diff --git a/.gitignore b/.gitignore index d54d21b802be8..3624d12269612 100644 --- a/.gitignore +++ b/.gitignore @@ -65,6 +65,7 @@ scalastyle.txt scalastyle-output.xml R-unit-tests.log R/unit-tests.out +python/lib/pyspark.zip # For Hive metastore_db/ diff --git a/.rat-excludes b/.rat-excludes index 2238a5b68e359..c0f81b57fe09d 100644 --- a/.rat-excludes +++ b/.rat-excludes @@ -30,10 +30,12 @@ spark-env.sh.template log4j-defaults.properties bootstrap-tooltip.js jquery-1.11.1.min.js +d3.min.js +dagre-d3.min.js +graphlib-dot.min.js sorttable.js vis.min.js vis.min.css -vis.map .*avsc .*txt .*json @@ -71,5 +73,12 @@ logs .*scalastyle-output.xml .*dependency-reduced-pom.xml known_translations +json_expectation +local-1422981759269/* +local-1422981780767/* +local-1425081759269/* +local-1426533911241/* +local-1426633911242/* +local-1430917381534/* DESCRIPTION NAMESPACE diff --git a/CHANGES.txt b/CHANGES.txt new file mode 100644 index 0000000000000..8c994048a2457 --- /dev/null +++ b/CHANGES.txt @@ -0,0 +1,14711 @@ +Spark Change Log +---------------- + +Release 1.4.0 + + [HOTFIX] Revert "[SPARK-7092] Update spark scala version to 2.11.6" + Patrick Wendell + 2015-05-19 02:28:41 -0700 + Commit: 31f5d53 + + Revert "Preparing Spark release v1.4.0-rc1" + Patrick Wendell + 2015-05-19 02:27:14 -0700 + Commit: 586ede6 + + Revert "Preparing development version 1.4.1-SNAPSHOT" + Patrick Wendell + 2015-05-19 02:27:07 -0700 + Commit: e7309ec + + Fixing a few basic typos in the Programming Guide. + Mike Dusenberry + 2015-05-19 08:59:45 +0100 + Commit: 0748263, github.com/apache/spark/pull/6240 + + Preparing development version 1.4.1-SNAPSHOT + Patrick Wendell + 2015-05-19 07:13:24 +0000 + Commit: a1d896b + + Preparing Spark release v1.4.0-rc1 + Patrick Wendell + 2015-05-19 07:13:24 +0000 + Commit: 79fb01a + + Updating CHANGES.txt for Spark 1.4 + Patrick Wendell + 2015-05-19 00:12:20 -0700 + Commit: 30bf333 + + Revert "Preparing Spark release v1.4.0-rc1" + Patrick Wendell + 2015-05-19 00:10:39 -0700 + Commit: b0c63d2 + + Revert "Preparing development version 1.4.1-SNAPSHOT" + Patrick Wendell + 2015-05-19 00:10:37 -0700 + Commit: 198a186 + + [SPARK-7581] [ML] [DOC] User guide for spark.ml PolynomialExpansion + Xusen Yin + 2015-05-19 00:06:33 -0700 + Commit: 38a3fc8, github.com/apache/spark/pull/6113 + + [HOTFIX] Fixing style failures in Kinesis source + Patrick Wendell + 2015-05-19 00:02:06 -0700 + Commit: de60c2e + + Preparing development version 1.4.1-SNAPSHOT + Patrick Wendell + 2015-05-19 06:06:41 +0000 + Commit: 40190ce + + Preparing Spark release v1.4.0-rc1 + Patrick Wendell + 2015-05-19 06:06:40 +0000 + Commit: 38ccef3 + + Revert "Preparing Spark release v1.4.0-rc1" + Patrick Wendell + 2015-05-18 23:06:15 -0700 + Commit: 152b029 + + Revert "Preparing development version 1.4.1-SNAPSHOT" + Patrick Wendell + 2015-05-18 23:06:13 -0700 + Commit: 4d098bc + + [HOTFIX]: Java 6 Build Breaks + Patrick Wendell + 2015-05-19 06:00:13 +0000 + Commit: be1fc93 + + Preparing development version 1.4.1-SNAPSHOT + Patrick Wendell + 2015-05-19 05:01:11 +0000 + Commit: 758ca74 + + Preparing Spark release v1.4.0-rc1 + Patrick Wendell + 2015-05-19 05:01:11 +0000 + Commit: e8e97e3 + + [SPARK-7687] [SQL] DataFrame.describe() should cast all aggregates to String + Josh Rosen + 2015-05-18 21:53:44 -0700 + Commit: 99436bd, github.com/apache/spark/pull/6218 + + CHANGES.txt and changelist updaets for Spark 1.4. + Patrick Wendell + 2015-05-18 21:44:13 -0700 + Commit: 914ecd0 + + [SPARK-7150] SparkContext.range() and SQLContext.range() + Daoyuan Wang , Davies Liu + 2015-05-18 21:43:12 -0700 + Commit: 7fcbb2c, github.com/apache/spark/pull/6081 + + Version updates for Spark 1.4.0 + Patrick Wendell + 2015-05-18 21:38:37 -0700 + Commit: 9d0b7fb + + [SPARK-7681] [MLLIB] Add SparseVector support for gemv + Liang-Chi Hsieh + 2015-05-18 21:32:36 -0700 + Commit: dd9f873, github.com/apache/spark/pull/6209 + + [SPARK-7692] Updated Kinesis examples + Tathagata Das + 2015-05-18 18:24:15 -0700 + Commit: 9c48548, github.com/apache/spark/pull/6249 + + [SPARK-7621] [STREAMING] Report Kafka errors to StreamingListeners + jerluc + 2015-05-18 18:13:29 -0700 + Commit: 9188ad8, github.com/apache/spark/pull/6204 + + [SPARK-7624] Revert #4147 + Davies Liu + 2015-05-18 16:55:45 -0700 + Commit: 60cb33d, github.com/apache/spark/pull/6172 + + [SQL] Fix serializability of ORC table scan + Michael Armbrust + 2015-05-18 15:24:31 -0700 + Commit: f8f23c4, github.com/apache/spark/pull/6247 + + [SPARK-7501] [STREAMING] DAG visualization: show DStream operations + Andrew Or + 2015-05-18 14:33:33 -0700 + Commit: a475cbc, github.com/apache/spark/pull/6034 + + [HOTFIX] Fix ORC build break + Michael Armbrust + 2015-05-18 14:04:04 -0700 + Commit: ba502ab, github.com/apache/spark/pull/6244 + + [SPARK-7658] [STREAMING] [WEBUI] Update the mouse behaviors for the timeline graphs + zsxwing + 2015-05-18 13:34:43 -0700 + Commit: 39add3d, github.com/apache/spark/pull/6168 + + [SPARK-6216] [PYSPARK] check python version of worker with driver + Davies Liu + 2015-05-18 12:55:13 -0700 + Commit: a833209, github.com/apache/spark/pull/6203 + + [SPARK-7673] [SQL] WIP: HadoopFsRelation and ParquetRelation2 performance optimizations + Cheng Lian + 2015-05-18 12:45:37 -0700 + Commit: 3962348, github.com/apache/spark/pull/6225 + + [SPARK-7567] [SQL] [follow-up] Use a new flag to set output committer based on mapreduce apis + Yin Huai + 2015-05-18 12:17:10 -0700 + Commit: a385f4b, github.com/apache/spark/pull/6130 + + [SPARK-7269] [SQL] Incorrect analysis for aggregation(use semanticEquals) + Wenchen Fan + 2015-05-18 12:08:28 -0700 + Commit: d6f5f37, github.com/apache/spark/pull/6173 + + [SPARK-7631] [SQL] treenode argString should not print children + scwf + 2015-05-18 12:05:14 -0700 + Commit: dbd4ec8, github.com/apache/spark/pull/6144 + + [SPARK-2883] [SQL] ORC data source for Spark SQL + Zhan Zhang , Cheng Lian + 2015-05-18 12:03:27 -0700 + Commit: 65d71bd, github.com/apache/spark/pull/6194 + + [SPARK-7380] [MLLIB] pipeline stages should be copyable in Python + Xiangrui Meng , Joseph K. Bradley + 2015-05-18 12:02:18 -0700 + Commit: cf4e04a, github.com/apache/spark/pull/6088 + + [SQL] [MINOR] [THIS] use private for internal field in ScalaUdf + Wenchen Fan + 2015-05-18 12:01:30 -0700 + Commit: 7d44c01, github.com/apache/spark/pull/6235 + + [SPARK-7570] [SQL] Ignores _temporary during partition discovery + Cheng Lian + 2015-05-18 11:59:44 -0700 + Commit: c7623a2, github.com/apache/spark/pull/6091 + + [SPARK-6888] [SQL] Make the jdbc driver handling user-definable + Rene Treffer + 2015-05-18 11:55:36 -0700 + Commit: b41301a, github.com/apache/spark/pull/5555 + + [SPARK-7627] [SPARK-7472] DAG visualization: style skipped stages + Andrew Or + 2015-05-18 10:59:35 -0700 + Commit: a0ae8ce, github.com/apache/spark/pull/6171 + + [SPARK-7272] [MLLIB] User guide for PMML model export + Vincenzo Selvaggio + 2015-05-18 08:46:33 -0700 + Commit: a95d4e1, github.com/apache/spark/pull/6219 + + [SPARK-6657] [PYSPARK] Fix doc warnings + Xiangrui Meng + 2015-05-18 08:35:14 -0700 + Commit: 2c94ffe, github.com/apache/spark/pull/6221 + + [SPARK-7299][SQL] Set precision and scale for Decimal according to JDBC metadata instead of returned BigDecimal + Liang-Chi Hsieh + 2015-05-18 01:10:55 -0700 + Commit: 0e7cd8f, github.com/apache/spark/pull/5833 + + [SPARK-7694] [MLLIB] Use getOrElse for getting the threshold of LR model + Shuo Xiang + 2015-05-17 21:16:52 -0700 + Commit: 0b6bc8a, github.com/apache/spark/pull/6224 + + [SPARK-7693][Core] Remove "import scala.concurrent.ExecutionContext.Implicits.global" + zsxwing + 2015-05-17 20:37:19 -0700 + Commit: 2a42d2d, github.com/apache/spark/pull/6223 + + [SQL] [MINOR] use catalyst type converter in ScalaUdf + Wenchen Fan + 2015-05-17 16:51:57 -0700 + Commit: be66d19, github.com/apache/spark/pull/6182 + + [SPARK-6514] [SPARK-5960] [SPARK-6656] [SPARK-7679] [STREAMING] [KINESIS] Updates to the Kinesis API + Tathagata Das + 2015-05-17 16:49:07 -0700 + Commit: e0632ff, github.com/apache/spark/pull/6147 + + [SPARK-7491] [SQL] Allow configuration of classloader isolation for hive + Michael Armbrust + 2015-05-17 12:43:15 -0700 + Commit: a855608, github.com/apache/spark/pull/6167 + + [SPARK-7686] [SQL] DescribeCommand is assigned wrong output attributes in SparkStrategies + Josh Rosen + 2015-05-17 11:59:28 -0700 + Commit: 53d6ab5, github.com/apache/spark/pull/6217 + + [SPARK-7660] Wrap SnappyOutputStream to work around snappy-java bug + Josh Rosen + 2015-05-17 09:30:49 -0700 + Commit: 6df71eb, github.com/apache/spark/pull/6176 + + [SPARK-7669] Builds against Hadoop 2.6+ get inconsistent curator depend… + Steve Loughran + 2015-05-17 17:03:11 +0100 + Commit: 0feb3de, github.com/apache/spark/pull/6191 + + [SPARK-7447] [SQL] Don't re-merge Parquet schema when the relation is deserialized + Liang-Chi Hsieh + 2015-05-17 15:42:21 +0800 + Commit: 898be62, github.com/apache/spark/pull/6012 + + [MINOR] Add 1.3, 1.3.1 to master branch EC2 scripts + Shivaram Venkataraman + 2015-05-17 00:12:20 -0700 + Commit: 0ed376a, github.com/apache/spark/pull/6215 + + [MINOR] [SQL] Removes an unreachable case clause + Cheng Lian + 2015-05-16 23:20:09 -0700 + Commit: 671a6bc, github.com/apache/spark/pull/6214 + + [SPARK-7654][SQL] Move JDBC into DataFrame's reader/writer interface. + Reynold Xin + 2015-05-16 22:01:53 -0700 + Commit: 17e0786, github.com/apache/spark/pull/6210 + + [SPARK-7655][Core] Deserializing value should not hold the TaskSchedulerImpl lock + zsxwing + 2015-05-16 21:03:22 -0700 + Commit: 8494910, github.com/apache/spark/pull/6195 + + [SPARK-7654][MLlib] Migrate MLlib to the DataFrame reader/writer API. + Reynold Xin + 2015-05-16 15:03:57 -0700 + Commit: bd057f8, github.com/apache/spark/pull/6211 + + [BUILD] update jblas dependency version to 1.2.4 + Matthew Brandyberry + 2015-05-16 18:17:48 +0100 + Commit: 8bde352, github.com/apache/spark/pull/6199 + + [HOTFIX] [SQL] Fixes DataFrameWriter.mode(String) + Cheng Lian + 2015-05-16 20:55:10 +0800 + Commit: 856619d, github.com/apache/spark/pull/6212 + + [SPARK-7655][Core][SQL] Remove 'scala.concurrent.ExecutionContext.Implicits.global' in 'ask' and 'BroadcastHashJoin' + zsxwing + 2015-05-16 00:44:29 -0700 + Commit: ad5b0b1, github.com/apache/spark/pull/6200 + + [SPARK-7672] [CORE] Use int conversion in translating kryoserializer.buffer.mb to kryoserializer.buffer + Nishkam Ravi , nishkamravi2 , nravi + 2015-05-16 08:24:21 +0100 + Commit: e7607e5, github.com/apache/spark/pull/6198 + + [SPARK-4556] [BUILD] binary distribution assembly can't run in local mode + Sean Owen + 2015-05-16 08:18:41 +0100 + Commit: 1fc3560, github.com/apache/spark/pull/6186 + + [SPARK-7671] Fix wrong URLs in MLlib Data Types Documentation + FavioVazquez + 2015-05-16 08:07:03 +0100 + Commit: 7e3f9fe, github.com/apache/spark/pull/6196 + + [SPARK-7654][SQL] DataFrameReader and DataFrameWriter for input/output API + Reynold Xin + 2015-05-15 22:00:31 -0700 + Commit: 9da55b5, github.com/apache/spark/pull/6175 + + [SPARK-7473] [MLLIB] Add reservoir sample in RandomForest + AiHe + 2015-05-15 20:42:35 -0700 + Commit: f41be8f, github.com/apache/spark/pull/5988 + + [SPARK-7543] [SQL] [PySpark] split dataframe.py into multiple files + Davies Liu + 2015-05-15 20:09:15 -0700 + Commit: 8164fbc, github.com/apache/spark/pull/6201 + + [SPARK-7073] [SQL] [PySpark] Clean up SQL data type hierarchy in Python + Davies Liu + 2015-05-15 20:05:26 -0700 + Commit: 61806f6, github.com/apache/spark/pull/6206 + + [SPARK-7575] [ML] [DOC] Example code for OneVsRest + Ram Sriharsha + 2015-05-15 19:33:20 -0700 + Commit: 04323ba, github.com/apache/spark/pull/6115 + + [SPARK-7563] OutputCommitCoordinator.stop() should only run on the driver + Josh Rosen + 2015-05-15 18:06:01 -0700 + Commit: ed75cc0, github.com/apache/spark/pull/6197 + + [SPARK-7676] Bug fix and cleanup of stage timeline view + Kay Ousterhout + 2015-05-15 17:45:14 -0700 + Commit: 6f78d03, github.com/apache/spark/pull/6202 + + [SPARK-7556] [ML] [DOC] Add user guide for spark.ml Binarizer, including Scala, Java and Python examples + Liang-Chi Hsieh + 2015-05-15 15:05:04 -0700 + Commit: e847d86, github.com/apache/spark/pull/6116 + + [SPARK-7677] [STREAMING] Add Kafka modules to the 2.11 build. + Iulian Dragos + 2015-05-15 14:57:29 -0700 + Commit: 31e6404, github.com/apache/spark/pull/6149 + + [SPARK-7226] [SPARKR] Support math functions in R DataFrame + qhuang + 2015-05-15 14:06:16 -0700 + Commit: 9ef6d74, github.com/apache/spark/pull/6170 + + [SPARK-7296] Add timeline visualization for stages in the UI. + Kousuke Saruta + 2015-05-15 13:54:09 -0700 + Commit: a5f7b3b, github.com/apache/spark/pull/5843 + + [SPARK-7504] [YARN] NullPointerException when initializing SparkContext in YARN-cluster mode + ehnalis + 2015-05-15 12:14:02 -0700 + Commit: 7dc0ff3, github.com/apache/spark/pull/6083 + + [SPARK-7664] [WEBUI] DAG visualization: Fix incorrect link paths of DAG. + Kousuke Saruta + 2015-05-15 11:54:13 -0700 + Commit: e319719, github.com/apache/spark/pull/6184 + + [SPARK-5412] [DEPLOY] Cannot bind Master to a specific hostname as per the documentation + Sean Owen + 2015-05-15 11:30:19 -0700 + Commit: fe3c734, github.com/apache/spark/pull/6185 + + [CORE] Protect additional test vars from early GC + Tim Ellison + 2015-05-15 11:27:24 -0700 + Commit: 866e4b5, github.com/apache/spark/pull/6187 + + [SPARK-7233] [CORE] Detect REPL mode once + Oleksii Kostyliev , Oleksii Kostyliev + 2015-05-15 11:19:56 -0700 + Commit: c58b9c6, github.com/apache/spark/pull/5835 + + [SPARK-7651] [MLLIB] [PYSPARK] GMM predict, predictSoft should raise error on bad input + FlytxtRnD + 2015-05-15 10:43:18 -0700 + Commit: dfdae58, github.com/apache/spark/pull/6180 + + [SPARK-7668] [MLLIB] Preserve isTransposed property for Matrix after calling map function + Liang-Chi Hsieh + 2015-05-15 10:03:29 -0700 + Commit: d1f5651, github.com/apache/spark/pull/6188 + + [SPARK-7503] [YARN] Resources in .sparkStaging directory can't be cleaned up on error + Kousuke Saruta + 2015-05-15 11:37:34 +0100 + Commit: a17a0ee, github.com/apache/spark/pull/6026 + + [SPARK-7591] [SQL] Partitioning support API tweaks + Cheng Lian + 2015-05-15 16:20:49 +0800 + Commit: bcb2c5d, github.com/apache/spark/pull/6150 + + [SPARK-6258] [MLLIB] GaussianMixture Python API parity check + Yanbo Liang + 2015-05-15 00:18:39 -0700 + Commit: c0bb974, github.com/apache/spark/pull/6087 + + [SPARK-7650] [STREAMING] [WEBUI] Move streaming css and js files to the streaming project + zsxwing + 2015-05-14 23:51:41 -0700 + Commit: 0ba99f0, github.com/apache/spark/pull/6160 + + [CORE] Remove unreachable Heartbeat message from Worker + Kan Zhang + 2015-05-14 23:50:50 -0700 + Commit: 6742b4e, github.com/apache/spark/pull/6163 + + [HOTFIX] Add workaround for SPARK-7660 to fix JavaAPISuite failures. + Josh Rosen + 2015-05-14 23:17:41 -0700 + Commit: 1206a55 + + [SQL] When creating partitioned table scan, explicitly create UnionRDD. + Yin Huai + 2015-05-15 12:04:26 +0800 + Commit: 7aa269f, github.com/apache/spark/pull/6162 + + [SPARK-7098][SQL] Make the WHERE clause with timestamp show consistent result + Liang-Chi Hsieh + 2015-05-14 20:49:21 -0700 + Commit: bac4522, github.com/apache/spark/pull/5682 + + [SPARK-7548] [SQL] Add explode function for DataFrames + Michael Armbrust + 2015-05-14 19:49:44 -0700 + Commit: 778a054, github.com/apache/spark/pull/6107 + + [SPARK-7619] [PYTHON] fix docstring signature + Xiangrui Meng + 2015-05-14 18:16:22 -0700 + Commit: a238c23, github.com/apache/spark/pull/6161 + + [SPARK-7648] [MLLIB] Add weights and intercept to GLM wrappers in spark.ml + Xiangrui Meng + 2015-05-14 18:13:58 -0700 + Commit: f91bb57, github.com/apache/spark/pull/6156 + + [SPARK-7645] [STREAMING] [WEBUI] Show milliseconds in the UI if the batch interval < 1 second + zsxwing + 2015-05-14 16:58:36 -0700 + Commit: 79983f1, github.com/apache/spark/pull/6154 + + [SPARK-7649] [STREAMING] [WEBUI] Use window.localStorage to store the status rather than the url + zsxwing + 2015-05-14 16:57:33 -0700 + Commit: 3358485, github.com/apache/spark/pull/6158 + + [SPARK-7643] [UI] use the correct size in RDDPage for storage info and partitions + Xiangrui Meng + 2015-05-14 16:56:32 -0700 + Commit: 8d8876d, github.com/apache/spark/pull/6157 + + [SPARK-7598] [DEPLOY] Add aliveWorkers metrics in Master + Rex Xiong + 2015-05-14 16:55:31 -0700 + Commit: 894214f, github.com/apache/spark/pull/6117 + + Make SPARK prefix a variable + tedyu + 2015-05-14 15:26:35 -0700 + Commit: fceaffc, github.com/apache/spark/pull/6153 + + [SPARK-7278] [PySpark] DateType should find datetime.datetime acceptable + ksonj + 2015-05-14 15:10:58 -0700 + Commit: a49a145, github.com/apache/spark/pull/6057 + + [SQL][minor] rename apply for QueryPlanner + Wenchen Fan + 2015-05-14 10:25:18 -0700 + Commit: aa8a0f9, github.com/apache/spark/pull/6142 + + [SPARK-7249] Updated Hadoop dependencies due to inconsistency in the versions + FavioVazquez + 2015-05-14 15:22:58 +0100 + Commit: 67ed0aa, github.com/apache/spark/pull/5786 + + [SPARK-7568] [ML] ml.LogisticRegression doesn't output the right prediction + DB Tsai + 2015-05-14 01:26:08 -0700 + Commit: 58534b0, github.com/apache/spark/pull/6109 + + [SPARK-7407] [MLLIB] use uid + name to identify parameters + Xiangrui Meng + 2015-05-14 01:22:15 -0700 + Commit: e45cd9f, github.com/apache/spark/pull/6019 + + [SPARK-7595] [SQL] Window will cause resolve failed with self join + linweizhong + 2015-05-14 00:23:27 -0700 + Commit: c80e0cf, github.com/apache/spark/pull/6114 + + [SPARK-7620] [ML] [MLLIB] Removed calling size, length in while condition to avoid extra JVM call + DB Tsai + 2015-05-13 22:23:21 -0700 + Commit: 9ab4db2, github.com/apache/spark/pull/6137 + + [SPARK-7612] [MLLIB] update NB training to use mllib's BLAS + Xiangrui Meng + 2015-05-13 21:27:17 -0700 + Commit: 82f387f, github.com/apache/spark/pull/6128 + + [HOT FIX #6125] Do not wait for all stages to start rendering + Andrew Or + 2015-05-13 21:04:13 -0700 + Commit: 2d4a961, github.com/apache/spark/pull/6138 + + [HOTFIX] Use 'new Job' in fsBasedParquet.scala + zsxwing + 2015-05-13 17:58:29 -0700 + Commit: d518c03, github.com/apache/spark/pull/6136 + + [SPARK-6752] [STREAMING] [REVISED] Allow StreamingContext to be recreated from checkpoint and existing SparkContext + Tathagata Das + 2015-05-13 17:33:15 -0700 + Commit: aec8394, github.com/apache/spark/pull/6096 + + [SPARK-7601] [SQL] Support Insert into JDBC Datasource + Venkata Ramana Gollamudi + 2015-05-13 17:24:04 -0700 + Commit: 820aaa6, github.com/apache/spark/pull/6121 + + [SPARK-7081] Faster sort-based shuffle path using binary processing cache-aware sort + Josh Rosen + 2015-05-13 17:07:31 -0700 + Commit: c53ebea, github.com/apache/spark/pull/5868 + + [SPARK-7356] [STREAMING] Fix flakey tests in FlumePollingStreamSuite using SparkSink's batch CountDownLatch. + Hari Shreedharan + 2015-05-13 16:43:30 -0700 + Commit: 6c0644a, github.com/apache/spark/pull/5918 + + [STREAMING] [MINOR] Keep streaming.UIUtils private + Andrew Or + 2015-05-13 16:31:24 -0700 + Commit: e499a1e, github.com/apache/spark/pull/6134 + + [SPARK-7502] DAG visualization: gracefully handle removed stages + Andrew Or + 2015-05-13 16:29:52 -0700 + Commit: 895d46a, github.com/apache/spark/pull/6132 + + [SPARK-7464] DAG visualization: highlight the same RDDs on hover + Andrew Or + 2015-05-13 16:29:10 -0700 + Commit: 4b4f10b, github.com/apache/spark/pull/6100 + + [SPARK-7399] Spark compilation error for scala 2.11 + Andrew Or + 2015-05-13 16:28:37 -0700 + Commit: e6b8cef, github.com/apache/spark/pull/6129 + + [SPARK-7608] Clean up old state in RDDOperationGraphListener + Andrew Or + 2015-05-13 16:27:48 -0700 + Commit: ec34230, github.com/apache/spark/pull/6125 + + [SQL] Move some classes into packages that are more appropriate. + Reynold Xin + 2015-05-13 16:15:31 -0700 + Commit: acd872b, github.com/apache/spark/pull/6108 + + [SPARK-7303] [SQL] push down project if possible when the child is sort + scwf + 2015-05-13 16:13:48 -0700 + Commit: d5c52d9, github.com/apache/spark/pull/5838 + + [SPARK-7382] [MLLIB] Feature Parity in PySpark for ml.classification + Burak Yavuz + 2015-05-13 15:13:09 -0700 + Commit: 51230f2, github.com/apache/spark/pull/6106 + + [SPARK-7545] [MLLIB] Added check in Bernoulli Naive Bayes to make sure that both training and predict features have values of 0 or 1 + leahmcguire + 2015-05-13 14:13:19 -0700 + Commit: d9fb905, github.com/apache/spark/pull/6073 + + [SPARK-7593] [ML] Python Api for ml.feature.Bucketizer + Burak Yavuz + 2015-05-13 13:21:36 -0700 + Commit: 11911b0, github.com/apache/spark/pull/6124 + + [SPARK-7551][DataFrame] support backticks for DataFrame attribute resolution + Wenchen Fan + 2015-05-13 12:47:48 -0700 + Commit: 3a60bcb, github.com/apache/spark/pull/6074 + + [SPARK-7567] [SQL] Migrating Parquet data source to FSBasedRelation + Cheng Lian + 2015-05-13 11:04:10 -0700 + Commit: 90f304b, github.com/apache/spark/pull/6090 + + [SPARK-7589] [STREAMING] [WEBUI] Make "Input Rate" in the Streaming page consistent with other pages + zsxwing + 2015-05-13 10:01:26 -0700 + Commit: 10007fb, github.com/apache/spark/pull/6102 + + [SPARK-6734] [SQL] Add UDTF.close support in Generate + Cheng Hao + 2015-05-14 00:14:59 +0800 + Commit: 42cf4a2, github.com/apache/spark/pull/5383 + + [MINOR] [SQL] Removes debugging println + Cheng Lian + 2015-05-13 23:40:13 +0800 + Commit: d78f0e1, github.com/apache/spark/pull/6123 + + [SQL] In InsertIntoFSBasedRelation.insert, log cause before abort job/task. + Yin Huai + 2015-05-13 23:36:19 +0800 + Commit: 9ca28d9, github.com/apache/spark/pull/6105 + + [SPARK-7599] [SQL] Don't restrict customized output committers to be subclasses of FileOutputCommitter + Cheng Lian + 2015-05-13 07:35:55 -0700 + Commit: cb1fe81, github.com/apache/spark/pull/6118 + + [SPARK-6568] spark-shell.cmd --jars option does not accept the jar that has space in its path + Masayoshi TSUZUKI , Kousuke Saruta + 2015-05-13 09:43:40 +0100 + Commit: bfdecac, github.com/apache/spark/pull/5447 + + [SPARK-7526] [SPARKR] Specify ip of RBackend, MonitorServer and RRDD Socket server + linweizhong + 2015-05-12 23:55:44 -0700 + Commit: 7bd5274, github.com/apache/spark/pull/6053 + + [SPARK-7482] [SPARKR] Rename some DataFrame API methods in SparkR to match their counterparts in Scala. + Sun Rui + 2015-05-12 23:52:30 -0700 + Commit: b18f1c6, github.com/apache/spark/pull/6007 + + [SPARK-7566][SQL] Add type to HiveContext.analyzer + Santiago M. Mola + 2015-05-12 23:44:21 -0700 + Commit: 6ff3379, github.com/apache/spark/pull/6086 + + [SPARK-7321][SQL] Add Column expression for conditional statements (when/otherwise) + Reynold Xin , kaka1992 + 2015-05-12 21:43:34 -0700 + Commit: 219a904, github.com/apache/spark/pull/6072 + + [SPARK-7588] Document all SQL/DataFrame public methods with @since tag + Reynold Xin + 2015-05-12 18:37:02 -0700 + Commit: bdd5db9, github.com/apache/spark/pull/6101 + + [HOTFIX] Use the old Job API to support old Hadoop versions + zsxwing + 2015-05-13 08:33:24 +0800 + Commit: 2cc3301, github.com/apache/spark/pull/6095 + + [SPARK-7572] [MLLIB] do not import Param/Params under pyspark.ml + Xiangrui Meng + 2015-05-12 17:15:39 -0700 + Commit: 08ec1af, github.com/apache/spark/pull/6094 + + [SPARK-7554] [STREAMING] Throw exception when an active/stopped StreamingContext is used to create DStreams and output operations + Tathagata Das + 2015-05-12 17:07:21 -0700 + Commit: bb81b15, github.com/apache/spark/pull/6099 + + [SPARK-7528] [MLLIB] make RankingMetrics Java-friendly + Xiangrui Meng + 2015-05-12 16:53:47 -0700 + Commit: 6c292a2, github.com/apache/spark/pull/6098 + + [SPARK-7553] [STREAMING] Added methods to maintain a singleton StreamingContext + Tathagata Das + 2015-05-12 16:44:14 -0700 + Commit: 91fbd93, github.com/apache/spark/pull/6070 + + [SPARK-7573] [ML] OneVsRest cleanups + Joseph K. Bradley + 2015-05-12 16:42:30 -0700 + Commit: 612247f, github.com/apache/spark/pull/6097 + + [SPARK-7557] [ML] [DOC] User guide for spark.ml HashingTF, Tokenizer + Joseph K. Bradley + 2015-05-12 16:39:56 -0700 + Commit: d080df1, github.com/apache/spark/pull/6093 + + [SPARK-7496] [MLLIB] Update Programming guide with Online LDA + Yuhao Yang + 2015-05-12 15:12:29 -0700 + Commit: fe34a59, github.com/apache/spark/pull/6046 + + [SPARK-7406] [STREAMING] [WEBUI] Add tooltips for "Scheduling Delay", "Processing Time" and "Total Delay" + zsxwing + 2015-05-12 14:41:21 -0700 + Commit: 221375e, github.com/apache/spark/pull/5952 + + [SPARK-7571] [MLLIB] rename Math to math + Xiangrui Meng + 2015-05-12 14:39:03 -0700 + Commit: 2555517, github.com/apache/spark/pull/6092 + + [SPARK-7484][SQL]Support jdbc connection properties + Venkata Ramana Gollamudi + 2015-05-12 14:37:23 -0700 + Commit: 32819fc, github.com/apache/spark/pull/6009 + + [SPARK-7559] [MLLIB] Bucketizer should include the right most boundary in the last bucket. + Xiangrui Meng + 2015-05-12 14:24:26 -0700 + Commit: 98ccd93, github.com/apache/spark/pull/6075 + + [SPARK-7569][SQL] Better error for invalid binary expressions + Michael Armbrust + 2015-05-12 13:36:55 -0700 + Commit: c68485e, github.com/apache/spark/pull/6089 + + [SPARK-7015] [MLLIB] [WIP] Multiclass to Binary Reduction: One Against All + Ram Sriharsha + 2015-05-12 13:35:12 -0700 + Commit: fd16709, github.com/apache/spark/pull/5830 + + [SPARK-2018] [CORE] Upgrade LZF library to fix endian serialization p… + Tim Ellison + 2015-05-12 20:48:26 +0100 + Commit: eadda92, github.com/apache/spark/pull/6077 + + [SPARK-7487] [ML] Feature Parity in PySpark for ml.regression + Burak Yavuz + 2015-05-12 12:17:05 -0700 + Commit: 432694c, github.com/apache/spark/pull/6016 + + [HOT FIX #6076] DAG visualization: curve the edges + Andrew Or + 2015-05-12 12:06:30 -0700 + Commit: ce6c400 + + [SPARK-7276] [DATAFRAME] speed up DataFrame.select by collapsing Project + Wenchen Fan + 2015-05-12 11:51:55 -0700 + Commit: 8be43f8, github.com/apache/spark/pull/5831 + + [SPARK-7500] DAG visualization: move cluster labeling to dagre-d3 + Andrew Or + 2015-05-12 11:17:59 -0700 + Commit: a236104, github.com/apache/spark/pull/6076 + + [DataFrame][minor] support column in field accessor + Wenchen Fan + 2015-05-12 10:37:57 -0700 + Commit: ec89286, github.com/apache/spark/pull/6080 + + [SPARK-3928] [SPARK-5182] [SQL] Partitioning support for the data sources API + Cheng Lian + 2015-05-13 01:32:28 +0800 + Commit: d232813, github.com/apache/spark/pull/5526 + + [DataFrame][minor] cleanup unapply methods in DataTypes + Wenchen Fan + 2015-05-12 10:28:40 -0700 + Commit: a9d84a9, github.com/apache/spark/pull/6079 + + [SPARK-6876] [PySpark] [SQL] add DataFrame na.replace in pyspark + Daoyuan Wang + 2015-05-12 10:23:41 -0700 + Commit: 653db0a, github.com/apache/spark/pull/6003 + + [SPARK-7532] [STREAMING] StreamingContext.start() made to logWarning and not throw exception + Tathagata Das + 2015-05-12 08:48:24 -0700 + Commit: 2bbb685, github.com/apache/spark/pull/6060 + + [SPARK-7467] Dag visualization: treat checkpoint as an RDD operation + Andrew Or + 2015-05-12 01:40:55 -0700 + Commit: 5601632, github.com/apache/spark/pull/6004 + + [SPARK-7485] [BUILD] Remove pyspark files from assembly. + Marcelo Vanzin + 2015-05-12 01:39:21 -0700 + Commit: afe54b7, github.com/apache/spark/pull/6022 + + [MINOR] [PYSPARK] Set PYTHONPATH to python/lib/pyspark.zip rather than python/pyspark + linweizhong + 2015-05-12 01:36:27 -0700 + Commit: 4092a2e, github.com/apache/spark/pull/6047 + + [SPARK-7534] [CORE] [WEBUI] Fix the Stage table when a stage is missing + zsxwing + 2015-05-12 01:34:33 -0700 + Commit: af374ed, github.com/apache/spark/pull/6061 + + [SPARK-6994][SQL] Update docs for fetching Row fields by name + vidmantas zemleris + 2015-05-11 22:29:24 -0700 + Commit: 6523fb8, github.com/apache/spark/pull/6030 + + [SQL] Rename Dialect -> ParserDialect. + Reynold Xin + 2015-05-11 22:06:56 -0700 + Commit: c6b8148, github.com/apache/spark/pull/6071 + + [SPARK-7435] [SPARKR] Make DataFrame.show() consistent with that of Scala and pySpark + Joshi , Rekha Joshi + 2015-05-11 21:02:34 -0700 + Commit: 835a770, github.com/apache/spark/pull/5989 + + [SPARK-7509][SQL] DataFrame.drop in Python for dropping columns. + Reynold Xin + 2015-05-11 20:04:36 -0700 + Commit: ed40ab5, github.com/apache/spark/pull/6068 + + [SPARK-7437] [SQL] Fold "literal in (item1, item2, ..., literal, ...)" into true or false directly + Zhongshuai Pei <799203320@qq.com>, DoingDone9 <799203320@qq.com> + 2015-05-11 19:22:44 -0700 + Commit: c30982d, github.com/apache/spark/pull/5972 + + [SPARK-7411] [SQL] Support SerDe for HiveQl in CTAS + Cheng Hao + 2015-05-11 19:21:16 -0700 + Commit: 1a664a0, github.com/apache/spark/pull/5963 + + [SPARK-7324] [SQL] DataFrame.dropDuplicates + Reynold Xin + 2015-05-11 19:15:14 -0700 + Commit: 8a9d234, github.com/apache/spark/pull/6066 + + [SPARK-7530] [STREAMING] Added StreamingContext.getState() to expose the current state of the context + Tathagata Das + 2015-05-11 18:53:50 -0700 + Commit: c16b47f, github.com/apache/spark/pull/6058 + + [SPARK-5893] [ML] Add bucketizer + Xusen Yin , Joseph K. Bradley + 2015-05-11 18:41:22 -0700 + Commit: f188815, github.com/apache/spark/pull/5980 + + Updated DataFrame.saveAsTable Hive warning to include SPARK-7550 ticket. + Reynold Xin + 2015-05-11 18:10:45 -0700 + Commit: e1e599d, github.com/apache/spark/pull/6067 + + [SPARK-7462][SQL] Update documentation for retaining grouping columns in DataFrames. + Reynold Xin + 2015-05-11 18:07:12 -0700 + Commit: eaa6116, github.com/apache/spark/pull/6062 + + [SPARK-7084] improve saveAsTable documentation + madhukar + 2015-05-11 17:04:11 -0700 + Commit: 0dbfe16, github.com/apache/spark/pull/5654 + + [SQL] Show better error messages for incorrect join types in DataFrames. + Reynold Xin + 2015-05-11 17:02:11 -0700 + Commit: 0ff34f80, github.com/apache/spark/pull/6064 + + Update Documentation: leftsemi instead of semijoin + LCY Vincent + 2015-05-11 14:48:10 -0700 + Commit: 788503a, github.com/apache/spark/pull/5944 + + [STREAMING] [MINOR] Close files correctly when iterator is finished in streaming WAL recovery + jerryshao + 2015-05-11 14:38:58 -0700 + Commit: 9e226e1, github.com/apache/spark/pull/6050 + + [SPARK-7516] [Minor] [DOC] Replace depreciated inferSchema() with createDataFrame() + gchen + 2015-05-11 14:37:18 -0700 + Commit: 1538b10, github.com/apache/spark/pull/6041 + + [SPARK-7508] JettyUtils-generated servlets to log & report all errors + Steve Loughran + 2015-05-11 13:35:06 -0700 + Commit: 779174a, github.com/apache/spark/pull/6033 + + [SPARK-7462] By default retain group by columns in aggregate + Reynold Xin , Shivaram Venkataraman + 2015-05-11 11:35:16 -0700 + Commit: 9c35f02, github.com/apache/spark/pull/5996 + + [SPARK-7361] [STREAMING] Throw unambiguous exception when attempting to start multiple StreamingContexts in the same JVM + Tathagata Das + 2015-05-11 10:58:56 -0700 + Commit: 11648fa, github.com/apache/spark/pull/5907 + + [SPARK-7522] [EXAMPLES] Removed angle brackets from dataFormat option + Bryan Cutler + 2015-05-11 09:23:47 -0700 + Commit: c234d78, github.com/apache/spark/pull/6049 + + [SPARK-6092] [MLLIB] Add RankingMetrics in PySpark/MLlib + Yanbo Liang + 2015-05-11 09:14:20 -0700 + Commit: 017f9fa, github.com/apache/spark/pull/6044 + + [SPARK-7326] [STREAMING] Performing window() on a WindowedDStream doesn't work all the time + Wesley Miao , Wesley + 2015-05-11 12:20:06 +0100 + Commit: da1be15, github.com/apache/spark/pull/5871 + + [SPARK-7519] [SQL] fix minor bugs in thrift server UI + tianyi + 2015-05-11 14:08:15 +0800 + Commit: fff3c86, github.com/apache/spark/pull/6048 + + [SPARK-7512] [SPARKR] Fix RDD's show method to use getJRDD + Shivaram Venkataraman + 2015-05-10 19:49:42 -0700 + Commit: 5f227fd, github.com/apache/spark/pull/6035 + + [SPARK-7427] [PYSPARK] Make sharedParams match in Scala, Python + Glenn Weidner + 2015-05-10 19:18:32 -0700 + Commit: 051864e, github.com/apache/spark/pull/6023 + + [SPARK-5521] PCA wrapper for easy transform vectors + Kirill A. Korinskiy , Joseph K. Bradley + 2015-05-10 13:34:00 -0700 + Commit: 193ff69, github.com/apache/spark/pull/4304 + + [SPARK-7431] [ML] [PYTHON] Made CrossValidatorModel call parent init in PySpark + Joseph K. Bradley + 2015-05-10 13:29:27 -0700 + Commit: d49b72c, github.com/apache/spark/pull/5968 + + [MINOR] [SQL] Fixes variable name typo + Cheng Lian + 2015-05-10 21:26:36 +0800 + Commit: fd87b2a, github.com/apache/spark/pull/6038 + + [SPARK-7345][SQL] Spark cannot detect renamed columns using JDBC connector + Oleg Sidorkin + 2015-05-10 01:31:34 -0700 + Commit: 5c40403, github.com/apache/spark/pull/6032 + + [SPARK-6091] [MLLIB] Add MulticlassMetrics in PySpark/MLlib + Yanbo Liang + 2015-05-10 00:57:14 -0700 + Commit: fe46374, github.com/apache/spark/pull/6011 + + [SPARK-7475] [MLLIB] adjust ldaExample for online LDA + Yuhao Yang + 2015-05-09 15:40:46 -0700 + Commit: e96fc86, github.com/apache/spark/pull/6000 + + [BUILD] Reference fasterxml.jackson.version in sql/core/pom.xml + tedyu + 2015-05-09 13:19:07 -0700 + Commit: 5110f3e, github.com/apache/spark/pull/6031 + + Upgrade version of jackson-databind in sql/core/pom.xml + tedyu + 2015-05-09 10:41:30 -0700 + Commit: 6c5b9ff, github.com/apache/spark/pull/6028 + + [STREAMING] [DOCS] Fix wrong url about API docs of StreamingListener + dobashim + 2015-05-09 10:14:46 +0100 + Commit: 5dbc7bb, github.com/apache/spark/pull/6024 + + [SPARK-7403] [WEBUI] Link URL in objects on Timeline View is wrong in case of running on YARN + Kousuke Saruta + 2015-05-09 10:10:29 +0100 + Commit: 869a52d, github.com/apache/spark/pull/5947 + + [SPARK-7438] [SPARK CORE] Fixed validation of relativeSD in countApproxDistinct + Vinod K C + 2015-05-09 10:03:15 +0100 + Commit: b0460f4, github.com/apache/spark/pull/5974 + + [SPARK-7498] [ML] removed varargs annotation from Params.setDefaults + Joseph K. Bradley + 2015-05-08 21:55:54 -0700 + Commit: 25972d3, github.com/apache/spark/pull/6021 + + [SPARK-7262] [ML] Binary LogisticRegression with L1/L2 (elastic net) using OWLQN in new ML package + DB Tsai + 2015-05-08 21:43:05 -0700 + Commit: 80bbe72, github.com/apache/spark/pull/5967 + + [SPARK-7375] [SQL] Avoid row copying in exchange when sort.serializeMapOutputs takes effect + Josh Rosen + 2015-05-08 22:09:55 -0400 + Commit: 21212a2, github.com/apache/spark/pull/5948 + + [SPARK-7231] [SPARKR] Changes to make SparkR DataFrame dplyr friendly. + Shivaram Venkataraman + 2015-05-08 18:29:57 -0700 + Commit: 448ff33, github.com/apache/spark/pull/6005 + + [SPARK-7451] [YARN] Preemption of executors is counted as failure causing Spark job to fail + Ashwin Shankar + 2015-05-08 17:51:00 -0700 + Commit: 959c7b6, github.com/apache/spark/pull/5993 + + [SPARK-7488] [ML] Feature Parity in PySpark for ml.recommendation + Burak Yavuz + 2015-05-08 17:24:32 -0700 + Commit: 85cab34, github.com/apache/spark/pull/6015 + + [SPARK-7237] Clean function in several RDD methods + tedyu + 2015-05-08 17:16:38 -0700 + Commit: 45b6215, github.com/apache/spark/pull/5959 + + [SPARK-7469] [SQL] DAG visualization: show SQL query operators + Andrew Or + 2015-05-08 17:15:10 -0700 + Commit: cafffd0, github.com/apache/spark/pull/5999 + + [SPARK-6955] Perform port retries at NettyBlockTransferService level + Aaron Davidson + 2015-05-08 17:13:55 -0700 + Commit: 1eae476, github.com/apache/spark/pull/5575 + + updated ec2 instance types + Brendan Collins + 2015-05-08 15:59:34 -0700 + Commit: 6e35cb5, github.com/apache/spark/pull/6014 + + [SPARK-5913] [MLLIB] Python API for ChiSqSelector + Yanbo Liang + 2015-05-08 15:48:39 -0700 + Commit: ab48df3, github.com/apache/spark/pull/5939 + + [SPARK-4699] [SQL] Make caseSensitive configurable in spark sql analyzer + Jacky Li , wangfei , scwf + 2015-05-08 15:25:54 -0700 + Commit: 21bd722, github.com/apache/spark/pull/5806 + + [SPARK-7390] [SQL] Only merge other CovarianceCounter when its count is greater than zero + Liang-Chi Hsieh + 2015-05-08 14:41:16 -0700 + Commit: 5205eb4, github.com/apache/spark/pull/5931 + + [SPARK-7378] [CORE] Handle deep links to unloaded apps. + Marcelo Vanzin + 2015-05-08 14:12:58 -0700 + Commit: 3024f6b, github.com/apache/spark/pull/5922 + + [MINOR] [CORE] Allow History Server to read kerberos opts from config file. + Marcelo Vanzin + 2015-05-08 14:10:27 -0700 + Commit: 3da5f8b, github.com/apache/spark/pull/5998 + + [SPARK-7466] DAG visualization: fix orphan nodes + Andrew Or + 2015-05-08 14:09:39 -0700 + Commit: ca2f1c5, github.com/apache/spark/pull/6002 + + [MINOR] Defeat early garbage collection of test suite variable + Tim Ellison + 2015-05-08 14:08:52 -0700 + Commit: f734c58, github.com/apache/spark/pull/6010 + + [SPARK-7489] [SPARK SHELL] Spark shell crashes when compiled with scala 2.11 + vinodkc + 2015-05-08 14:07:53 -0700 + Commit: 3b7fb7a, github.com/apache/spark/pull/6013 + + [WEBUI] Remove debug feature for vis.js + Kousuke Saruta + 2015-05-08 14:06:37 -0700 + Commit: 1dde3b3, github.com/apache/spark/pull/5994 + + [MINOR] Ignore python/lib/pyspark.zip + zsxwing + 2015-05-08 14:06:02 -0700 + Commit: ab0caa0, github.com/apache/spark/pull/6017 + + [SPARK-7490] [CORE] [Minor] MapOutputTracker.deserializeMapStatuses: close input streams + Evan Jones + 2015-05-08 22:00:39 +0100 + Commit: 6230809, github.com/apache/spark/pull/5982 + + [SPARK-6627] Finished rename to ShuffleBlockResolver + Kay Ousterhout + 2015-05-08 12:24:06 -0700 + Commit: 82be68f, github.com/apache/spark/pull/5764 + + [SPARK-7133] [SQL] Implement struct, array, and map field accessor + Wenchen Fan + 2015-05-08 11:49:38 -0700 + Commit: f8468c4, github.com/apache/spark/pull/5744 + + [SPARK-7298] Harmonize style of new visualizations + Matei Zaharia + 2015-05-08 14:41:42 -0400 + Commit: 0b2c252, github.com/apache/spark/pull/5942 + + [SPARK-7436] Fixed instantiation of custom recovery mode factory and added tests + Jacek Lewandowski + 2015-05-08 11:38:09 -0700 + Commit: 89d9487, github.com/apache/spark/pull/5976 + + [SPARK-6824] Fill the docs for DataFrame API in SparkR + hqzizania , qhuang + 2015-05-08 11:25:04 -0700 + Commit: 4f01f5b, github.com/apache/spark/pull/5969 + + [SPARK-7474] [MLLIB] update ParamGridBuilder doctest + Xiangrui Meng + 2015-05-08 11:16:04 -0700 + Commit: 75fed0c, github.com/apache/spark/pull/6001 + + [SPARK-7383] [ML] Feature Parity in PySpark for ml.features + Burak Yavuz + 2015-05-08 11:14:39 -0700 + Commit: 85e1154, github.com/apache/spark/pull/5991 + + [SPARK-3454] separate json endpoints for data in the UI + Imran Rashid + 2015-05-08 16:54:32 +0100 + Commit: 532bfda, github.com/apache/spark/pull/5940 + + [SPARK-6869] [PYSPARK] Add pyspark archives path to PYTHONPATH + Lianhui Wang + 2015-05-08 08:44:46 -0500 + Commit: acf4bc1, github.com/apache/spark/pull/5580 + + [SPARK-7392] [CORE] bugfix: Kryo buffer size cannot be larger than 2M + Zhang, Liye + 2015-05-08 09:10:58 +0100 + Commit: f5e9678, github.com/apache/spark/pull/5934 + + [SPARK-7232] [SQL] Add a Substitution batch for spark sql analyzer + wangfei + 2015-05-07 22:55:42 -0700 + Commit: bb5872f, github.com/apache/spark/pull/5776 + + [SPARK-7470] [SQL] Spark shell SQLContext crashes without hive + Andrew Or + 2015-05-07 22:32:13 -0700 + Commit: 1a3e9e9, github.com/apache/spark/pull/5997 + + [SPARK-6986] [SQL] Use Serializer2 in more cases. + Yin Huai + 2015-05-07 20:59:42 -0700 + Commit: 9d0d289, github.com/apache/spark/pull/5849 + + [SPARK-7452] [MLLIB] fix bug in topBykey and update test + Shuo Xiang + 2015-05-07 20:55:08 -0700 + Commit: 28d4238, github.com/apache/spark/pull/5990 + + [SPARK-6908] [SQL] Use isolated Hive client + Michael Armbrust + 2015-05-07 19:36:24 -0700 + Commit: 05454fd, github.com/apache/spark/pull/5876 + + [SPARK-7305] [STREAMING] [WEBUI] Make BatchPage show friendly information when jobs are dropped by SparkListener + zsxwing + 2015-05-07 17:34:44 -0700 + Commit: 2e8a141, github.com/apache/spark/pull/5840 + + [SPARK-7450] Use UNSAFE.getLong() to speed up BitSetMethods#anySet() + tedyu + 2015-05-07 16:53:59 -0700 + Commit: 99897fe, github.com/apache/spark/pull/5897 + + [SPARK-2155] [SQL] [WHEN D THEN E] [ELSE F] add CaseKeyWhen for "CASE a WHEN b THEN c * END" + Wenchen Fan + 2015-05-07 16:26:49 -0700 + Commit: 622a0c5, github.com/apache/spark/pull/5979 + + [SPARK-5281] [SQL] Registering table on RDD is giving MissingRequirementError + Iulian Dragos + 2015-05-07 16:24:11 -0700 + Commit: 9fd25f7, github.com/apache/spark/pull/5981 + + [SPARK-7277] [SQL] Throw exception if the property mapred.reduce.tasks is set to -1 + Liang-Chi Hsieh + 2015-05-07 16:22:45 -0700 + Commit: 7064ea0, github.com/apache/spark/pull/5811 + + [SQL] [MINOR] make star and multialias extend NamedExpression + scwf + 2015-05-07 16:21:24 -0700 + Commit: 2425e4d, github.com/apache/spark/pull/5928 + + [SPARK-6948] [MLLIB] compress vectors in VectorAssembler + Xiangrui Meng + 2015-05-07 15:45:37 -0700 + Commit: 475143a, github.com/apache/spark/pull/5985 + + [SPARK-5726] [MLLIB] Elementwise (Hadamard) Vector Product Transformer + Octavian Geagla , Joseph K. Bradley + 2015-05-07 14:49:55 -0700 + Commit: 76e58b5, github.com/apache/spark/pull/4580 + + [SPARK-7328] [MLLIB] [PYSPARK] Pyspark.mllib.linalg.Vectors: Missing items + MechCoder + 2015-05-07 14:02:05 -0700 + Commit: 4436e26, github.com/apache/spark/pull/5872 + + [SPARK-7347] DAG visualization: add tooltips to RDDs + Andrew Or + 2015-05-07 12:29:56 -0700 + Commit: 1b742a4, github.com/apache/spark/pull/5957 + + [SPARK-7391] DAG visualization: auto expand if linked from another viz + Andrew Or + 2015-05-07 12:29:18 -0700 + Commit: 800c0fc, github.com/apache/spark/pull/5958 + + [SPARK-7373] [MESOS] Add docker support for launching drivers in mesos cluster mode. + Timothy Chen + 2015-05-07 12:23:16 -0700 + Commit: 226033c, github.com/apache/spark/pull/5917 + + [SPARK-7399] [SPARK CORE] Fixed compilation error in scala 2.11 + Tijo Thomas + 2015-05-07 12:21:09 -0700 + Commit: d4e31bf, github.com/apache/spark/pull/5966 + + [SPARK-5213] [SQL] Remove the duplicated SparkSQLParser + Cheng Hao + 2015-05-07 12:09:54 -0700 + Commit: 2b0c423, github.com/apache/spark/pull/5965 + + [SPARK-7116] [SQL] [PYSPARK] Remove cache() causing memory leak + ksonj + 2015-05-07 12:04:19 -0700 + Commit: 86f141c, github.com/apache/spark/pull/5973 + + [SPARK-1442] [SQL] [FOLLOW-UP] Address minor comments in Window Function PR (#5604). + Yin Huai + 2015-05-07 11:46:49 -0700 + Commit: 9dcf4f7, github.com/apache/spark/pull/5945 + + [SPARK-6093] [MLLIB] Add RegressionMetrics in PySpark/MLlib + Yanbo Liang + 2015-05-07 11:18:32 -0700 + Commit: ef835dc, github.com/apache/spark/pull/5941 + + [SPARK-7118] [Python] Add the coalesce Spark SQL function available in PySpark + Olivier Girardot + 2015-05-07 10:58:35 -0700 + Commit: 3038b26, github.com/apache/spark/pull/5698 + + [SPARK-7388] [SPARK-7383] wrapper for VectorAssembler in Python + Burak Yavuz , Xiangrui Meng + 2015-05-07 10:25:41 -0700 + Commit: 6b9737a, github.com/apache/spark/pull/5930 + + [SPARK-7330] [SQL] avoid NPE at jdbc rdd + Daoyuan Wang + 2015-05-07 10:05:01 -0700 + Commit: 84ee348, github.com/apache/spark/pull/5877 + + [SPARK-7429] [ML] Params cleanups + Joseph K. Bradley + 2015-05-07 01:28:44 -0700 + Commit: 91ce131, github.com/apache/spark/pull/5960 + + [SPARK-7421] [MLLIB] OnlineLDA cleanups + Joseph K. Bradley + 2015-05-07 01:12:14 -0700 + Commit: a038c51, github.com/apache/spark/pull/5956 + + [SPARK-7035] Encourage __getitem__ over __getattr__ on column access in the Python DataFrame API + ksonj + 2015-05-07 01:02:00 -0700 + Commit: b929a75, github.com/apache/spark/pull/5971 + + [SPARK-7295][SQL] bitwise operations for DataFrame DSL + Shiti + 2015-05-07 01:00:29 -0700 + Commit: 703211b, github.com/apache/spark/pull/5867 + + [SPARK-7217] [STREAMING] Add configuration to control the default behavior of StreamingContext.stop() implicitly calling SparkContext.stop() + Tathagata Das + 2015-05-07 00:24:44 -0700 + Commit: cb13c98, github.com/apache/spark/pull/5929 + + [SPARK-7430] [STREAMING] [TEST] General improvements to streaming tests to increase debuggability + Tathagata Das + 2015-05-07 00:21:10 -0700 + Commit: 065d114, github.com/apache/spark/pull/5961 + + [SPARK-5938] [SPARK-5443] [SQL] Improve JsonRDD performance + Nathan Howell + 2015-05-06 22:56:53 -0700 + Commit: 2337ccc1, github.com/apache/spark/pull/5801 + + [SPARK-6812] [SPARKR] filter() on DataFrame does not work as expected. + Sun Rui + 2015-05-06 22:48:16 -0700 + Commit: 4948f42, github.com/apache/spark/pull/5938 + + [SPARK-7432] [MLLIB] disable cv doctest + Xiangrui Meng + 2015-05-06 22:29:07 -0700 + Commit: fb4967b, github.com/apache/spark/pull/5962 + + [SPARK-7405] [STREAMING] Fix the bug that ReceiverInputDStream doesn't report InputInfo + zsxwing + 2015-05-06 18:07:00 -0700 + Commit: d6e76cb, github.com/apache/spark/pull/5950 + + [HOT FIX] For DAG visualization #5954 + Andrew Or + 2015-05-06 18:02:08 -0700 + Commit: 85a644b + + [SPARK-7371] [SPARK-7377] [SPARK-7408] DAG visualization addendum (#5729) + Andrew Or + 2015-05-06 17:52:34 -0700 + Commit: 76e8344, github.com/apache/spark/pull/5954 + + [SPARK-7396] [STREAMING] [EXAMPLE] Update KafkaWordCountProducer to use new Producer API + jerryshao + 2015-05-06 17:44:43 -0700 + Commit: ba24dfa, github.com/apache/spark/pull/5936 + + [SPARK-6799] [SPARKR] Remove SparkR RDD examples, add dataframe examples + Shivaram Venkataraman + 2015-05-06 17:28:11 -0700 + Commit: 4b91e18, github.com/apache/spark/pull/5949 + + [HOT FIX] [SPARK-7418] Ignore flaky SparkSubmitUtilsSuite test + Andrew Or + 2015-05-06 17:08:39 -0700 + Commit: c0ec20a + + [SPARK-5995] [ML] Make Prediction dev API public + Joseph K. Bradley + 2015-05-06 16:15:51 -0700 + Commit: b681b93, github.com/apache/spark/pull/5913 + + [HOT-FIX] Move HiveWindowFunctionQuerySuite.scala to hive compatibility dir. + Yin Huai + 2015-05-06 14:48:25 -0700 + Commit: 14bcb84, github.com/apache/spark/pull/5951 + + Add `Private` annotation. + Josh Rosen + 2015-05-06 11:03:17 -0700 + Commit: 2163367 + + [SPARK-7311] Introduce internal Serializer API for determining if serializers support object relocation + Josh Rosen + 2015-05-06 10:52:55 -0700 + Commit: d651e28, github.com/apache/spark/pull/5924 + + [SPARK-1442] [SQL] Window Function Support for Spark SQL + Yin Huai + 2015-05-06 10:43:00 -0700 + Commit: b521a3b, github.com/apache/spark/pull/5604 + + [SPARK-6201] [SQL] promote string and do widen types for IN + Daoyuan Wang + 2015-05-06 10:30:42 -0700 + Commit: 7212897, github.com/apache/spark/pull/4945 + + [SPARK-5456] [SQL] fix decimal compare for jdbc rdd + Daoyuan Wang + 2015-05-06 10:05:10 -0700 + Commit: f1a5caf, github.com/apache/spark/pull/5803 + + [SQL] JavaDoc update for various DataFrame functions. + Reynold Xin + 2015-05-06 08:50:56 -0700 + Commit: 389b755, github.com/apache/spark/pull/5935 + + [SPARK-6940] [MLLIB] Add CrossValidator to Python ML pipeline API + Xiangrui Meng + 2015-05-06 01:28:43 -0700 + Commit: 3e27a54, github.com/apache/spark/pull/5926 + + [SPARK-7384][Core][Tests] Fix flaky tests for distributed mode in BroadcastSuite + zsxwing + 2015-05-05 23:25:28 -0700 + Commit: 20f9237, github.com/apache/spark/pull/5925 + + [SPARK-6267] [MLLIB] Python API for IsotonicRegression + Yanbo Liang , Xiangrui Meng + 2015-05-05 22:57:13 -0700 + Commit: 384ac3c, github.com/apache/spark/pull/5890 + + [SPARK-7358][SQL] Move DataFrame mathfunctions into functions + Burak Yavuz + 2015-05-05 22:56:01 -0700 + Commit: 8aa6681, github.com/apache/spark/pull/5923 + + [SPARK-6841] [SPARKR] add support for mean, median, stdev etc. + qhuang + 2015-05-05 20:39:56 -0700 + Commit: b5cd7dc, github.com/apache/spark/pull/5446 + + Revert "[SPARK-3454] separate json endpoints for data in the UI" + Reynold Xin + 2015-05-05 19:28:35 -0700 + Commit: 765f6e1 + + [SPARK-6231][SQL/DF] Automatically resolve join condition ambiguity for self-joins. + Reynold Xin + 2015-05-05 18:59:46 -0700 + Commit: e61083c, github.com/apache/spark/pull/5919 + + Some minor cleanup after SPARK-4550. + Sandy Ryza + 2015-05-05 18:32:16 -0700 + Commit: 762ff2e, github.com/apache/spark/pull/5916 + + [SPARK-7230] [SPARKR] Make RDD private in SparkR. + Shivaram Venkataraman + 2015-05-05 14:40:33 -0700 + Commit: 4afb578, github.com/apache/spark/pull/5895 + + [SQL][Minor] make StringComparison extends ExpectsInputTypes + wangfei + 2015-05-05 14:24:37 -0700 + Commit: b6566a2, github.com/apache/spark/pull/5905 + + [SPARK-7351] [STREAMING] [DOCS] Add spark.streaming.ui.retainedBatches to docs + zsxwing + 2015-05-05 13:42:23 -0700 + Commit: 4c95fe5, github.com/apache/spark/pull/5899 + + [SPARK-7294][SQL] ADD BETWEEN + 云峤 , kaka1992 + 2015-05-05 13:23:53 -0700 + Commit: c68d0e2, github.com/apache/spark/pull/5839 + + [SPARK-6939] [STREAMING] [WEBUI] Add timeline and histogram graphs for streaming statistics + zsxwing + 2015-05-05 12:52:16 -0700 + Commit: 8109c9e, github.com/apache/spark/pull/5533 + + [SPARK-5888] [MLLIB] Add OneHotEncoder as a Transformer + Sandy Ryza + 2015-05-05 12:34:02 -0700 + Commit: 94ac9eb, github.com/apache/spark/pull/5500 + + [SPARK-7333] [MLLIB] Add BinaryClassificationEvaluator to PySpark + Xiangrui Meng + 2015-05-05 11:45:37 -0700 + Commit: dfb6bfc, github.com/apache/spark/pull/5885 + + [SPARK-7243][SQL] Reduce size for Contingency Tables in DataFrames + Burak Yavuz + 2015-05-05 11:01:25 -0700 + Commit: 598902b, github.com/apache/spark/pull/5900 + + [SPARK-7007] [CORE] Add a metric source for ExecutorAllocationManager + jerryshao + 2015-05-05 09:43:49 -0700 + Commit: 29350ee, github.com/apache/spark/pull/5589 + + [SPARK-7318] [STREAMING] DStream cleans objects that are not closures + Andrew Or + 2015-05-05 09:37:49 -0700 + Commit: acc877a, github.com/apache/spark/pull/5860 + + [SPARK-7237] Many user provided closures are not actually cleaned + Andrew Or + 2015-05-05 09:37:04 -0700 + Commit: 01d4022, github.com/apache/spark/pull/5787 + + [SPARK-6612] [MLLIB] [PYSPARK] Python KMeans parity + Hrishikesh Subramonian + 2015-05-05 07:57:39 -0700 + Commit: 8b63103, github.com/apache/spark/pull/5647 + + [SPARK-7202] [MLLIB] [PYSPARK] Add SparseMatrixPickler to SerDe + MechCoder + 2015-05-05 07:53:11 -0700 + Commit: cd55e9a, github.com/apache/spark/pull/5775 + + [SPARK-7350] [STREAMING] [WEBUI] Attach the Streaming tab when calling ssc.start() + zsxwing + 2015-05-05 15:09:58 +0100 + Commit: 49923f7, github.com/apache/spark/pull/5898 + + [SPARK-5074] [CORE] [TESTS] Fix the flakey test 'run shuffle with map stage failure' in DAGSchedulerSuite + zsxwing + 2015-05-05 15:04:14 +0100 + Commit: 6f35dac, github.com/apache/spark/pull/5903 + + [MINOR] Minor update for document + Liang-Chi Hsieh + 2015-05-05 14:44:02 +0100 + Commit: d288322, github.com/apache/spark/pull/5906 + + [SPARK-3454] separate json endpoints for data in the UI + Imran Rashid + 2015-05-05 07:25:40 -0500 + Commit: ff8b449, github.com/apache/spark/pull/4435 + + [SPARK-5112] Expose SizeEstimator as a developer api + Sandy Ryza + 2015-05-05 12:38:46 +0100 + Commit: 0327ca2, github.com/apache/spark/pull/3913 + + [SPARK-6653] [YARN] New config to specify port for sparkYarnAM actor system + shekhar.bansal + 2015-05-05 11:09:51 +0100 + Commit: 93af96a, github.com/apache/spark/pull/5719 + + [SPARK-7341] [STREAMING] [TESTS] Fix the flaky test: org.apache.spark.stre... + zsxwing + 2015-05-05 02:15:39 -0700 + Commit: 0634510, github.com/apache/spark/pull/5891 + + [SPARK-7113] [STREAMING] Support input information reporting for Direct Kafka stream + jerryshao + 2015-05-05 02:01:06 -0700 + Commit: becdb81, github.com/apache/spark/pull/5879 + + [HOTFIX] [TEST] Ignoring flaky tests + Tathagata Das + 2015-05-05 01:58:51 -0700 + Commit: e8f847a, github.com/apache/spark/pull/5901 + + [SPARK-7139] [STREAMING] Allow received block metadata to be saved to WAL and recovered on driver failure + Tathagata Das + 2015-05-05 01:45:19 -0700 + Commit: ae27c0e, github.com/apache/spark/pull/5732 + + [MINOR] [BUILD] Declare ivy dependency in root pom. + Marcelo Vanzin + 2015-05-05 08:56:16 +0100 + Commit: 5160437, github.com/apache/spark/pull/5893 + + [SPARK-7314] [SPARK-3524] [PYSPARK] upgrade Pyrolite to 4.4 + Xiangrui Meng + 2015-05-04 23:52:42 -0700 + Commit: 21ed108, github.com/apache/spark/pull/5850 + + [SPARK-7236] [CORE] Fix to prevent AkkaUtils askWithReply from sleeping on final attempt + Bryan Cutler + 2015-05-04 18:29:22 -0700 + Commit: 48655d1, github.com/apache/spark/pull/5896 + + [SPARK-7266] Add ExpectsInputTypes to expressions when possible. + Reynold Xin + 2015-05-04 18:03:07 -0700 + Commit: 1388a46, github.com/apache/spark/pull/5796 + + [SPARK-7243][SQL] Contingency Tables for DataFrames + Burak Yavuz + 2015-05-04 17:02:49 -0700 + Commit: ecf0d8a, github.com/apache/spark/pull/5842 + + [SPARK-6943] [SPARK-6944] DAG visualization on SparkUI + Andrew Or + 2015-05-04 16:21:36 -0700 + Commit: 863ec0c, github.com/apache/spark/pull/5729 + + [SPARK-7319][SQL] Improve the output from DataFrame.show() + 云峤 + 2015-05-04 12:08:38 -0700 + Commit: 34edaa8, github.com/apache/spark/pull/5865 + + [SPARK-5956] [MLLIB] Pipeline components should be copyable. + Xiangrui Meng + 2015-05-04 11:28:59 -0700 + Commit: 893b310, github.com/apache/spark/pull/5820 + + [SPARK-5100] [SQL] add webui for thriftserver + tianyi + 2015-05-04 16:59:34 +0800 + Commit: 343d3bf, github.com/apache/spark/pull/5730 + + [SPARK-5563] [MLLIB] LDA with online variational inference + Yuhao Yang , Joseph K. Bradley + 2015-05-04 00:06:25 -0700 + Commit: 3539cb7, github.com/apache/spark/pull/4419 + + [SPARK-7241] Pearson correlation for DataFrames + Burak Yavuz + 2015-05-03 21:44:39 -0700 + Commit: 9646018, github.com/apache/spark/pull/5858 + + [SPARK-7329] [MLLIB] simplify ParamGridBuilder impl + Xiangrui Meng + 2015-05-03 18:06:48 -0700 + Commit: 1ffa8cb, github.com/apache/spark/pull/5873 + + [SPARK-7302] [DOCS] SPARK building documentation still mentions building for yarn 0.23 + Sean Owen + 2015-05-03 21:22:31 +0100 + Commit: 9e25b09, github.com/apache/spark/pull/5863 + + [SPARK-6907] [SQL] Isolated client for HiveMetastore + Michael Armbrust + 2015-05-03 13:12:50 -0700 + Commit: daa70bf, github.com/apache/spark/pull/5851 + + [SPARK-7022] [PYSPARK] [ML] Add ML.Tuning.ParamGridBuilder to PySpark + Omede Firouz , Omede + 2015-05-03 11:42:02 -0700 + Commit: f4af925, github.com/apache/spark/pull/5601 + + [SPARK-7031] [THRIFTSERVER] let thrift server take SPARK_DAEMON_MEMORY and SPARK_DAEMON_JAVA_OPTS + WangTaoTheTonic + 2015-05-03 00:47:47 +0100 + Commit: 49549d5, github.com/apache/spark/pull/5609 + + [SPARK-7255] [STREAMING] [DOCUMENTATION] Added documentation for spark.streaming.kafka.maxRetries + BenFradet + 2015-05-02 23:41:14 +0100 + Commit: ea841ef, github.com/apache/spark/pull/5808 + + [SPARK-5213] [SQL] Pluggable SQL Parser Support + Cheng Hao , scwf + 2015-05-02 15:20:07 -0700 + Commit: 5d6b90d, github.com/apache/spark/pull/5827 + + [MINOR] [HIVE] Fix QueryPartitionSuite. + Marcelo Vanzin + 2015-05-02 23:10:35 +0100 + Commit: 82c8c37, github.com/apache/spark/pull/5854 + + [SPARK-6030] [CORE] Using simulated field layout method to compute class shellSize + Ye Xianjin + 2015-05-02 23:08:09 +0100 + Commit: bfcd528, github.com/apache/spark/pull/4783 + + [SPARK-7323] [SPARK CORE] Use insertAll instead of insert while merging combiners in reducer + Mridul Muralidharan + 2015-05-02 23:05:51 +0100 + Commit: da30352, github.com/apache/spark/pull/5862 + + [SPARK-3444] Fix typo in Dataframes.py introduced in [] + Dean Chen + 2015-05-02 23:04:13 +0100 + Commit: 856a571, github.com/apache/spark/pull/5866 + + [SPARK-7315] [STREAMING] [TEST] Fix flaky WALBackedBlockRDDSuite + Tathagata Das + 2015-05-02 01:53:14 -0700 + Commit: ecc6eb5, github.com/apache/spark/pull/5853 + + [SPARK-7120] [SPARK-7121] Closure cleaner nesting + documentation + tests + Andrew Or + 2015-05-01 23:57:58 -0700 + Commit: 7394e7a, github.com/apache/spark/pull/5685 + + [SPARK-7242] added python api for freqItems in DataFrames + Burak Yavuz + 2015-05-01 23:43:24 -0700 + Commit: 2e0f357, github.com/apache/spark/pull/5859 + + [SPARK-7317] [Shuffle] Expose shuffle handle + Mridul Muralidharan + 2015-05-01 21:23:42 -0700 + Commit: b79aeb9, github.com/apache/spark/pull/5857 + + [SPARK-6229] Add SASL encryption to network library. + Marcelo Vanzin + 2015-05-01 19:01:46 -0700 + Commit: 38d4e9e, github.com/apache/spark/pull/5377 + + [SPARK-2691] [MESOS] Support for Mesos DockerInfo + Chris Heller + 2015-05-01 18:41:22 -0700 + Commit: 8f50a07, github.com/apache/spark/pull/3074 + + [SPARK-6443] [SPARK SUBMIT] Could not submit app in standalone cluster mode when HA is enabled + WangTaoTheTonic + 2015-05-01 18:38:20 -0700 + Commit: b4b43df, github.com/apache/spark/pull/5116 + + [SPARK-7216] [MESOS] Add driver details page to Mesos cluster UI. + Timothy Chen + 2015-05-01 18:36:42 -0700 + Commit: 2022193, github.com/apache/spark/pull/5763 + + [SPARK-6954] [YARN] ExecutorAllocationManager can end up requesting a negative n... + Sandy Ryza + 2015-05-01 18:32:46 -0700 + Commit: 099327d, github.com/apache/spark/pull/5704 + + [SPARK-3444] Provide an easy way to change log level + Holden Karau + 2015-05-01 18:02:10 -0700 + Commit: ae98eec, github.com/apache/spark/pull/5791 + + [SPARK-2808][Streaming][Kafka] update kafka to 0.8.2 + cody koeninger , Helena Edelson + 2015-05-01 17:54:56 -0700 + Commit: 4786484, github.com/apache/spark/pull/4537 + + [SPARK-7112][Streaming][WIP] Add a InputInfoTracker to track all the input streams + jerryshao , Saisai Shao + 2015-05-01 17:46:06 -0700 + Commit: b88c275, github.com/apache/spark/pull/5680 + + [SPARK-7309] [CORE] [STREAMING] Shutdown the thread pools in ReceivedBlockHandler and DAGScheduler + zsxwing + 2015-05-01 17:41:55 -0700 + Commit: ebc25a4, github.com/apache/spark/pull/5845 + + [SPARK-6999] [SQL] Remove the infinite recursive method (useless) + Cheng Hao + 2015-05-01 19:39:30 -0500 + Commit: 98e7045, github.com/apache/spark/pull/5804 + + [SPARK-7304] [BUILD] Include $@ in call to mvn consistently in make-distribution.sh + Rajendra Gokhale (rvgcentos) + 2015-05-01 17:01:36 -0700 + Commit: e6fb377, github.com/apache/spark/pull/5846 + + [SPARK-7312][SQL] SPARK-6913 broke jdk6 build + Yin Huai + 2015-05-01 16:47:00 -0700 + Commit: 41c6a44, github.com/apache/spark/pull/5847 + + Ignore flakey test in SparkSubmitUtilsSuite + Patrick Wendell + 2015-05-01 14:42:58 -0700 + Commit: 5c1faba + + [SPARK-5342] [YARN] Allow long running Spark apps to run on secure YARN/HDFS + Hari Shreedharan + 2015-05-01 15:32:09 -0500 + Commit: b1f4ca8, github.com/apache/spark/pull/5823 + + [SPARK-7240][SQL] Single pass covariance calculation for dataframes + Burak Yavuz + 2015-05-01 13:29:17 -0700 + Commit: 4dc8d74, github.com/apache/spark/pull/5825 + + [SPARK-7281] [YARN] Add option to set AM's lib path in client mode. + Marcelo Vanzin + 2015-05-01 21:20:46 +0100 + Commit: 7b5dd3e, github.com/apache/spark/pull/5813 + + [SPARK-7213] [YARN] Check for read permissions before copying a Hadoop config file + Nishkam Ravi , nishkamravi2 , nravi + 2015-05-01 21:14:16 +0100 + Commit: f53a488, github.com/apache/spark/pull/5760 + + Revert "[SPARK-7224] added mock repository generator for --packages tests" + Patrick Wendell + 2015-05-01 13:01:43 -0700 + Commit: c6d9a42 + + Revert "[SPARK-7287] enabled fixed test" + Patrick Wendell + 2015-05-01 13:01:14 -0700 + Commit: 58d6584 + + [SPARK-7274] [SQL] Create Column expression for array/struct creation. + Reynold Xin + 2015-05-01 12:49:02 -0700 + Commit: 3753776, github.com/apache/spark/pull/5802 + + [SPARK-7183] [NETWORK] Fix memory leak of TransportRequestHandler.streamIds + Liang-Chi Hsieh + 2015-05-01 11:59:12 -0700 + Commit: 1686032, github.com/apache/spark/pull/5743 + + [SPARK-6846] [WEBUI] [HOTFIX] return to GET for kill link in UI since YARN AM won't proxy POST + Sean Owen + 2015-05-01 19:57:37 +0100 + Commit: 1262e31, github.com/apache/spark/pull/5837 + + [SPARK-5854] personalized page rank + Dan McClary , dwmclary + 2015-05-01 11:55:43 -0700 + Commit: 7d42722, github.com/apache/spark/pull/4774 + + changing persistence engine trait to an abstract class + niranda + 2015-05-01 11:27:45 -0700 + Commit: 27de6fe, github.com/apache/spark/pull/5832 + + Limit help option regex + Chris Biow + 2015-05-01 19:26:55 +0100 + Commit: c8c481d, github.com/apache/spark/pull/5816 + + [SPARK-5891] [ML] Add Binarizer ML Transformer + Liang-Chi Hsieh + 2015-05-01 08:31:01 -0700 + Commit: 7630213, github.com/apache/spark/pull/5699 + + [SPARK-3066] [MLLIB] Support recommendAll in matrix factorization model + Debasish Das , Xiangrui Meng + 2015-05-01 08:27:46 -0700 + Commit: 3b514af, github.com/apache/spark/pull/3098 + + [SPARK-4705] Handle multiple app attempts event logs, history server. + Marcelo Vanzin , twinkle sachdeva , twinkle.sachdeva , twinkle sachdeva + 2015-05-01 09:50:55 -0500 + Commit: 3052f49, github.com/apache/spark/pull/5432 + + [SPARK-3468] [WEBUI] Timeline-View feature + Kousuke Saruta + 2015-05-01 01:39:56 -0700 + Commit: 7fe0f3f, github.com/apache/spark/pull/2342 + + [SPARK-6257] [PYSPARK] [MLLIB] MLlib API missing items in Recommendation + MechCoder + 2015-04-30 23:51:00 -0700 + Commit: c24aeb6, github.com/apache/spark/pull/5807 + + [SPARK-7291] [CORE] Fix a flaky test in AkkaRpcEnvSuite + zsxwing + 2015-04-30 23:44:33 -0700 + Commit: 14b3288, github.com/apache/spark/pull/5822 + + [SPARK-7287] enabled fixed test + Burak Yavuz + 2015-04-30 23:39:58 -0700 + Commit: 7cf1eb7, github.com/apache/spark/pull/5826 + + [SPARK-4550] In sort-based shuffle, store map outputs in serialized form + Sandy Ryza + 2015-04-30 23:14:14 -0700 + Commit: 0a2b15c, github.com/apache/spark/pull/4450 + + HOTFIX: Disable buggy dependency checker + Patrick Wendell + 2015-04-30 22:39:58 -0700 + Commit: a9fc505 + + [SPARK-6479] [BLOCK MANAGER] Create off-heap block storage API + Zhan Zhang + 2015-04-30 22:24:31 -0700 + Commit: 36a7a68, github.com/apache/spark/pull/5430 + + [SPARK-7248] implemented random number generators for DataFrames + Burak Yavuz + 2015-04-30 21:56:03 -0700 + Commit: b5347a4, github.com/apache/spark/pull/5819 + + [SPARK-7282] [STREAMING] Fix the race conditions in StreamingListenerSuite + zsxwing + 2015-04-30 21:32:11 -0700 + Commit: 69a739c, github.com/apache/spark/pull/5812 + + Revert "[SPARK-5213] [SQL] Pluggable SQL Parser Support" + Patrick Wendell + 2015-04-30 20:33:36 -0700 + Commit: beeafcf + + [SPARK-7123] [SQL] support table.star in sqlcontext + scwf + 2015-04-30 18:50:14 -0700 + Commit: 473552f, github.com/apache/spark/pull/5690 + + [SPARK-5213] [SQL] Pluggable SQL Parser Support + Cheng Hao + 2015-04-30 18:49:06 -0700 + Commit: 3ba5aaa, github.com/apache/spark/pull/4015 + + [SPARK-6913][SQL] Fixed "java.sql.SQLException: No suitable driver found" + Vyacheslav Baranov + 2015-04-30 18:45:14 -0700 + Commit: e991255, github.com/apache/spark/pull/5782 + + [SPARK-7109] [SQL] Push down left side filter for left semi join + wangfei , scwf + 2015-04-30 18:18:54 -0700 + Commit: a0d8a61, github.com/apache/spark/pull/5677 + + [SPARK-7093] [SQL] Using newPredicate in NestedLoopJoin to enable code generation + scwf + 2015-04-30 18:15:56 -0700 + Commit: 0797338, github.com/apache/spark/pull/5665 + + [SPARK-7280][SQL] Add "drop" column/s on a data frame + rakeshchalasani + 2015-04-30 17:42:50 -0700 + Commit: ee04413, github.com/apache/spark/pull/5818 + + [SPARK-7242][SQL][MLLIB] Frequent items for DataFrames + Burak Yavuz + 2015-04-30 16:40:32 -0700 + Commit: 149b3ee, github.com/apache/spark/pull/5799 + + [SPARK-7279] Removed diffSum which is theoretical zero in LinearRegression and coding formating + DB Tsai + 2015-04-30 16:26:51 -0700 + Commit: 1c3e402, github.com/apache/spark/pull/5809 + + [Build] Enable MiMa checks for SQL + Josh Rosen + 2015-04-30 16:23:01 -0700 + Commit: fa01bec, github.com/apache/spark/pull/5727 + + [SPARK-7267][SQL]Push down Project when it's child is Limit + Zhongshuai Pei <799203320@qq.com>, DoingDone9 <799203320@qq.com> + 2015-04-30 15:22:13 -0700 + Commit: 77cc25f, github.com/apache/spark/pull/5797 + + [SPARK-7288] Suppress compiler warnings due to use of sun.misc.Unsafe; add facade in front of Unsafe; remove use of Unsafe.setMemory + Josh Rosen + 2015-04-30 15:21:00 -0700 + Commit: 07a8620, github.com/apache/spark/pull/5814 + + [SPARK-7196][SQL] Support precision and scale of decimal type for JDBC + Liang-Chi Hsieh + 2015-04-30 15:13:43 -0700 + Commit: 6702324, github.com/apache/spark/pull/5777 + + Revert "[SPARK-5342] [YARN] Allow long running Spark apps to run on secure YARN/HDFS" + Patrick Wendell + 2015-04-30 14:59:20 -0700 + Commit: e0628f2 + + [SPARK-7207] [ML] [BUILD] Added ml.recommendation, ml.regression to SparkBuild + Joseph K. Bradley + 2015-04-30 14:39:27 -0700 + Commit: adbdb19, github.com/apache/spark/pull/5758 + + [SPARK-5342] [YARN] Allow long running Spark apps to run on secure YARN/HDFS + Hari Shreedharan + 2015-04-30 13:03:23 -0500 + Commit: 6c65da6, github.com/apache/spark/pull/4688 + + [SPARK-7224] added mock repository generator for --packages tests + Burak Yavuz + 2015-04-30 10:19:08 -0700 + Commit: 7dacc08, github.com/apache/spark/pull/5790 + + [HOTFIX] Disabling flaky test (fix in progress as part of SPARK-7224) + Patrick Wendell + 2015-04-30 01:02:33 -0700 + Commit: 47bf406 + + [SPARK-1406] Mllib pmml model export + Vincenzo Selvaggio , Xiangrui Meng , selvinsource + 2015-04-29 23:21:21 -0700 + Commit: 254e050, github.com/apache/spark/pull/3062 + + [SPARK-7225][SQL] CombineLimits optimizer does not work + Zhongshuai Pei <799203320@qq.com>, DoingDone9 <799203320@qq.com> + 2015-04-29 22:44:14 -0700 + Commit: 4459514, github.com/apache/spark/pull/5770 + + Some code clean up. + DB Tsai + 2015-04-29 21:44:41 -0700 + Commit: ba49eb1, github.com/apache/spark/pull/5794 + + [SPARK-7156][SQL] Addressed follow up comments for randomSplit + Burak Yavuz + 2015-04-29 19:13:47 -0700 + Commit: 5553198, github.com/apache/spark/pull/5795 + + [SPARK-7234][SQL] Fix DateType mismatch when codegen on. + 云峤 + 2015-04-29 18:23:42 -0700 + Commit: 7143f6e, github.com/apache/spark/pull/5778 + + [SPARK-6862] [STREAMING] [WEBUI] Add BatchPage to display details of a batch + zsxwing + 2015-04-29 18:22:14 -0700 + Commit: 1b7106b, github.com/apache/spark/pull/5473 + + [SPARK-7176] [ML] Add validation functionality to Param + Joseph K. Bradley + 2015-04-29 17:26:46 -0700 + Commit: 114bad6, github.com/apache/spark/pull/5740 + + [SQL] [Minor] Print detail query execution info when spark answer is not right + wangfei + 2015-04-29 17:00:24 -0700 + Commit: 1fdfdb4, github.com/apache/spark/pull/5774 + + [SPARK-7259] [ML] VectorIndexer: do not copy non-ML metadata to output column + Joseph K. Bradley + 2015-04-29 16:35:17 -0700 + Commit: b1ef6a6, github.com/apache/spark/pull/5789 + + [SPARK-7229] [SQL] SpecificMutableRow should take integer type as internal representation for Date + Cheng Hao + 2015-04-29 16:23:34 -0700 + Commit: f8cbb0a, github.com/apache/spark/pull/5772 + + [SPARK-7155] [CORE] Allow newAPIHadoopFile to support comma-separated list of files as input + yongtang + 2015-04-29 23:55:51 +0100 + Commit: 3fc6cfd, github.com/apache/spark/pull/5708 + + [SPARK-7181] [CORE] fix inifite loop in Externalsorter's mergeWithAggregation + Qiping Li + 2015-04-29 23:52:16 +0100 + Commit: 7f4b583, github.com/apache/spark/pull/5737 + + [SPARK-7156][SQL] support RandomSplit in DataFrames + Burak Yavuz + 2015-04-29 15:34:05 -0700 + Commit: d7dbce8, github.com/apache/spark/pull/5761 + + [SPARK-6529] [ML] Add Word2Vec transformer + Xusen Yin + 2015-04-29 14:55:32 -0700 + Commit: c9d530e, github.com/apache/spark/pull/5596 + + [SPARK-7222] [ML] Added mathematical derivation in comment and compressed the model, removed the correction terms in LinearRegression with ElasticNet + DB Tsai + 2015-04-29 14:53:37 -0700 + Commit: 15995c8, github.com/apache/spark/pull/5767 + + [SPARK-6629] cancelJobGroup() may not work for jobs whose job groups are inherited from parent threads + Josh Rosen + 2015-04-29 13:31:52 -0700 + Commit: 3a180c1, github.com/apache/spark/pull/5288 + + [SPARK-6752] [STREAMING] [REOPENED] Allow StreamingContext to be recreated from checkpoint and existing SparkContext + Tathagata Das + 2015-04-29 13:10:31 -0700 + Commit: a9c4e29, github.com/apache/spark/pull/5773 + + [SPARK-7056] [STREAMING] Make the Write Ahead Log pluggable + Tathagata Das + 2015-04-29 13:06:11 -0700 + Commit: 1868bd4, github.com/apache/spark/pull/5645 + + Fix a typo of "threshold" + Xusen Yin + 2015-04-29 10:13:48 -0700 + Commit: c0c0ba6, github.com/apache/spark/pull/5769 + + [SQL][Minor] fix java doc for DataFrame.agg + Wenchen Fan + 2015-04-29 09:49:24 -0700 + Commit: 81ea42b, github.com/apache/spark/pull/5712 + + Better error message on access to non-existing attribute + ksonj + 2015-04-29 09:48:47 -0700 + Commit: 3df9c5d, github.com/apache/spark/pull/5771 + + [SPARK-7223] Rename RPC askWithReply -> askWithReply, sendWithReply -> ask. + Reynold Xin + 2015-04-29 09:46:37 -0700 + Commit: 687273d, github.com/apache/spark/pull/5768 + + [SPARK-6918] [YARN] Secure HBase support. + Dean Chen + 2015-04-29 08:58:33 -0500 + Commit: baed3f2, github.com/apache/spark/pull/5586 + + [SPARK-7076][SPARK-7077][SPARK-7080][SQL] Use managed memory for aggregations + Josh Rosen + 2015-04-29 01:07:26 -0700 + Commit: f49284b, github.com/apache/spark/pull/5725 + + [SPARK-7204] [SQL] Fix callSite for Dataframe and SQL operations + Patrick Wendell + 2015-04-29 00:35:08 -0700 + Commit: 1fd6ed9, github.com/apache/spark/pull/5757 + + [SPARK-7188] added python support for math DataFrame functions + Burak Yavuz + 2015-04-29 00:09:24 -0700 + Commit: fe917f5, github.com/apache/spark/pull/5750 + + MAINTENANCE: Automated closing of pull requests. + Patrick Wendell + 2015-04-28 23:38:59 -0700 + Commit: 8dee274, github.com/apache/spark/pull/3205 + + [SPARK-7205] Support `.ivy2/local` and `.m2/repositories/` in --packages + Burak Yavuz + 2015-04-28 23:05:02 -0700 + Commit: f98773a, github.com/apache/spark/pull/5755 + + [SPARK-7215] made coalesce and repartition a part of the query plan + Burak Yavuz + 2015-04-28 22:48:04 -0700 + Commit: 271c4c6, github.com/apache/spark/pull/5762 + + [SPARK-6756] [MLLIB] add toSparse, toDense, numActives, numNonzeros, and compressed to Vector + Xiangrui Meng + 2015-04-28 21:49:53 -0700 + Commit: 5ef006f, github.com/apache/spark/pull/5756 + + [SPARK-7208] [ML] [PYTHON] Added Matrix, SparseMatrix to __all__ list in linalg.py + Joseph K. Bradley + 2015-04-28 21:15:47 -0700 + Commit: a8aeadb, github.com/apache/spark/pull/5759 + + [SPARK-7138] [STREAMING] Add method to BlockGenerator to add multiple records to BlockGenerator with single callback + Tathagata Das + 2015-04-28 19:31:57 -0700 + Commit: 5c8f4bd, github.com/apache/spark/pull/5695 + + [SPARK-6965] [MLLIB] StringIndexer handles numeric input. + Xiangrui Meng + 2015-04-28 17:41:09 -0700 + Commit: d36e673, github.com/apache/spark/pull/5753 + + Closes #4807 Closes #5055 Closes #3583 + Xiangrui Meng + 2015-04-28 14:21:25 -0700 + Commit: 555213e + + [SPARK-7201] [MLLIB] move Identifiable to ml.util + Xiangrui Meng + 2015-04-28 14:07:26 -0700 + Commit: f0a1f90, github.com/apache/spark/pull/5749 + + [MINOR] [CORE] Warn users who try to cache RDDs with dynamic allocation on. + Marcelo Vanzin + 2015-04-28 13:49:29 -0700 + Commit: 28b1af7, github.com/apache/spark/pull/5751 + + [SPARK-5338] [MESOS] Add cluster mode support for Mesos + Timothy Chen , Luc Bourlier + 2015-04-28 13:31:08 -0700 + Commit: 53befac, github.com/apache/spark/pull/5144 + + [SPARK-6314] [CORE] handle JsonParseException for history server + Zhang, Liye + 2015-04-28 12:33:48 -0700 + Commit: 8009810, github.com/apache/spark/pull/5736 + + [SPARK-5932] [CORE] Use consistent naming for size properties + Ilya Ganelin + 2015-04-28 12:18:55 -0700 + Commit: 2d222fb, github.com/apache/spark/pull/5574 + + [SPARK-4286] Add an external shuffle service that can be run as a daemon. + Iulian Dragos + 2015-04-28 12:08:18 -0700 + Commit: 8aab94d, github.com/apache/spark/pull/4990 + + [Core][test][minor] replace try finally block with tryWithSafeFinally + Zhang, Liye + 2015-04-28 10:24:00 -0700 + Commit: 52ccf1d, github.com/apache/spark/pull/5739 + + [SPARK-7140] [MLLIB] only scan the first 16 entries in Vector.hashCode + Xiangrui Meng + 2015-04-28 09:59:36 -0700 + Commit: b14cd23, github.com/apache/spark/pull/5697 + + [SPARK-5253] [ML] LinearRegression with L1/L2 (ElasticNet) using OWLQN + DB Tsai , DB Tsai + 2015-04-28 09:46:08 -0700 + Commit: 6a827d5, github.com/apache/spark/pull/4259 + + [SPARK-6435] spark-shell --jars option does not add all jars to classpath + Masayoshi TSUZUKI + 2015-04-28 07:55:21 -0400 + Commit: 268c419, github.com/apache/spark/pull/5227 + + [SPARK-7100] [MLLIB] Fix persisted RDD leak in GradientBoostTrees + Jim Carroll + 2015-04-28 07:51:02 -0400 + Commit: 75905c5, github.com/apache/spark/pull/5669 + + [SPARK-7168] [BUILD] Update plugin versions in Maven build and centralize versions + Sean Owen + 2015-04-28 07:48:34 -0400 + Commit: 7f3b3b7, github.com/apache/spark/pull/5720 + + [SPARK-6352] [SQL] Custom parquet output committer + Pei-Lun Lee + 2015-04-28 16:50:18 +0800 + Commit: e13cd86, github.com/apache/spark/pull/5525 + + [SPARK-7135][SQL] DataFrame expression for monotonically increasing IDs. + Reynold Xin + 2015-04-28 00:39:08 -0700 + Commit: d94cd1a, github.com/apache/spark/pull/5709 + + [SPARK-7187] SerializationDebugger should not crash user code + Andrew Or + 2015-04-28 00:38:14 -0700 + Commit: bf35edd, github.com/apache/spark/pull/5734 + + [SPARK-5946] [STREAMING] Add Python API for direct Kafka stream + jerryshao , Saisai Shao + 2015-04-27 23:48:02 -0700 + Commit: 9e4e82b, github.com/apache/spark/pull/4723 + + [SPARK-6829] Added math functions for DataFrames + Burak Yavuz + 2015-04-27 23:10:14 -0700 + Commit: 29576e7, github.com/apache/spark/pull/5616 + + [SPARK-7174][Core] Move calling `TaskScheduler.executorHeartbeatReceived` to another thread + zsxwing + 2015-04-27 21:45:40 -0700 + Commit: 874a2ca, github.com/apache/spark/pull/5723 + + [SPARK-7090] [MLLIB] Introduce LDAOptimizer to LDA to further improve extensibility + Yuhao Yang + 2015-04-27 19:02:51 -0700 + Commit: 4d9e560, github.com/apache/spark/pull/5661 + + [SPARK-7162] [YARN] Launcher error in yarn-client + GuoQiang Li + 2015-04-27 19:52:41 -0400 + Commit: 62888a4, github.com/apache/spark/pull/5716 + + [SPARK-7145] [CORE] commons-lang (2.x) classes used instead of commons-lang3 (3.x); commons-io used without dependency + Sean Owen + 2015-04-27 19:50:55 -0400 + Commit: ab5adb7, github.com/apache/spark/pull/5703 + + [SPARK-3090] [CORE] Stop SparkContext if user forgets to. + Marcelo Vanzin + 2015-04-27 19:46:17 -0400 + Commit: 5d45e1f, github.com/apache/spark/pull/5696 + + [SPARK-6738] [CORE] Improve estimate the size of a large array + Hong Shen + 2015-04-27 18:57:31 -0400 + Commit: 8e1c00d, github.com/apache/spark/pull/5608 + + [SPARK-7103] Fix crash with SparkContext.union when RDD has no partitioner + Steven She + 2015-04-27 18:55:02 -0400 + Commit: b9de9e0, github.com/apache/spark/pull/5679 + + [SPARK-6991] [SPARKR] Adds support for zipPartitions. + hlin09 + 2015-04-27 15:04:37 -0700 + Commit: ca9f4eb, github.com/apache/spark/pull/5568 + + SPARK-7107 Add parameter for zookeeper.znode.parent to hbase_inputformat... + tedyu + 2015-04-27 14:42:40 -0700 + Commit: ef82bdd, github.com/apache/spark/pull/5673 + + [SPARK-6856] [R] Make RDD information more useful in SparkR + Jeff Harrison + 2015-04-27 13:38:25 -0700 + Commit: 7078f60, github.com/apache/spark/pull/5667 + + [SPARK-4925] Publish Spark SQL hive-thriftserver maven artifact + Misha Chernetsov + 2015-04-27 11:27:56 -0700 + Commit: 998aac2, github.com/apache/spark/pull/5429 + + [SPARK-6505] [SQL] Remove the reflection call in HiveFunctionWrapper + baishuo + 2015-04-27 14:08:05 +0800 + Commit: 82bb7fd, github.com/apache/spark/pull/5660 + + [SQL][Minor] rename DataTypeParser.apply to DataTypeParser.parse + wangfei + 2015-04-26 21:08:47 -0700 + Commit: d188b8b, github.com/apache/spark/pull/5710 + + [SPARK-7152][SQL] Add a Column expression for partition ID. + Reynold Xin + 2015-04-26 11:46:58 -0700 + Commit: ca55dc9, github.com/apache/spark/pull/5705 + + [MINOR] [MLLIB] Refactor toString method in MLLIB + Alain + 2015-04-26 07:14:24 -0400 + Commit: 9a5bbe0, github.com/apache/spark/pull/5687 + + [SPARK-6014] [CORE] [HOTFIX] Add try-catch block around ShutDownHook + Nishkam Ravi , nishkamravi2 , nravi + 2015-04-25 20:02:23 -0400 + Commit: f5473c2, github.com/apache/spark/pull/5672 + + [SPARK-7092] Update spark scala version to 2.11.6 + Prashant Sharma + 2015-04-25 18:07:34 -0400 + Commit: a11c868, github.com/apache/spark/pull/5662 + + [SQL] Update SQL readme to include instructions on generating golden answer files based on Hive 0.13.1. + Yin Huai + 2015-04-25 13:43:39 -0700 + Commit: aa6966f, github.com/apache/spark/pull/5702 + + [SPARK-6113] [ML] Tree ensembles for Pipelines API + Joseph K. Bradley + 2015-04-25 12:27:19 -0700 + Commit: a7160c4, github.com/apache/spark/pull/5626 + + Revert "[SPARK-6752][Streaming] Allow StreamingContext to be recreated from checkpoint and existing SparkContext" + Patrick Wendell + 2015-04-25 10:37:34 -0700 + Commit: a61d65f + + update the deprecated CountMinSketchMonoid function to TopPctCMS function + KeheCAI + 2015-04-25 08:42:38 -0400 + Commit: cca9905, github.com/apache/spark/pull/5629 + + [SPARK-7136][Docs] Spark SQL and DataFrame Guide fix example file and paths + Deborah Siegel , DEBORAH SIEGEL , DEBORAH SIEGEL , DEBORAH SIEGEL + 2015-04-24 20:25:07 -0700 + Commit: 59b7cfc, github.com/apache/spark/pull/5693 + + [PySpark][Minor] Update sql example, so that can read file correctly + linweizhong + 2015-04-24 20:23:19 -0700 + Commit: d874f8b, github.com/apache/spark/pull/5684 + + [SPARK-6122] [CORE] Upgrade tachyon-client version to 0.6.3 + Calvin Jia + 2015-04-24 17:57:41 -0400 + Commit: 438859e, github.com/apache/spark/pull/5354 + + [SPARK-6852] [SPARKR] Accept numeric as numPartitions in SparkR. + Sun Rui + 2015-04-24 12:52:07 -0700 + Commit: caf0136, github.com/apache/spark/pull/5613 + + [SPARK-7033] [SPARKR] Clean usage of split. Use partition instead where applicable. + Sun Rui + 2015-04-24 11:00:19 -0700 + Commit: ebb77b2, github.com/apache/spark/pull/5628 + + [SPARK-6528] [ML] Add IDF transformer + Xusen Yin + 2015-04-24 08:29:49 -0700 + Commit: 6e57d57, github.com/apache/spark/pull/5266 + + [SPARK-7115] [MLLIB] skip the very first 1 in poly expansion + Xiangrui Meng + 2015-04-24 08:27:48 -0700 + Commit: 78b39c7, github.com/apache/spark/pull/5681 + + [SPARK-5894] [ML] Add polynomial mapper + Xusen Yin , Xiangrui Meng + 2015-04-24 00:39:29 -0700 + Commit: 8509519, github.com/apache/spark/pull/5245 + + Fixed a typo from the previous commit. + Reynold Xin + 2015-04-23 22:39:00 -0700 + Commit: 4c722d7 + + [SQL] Fixed expression data type matching. + Reynold Xin + 2015-04-23 21:21:03 -0700 + Commit: d3a302d, github.com/apache/spark/pull/5675 + + Update sql-programming-guide.md + Ken Geis + 2015-04-23 20:45:33 -0700 + Commit: 67bccbd, github.com/apache/spark/pull/5674 + + [SPARK-7060][SQL] Add alias function to python dataframe + Yin Huai + 2015-04-23 18:52:55 -0700 + Commit: 2d010f7, github.com/apache/spark/pull/5634 + + [SPARK-7037] [CORE] Inconsistent behavior for non-spark config properties in spark-shell and spark-submit + Cheolsoo Park + 2015-04-23 20:10:55 -0400 + Commit: 336f7f5, github.com/apache/spark/pull/5617 + + [SPARK-6818] [SPARKR] Support column deletion in SparkR DataFrame API. + Sun Rui + 2015-04-23 16:08:14 -0700 + Commit: 73db132, github.com/apache/spark/pull/5655 + + [SQL] Break dataTypes.scala into multiple files. + Reynold Xin + 2015-04-23 14:48:19 -0700 + Commit: 6220d93, github.com/apache/spark/pull/5670 + + [SPARK-7070] [MLLIB] LDA.setBeta should call setTopicConcentration. + Xiangrui Meng + 2015-04-23 14:46:54 -0700 + Commit: 1ed46a6, github.com/apache/spark/pull/5649 + + [SPARK-7087] [BUILD] Fix path issue change version script + Tijo Thomas + 2015-04-23 17:23:15 -0400 + Commit: 6d0749c, github.com/apache/spark/pull/5656 + + [SPARK-6879] [HISTORYSERVER] check if app is completed before clean it up + WangTaoTheTonic + 2015-04-23 17:20:17 -0400 + Commit: baa83a9, github.com/apache/spark/pull/5491 + + [SPARK-7085][MLlib] Fix miniBatchFraction parameter in train method called with 4 arguments + wizz + 2015-04-23 14:00:07 -0700 + Commit: 3e91cc2, github.com/apache/spark/pull/5658 + + [SPARK-7058] Include RDD deserialization time in "task deserialization time" metric + Josh Rosen + 2015-04-23 13:19:03 -0700 + Commit: 6afde2c, github.com/apache/spark/pull/5635 + + [SPARK-7055][SQL]Use correct ClassLoader for JDBC Driver in JDBCRDD.getConnector + Vinod K C + 2015-04-23 12:00:23 -0700 + Commit: c1213e6, github.com/apache/spark/pull/5633 + + [SPARK-6752][Streaming] Allow StreamingContext to be recreated from checkpoint and existing SparkContext + Tathagata Das + 2015-04-23 11:29:34 -0700 + Commit: 534f2a4, github.com/apache/spark/pull/5428 + + [SPARK-7044] [SQL] Fix the deadlock in script transformation + Cheng Hao + 2015-04-23 10:35:22 -0700 + Commit: cc48e63, github.com/apache/spark/pull/5625 + + [minor][streaming]fixed scala string interpolation error + Prabeesh K + 2015-04-23 10:33:13 -0700 + Commit: 975f53e, github.com/apache/spark/pull/5653 + + [HOTFIX] [SQL] Fix compilation for scala 2.11. + Prashant Sharma + 2015-04-23 16:45:26 +0530 + Commit: a7d65d3, github.com/apache/spark/pull/5652 + + [SPARK-7069][SQL] Rename NativeType -> AtomicType. + Reynold Xin + 2015-04-23 01:43:40 -0700 + Commit: f60bece, github.com/apache/spark/pull/5651 + + [SPARK-7068][SQL] Remove PrimitiveType + Reynold Xin + 2015-04-22 23:55:20 -0700 + Commit: 29163c5, github.com/apache/spark/pull/5646 + + [MLlib] Add support for BooleanType to VectorAssembler. + Reynold Xin + 2015-04-22 23:54:48 -0700 + Commit: 2d33323, github.com/apache/spark/pull/5648 + + [HOTFIX][SQL] Fix broken cached test + Liang-Chi Hsieh + 2015-04-22 22:18:56 -0700 + Commit: d9e70f3, github.com/apache/spark/pull/5640 + + [SPARK-7046] Remove InputMetrics from BlockResult + Kay Ousterhout + 2015-04-22 21:42:09 -0700 + Commit: 03e85b4, github.com/apache/spark/pull/5627 + + [SPARK-7066][MLlib] VectorAssembler should use NumericType not NativeType. + Reynold Xin + 2015-04-22 21:35:42 -0700 + Commit: d206860, github.com/apache/spark/pull/5642 + + [MLlib] UnaryTransformer nullability should not depend on PrimitiveType. + Reynold Xin + 2015-04-22 21:35:12 -0700 + Commit: 1b85e08, github.com/apache/spark/pull/5644 + + Disable flaky test: ReceiverSuite "block generator throttling". + Reynold Xin + 2015-04-22 21:24:22 -0700 + Commit: b69c4f9 + + [SPARK-6967] [SQL] fix date type convertion in jdbcrdd + Daoyuan Wang + 2015-04-22 19:14:28 -0700 + Commit: 04525c0, github.com/apache/spark/pull/5590 + + [SPARK-6827] [MLLIB] Wrap FPGrowthModel.freqItemsets and make it consistent with Java API + Yanbo Liang + 2015-04-22 17:22:26 -0700 + Commit: f4f3998, github.com/apache/spark/pull/5614 + + [SPARK-7059][SQL] Create a DataFrame join API to facilitate equijoin. + Reynold Xin + 2015-04-22 15:26:58 -0700 + Commit: baf865d, github.com/apache/spark/pull/5638 + + [SPARK-7039][SQL]JDBCRDD: Add support on type NVARCHAR + szheng79 + 2015-04-22 13:02:55 -0700 + Commit: fbe7106, github.com/apache/spark/pull/5618 + + [SQL] Rename some apply functions. + Reynold Xin + 2015-04-22 11:18:01 -0700 + Commit: cdf0328, github.com/apache/spark/pull/5624 + + [SPARK-7052][Core] Add ThreadUtils and move thread methods from Utils to ThreadUtils + zsxwing + 2015-04-22 11:08:59 -0700 + Commit: 33b8562, github.com/apache/spark/pull/5631 + + [SPARK-6889] [DOCS] CONTRIBUTING.md updates to accompany contribution doc updates + Sean Owen + 2015-04-21 22:34:31 -0700 + Commit: bdc5c16, github.com/apache/spark/pull/5623 + + [SPARK-6113] [ML] Small cleanups after original tree API PR + Joseph K. Bradley + 2015-04-21 21:44:44 -0700 + Commit: 607eff0, github.com/apache/spark/pull/5567 + + [MINOR] Comment improvements in ExternalSorter. + Patrick Wendell + 2015-04-21 21:04:04 -0700 + Commit: 70f9f8f, github.com/apache/spark/pull/5620 + + [SPARK-6490][Docs] Add docs for rpc configurations + zsxwing + 2015-04-21 18:37:53 -0700 + Commit: 3a3f710, github.com/apache/spark/pull/5607 + + [SPARK-1684] [PROJECT INFRA] Merge script should standardize SPARK-XXX prefix + texasmichelle + 2015-04-21 18:08:29 -0700 + Commit: a0761ec, github.com/apache/spark/pull/5149 + + Closes #5427 + Reynold Xin + 2015-04-21 17:52:52 -0700 + Commit: 41ef78a + + [SPARK-6953] [PySpark] speed up python tests + Reynold Xin , Xiangrui Meng + 2015-04-21 17:49:55 -0700 + Commit: 3134c3f, github.com/apache/spark/pull/5605 + + [SPARK-6014] [core] Revamp Spark shutdown hooks, fix shutdown races. + Marcelo Vanzin + 2015-04-21 20:33:57 -0400 + Commit: e72c16e, github.com/apache/spark/pull/5560 + + Avoid warning message about invalid refuse_seconds value in Mesos >=0.21... + mweindel + 2015-04-21 20:19:33 -0400 + Commit: b063a61, github.com/apache/spark/pull/5597 + + [Minor][MLLIB] Fix a minor formatting bug in toString method in Node.scala + Alain + 2015-04-21 16:46:17 -0700 + Commit: ae036d0, github.com/apache/spark/pull/5621 + + [SPARK-7036][MLLIB] ALS.train should support DataFrames in PySpark + Xiangrui Meng + 2015-04-21 16:44:52 -0700 + Commit: 686dd74, github.com/apache/spark/pull/5619 + + [SPARK-6065] [MLlib] Optimize word2vec.findSynonyms using blas calls + MechCoder + 2015-04-21 16:42:45 -0700 + Commit: 7fe6142, github.com/apache/spark/pull/5467 + + [minor] [build] Set java options when generating mima ignores. + Marcelo Vanzin + 2015-04-21 16:35:37 -0700 + Commit: a70e849, github.com/apache/spark/pull/5615 + + [SPARK-3386] Share and reuse SerializerInstances in shuffle paths + Josh Rosen + 2015-04-21 16:24:15 -0700 + Commit: f83c0f1, github.com/apache/spark/pull/5606 + + [SPARK-5817] [SQL] Fix bug of udtf with column names + Cheng Hao + 2015-04-21 15:11:15 -0700 + Commit: 7662ec2, github.com/apache/spark/pull/4602 + + [SPARK-6996][SQL] Support map types in java beans + Punya Biswal + 2015-04-21 14:50:02 -0700 + Commit: 2a24bf9, github.com/apache/spark/pull/5578 + + [SPARK-6969][SQL] Refresh the cached table when REFRESH TABLE is used + Yin Huai + 2015-04-21 14:48:42 -0700 + Commit: 6265cba, github.com/apache/spark/pull/5583 + + [SQL][minor] make it more clear that we only need to re-throw GetField exception for UnresolvedAttribute + Wenchen Fan + 2015-04-21 14:48:02 -0700 + Commit: 03fd921, github.com/apache/spark/pull/5588 + + [SPARK-6994] Allow to fetch field values by name in sql.Row + vidmantas zemleris + 2015-04-21 14:47:09 -0700 + Commit: 2e8c6ca, github.com/apache/spark/pull/5573 + + [SPARK-7011] Build(compilation) fails with scala 2.11 option, because a protected[sql] type is accessed in ml package. + Prashant Sharma + 2015-04-21 14:43:46 -0700 + Commit: 04bf34e, github.com/apache/spark/pull/5593 + + [SPARK-6845] [MLlib] [PySpark] Add isTranposed flag to DenseMatrix + MechCoder + 2015-04-21 14:36:50 -0700 + Commit: 45c47fa, github.com/apache/spark/pull/5455 + + SPARK-3276 Added a new configuration spark.streaming.minRememberDuration + emres + 2015-04-21 16:39:56 -0400 + Commit: c25ca7c, github.com/apache/spark/pull/5438 + + [SPARK-5360] [SPARK-6606] Eliminate duplicate objects in serialized CoGroupedRDD + Kay Ousterhout + 2015-04-21 11:01:18 -0700 + Commit: c035c0f, github.com/apache/spark/pull/4145 + + [SPARK-6985][streaming] Receiver maxRate over 1000 causes a StackOverflowError + David McGuire + 2015-04-21 07:21:10 -0400 + Commit: 5fea3e5, github.com/apache/spark/pull/5559 + + [SPARK-5990] [MLLIB] Model import/export for IsotonicRegression + Yanbo Liang + 2015-04-21 00:14:16 -0700 + Commit: 1f2f723, github.com/apache/spark/pull/5270 + + [SPARK-6949] [SQL] [PySpark] Support Date/Timestamp in Column expression + Davies Liu + 2015-04-21 00:08:18 -0700 + Commit: ab9128f, github.com/apache/spark/pull/5570 + + [SPARK-6490][Core] Add spark.rpc.* and deprecate spark.akka.* + zsxwing + 2015-04-20 23:18:42 -0700 + Commit: 8136810, github.com/apache/spark/pull/5595 + + [SPARK-6635][SQL] DataFrame.withColumn should replace columns with identical column names + Liang-Chi Hsieh + 2015-04-20 18:54:01 -0700 + Commit: c736220, github.com/apache/spark/pull/5541 + + [SPARK-6368][SQL] Build a specialized serializer for Exchange operator. + Yin Huai + 2015-04-20 18:42:50 -0700 + Commit: ce7ddab, github.com/apache/spark/pull/5497 + + [doc][streaming] Fixed broken link in mllib section + BenFradet + 2015-04-20 13:46:55 -0700 + Commit: 517bdf3, github.com/apache/spark/pull/5600 + + fixed doc + Eric Chiang + 2015-04-20 13:11:21 -0700 + Commit: 97fda73, github.com/apache/spark/pull/5599 + + [Minor][MLlib] Incorrect path to test data is used in DecisionTreeExample + Liang-Chi Hsieh + 2015-04-20 10:47:37 -0700 + Commit: 1ebceaa, github.com/apache/spark/pull/5594 + + [SPARK-6661] Python type errors should print type, not object + Elisey Zanko + 2015-04-20 10:44:09 -0700 + Commit: 7717661, github.com/apache/spark/pull/5361 + + [SPARK-7003] Improve reliability of connection failure detection between Netty block transfer service endpoints + Aaron Davidson + 2015-04-20 09:54:21 -0700 + Commit: 968ad97, github.com/apache/spark/pull/5584 + + [SPARK-5924] Add the ability to specify withMean or withStd parameters with StandarScaler + jrabary + 2015-04-20 09:47:56 -0700 + Commit: 1be2070, github.com/apache/spark/pull/4704 + + [doc][mllib] Fix typo of the page title in Isotonic regression documents + dobashim + 2015-04-20 00:03:23 -0400 + Commit: 6fe690d, github.com/apache/spark/pull/5581 + + [SPARK-6979][Streaming] Replace JobScheduler.eventActor and JobGenerator.eventActor with EventLoop + zsxwing + 2015-04-19 20:48:36 -0700 + Commit: c776ee8, github.com/apache/spark/pull/5554 + + [SPARK-6983][Streaming] Update ReceiverTrackerActor to use the new Rpc interface + zsxwing + 2015-04-19 20:35:43 -0700 + Commit: d8e1b7b, github.com/apache/spark/pull/5557 + + [SPARK-6998][MLlib] Make StreamingKMeans 'Serializable' + zsxwing + 2015-04-19 20:33:51 -0700 + Commit: fa73da0, github.com/apache/spark/pull/5582 + + [SPARK-6963][CORE]Flaky test: o.a.s.ContextCleanerSuite automatically cleanup checkpoint + GuoQiang Li + 2015-04-19 09:37:09 +0100 + Commit: 0424da6, github.com/apache/spark/pull/5548 + + SPARK-6993 : Add default min, max methods for JavaDoubleRDD + Olivier Girardot + 2015-04-18 18:21:44 -0700 + Commit: 8fbd45c, github.com/apache/spark/pull/5571 + + Fixed doc + Gaurav Nanda + 2015-04-18 17:20:46 -0700 + Commit: 729885e, github.com/apache/spark/pull/5576 + + [SPARK-6219] Reuse pep8.py + Nicholas Chammas + 2015-04-18 16:46:28 -0700 + Commit: 28683b4, github.com/apache/spark/pull/5561 + + [core] [minor] Make sure ConnectionManager stops. + Marcelo Vanzin + 2015-04-18 10:14:56 +0100 + Commit: 327ebf0, github.com/apache/spark/pull/5566 + + SPARK-6992 : Fix documentation example for Spark SQL on StructType + Olivier Girardot + 2015-04-18 00:31:01 -0700 + Commit: 5f095d5, github.com/apache/spark/pull/5569 + + [SPARK-6975][Yarn] Fix argument validation error + jerryshao + 2015-04-17 19:17:06 -0700 + Commit: d850b4b, github.com/apache/spark/pull/5551 + + [SPARK-5933] [core] Move config deprecation warnings to SparkConf. + Marcelo Vanzin + 2015-04-17 19:02:07 -0700 + Commit: 1991337, github.com/apache/spark/pull/5562 + + [SPARK-6350][Mesos] Make mesosExecutorCores configurable in mesos "fine-grained" mode + Jongyoul Lee + 2015-04-17 18:30:55 -0700 + Commit: 6fbeb82, github.com/apache/spark/pull/5063 + + [SPARK-6703][Core] Provide a way to discover existing SparkContext's + Ilya Ganelin + 2015-04-17 18:28:42 -0700 + Commit: c5ed510, github.com/apache/spark/pull/5501 + + Minor fix to SPARK-6958: Improve Python docstring for DataFrame.sort. + Reynold Xin + 2015-04-17 16:30:13 -0500 + Commit: a452c59, github.com/apache/spark/pull/5558 + + SPARK-6988 : Fix documentation regarding DataFrames using the Java API + Olivier Girardot + 2015-04-17 16:23:10 -0500 + Commit: d305e68, github.com/apache/spark/pull/5564 + + [SPARK-6807] [SparkR] Merge recent SparkR-pkg changes + cafreeman , Davies Liu , Zongheng Yang , Shivaram Venkataraman , Shivaram Venkataraman , Sun Rui + 2015-04-17 13:42:19 -0700 + Commit: 59e206d, github.com/apache/spark/pull/5436 + + [SPARK-6113] [ml] Stabilize DecisionTree API + Joseph K. Bradley + 2015-04-17 13:15:36 -0700 + Commit: a83571a, github.com/apache/spark/pull/5530 + + [SPARK-2669] [yarn] Distribute client configuration to AM. + Marcelo Vanzin + 2015-04-17 14:21:51 -0500 + Commit: 50ab8a6, github.com/apache/spark/pull/4142 + + [SPARK-6957] [SPARK-6958] [SQL] improve API compatibility to pandas + Davies Liu + 2015-04-17 11:29:27 -0500 + Commit: c84d916, github.com/apache/spark/pull/5544 + + [SPARK-6604][PySpark]Specify ip of python server scoket + linweizhong + 2015-04-17 12:04:02 +0100 + Commit: dc48ba9, github.com/apache/spark/pull/5256 + + [SPARK-6952] Handle long args when detecting PID reuse + Punya Biswal + 2015-04-17 11:08:37 +0100 + Commit: f6a9a57, github.com/apache/spark/pull/5535 + + [SPARK-6046] [core] Reorganize deprecated config support in SparkConf. + Marcelo Vanzin + 2015-04-17 11:06:01 +0100 + Commit: 4527761, github.com/apache/spark/pull/5514 + + SPARK-6846 [WEBUI] Stage kill URL easy to accidentally trigger and possibility for security issue + Sean Owen + 2015-04-17 11:02:31 +0100 + Commit: f7a2564, github.com/apache/spark/pull/5528 + + [SPARK-6972][SQL] Add Coalesce to DataFrame + Michael Armbrust + 2015-04-16 21:49:26 -0500 + Commit: 8220d52, github.com/apache/spark/pull/5545 + + [SPARK-6966][SQL] Use correct ClassLoader for JDBC Driver + Michael Armbrust + 2015-04-16 17:59:49 -0700 + Commit: e5949c2, github.com/apache/spark/pull/5543 + + [SPARK-6899][SQL] Fix type mismatch when using codegen with Average on DecimalType + Liang-Chi Hsieh + 2015-04-16 17:50:20 -0700 + Commit: 1e43851, github.com/apache/spark/pull/5517 + + [SQL][Minor] Fix foreachUp of treenode + scwf , Fei Wang + 2015-04-16 17:35:51 -0700 + Commit: d966086, github.com/apache/spark/pull/5518 + + [SPARK-6911] [SQL] improve accessor for nested types + Davies Liu + 2015-04-16 17:33:57 -0700 + Commit: 6183b5e, github.com/apache/spark/pull/5513 + + SPARK-6927 [SQL] Sorting Error when codegen on + 云峤 + 2015-04-16 17:32:42 -0700 + Commit: 5fe4343, github.com/apache/spark/pull/5524 + + [SPARK-4897] [PySpark] Python 3 support + Davies Liu , twneale , Josh Rosen + 2015-04-16 16:20:57 -0700 + Commit: 04e44b3, github.com/apache/spark/pull/5173 + + [SPARK-6855] [SPARKR] Set R includes to get the right collate order. + Shivaram Venkataraman + 2015-04-16 13:06:34 -0700 + Commit: 55f553a, github.com/apache/spark/pull/5462 + + [SPARK-6934][Core] Use 'spark.akka.askTimeout' for the ask timeout + zsxwing + 2015-04-16 13:45:55 -0500 + Commit: ef3fb80, github.com/apache/spark/pull/5529 + + [SPARK-6694][SQL]SparkSQL CLI must be able to specify an option --database on the command line. + Jin Adachi , adachij + 2015-04-16 23:41:04 +0800 + Commit: 3ae37b9, github.com/apache/spark/pull/5345 + + [SPARK-4194] [core] Make SparkContext initialization exception-safe. + Marcelo Vanzin + 2015-04-16 10:48:31 +0100 + Commit: de4fa6b, github.com/apache/spark/pull/5335 + + SPARK-4783 [CORE] System.exit() calls in SparkContext disrupt applications embedding Spark + Sean Owen + 2015-04-16 10:45:32 +0100 + Commit: 6179a94, github.com/apache/spark/pull/5492 + + [Streaming][minor] Remove additional quote and unneeded imports + jerryshao + 2015-04-16 10:39:02 +0100 + Commit: 8370550, github.com/apache/spark/pull/5540 + + [SPARK-6893][ML] default pipeline parameter handling in python + Xiangrui Meng + 2015-04-15 23:49:42 -0700 + Commit: 57cd1e8, github.com/apache/spark/pull/5534 + + SPARK-6938: All require statements now have an informative error message. + Juliet Hougland + 2015-04-15 21:52:25 -0700 + Commit: 52c3439, github.com/apache/spark/pull/5532 + + [SPARK-5277][SQL] - SparkSqlSerializer doesn't always register user specified KryoRegistrators + Max Seiden + 2015-04-15 16:15:11 -0700 + Commit: 8a53de1, github.com/apache/spark/pull/5237 + + [SPARK-2312] Logging Unhandled messages + Isaias Barroso + 2015-04-15 22:40:52 +0100 + Commit: d5f1b96, github.com/apache/spark/pull/2055 + + [SPARK-2213] [SQL] sort merge join for spark sql + Daoyuan Wang , Michael Armbrust + 2015-04-15 14:06:10 -0700 + Commit: 585638e, github.com/apache/spark/pull/5208 + + [SPARK-6898][SQL] completely support special chars in column names + Wenchen Fan + 2015-04-15 13:39:12 -0700 + Commit: 4754e16, github.com/apache/spark/pull/5511 + + [SPARK-6937][MLLIB] Fixed bug in PICExample in which the radius were not being accepted on c... + sboeschhuawei + 2015-04-15 13:28:10 -0700 + Commit: 557a797, github.com/apache/spark/pull/5531 + + [SPARK-6844][SQL] Clean up accumulators used in InMemoryRelation when it is uncached + Liang-Chi Hsieh + 2015-04-15 13:15:58 -0700 + Commit: cf38fe0, github.com/apache/spark/pull/5475 + + [SPARK-6638] [SQL] Improve performance of StringType in SQL + Davies Liu + 2015-04-15 13:06:38 -0700 + Commit: 8584276, github.com/apache/spark/pull/5350 + + [SPARK-6887][SQL] ColumnBuilder misses FloatType + Yin Huai + 2015-04-15 13:04:03 -0700 + Commit: 785f955, github.com/apache/spark/pull/5499 + + [SPARK-6800][SQL] Update doc for JDBCRelation's columnPartition + Liang-Chi Hsieh + 2015-04-15 13:01:29 -0700 + Commit: e3e4e9a, github.com/apache/spark/pull/5488 + + [SPARK-6730][SQL] Allow using keyword as identifier in OPTIONS + Liang-Chi Hsieh + 2015-04-15 13:00:19 -0700 + Commit: b75b307, github.com/apache/spark/pull/5520 + + [SPARK-6886] [PySpark] fix big closure with shuffle + Davies Liu + 2015-04-15 12:58:02 -0700 + Commit: f11288d, github.com/apache/spark/pull/5496 + + SPARK-6861 [BUILD] Scalastyle config prevents building Maven child modules alone + Sean Owen + 2015-04-15 15:17:58 +0100 + Commit: 6c5ed8a, github.com/apache/spark/pull/5471 + + [HOTFIX] [SPARK-6896] [SQL] fix compile error in hive-thriftserver + Daoyuan Wang + 2015-04-15 10:23:53 +0100 + Commit: 29aabdd, github.com/apache/spark/pull/5507 + + [SPARK-6871][SQL] WITH clause in CTE can not following another WITH clause + Liang-Chi Hsieh + 2015-04-14 23:47:16 -0700 + Commit: 6be9189, github.com/apache/spark/pull/5480 + + [SPARK-5634] [core] Show correct message in HS when no incomplete apps f... + Marcelo Vanzin + 2015-04-14 18:52:48 -0700 + Commit: 30a6e0d, github.com/apache/spark/pull/5515 + + [SPARK-6890] [core] Fix launcher lib work with SPARK_PREPEND_CLASSES. + Marcelo Vanzin + 2015-04-14 18:51:39 -0700 + Commit: 9717389, github.com/apache/spark/pull/5504 + + [SPARK-6796][Streaming][WebUI] Add "Active Batches" and "Completed Batches" lists to StreamingPage + zsxwing + 2015-04-14 16:51:36 -0700 + Commit: 6de282e, github.com/apache/spark/pull/5434 + + Revert "[SPARK-6352] [SQL] Add DirectParquetOutputCommitter" + Josh Rosen + 2015-04-14 14:07:25 -0700 + Commit: a76b921 + + [SPARK-6769][YARN][TEST] Usage of the ListenerBus in YarnClusterSuite is wrong + Kousuke Saruta + 2015-04-14 14:00:49 -0700 + Commit: 4d4b249, github.com/apache/spark/pull/5417 + + [SPARK-5808] [build] Package pyspark files in sbt assembly. + Marcelo Vanzin + 2015-04-14 13:41:38 -0700 + Commit: 6577437, github.com/apache/spark/pull/5461 + + [SPARK-6905] Upgrade to snappy-java 1.1.1.7 + Josh Rosen + 2015-04-14 13:40:07 -0700 + Commit: 6adb8bc, github.com/apache/spark/pull/5512 + + [SPARK-6700] [yarn] Re-enable flaky test. + Marcelo Vanzin + 2015-04-14 13:34:44 -0700 + Commit: b075e4b, github.com/apache/spark/pull/5459 + + SPARK-1706: Allow multiple executors per worker in Standalone mode + CodingCat + 2015-04-14 13:32:06 -0700 + Commit: 8f8dc45, github.com/apache/spark/pull/731 + + [SPARK-2033] Automatically cleanup checkpoint + GuoQiang Li + 2015-04-14 12:56:47 -0700 + Commit: 25998e4, github.com/apache/spark/pull/855 + + [CORE] SPARK-6880: Fixed null check when all the dependent stages are cancelled due to previous stage failure + pankaj arora + 2015-04-14 12:06:46 -0700 + Commit: dcf8a9f, github.com/apache/spark/pull/5494 + + [SPARK-6894]spark.executor.extraLibraryOptions => spark.executor.extraLibraryPath + WangTaoTheTonic + 2015-04-14 12:02:11 -0700 + Commit: f63b44a, github.com/apache/spark/pull/5506 + + [SPARK-6081] Support fetching http/https uris in driver runner. + Timothy Chen + 2015-04-14 11:48:12 -0700 + Commit: 320bca4, github.com/apache/spark/pull/4832 + + SPARK-6878 [CORE] Fix for sum on empty RDD fails with exception + Erik van Oosten + 2015-04-14 12:39:56 +0100 + Commit: 51b306b, github.com/apache/spark/pull/5489 + + [SPARK-6731] Bump version of apache commons-math3 + Punyashloka Biswal + 2015-04-14 11:43:06 +0100 + Commit: 628a72f, github.com/apache/spark/pull/5380 + + [WIP][HOTFIX][SPARK-4123]: Fix bug in PR dependency (all deps. removed issue) + Brennon York + 2015-04-13 22:31:44 -0700 + Commit: 77eeb10, github.com/apache/spark/pull/5443 + + [SPARK-5957][ML] better handling of parameters + Xiangrui Meng + 2015-04-13 21:18:05 -0700 + Commit: 971b95b, github.com/apache/spark/pull/5431 + + [Minor][SparkR] Minor refactor and removes redundancy related to cleanClosure. + hlin09 + 2015-04-13 20:43:24 -0700 + Commit: 0ba3fdd, github.com/apache/spark/pull/5495 + + [SPARK-5794] [SQL] fix add jar + Daoyuan Wang + 2015-04-13 18:26:00 -0700 + Commit: b45059d, github.com/apache/spark/pull/4586 + + [SQL] [Minor] Fix for SqlApp.scala + Fei Wang + 2015-04-13 18:23:35 -0700 + Commit: 3782e1f, github.com/apache/spark/pull/5485 + + [Spark-4848] Allow different Worker configurations in standalone cluster + Nathan Kronenfeld + 2015-04-13 18:21:16 -0700 + Commit: 435b877, github.com/apache/spark/pull/5140 + + [SPARK-6877][SQL] Add code generation support for Min + Liang-Chi Hsieh + 2015-04-13 18:16:33 -0700 + Commit: 4898dfa, github.com/apache/spark/pull/5487 + + [SPARK-6303][SQL] Remove unnecessary Average in GeneratedAggregate + Liang-Chi Hsieh + 2015-04-13 18:15:29 -0700 + Commit: 5b8b324, github.com/apache/spark/pull/4996 + + [SPARK-6881][SparkR] Changes the checkpoint directory name. + hlin09 + 2015-04-13 16:53:50 -0700 + Commit: d7f2c19, github.com/apache/spark/pull/5493 + + [SPARK-5931][CORE] Use consistent naming for time properties + Ilya Ganelin , Ilya Ganelin + 2015-04-13 16:28:07 -0700 + Commit: c4ab255, github.com/apache/spark/pull/5236 + + [SPARK-5941] [SQL] Unit Test loads the table `src` twice for leftsemijoin.q + Cheng Hao + 2015-04-13 16:02:18 -0700 + Commit: c5602bd, github.com/apache/spark/pull/4506 + + [SPARK-6872] [SQL] add copy in external sort + Daoyuan Wang + 2015-04-13 16:00:58 -0700 + Commit: e63a86a, github.com/apache/spark/pull/5481 + + [SPARK-5972] [MLlib] Cache residuals and gradient in GBT during training and validation + MechCoder + 2015-04-13 15:36:33 -0700 + Commit: 2a55cb4, github.com/apache/spark/pull/5330 + + [SQL][SPARK-6742]: Don't push down predicates which reference partition column(s) + Yash Datta + 2015-04-13 14:43:07 -0700 + Commit: 3a205bb, github.com/apache/spark/pull/5390 + + [SPARK-6130] [SQL] support if not exists for insert overwrite into partition in hiveQl + Daoyuan Wang + 2015-04-13 14:29:07 -0700 + Commit: 85ee0ca, github.com/apache/spark/pull/4865 + + [SPARK-5988][MLlib] add save/load for PowerIterationClusteringModel + Xusen Yin + 2015-04-13 11:53:17 -0700 + Commit: 1e340c3, github.com/apache/spark/pull/5450 + + [SPARK-6662][YARN] Allow variable substitution in spark.yarn.historyServer.address + Cheolsoo Park + 2015-04-13 13:45:10 -0500 + Commit: 6cc5b3e, github.com/apache/spark/pull/5321 + + [SPARK-6765] Enable scalastyle on test code. + Reynold Xin + 2015-04-13 09:29:04 -0700 + Commit: c5b0b29, github.com/apache/spark/pull/5486 + + [SPARK-6207] [YARN] [SQL] Adds delegation tokens for metastore to conf. + Doug Balog , Doug Balog + 2015-04-13 09:49:58 -0500 + Commit: 77620be, github.com/apache/spark/pull/5031 + + [SPARK-6352] [SQL] Add DirectParquetOutputCommitter + Pei-Lun Lee + 2015-04-13 21:52:00 +0800 + Commit: b29663e, github.com/apache/spark/pull/5042 + + [SPARK-6870][Yarn] Catch InterruptedException when yarn application state monitor thread been interrupted + linweizhong + 2015-04-13 13:06:54 +0100 + Commit: 202ebf0, github.com/apache/spark/pull/5479 + + [SPARK-6671] Add status command for spark daemons + Pradeep Chanumolu + 2015-04-13 13:02:55 +0100 + Commit: 240ea03, github.com/apache/spark/pull/5327 + + [SPARK-6440][CORE]Handle IPv6 addresses properly when constructing URI + nyaapa + 2015-04-13 12:55:25 +0100 + Commit: 9d117ce, github.com/apache/spark/pull/5424 + + [SPARK-6860][Streaming][WebUI] Fix the possible inconsistency of StreamingPage + zsxwing + 2015-04-13 12:21:29 +0100 + Commit: 14ce3ea, github.com/apache/spark/pull/5470 + + [SPARK-6762]Fix potential resource leaks in CheckPoint CheckpointWriter and CheckpointReader + lisurprise + 2015-04-13 12:18:05 +0100 + Commit: cadd7d7, github.com/apache/spark/pull/5407 + + [SPARK-6868][YARN] Fix broken container log link on executor page when HTTPS_ONLY. + Dean Chen + 2015-04-13 12:08:55 +0100 + Commit: 950645d, github.com/apache/spark/pull/5477 + + [SPARK-6562][SQL] DataFrame.replace + Reynold Xin + 2015-04-12 22:56:12 -0700 + Commit: 68d1faa, github.com/apache/spark/pull/5282 + + [SPARK-5885][MLLIB] Add VectorAssembler as a feature transformer + Xiangrui Meng + 2015-04-12 22:42:01 -0700 + Commit: 9294044, github.com/apache/spark/pull/5196 + + [SPARK-5886][ML] Add StringIndexer as a feature transformer + Xiangrui Meng + 2015-04-12 22:41:05 -0700 + Commit: 685ddcf, github.com/apache/spark/pull/4735 + + [SPARK-4081] [mllib] VectorIndexer + Joseph K. Bradley + 2015-04-12 22:38:27 -0700 + Commit: d3792f5, github.com/apache/spark/pull/3000 + + [SPARK-6643][MLLIB] Implement StandardScalerModel missing methods + lewuathe + 2015-04-12 22:17:16 -0700 + Commit: fc17661, github.com/apache/spark/pull/5310 + + [SPARK-6765] Fix test code style for core. + Reynold Xin + 2015-04-12 20:50:49 -0700 + Commit: a1fe59d, github.com/apache/spark/pull/5484 + + [MINOR] a typo: coalesce + Daoyuan Wang + 2015-04-12 18:58:53 +0100 + Commit: 04bcd67, github.com/apache/spark/pull/5482 + + [SPARK-6431][Streaming][Kafka] Error message for partition metadata requ... + cody koeninger + 2015-04-12 17:37:30 +0100 + Commit: 6ac8eea, github.com/apache/spark/pull/5454 + + [SPARK-6843][core]Add volatile for the "state" + lisurprise + 2015-04-12 13:41:44 +0100 + Commit: ddc1743, github.com/apache/spark/pull/5448 + + [SPARK-6866][Build] Remove duplicated dependency in launcher/pom.xml + Guancheng (G.C.) Chen + 2015-04-12 11:36:41 +0100 + Commit: e9445b1, github.com/apache/spark/pull/5476 + + [SPARK-6677] [SQL] [PySpark] fix cached classes + Davies Liu + 2015-04-11 22:33:23 -0700 + Commit: 5d8f7b9, github.com/apache/spark/pull/5445 + + MAINTENANCE: Automated closing of pull requests. + Patrick Wendell + 2015-04-11 22:12:56 -0700 + Commit: 0cc8fcb, github.com/apache/spark/pull/4994 + + SPARK-6710 GraphX Fixed Wrong initial bias in GraphX SVDPlusPlus + Michael Malak + 2015-04-11 21:01:23 -0700 + Commit: 1205f7e, github.com/apache/spark/pull/5464 + + [HOTFIX] Add explicit return types to fix lint errors + Josh Rosen + 2015-04-11 20:12:40 -0700 + Commit: dea5dac + + [SQL][minor] move `resolveGetField` into a object + Wenchen Fan + 2015-04-11 19:35:56 -0700 + Commit: 5c2844c, github.com/apache/spark/pull/5435 + + [SPARK-6367][SQL] Use the proper data type for those expressions that are hijacking existing data types. + Yin Huai + 2015-04-11 19:26:15 -0700 + Commit: 6d4e854, github.com/apache/spark/pull/5094 + + [SQL] Handle special characters in the authority of a Path's URI. + Yin Huai + 2015-04-11 18:44:54 -0700 + Commit: d2383fb, github.com/apache/spark/pull/5381 + + [SPARK-6379][SQL] Support a functon to call user-defined functions registered in SQLContext + Takeshi YAMAMURO + 2015-04-11 18:41:12 -0700 + Commit: 352a5da, github.com/apache/spark/pull/5061 + + [SPARK-6179][SQL] Add token for "SHOW PRINCIPALS role_name" and "SHOW TRANSACTIONS" and "SHOW COMPACTIONS" + DoingDone9 <799203320@qq.com>, Zhongshuai Pei <799203320@qq.com>, Xu Tingjun + 2015-04-11 18:34:17 -0700 + Commit: 48cc840, github.com/apache/spark/pull/4902 + + [Spark-5068][SQL]Fix bug query data when path doesn't exist for HiveContext + lazymam500 , lazyman + 2015-04-11 18:33:14 -0700 + Commit: 1f39a61, github.com/apache/spark/pull/5059 + + [SPARK-6199] [SQL] Support CTE in HiveContext and SQLContext + haiyang + 2015-04-11 18:30:17 -0700 + Commit: 2f53588, github.com/apache/spark/pull/4929 + + [Minor][SQL] Fix typo in sql + Guancheng (G.C.) Chen + 2015-04-11 15:43:12 -0700 + Commit: 7dbd371, github.com/apache/spark/pull/5474 + + [SPARK-6863] Fix formatting on SQL programming guide. + Santiago M. Mola + 2015-04-11 15:42:03 -0700 + Commit: 6437e7c, github.com/apache/spark/pull/5472 + + [SPARK-6611][SQL] Add support for INTEGER as synonym of INT. + Santiago M. Mola + 2015-04-11 14:52:49 -0700 + Commit: 5f7b7cd, github.com/apache/spark/pull/5271 + + [SPARK-6858][SQL] Register Java HashMap for SparkSqlSerializer + Liang-Chi Hsieh + 2015-04-11 14:50:50 -0700 + Commit: 198cf2a, github.com/apache/spark/pull/5465 + + [SPARK-6835] [SQL] Fix bug of Hive UDTF in Lateral View (ClassNotFound) + Cheng Hao + 2015-04-11 22:11:03 +0800 + Commit: 3ceb810, github.com/apache/spark/pull/5444 + + [hotfix] [build] Make sure JAVA_HOME is set for tests. + Marcelo Vanzin + 2015-04-11 13:10:01 +0100 + Commit: 694aef0, github.com/apache/spark/pull/5441 + + [Minor][Core] Fix typo + Liang-Chi Hsieh + 2015-04-11 13:07:41 +0100 + Commit: 95a0759, github.com/apache/spark/pull/5466 + + [SQL] [SPARK-6620] Speed up toDF() and rdd() functions by constructing converters in ScalaReflection + Volodymyr Lyubinets + 2015-04-10 16:27:56 -0700 + Commit: 67d0688, github.com/apache/spark/pull/5279 + + [SPARK-6851][SQL] Create new instance for each converted parquet relation + Michael Armbrust + 2015-04-10 16:05:14 -0700 + Commit: 23d5f88, github.com/apache/spark/pull/5458 + + [SPARK-6850] [SparkR] use one partition when we need to compare the whole result + Davies Liu + 2015-04-10 15:35:45 -0700 + Commit: 68ecdb7, github.com/apache/spark/pull/5460 + + [SPARK-6216] [PySpark] check the python version in worker + Davies Liu + 2015-04-10 14:04:53 -0700 + Commit: 4740d6a, github.com/apache/spark/pull/5404 + + [SPARK-5969][PySpark] Fix descending pyspark.rdd.sortByKey. + Milan Straka + 2015-04-10 13:50:32 -0700 + Commit: 0375134, github.com/apache/spark/pull/4761 + + [SQL] [SPARK-6794] Use kryo-based SparkSqlSerializer for GeneralHashedRelation + Volodymyr Lyubinets + 2015-04-10 12:09:54 -0700 + Commit: b9baa4c, github.com/apache/spark/pull/5433 + + [SPARK-6773][Tests]Fix RAT checks still passed issue when download rat jar failed + June.He + 2015-04-10 20:02:35 +0100 + Commit: 9f5ed99, github.com/apache/spark/pull/5421 + + [SPARK-6766][Streaming] Fix issue about StreamingListenerBatchSubmitted and StreamingListenerBatchStarted + zsxwing + 2015-04-10 01:51:42 -0700 + Commit: 18ca089, github.com/apache/spark/pull/5414 + + [SPARK-6211][Streaming] Add Python Kafka API unit test + jerryshao , Saisai Shao + 2015-04-09 23:14:24 -0700 + Commit: 3290d2d, github.com/apache/spark/pull/4961 + + [SPARK-6577] [MLlib] [PySpark] SparseMatrix should be supported in PySpark + MechCoder + 2015-04-09 23:10:13 -0700 + Commit: e236081, github.com/apache/spark/pull/5355 + + [SPARK-3074] [PySpark] support groupByKey() with single huge key + Davies Liu , Davies Liu + 2015-04-09 17:07:23 -0700 + Commit: b5c51c8, github.com/apache/spark/pull/1977 + + [Spark-6693][MLlib]add tostring with max lines and width for matrix + Yuhao Yang + 2015-04-09 15:37:45 -0700 + Commit: 9c67049, github.com/apache/spark/pull/5344 + + [SPARK-6264] [MLLIB] Support FPGrowth algorithm in Python API + Yanbo Liang + 2015-04-09 15:10:10 -0700 + Commit: a0411ae, github.com/apache/spark/pull/5213 + + [SPARK-6758]block the right jetty package in log + WangTaoTheTonic + 2015-04-09 17:44:08 -0400 + Commit: 7d92db3, github.com/apache/spark/pull/5406 + + [minor] [examples] Avoid packaging duplicate classes. + Marcelo Vanzin + 2015-04-09 07:07:50 -0400 + Commit: 470d745, github.com/apache/spark/pull/5379 + + SPARK-4924 addendum. Minor assembly directory fix in load-spark-env-sh + raschild + 2015-04-09 07:04:18 -0400 + Commit: 53f6bb1, github.com/apache/spark/pull/5261 + + [SPARK-6343] Doc driver-worker network reqs + Peter Parente + 2015-04-09 06:37:20 -0400 + Commit: b9c51c0, github.com/apache/spark/pull/5382 + + [SPARK-5654] Integrate SparkR + Shivaram Venkataraman , Shivaram Venkataraman , Zongheng Yang , cafreeman , Shivaram Venkataraman , Davies Liu , Davies Liu , hlin09 , Sun Rui , lythesia , oscaroboto , Antonio Piccolboni , root , edwardt , hqzizania , dputler , Todd Gao , Chris Freeman , Felix Cheung , Hossein , Evert Lammerts , Felix Cheung , felixcheung , Ryan Hafen , Ashutosh Raina , Oscar Olmedo , Josh Rosen , Yi Lu , Harihar Nahak + 2015-04-08 22:45:40 -0700 + Commit: 2fe0a1a, github.com/apache/spark/pull/5096 + + [SPARK-6765] Fix test code style for SQL + Reynold Xin + 2015-04-08 20:35:29 -0700 + Commit: 1b2aab8, github.com/apache/spark/pull/5412 + + [SPARK-6696] [SQL] Adds HiveContext.refreshTable to PySpark + Cheng Lian + 2015-04-08 18:47:39 -0700 + Commit: 891ada5, github.com/apache/spark/pull/5349 + + [SPARK-6451][SQL] supported code generation for CombineSum + Venkata Ramana Gollamudi + 2015-04-08 18:42:34 -0700 + Commit: 7d7384c, github.com/apache/spark/pull/5138 + + [SQL][minor] remove duplicated resolveGetField and update comment + Wenchen Fan + 2015-04-08 13:57:01 -0700 + Commit: 9418280, github.com/apache/spark/pull/5304 + + [SPARK-4346][SPARK-3596][YARN] Commonize the monitor logic + unknown , Sephiroth-Lin + 2015-04-08 13:56:42 -0700 + Commit: 55a92ef, github.com/apache/spark/pull/5305 + + [SPARK-5242]: Add --private-ips flag to EC2 script + Michelangelo D'Agostino + 2015-04-08 16:48:45 -0400 + Commit: 86403f5, github.com/apache/spark/pull/5244 + + [SPARK-6767][SQL] Fixed Query DSL error in spark sql Readme + Tijo Thomas + 2015-04-08 13:42:29 -0700 + Commit: 2f482d7, github.com/apache/spark/pull/5415 + + [SPARK-6781] [SQL] use sqlContext in python shell + Davies Liu + 2015-04-08 13:31:45 -0700 + Commit: 6ada4f6, github.com/apache/spark/pull/5425 + + [SPARK-6765] Fix test code style for mllib. + Reynold Xin + 2015-04-08 11:32:44 -0700 + Commit: 66159c3, github.com/apache/spark/pull/5411 + + [SPARK-6765] Fix test code style for graphx. + Reynold Xin + 2015-04-08 11:31:48 -0700 + Commit: 8d812f9, github.com/apache/spark/pull/5410 + + [SPARK-6753] Clone SparkConf in ShuffleSuite tests + Kay Ousterhout + 2015-04-08 10:26:45 -0700 + Commit: 9d44ddc, github.com/apache/spark/pull/5401 + + [SPARK-6506] [pyspark] Do not try to retrieve SPARK_HOME when not needed... + Marcelo Vanzin + 2015-04-08 10:14:52 -0700 + Commit: f7e21dd, github.com/apache/spark/pull/5405 + + [SPARK-6765] Fix test code style for streaming. + Reynold Xin + 2015-04-08 00:24:59 -0700 + Commit: 15e0d2b, github.com/apache/spark/pull/5409 + + [SPARK-6754] Remove unnecessary TaskContextHelper + Kay Ousterhout + 2015-04-07 22:40:42 -0700 + Commit: 8d2a36c, github.com/apache/spark/pull/5402 + + [SPARK-6705][MLLIB] Add fit intercept api to ml logisticregression + Omede Firouz + 2015-04-07 23:36:31 -0400 + Commit: d138aa8, github.com/apache/spark/pull/5301 + + [SPARK-6737] Fix memory leak in OutputCommitCoordinator + Josh Rosen + 2015-04-07 16:18:55 -0700 + Commit: c83e039, github.com/apache/spark/pull/5397 + + [SPARK-6748] [SQL] Makes QueryPlan.schema a lazy val + Cheng Lian + 2015-04-08 07:00:56 +0800 + Commit: 77bcceb, github.com/apache/spark/pull/5398 + + [SPARK-6720][MLLIB] PySpark MultivariateStatisticalSummary unit test for normL1... + lewuathe + 2015-04-07 14:36:57 -0700 + Commit: fc957dc, github.com/apache/spark/pull/5374 + + Revert "[SPARK-6568] spark-shell.cmd --jars option does not accept the jar that has space in its path" + Xiangrui Meng + 2015-04-07 14:34:15 -0700 + Commit: e6f08fb + + [SPARK-6568] spark-shell.cmd --jars option does not accept the jar that has space in its path + Masayoshi TSUZUKI + 2015-04-07 14:29:53 -0700 + Commit: 596ba77, github.com/apache/spark/pull/5347 + + [SPARK-6750] Upgrade ScalaStyle to 0.7. + Reynold Xin + 2015-04-07 12:37:33 -0700 + Commit: 1232215, github.com/apache/spark/pull/5399 + + Replace use of .size with .length for Arrays + sksamuel + 2015-04-07 10:43:22 -0700 + Commit: 2c32bef, github.com/apache/spark/pull/5376 + + [SPARK-6733][ Scheduler]Added scala.language.existentials + Vinod K C + 2015-04-07 10:42:08 -0700 + Commit: 7162ecf, github.com/apache/spark/pull/5384 + + [SPARK-3591][YARN]fire and forget for YARN cluster mode + WangTaoTheTonic + 2015-04-07 08:36:25 -0500 + Commit: b65bad6, github.com/apache/spark/pull/5297 + + [SPARK-6736][GraphX][Doc]Example of Graph#aggregateMessages has error + Sasaki Toru + 2015-04-07 01:55:32 -0700 + Commit: ae980eb, github.com/apache/spark/pull/5388 + + [SPARK-6636] Use public DNS hostname everywhere in spark_ec2.py + Matt Aasted + 2015-04-06 23:50:48 -0700 + Commit: 6f0d55d, github.com/apache/spark/pull/5302 + + [SPARK-6716] Change SparkContext.DRIVER_IDENTIFIER from to driver + Josh Rosen + 2015-04-06 23:33:16 -0700 + Commit: a0846c4, github.com/apache/spark/pull/5372 + + [Minor] [SQL] [SPARK-6729] Minor fix for DriverQuirks get + Volodymyr Lyubinets + 2015-04-06 18:00:51 -0700 + Commit: e40ea87, github.com/apache/spark/pull/5378 + + [MLlib] [SPARK-6713] Iterators in columnSimilarities for mapPartitionsWithIndex + Reza Zadeh + 2015-04-06 13:15:01 -0700 + Commit: 30363ed, github.com/apache/spark/pull/5364 + + SPARK-6569 [STREAMING] Down-grade same-offset message in Kafka streaming to INFO + Sean Owen + 2015-04-06 10:18:56 +0100 + Commit: 9fe4125, github.com/apache/spark/pull/5366 + + [SPARK-6673] spark-shell.cmd can't start in Windows even when spark was built + Masayoshi TSUZUKI + 2015-04-06 10:11:20 +0100 + Commit: 49f3882, github.com/apache/spark/pull/5328 + + [SPARK-6602][Core] Update MapOutputTrackerMasterActor to MapOutputTrackerMasterEndpoint + zsxwing + 2015-04-05 21:57:15 -0700 + Commit: 0b5d028, github.com/apache/spark/pull/5371 + + [SPARK-6262][MLLIB]Implement missing methods for MultivariateStatisticalSummary + lewuathe + 2015-04-05 16:13:31 -0700 + Commit: acffc43, github.com/apache/spark/pull/5359 + + [SPARK-6602][Core] Replace direct use of Akka with Spark RPC interface - part 1 + zsxwing + 2015-04-04 11:52:05 -0700 + Commit: f15806a, github.com/apache/spark/pull/5268 + + [SPARK-6607][SQL] Check invalid characters for Parquet schema and show error messages + Liang-Chi Hsieh + 2015-04-05 00:20:43 +0800 + Commit: 7bca62f, github.com/apache/spark/pull/5263 + + [SQL] Use path.makeQualified in newParquet. + Yin Huai + 2015-04-04 23:26:10 +0800 + Commit: da25c86, github.com/apache/spark/pull/5353 + + [SPARK-6700] disable flaky test + Davies Liu + 2015-04-03 15:22:21 -0700 + Commit: 9b40c17, github.com/apache/spark/pull/5356 + + [SPARK-6647][SQL] Make trait StringComparison as BinaryPredicate and fix unit tests of string data source Filter + Liang-Chi Hsieh + 2015-04-03 12:35:00 -0700 + Commit: 26b415e, github.com/apache/spark/pull/5309 + + [SPARK-6688] [core] Always use resolved URIs in EventLoggingListener. + Marcelo Vanzin + 2015-04-03 11:54:31 -0700 + Commit: 14632b7, github.com/apache/spark/pull/5340 + + Closes #3158 + Reynold Xin + 2015-04-03 11:53:07 -0700 + Commit: ffe8cc9 + + [SPARK-6640][Core] Fix the race condition of creating HeartbeatReceiver and retrieving HeartbeatReceiver + zsxwing + 2015-04-03 11:44:27 -0700 + Commit: 88504b7, github.com/apache/spark/pull/5306 + + [SPARK-6492][CORE] SparkContext.stop() can deadlock when DAGSchedulerEventProcessLoop dies + Ilya Ganelin + 2015-04-03 19:23:11 +0100 + Commit: 2c43ea3, github.com/apache/spark/pull/5277 + + [SPARK-5203][SQL] fix union with different decimal type + guowei2 + 2015-04-04 02:02:30 +0800 + Commit: c23ba81, github.com/apache/spark/pull/4004 + + [Minor][SQL] Fix typo + Liang-Chi Hsieh + 2015-04-03 18:31:48 +0100 + Commit: dc6dff2, github.com/apache/spark/pull/5352 + + [SPARK-6615][MLLIB] Python API for Word2Vec + lewuathe + 2015-04-03 09:49:50 -0700 + Commit: 512a2f1, github.com/apache/spark/pull/5296 + + [MLLIB] Remove println in LogisticRegression.scala + Omede Firouz + 2015-04-03 10:26:43 +0100 + Commit: b52c7f9, github.com/apache/spark/pull/5338 + + [SPARK-6560][CORE] Do not suppress exceptions from writer.write. + Stephen Haberman + 2015-04-03 09:48:37 +0100 + Commit: b0d884f, github.com/apache/spark/pull/5223 + + [SPARK-6428] Turn on explicit type checking for public methods. + Reynold Xin + 2015-04-03 01:25:02 -0700 + Commit: 82701ee, github.com/apache/spark/pull/5342 + + [SPARK-6575][SQL] Converted Parquet Metastore tables no longer cache metadata + Yin Huai + 2015-04-03 14:40:36 +0800 + Commit: c42c3fc, github.com/apache/spark/pull/5339 + + [SPARK-6621][Core] Fix the bug that calling EventLoop.stop in EventLoop.onReceive/onError/onStart doesn't call onStop + zsxwing + 2015-04-02 22:54:30 -0700 + Commit: 440ea31, github.com/apache/spark/pull/5280 + + [SPARK-6345][STREAMING][MLLIB] Fix for training with prediction + freeman + 2015-04-02 21:37:44 -0700 + Commit: 6e1c1ec, github.com/apache/spark/pull/5037 + + [CORE] The descriptionof jobHistory config should be spark.history.fs.logDirectory + KaiXinXiaoLei + 2015-04-02 20:24:31 -0700 + Commit: 8a0aa81, github.com/apache/spark/pull/5332 + + [SPARK-6575][SQL] Converted Parquet Metastore tables no longer cache metadata + Yin Huai + 2015-04-02 20:23:08 -0700 + Commit: 4b82bd7, github.com/apache/spark/pull/5339 + + [SPARK-6650] [core] Stop ExecutorAllocationManager when context stops. + Marcelo Vanzin + 2015-04-02 19:48:55 -0700 + Commit: 45134ec, github.com/apache/spark/pull/5311 + + [SPARK-6686][SQL] Use resolved output instead of names for toDF rename + Michael Armbrust + 2015-04-02 18:30:55 -0700 + Commit: 052dee0, github.com/apache/spark/pull/5337 + + [SPARK-6243][SQL] The Operation of match did not conside the scenarios that order.dataType does not match NativeType + DoingDone9 <799203320@qq.com> + 2015-04-02 17:23:51 -0700 + Commit: 947802c, github.com/apache/spark/pull/4959 + + [SQL][Minor] Use analyzed logical instead of unresolved in HiveComparisonTest + Cheng Hao + 2015-04-02 17:20:31 -0700 + Commit: dfd2982, github.com/apache/spark/pull/4946 + + [SPARK-6618][SPARK-6669][SQL] Lock Hive metastore client correctly. + Yin Huai , Michael Armbrust + 2015-04-02 16:46:50 -0700 + Commit: 5db8912, github.com/apache/spark/pull/5333 + + [Minor] [SQL] Follow-up of PR #5210 + Cheng Lian + 2015-04-02 16:15:34 -0700 + Commit: d3944b6, github.com/apache/spark/pull/5219 + + [SPARK-6655][SQL] We need to read the schema of a data source table stored in spark.sql.sources.schema property + Yin Huai + 2015-04-02 16:02:31 -0700 + Commit: 251698f, github.com/apache/spark/pull/5313 + + [SQL] Throw UnsupportedOperationException instead of NotImplementedError + Michael Armbrust + 2015-04-02 16:01:03 -0700 + Commit: 4214e50, github.com/apache/spark/pull/5315 + + SPARK-6414: Spark driver failed with NPE on job cancelation + Hung Lin + 2015-04-02 14:01:43 -0700 + Commit: e3202aa, github.com/apache/spark/pull/5124 + + [SPARK-6667] [PySpark] remove setReuseAddress + Davies Liu + 2015-04-02 12:18:33 -0700 + Commit: 0cce545, github.com/apache/spark/pull/5324 + + [SPARK-6672][SQL] convert row to catalyst in createDataFrame(RDD[Row], ...) + Xiangrui Meng + 2015-04-02 17:57:01 +0800 + Commit: 424e987, github.com/apache/spark/pull/5329 + + [SPARK-6627] Some clean-up in shuffle code. + Patrick Wendell + 2015-04-01 23:42:09 -0700 + Commit: 6562787, github.com/apache/spark/pull/5286 + + [SPARK-6663] [SQL] use Literal.create instread of constructor + Davies Liu + 2015-04-01 23:11:38 -0700 + Commit: 40df5d4, github.com/apache/spark/pull/5320 + + Revert "[SPARK-6618][SQL] HiveMetastoreCatalog.lookupRelation should use fine-grained lock" + Cheng Lian + 2015-04-02 12:56:34 +0800 + Commit: 2bc7fe7 + + [SPARK-6658][SQL] Update DataFrame documentation to fix type references. + Chet Mancini + 2015-04-01 21:39:46 -0700 + Commit: 191524e, github.com/apache/spark/pull/5316 + + [SPARK-6578] Small rewrite to make the logic more clear in MessageWithHeader.transferTo. + Reynold Xin + 2015-04-01 18:36:06 -0700 + Commit: 899ebcb, github.com/apache/spark/pull/5319 + + [SPARK-6660][MLLIB] pythonToJava doesn't recognize object arrays + Xiangrui Meng + 2015-04-01 18:17:07 -0700 + Commit: 4815bc2, github.com/apache/spark/pull/5318 + + [SPARK-6553] [pyspark] Support functools.partial as UDF + ksonj + 2015-04-01 17:23:57 -0700 + Commit: 757b2e9, github.com/apache/spark/pull/5206 + + [SPARK-6580] [MLLIB] Optimize LogisticRegressionModel.predictPoint + Yanbo Liang + 2015-04-01 17:19:36 -0700 + Commit: 86b4399, github.com/apache/spark/pull/5249 + + [SPARK-6576] [MLlib] [PySpark] DenseMatrix in PySpark should support indexing + MechCoder + 2015-04-01 17:03:39 -0700 + Commit: 2fa3b47, github.com/apache/spark/pull/5232 + + [SPARK-6642][MLLIB] use 1.2 lambda scaling and remove addImplicit from NormalEquation + Xiangrui Meng + 2015-04-01 16:47:18 -0700 + Commit: ccafd75, github.com/apache/spark/pull/5314 + + [SPARK-6578] [core] Fix thread-safety issue in outbound path of network library. + Marcelo Vanzin + 2015-04-01 16:06:11 -0700 + Commit: f084c5d, github.com/apache/spark/pull/5234 + + [SPARK-6657] [Python] [Docs] fixed python doc build warnings + Joseph K. Bradley + 2015-04-01 15:15:47 -0700 + Commit: fb25e8c, github.com/apache/spark/pull/5317 + + [SPARK-6651][MLLIB] delegate dense vector arithmetics to the underlying numpy array + Xiangrui Meng + 2015-04-01 13:29:04 -0700 + Commit: 2275acc, github.com/apache/spark/pull/5312 + + SPARK-6433 hive tests to import spark-sql test JAR for QueryTest access + Steve Loughran + 2015-04-01 16:26:54 +0100 + Commit: ee11be2, github.com/apache/spark/pull/5119 + + [SPARK-6608] [SQL] Makes DataFrame.rdd a lazy val + Cheng Lian + 2015-04-01 21:34:45 +0800 + Commit: d36c5fc, github.com/apache/spark/pull/5265 + + SPARK-6626 [DOCS]: Corrected Scala:TwitterUtils parameters + jayson + 2015-04-01 11:12:55 +0100 + Commit: 0358b08, github.com/apache/spark/pull/5295 + + [SPARK-6597][Minor] Replace `input:checkbox` with `input[type="checkbox"]` in additional-metrics.js + Kousuke Saruta + 2015-04-01 11:11:56 +0100 + Commit: d824c11, github.com/apache/spark/pull/5254 + + [EC2] [SPARK-6600] Open ports in ec2/spark_ec2.py to allow HDFS NFS gateway + Florian Verhein + 2015-04-01 11:10:43 +0100 + Commit: 4122623, github.com/apache/spark/pull/5257 + + [SPARK-4655][Core] Split Stage into ShuffleMapStage and ResultStage subclasses + Ilya Ganelin , Ilya Ganelin + 2015-04-01 11:09:00 +0100 + Commit: ff1915e, github.com/apache/spark/pull/4708 + + [Doc] Improve Python DataFrame documentation + Reynold Xin + 2015-03-31 18:31:36 -0700 + Commit: 305abe1, github.com/apache/spark/pull/5287 + + [SPARK-6614] OutputCommitCoordinator should clear authorized committer only after authorized committer fails, not after any failure + Josh Rosen + 2015-03-31 16:18:39 -0700 + Commit: 3732607, github.com/apache/spark/pull/5276 + + [SPARK-5692] [MLlib] Word2Vec save/load + MechCoder + 2015-03-31 16:01:08 -0700 + Commit: 0e00f12, github.com/apache/spark/pull/5291 + + [SPARK-6633][SQL] Should be "Contains" instead of "EndsWith" when constructing sources.StringContains + Liang-Chi Hsieh + 2015-03-31 13:18:07 -0700 + Commit: 2036bc5, github.com/apache/spark/pull/5299 + + [SPARK-5371][SQL] Propagate types after function conversion, before futher resolution + Michael Armbrust + 2015-03-31 11:34:29 -0700 + Commit: beebb7f, github.com/apache/spark/pull/5278 + + [SPARK-6255] [MLLIB] Support multiclass classification in Python API + Yanbo Liang + 2015-03-31 11:32:14 -0700 + Commit: b5bd75d, github.com/apache/spark/pull/5137 + + [SPARK-6598][MLLIB] Python API for IDFModel + lewuathe + 2015-03-31 11:25:21 -0700 + Commit: 46de6c0, github.com/apache/spark/pull/5264 + + [SPARK-6145][SQL] fix ORDER BY on nested fields + Michael Armbrust + 2015-03-31 11:23:18 -0700 + Commit: cd48ca5, github.com/apache/spark/pull/5189 + + [SPARK-6575] [SQL] Adds configuration to disable schema merging while converting metastore Parquet tables + Cheng Lian + 2015-03-31 11:21:15 -0700 + Commit: 8102014, github.com/apache/spark/pull/5231 + + [SPARK-6555] [SQL] Overrides equals() and hashCode() for MetastoreRelation + Cheng Lian + 2015-03-31 11:18:25 -0700 + Commit: a7992ff, github.com/apache/spark/pull/5289 + + [SPARK-4894][mllib] Added Bernoulli option to NaiveBayes model in mllib + leahmcguire , Joseph K. Bradley , Leah McGuire + 2015-03-31 11:16:55 -0700 + Commit: d01a6d8, github.com/apache/spark/pull/4087 + + [SPARK-6542][SQL] add CreateStruct + Xiangrui Meng + 2015-03-31 17:05:23 +0800 + Commit: a05835b, github.com/apache/spark/pull/5195 + + [SPARK-6618][SQL] HiveMetastoreCatalog.lookupRelation should use fine-grained lock + Yin Huai + 2015-03-31 16:28:40 +0800 + Commit: 314afd0, github.com/apache/spark/pull/5281 + + [SPARK-6623][SQL] Alias DataFrame.na.drop and DataFrame.na.fill in Python. + Reynold Xin + 2015-03-31 00:25:23 -0700 + Commit: b80a030, github.com/apache/spark/pull/5284 + + [SPARK-6625][SQL] Add common string filters to data sources. + Reynold Xin + 2015-03-31 00:19:51 -0700 + Commit: f07e714, github.com/apache/spark/pull/5285 + + [SPARK-5124][Core] Move StopCoordinator to the receive method since it does not require a reply + zsxwing + 2015-03-30 22:10:49 -0700 + Commit: 5677557, github.com/apache/spark/pull/5283 + + [SPARK-6119][SQL] DataFrame support for missing data handling + Reynold Xin + 2015-03-30 20:47:10 -0700 + Commit: b8ff2bc, github.com/apache/spark/pull/5274 + + [SPARK-6369] [SQL] Uses commit coordinator to help committing Hive and Parquet tables + Cheng Lian + 2015-03-31 07:48:37 +0800 + Commit: fde6945, github.com/apache/spark/pull/5139 + + [SPARK-6603] [PySpark] [SQL] add SQLContext.udf and deprecate inferSchema() and applySchema + Davies Liu + 2015-03-30 15:47:00 -0700 + Commit: f76d2e5, github.com/apache/spark/pull/5273 + + [HOTFIX][SPARK-4123]: Updated to fix bug where multiple dependencies added breaks Github output + Brennon York + 2015-03-30 12:48:26 -0700 + Commit: df35500, github.com/apache/spark/pull/5269 + + [SPARK-6592][SQL] fix filter for scaladoc to generate API doc for Row class under catalyst dir + CodingCat + 2015-03-30 11:54:44 -0700 + Commit: 32259c6, github.com/apache/spark/pull/5252 + + [SPARK-6595][SQL] MetastoreRelation should be a MultiInstanceRelation + Michael Armbrust + 2015-03-30 22:24:12 +0800 + Commit: fe81f6c, github.com/apache/spark/pull/5251 + + [HOTFIX] Update start-slave.sh + Jose Manuel Gomez + 2015-03-30 14:59:08 +0100 + Commit: 19d4c39, github.com/apache/spark/pull/5262 + + [SPARK-5750][SPARK-3441][SPARK-5836][CORE] Added documentation explaining shuffle + Ilya Ganelin , Ilya Ganelin + 2015-03-30 11:52:02 +0100 + Commit: 4bdfb7b, github.com/apache/spark/pull/5074 + + [SPARK-6596] fix the instruction on building scaladoc + CodingCat + 2015-03-30 11:41:43 +0100 + Commit: de67330, github.com/apache/spark/pull/5253 + + [spark-sql] a better exception message than "scala.MatchError" for unsupported types in Schema creation + Eran Medan + 2015-03-30 00:02:52 -0700 + Commit: 17b13c5, github.com/apache/spark/pull/5235 + + Fix string interpolator error in HeartbeatReceiver + Li Zhihui + 2015-03-29 21:30:37 -0700 + Commit: 01dc9f5, github.com/apache/spark/pull/5255 + + [SPARK-5124][Core] A standard RPC interface and an Akka implementation + zsxwing + 2015-03-29 21:25:09 -0700 + Commit: a8d53af, github.com/apache/spark/pull/4588 + + [SPARK-6585][Tests]Fix FileServerSuite testcase in some Env. + June.He + 2015-03-29 12:47:22 +0100 + Commit: 0e2753f, github.com/apache/spark/pull/5239 + + [SPARK-6558] Utils.getCurrentUserName returns the full principal name instead of login name + Thomas Graves + 2015-03-29 12:43:30 +0100 + Commit: 52ece26, github.com/apache/spark/pull/5229 + + [SPARK-6406] Launch Spark using assembly jar instead of a separate launcher jar + Nishkam Ravi , nishkamravi2 , nravi + 2015-03-29 12:40:37 +0100 + Commit: e3eb393, github.com/apache/spark/pull/5085 + + [SPARK-4123][Project Infra]: Show new dependencies added in pull requests + Brennon York + 2015-03-29 12:37:53 +0100 + Commit: 55153f5, github.com/apache/spark/pull/5093 + + [DOC] Improvements to Python docs. + Reynold Xin + 2015-03-28 23:59:27 -0700 + Commit: 5eef00d, github.com/apache/spark/pull/5238 + + [SPARK-6571][MLLIB] use wrapper in MatrixFactorizationModel.load + Xiangrui Meng + 2015-03-28 15:08:05 -0700 + Commit: f75f633, github.com/apache/spark/pull/5243 + + [SPARK-6552][Deploy][Doc]expose start-slave.sh to user and update outdated doc + WangTaoTheTonic + 2015-03-28 12:32:35 +0000 + Commit: 9963143, github.com/apache/spark/pull/5205 + + [SPARK-6538][SQL] Add missing nullable Metastore fields when merging a Parquet schema + Adam Budde + 2015-03-28 09:14:09 +0800 + Commit: 5909f09, github.com/apache/spark/pull/5214 + + [SPARK-6564][SQL] SQLContext.emptyDataFrame should contain 0 row, not 1 row + Reynold Xin + 2015-03-27 14:56:57 -0700 + Commit: 3af7334, github.com/apache/spark/pull/5226 + + [SPARK-6526][ML] Add Normalizer transformer in ML package + Xusen Yin + 2015-03-27 13:29:10 -0700 + Commit: d5497ab, github.com/apache/spark/pull/5181 + + [SPARK-6574] [PySpark] fix sql example + Davies Liu + 2015-03-27 11:42:26 -0700 + Commit: 887e1b7, github.com/apache/spark/pull/5230 + + [SPARK-6550][SQL] Use analyzed plan in DataFrame + Michael Armbrust + 2015-03-27 11:40:00 -0700 + Commit: 5d9c37c, github.com/apache/spark/pull/5217 + + [SPARK-6544][build] Increment Avro version from 1.7.6 to 1.7.7 + Dean Chen + 2015-03-27 14:32:51 +0000 + Commit: aa2b991, github.com/apache/spark/pull/5193 + + [SPARK-6556][Core] Fix wrong parsing logic of executorTimeoutMs and checkTimeoutIntervalMs in HeartbeatReceiver + zsxwing + 2015-03-27 12:31:06 +0000 + Commit: da546b7, github.com/apache/spark/pull/5209 + + [SPARK-6341][mllib] Upgrade breeze from 0.11.1 to 0.11.2 + Yu ISHIKAWA + 2015-03-27 00:15:02 -0700 + Commit: f43a610, github.com/apache/spark/pull/5222 + + [SPARK-6405] Limiting the maximum Kryo buffer size to be 2GB. + mcheah + 2015-03-26 22:48:42 -0700 + Commit: 49d2ec6, github.com/apache/spark/pull/5218 + + [SPARK-6510][GraphX]: Add Graph#minus method to act as Set#difference + Brennon York + 2015-03-26 19:08:09 -0700 + Commit: 39fb579, github.com/apache/spark/pull/5175 + + [DOCS][SQL] Fix JDBC example + Michael Armbrust + 2015-03-26 14:51:46 -0700 + Commit: aad0032, github.com/apache/spark/pull/5192 + + [SPARK-6554] [SQL] Don't push down predicates which reference partition column(s) + Cheng Lian + 2015-03-26 13:11:37 -0700 + Commit: 71a0d40, github.com/apache/spark/pull/5210 + + [SPARK-6117] [SQL] Improvements to DataFrame.describe() + Reynold Xin + 2015-03-26 12:26:13 -0700 + Commit: 784fcd5, github.com/apache/spark/pull/5201 + + SPARK-6532 [BUILD] LDAModel.scala fails scalastyle on Windows + Sean Owen + 2015-03-26 10:52:31 -0700 + Commit: c3a52a0, github.com/apache/spark/pull/5211 + + SPARK-6480 [CORE] histogram() bucket function is wrong in some simple edge cases + Sean Owen + 2015-03-26 15:00:23 +0000 + Commit: fe15ea9, github.com/apache/spark/pull/5148 + + [MLlib]remove unused import + Yuhao Yang + 2015-03-26 13:27:05 +0000 + Commit: 3ddb975, github.com/apache/spark/pull/5207 + + [SQL][SPARK-6471]: Metastore schema should only be a subset of parquet schema to support dropping of columns using replace columns + Yash Datta + 2015-03-26 21:13:38 +0800 + Commit: 1c05027, github.com/apache/spark/pull/5141 + + [SPARK-6468][Block Manager] Fix the race condition of subDirs in DiskBlockManager + zsxwing + 2015-03-26 12:54:48 +0000 + Commit: 0c88ce5, github.com/apache/spark/pull/5136 + + [SPARK-6465][SQL] Fix serialization of GenericRowWithSchema using kryo + Michael Armbrust + 2015-03-26 18:46:57 +0800 + Commit: f88f51b, github.com/apache/spark/pull/5191 + + [SPARK-6546][Build] Using the wrong code that will make spark compile failed!! + DoingDone9 <799203320@qq.com> + 2015-03-26 17:04:19 +0800 + Commit: 855cba8, github.com/apache/spark/pull/5198 + + [SPARK-6117] [SQL] add describe function to DataFrame for summary statis... + azagrebin + 2015-03-26 00:25:04 -0700 + Commit: 5bbcd13, github.com/apache/spark/pull/5073 + + [SPARK-6536] [PySpark] Column.inSet() in Python + Davies Liu + 2015-03-26 00:01:24 -0700 + Commit: f535802, github.com/apache/spark/pull/5190 + + [SPARK-6463][SQL] AttributeSet.equal should compare size + sisihj , Michael Armbrust + 2015-03-25 19:21:54 -0700 + Commit: 276ef1c, github.com/apache/spark/pull/5194 + + The UT test of spark is failed. Because there is a test in SQLQuerySuite about creating table “test” + KaiXinXiaoLei + 2015-03-25 19:15:30 -0700 + Commit: e87bf37, github.com/apache/spark/pull/5150 + + [SPARK-6202] [SQL] enable variable substitution on test framework + Daoyuan Wang + 2015-03-25 18:43:26 -0700 + Commit: 5ab6e9f, github.com/apache/spark/pull/4930 + + [SPARK-6271][SQL] Sort these tokens in alphabetic order to avoid further duplicate in HiveQl + DoingDone9 <799203320@qq.com> + 2015-03-25 18:41:59 -0700 + Commit: 328daf6, github.com/apache/spark/pull/4973 + + [SPARK-6326][SQL] Improve castStruct to be faster + Liang-Chi Hsieh + 2015-03-25 17:52:23 -0700 + Commit: 73d5775, github.com/apache/spark/pull/5017 + + [SPARK-5498][SQL]fix query exception when partition schema does not match table schema + jeanlyn + 2015-03-25 17:47:45 -0700 + Commit: e6d1406, github.com/apache/spark/pull/4289 + + [SPARK-6450] [SQL] Fixes metastore Parquet table conversion + Cheng Lian + 2015-03-25 17:40:19 -0700 + Commit: 8c3b005, github.com/apache/spark/pull/5183 + + [SPARK-6079] Use index to speed up StatusTracker.getJobIdsForGroup() + Josh Rosen + 2015-03-25 17:40:00 -0700 + Commit: d44a336, github.com/apache/spark/pull/4830 + + [SPARK-5987] [MLlib] Save/load for GaussianMixtureModels + MechCoder + 2015-03-25 14:45:23 -0700 + Commit: 4fc4d03, github.com/apache/spark/pull/4986 + + [SPARK-6256] [MLlib] MLlib Python API parity check for regression + Yanbo Liang + 2015-03-25 13:38:33 -0700 + Commit: 4353373, github.com/apache/spark/pull/4997 + + [SPARK-5771] Master UI inconsistently displays application cores + Andrew Or + 2015-03-25 13:28:32 -0700 + Commit: c1b74df, github.com/apache/spark/pull/5177 + + [SPARK-6537] UIWorkloadGenerator: The main thread should not stop SparkContext until all jobs finish + Kousuke Saruta + 2015-03-25 13:27:15 -0700 + Commit: acef51d, github.com/apache/spark/pull/5187 + + [SPARK-6076][Block Manager] Fix a potential OOM issue when StorageLevel is MEMORY_AND_DISK_SER + zsxwing + 2015-03-25 12:09:30 -0700 + Commit: 883b7e9, github.com/apache/spark/pull/4827 + + [SPARK-6409][SQL] It is not necessary that avoid old inteface of hive, because this will make some UDAF can not work. + DoingDone9 <799203320@qq.com> + 2015-03-25 11:11:52 -0700 + Commit: 968408b, github.com/apache/spark/pull/5131 + + [ML][FEATURE] SPARK-5566: RegEx Tokenizer + Augustin Borsu , Augustin Borsu , Augustin Borsu , Xiangrui Meng + 2015-03-25 10:16:39 -0700 + Commit: 982952f, github.com/apache/spark/pull/4504 + + [SPARK-6496] [MLLIB] GeneralizedLinearAlgorithm.run(input, initialWeights) should initialize numFeatures + Yanbo Liang + 2015-03-25 17:05:56 +0000 + Commit: 10c7860, github.com/apache/spark/pull/5167 + + [SPARK-6483][SQL]Improve ScalaUdf called performance. + zzcclp + 2015-03-25 19:11:04 +0800 + Commit: 64262ed, github.com/apache/spark/pull/5154 + + [DOCUMENTATION]Fixed Missing Type Import in Documentation + Bill Chambers , anabranch + 2015-03-24 22:24:35 -0700 + Commit: c5cc414, github.com/apache/spark/pull/5179 + + [SPARK-6515] update OpenHashSet impl + Xiangrui Meng + 2015-03-24 18:58:27 -0700 + Commit: c14ddd9, github.com/apache/spark/pull/5176 + + [SPARK-6428][Streaming] Added explicit types for all public methods. + Reynold Xin + 2015-03-24 17:08:25 -0700 + Commit: 9459865, github.com/apache/spark/pull/5110 + + [SPARK-6512] add contains to OpenHashMap + Xiangrui Meng + 2015-03-24 17:06:22 -0700 + Commit: 6930e96, github.com/apache/spark/pull/5171 + + [SPARK-6469] Improving documentation on YARN local directories usage + Christophe Préaud + 2015-03-24 17:05:49 -0700 + Commit: 05c2214, github.com/apache/spark/pull/5165 + + Revert "[SPARK-5771] Number of Cores in Completed Applications of Standalone Master Web Page always be 0 if sc.stop() is called" + Andrew Or + 2015-03-24 16:49:27 -0700 + Commit: dd907d1 + + Revert "[SPARK-5771][UI][hotfix] Change Requested Cores into * if default cores is not set" + Andrew Or + 2015-03-24 16:41:31 -0700 + Commit: f7c3668 + + [SPARK-3570] Include time to open files in shuffle write time. + Kay Ousterhout + 2015-03-24 16:29:40 -0700 + Commit: d8ccf65, github.com/apache/spark/pull/4550 + + [SPARK-6088] Correct how tasks that get remote results are shown in UI. + Kay Ousterhout + 2015-03-24 16:26:43 -0700 + Commit: 6948ab6, github.com/apache/spark/pull/4839 + + [SPARK-6428][SQL] Added explicit types for all public methods in catalyst + Reynold Xin + 2015-03-24 16:03:55 -0700 + Commit: 7334801, github.com/apache/spark/pull/5162 + + [SPARK-6209] Clean up connections in ExecutorClassLoader after failing to load classes (master branch PR) + Josh Rosen + 2015-03-24 14:38:20 -0700 + Commit: 7215aa74, github.com/apache/spark/pull/4944 + + [SPARK-6458][SQL] Better error messages for invalid data sources + Michael Armbrust + 2015-03-24 14:10:56 -0700 + Commit: a8f51b8, github.com/apache/spark/pull/5158 + + [SPARK-6376][SQL] Avoid eliminating subqueries until optimization + Michael Armbrust + 2015-03-24 14:08:20 -0700 + Commit: cbeaf9e, github.com/apache/spark/pull/5160 + + [SPARK-6375][SQL] Fix formatting of error messages. + Michael Armbrust + 2015-03-24 13:22:46 -0700 + Commit: 046c1e2, github.com/apache/spark/pull/5155 + + [SPARK-6054][SQL] Fix transformations of TreeNodes that hold StructTypes + Michael Armbrust + 2015-03-24 12:28:01 -0700 + Commit: 3fa3d12, github.com/apache/spark/pull/5157 + + [SPARK-6437][SQL] Use completion iterator to close external sorter + Michael Armbrust + 2015-03-24 12:10:30 -0700 + Commit: 26c6ce3, github.com/apache/spark/pull/5161 + + [SPARK-6459][SQL] Warn when constructing trivially true equals predicate + Michael Armbrust + 2015-03-24 12:09:02 -0700 + Commit: 32efadd, github.com/apache/spark/pull/5163 + + [SPARK-6361][SQL] support adding a column with metadata in DF + Xiangrui Meng + 2015-03-24 12:08:19 -0700 + Commit: 6bdddb6, github.com/apache/spark/pull/5151 + + [SPARK-6475][SQL] recognize array types when infer data types from JavaBeans + Xiangrui Meng + 2015-03-24 10:11:27 -0700 + Commit: a1d1529, github.com/apache/spark/pull/5146 + + [ML][docs][minor] Define LabeledDocument/Document classes in CV example + Peter Rudenko + 2015-03-24 16:33:38 +0000 + Commit: 08d4528, github.com/apache/spark/pull/5135 + + [SPARK-5559] [Streaming] [Test] Remove oppotunity we met flakiness when running FlumeStreamSuite + Kousuke Saruta + 2015-03-24 16:13:25 +0000 + Commit: 85cf063, github.com/apache/spark/pull/4337 + + [SPARK-6473] [core] Do not try to figure out Scala version if not needed... + Marcelo Vanzin + 2015-03-24 13:48:33 +0000 + Commit: b293afc, github.com/apache/spark/pull/5143 + + Update the command to use IPython notebook + Cong Yue + 2015-03-24 12:56:13 +0000 + Commit: c12312f, github.com/apache/spark/pull/5111 + + [SPARK-6477][Build]: Run MIMA tests before the Spark test suite + Brennon York + 2015-03-24 10:33:04 +0000 + Commit: 37fac1d, github.com/apache/spark/pull/5145 + + [SPARK-6452] [SQL] Checks for missing attributes and unresolved operator for all types of operator + Cheng Lian + 2015-03-24 01:12:11 -0700 + Commit: 1afcf77, github.com/apache/spark/pull/5129 + + [SPARK-6428] Added explicit types for all public methods in core. + Reynold Xin + 2015-03-23 23:41:06 -0700 + Commit: 4ce2782, github.com/apache/spark/pull/5125 + + [SPARK-6124] Support jdbc connection properties in OPTIONS part of the query + Volodymyr Lyubinets + 2015-03-23 17:00:27 -0700 + Commit: bfd3ee9, github.com/apache/spark/pull/4859 + + Revert "[SPARK-6122][Core] Upgrade Tachyon client version to 0.6.1." + Patrick Wendell + 2015-03-23 15:08:39 -0700 + Commit: 6cd7058 + + [SPARK-6308] [MLlib] [Sql] Override TypeName in VectorUDT and MatrixUDT + MechCoder + 2015-03-23 13:30:21 -0700 + Commit: 474d132, github.com/apache/spark/pull/5118 + + [SPARK-6397][SQL] Check the missingInput simply + Yadong Qi + 2015-03-23 18:16:49 +0800 + Commit: 9f3273b, github.com/apache/spark/pull/5132 + + Revert "[SPARK-6397][SQL] Check the missingInput simply" + Cheng Lian + 2015-03-23 12:15:19 +0800 + Commit: bf044de + + [SPARK-6397][SQL] Check the missingInput simply + q00251598 + 2015-03-23 12:06:13 +0800 + Commit: e566fe5, github.com/apache/spark/pull/5082 + + [SPARK-4985] [SQL] parquet support for date type + Daoyuan Wang + 2015-03-23 11:46:16 +0800 + Commit: 4659468, github.com/apache/spark/pull/3822 + + [SPARK-6337][Documentation, SQL]Spark 1.3 doc fixes + vinodkc + 2015-03-22 20:00:08 +0000 + Commit: 2bf40c5, github.com/apache/spark/pull/5112 + + [HOTFIX] Build break due to https://github.com/apache/spark/pull/5128 + Reynold Xin + 2015-03-22 12:08:15 -0700 + Commit: 7a0da47 + + [SPARK-6122][Core] Upgrade Tachyon client version to 0.6.1. + Calvin Jia + 2015-03-22 11:11:29 -0700 + Commit: a41b9c6, github.com/apache/spark/pull/4867 + + SPARK-6454 [DOCS] Fix links to pyspark api + Kamil Smuga , stderr + 2015-03-22 15:56:25 +0000 + Commit: 6ef4863, github.com/apache/spark/pull/5120 + + [SPARK-6453][Mesos] Some Mesos*Suite have a different package with their classes + Jongyoul Lee + 2015-03-22 15:53:18 +0000 + Commit: adb2ff7, github.com/apache/spark/pull/5126 + + [SPARK-6455] [docs] Correct some mistakes and typos + Hangchen Yu + 2015-03-22 15:51:10 +0000 + Commit: ab4f516, github.com/apache/spark/pull/5128 + + [SPARK-6448] Make history server log parse exceptions + Ryan Williams + 2015-03-22 11:54:23 +0000 + Commit: b9fe504, github.com/apache/spark/pull/5122 + + [SPARK-6408] [SQL] Fix JDBCRDD filtering string literals + ypcat , Pei-Lun Lee + 2015-03-22 15:49:13 +0800 + Commit: 9b1e1f2, github.com/apache/spark/pull/5087 + + [SPARK-6428][SQL] Added explicit type for all public methods for Hive module + Reynold Xin + 2015-03-21 14:30:04 -0700 + Commit: b6090f9, github.com/apache/spark/pull/5108 + + [SPARK-6250][SPARK-6146][SPARK-5911][SQL] Types are now reserved words in DDL parser. + Yin Huai + 2015-03-21 13:27:53 -0700 + Commit: 94a102a, github.com/apache/spark/pull/5078 + + [SPARK-5680][SQL] Sum function on all null values, should return zero + Venkata Ramana G , Venkata Ramana Gollamudi + 2015-03-21 13:24:24 -0700 + Commit: ee569a0, github.com/apache/spark/pull/4466 + + [SPARK-5320][SQL]Add statistics method at NoRelation (override super). + x1- + 2015-03-21 13:22:34 -0700 + Commit: 52dd4b2, github.com/apache/spark/pull/5105 + + [SPARK-5821] [SQL] JSON CTAS command should throw error message when delete path failure + Yanbo Liang , Yanbo Liang + 2015-03-21 11:23:28 +0800 + Commit: e5d2c37, github.com/apache/spark/pull/4610 + + [SPARK-6315] [SQL] Also tries the case class string parser while reading Parquet schema + Cheng Lian + 2015-03-21 11:18:45 +0800 + Commit: 937c1e5, github.com/apache/spark/pull/5034 + + [SPARK-5821] [SQL] ParquetRelation2 CTAS should check if delete is successful + Yanbo Liang + 2015-03-21 10:53:04 +0800 + Commit: bc37c97, github.com/apache/spark/pull/5107 + + [SPARK-6025] [MLlib] Add helper method evaluateEachIteration to extract learning curve + MechCoder + 2015-03-20 17:14:09 -0700 + Commit: 25e271d, github.com/apache/spark/pull/4906 + + [SPARK-6428][SQL] Added explicit type for all public methods in sql/core + Reynold Xin + 2015-03-20 15:47:07 -0700 + Commit: a95043b, github.com/apache/spark/pull/5104 + + [SPARK-6421][MLLIB] _regression_train_wrapper does not test initialWeights correctly + lewuathe + 2015-03-20 17:18:18 -0400 + Commit: 257cde7, github.com/apache/spark/pull/5101 + + [SPARK-6309] [SQL] [MLlib] Implement MatrixUDT + MechCoder + 2015-03-20 17:13:18 -0400 + Commit: 11e0259, github.com/apache/spark/pull/5048 + + [SPARK-6423][Mesos] MemoryUtils should use memoryOverhead if it's set + Jongyoul Lee + 2015-03-20 19:14:35 +0000 + Commit: 49a01c7, github.com/apache/spark/pull/5099 + + [SPARK-5955][MLLIB] add checkpointInterval to ALS + Xiangrui Meng + 2015-03-20 15:02:57 -0400 + Commit: 6b36470, github.com/apache/spark/pull/5076 + + [Spark 6096][MLlib] Add Naive Bayes load save methods in Python + Xusen Yin + 2015-03-20 14:53:59 -0400 + Commit: 25636d9, github.com/apache/spark/pull/5090 + + [MLlib] SPARK-5954: Top by key + Shuo Xiang + 2015-03-20 14:45:44 -0400 + Commit: 5e6ad24, github.com/apache/spark/pull/5075 + + [SPARK-6095] [MLLIB] Support model save/load in Python's linear models + Yanbo Liang + 2015-03-20 14:44:21 -0400 + Commit: 48866f7, github.com/apache/spark/pull/5016 + + [SPARK-6371] [build] Update version to 1.4.0-SNAPSHOT. + Marcelo Vanzin + 2015-03-20 18:43:57 +0000 + Commit: a745645, github.com/apache/spark/pull/5056 + + [SPARK-6426][Doc]User could also point the yarn cluster config directory via YARN_CONF_DI... + WangTaoTheTonic + 2015-03-20 18:42:18 +0000 + Commit: 385b2ff, github.com/apache/spark/pull/5103 + + [SPARK-6370][core] Documentation: Improve all 3 docs for RDD.sample + mbonaci + 2015-03-20 18:30:45 +0000 + Commit: 28bcb9e, github.com/apache/spark/pull/5097 + + [SPARK-6428][MLlib] Added explicit type for public methods and implemented hashCode when equals is defined. + Reynold Xin + 2015-03-20 14:13:02 -0400 + Commit: db4d317, github.com/apache/spark/pull/5102 + + SPARK-6338 [CORE] Use standard temp dir mechanisms in tests to avoid orphaned temp files + Sean Owen + 2015-03-20 14:16:21 +0000 + Commit: 6f80c3e, github.com/apache/spark/pull/5029 + + SPARK-5134 [BUILD] Bump default Hadoop version to 2+ + Sean Owen + 2015-03-20 14:14:53 +0000 + Commit: d08e3eb, github.com/apache/spark/pull/5027 + + [SPARK-6286][Mesos][minor] Handle missing Mesos case TASK_ERROR + Jongyoul Lee + 2015-03-20 12:24:34 +0000 + Commit: 116c553, github.com/apache/spark/pull/5088 + + Tighten up field/method visibility in Executor and made some code more clear to read. + Reynold Xin + 2015-03-19 22:12:01 -0400 + Commit: 0745a30, github.com/apache/spark/pull/4850 + + [SPARK-6219] [Build] Check that Python code compiles + Nicholas Chammas + 2015-03-19 12:46:10 -0700 + Commit: f17d43b, github.com/apache/spark/pull/4941 + + [Core][minor] remove unused `visitedStages` in `DAGScheduler.stageDependsOn` + Wenchen Fan + 2015-03-19 15:25:32 -0400 + Commit: 3b5aaa6, github.com/apache/spark/pull/5086 + + [SPARK-5313][Project Infra]: Create simple framework for highlighting changes introduced in a PR + Brennon York + 2015-03-19 11:18:24 -0400 + Commit: 8cb23a1, github.com/apache/spark/pull/5072 + + [SPARK-6291] [MLLIB] GLM toString & toDebugString + Yanbo Liang + 2015-03-19 11:10:20 -0400 + Commit: dda4ded, github.com/apache/spark/pull/5038 + + [SPARK-5843] [API] Allowing map-side combine to be specified in Java. + mcheah + 2015-03-19 08:51:49 -0400 + Commit: 3c4e486, github.com/apache/spark/pull/4634 + + [SPARK-6402][DOC] - Remove some refererences to shark in docs and ec2 + Pierre Borckmans + 2015-03-19 08:02:06 -0400 + Commit: 797f8a0, github.com/apache/spark/pull/5083 + + [SPARK-4012] stop SparkContext when the exception is thrown from an infinite loop + CodingCat + 2015-03-18 23:48:45 -0700 + Commit: 2c3f83c, github.com/apache/spark/pull/5004 + + [SPARK-6222][Streaming] Dont delete checkpoint data when doing pre-batch-start checkpoint + Tathagata Das + 2015-03-19 02:15:50 -0400 + Commit: 645cf3f, github.com/apache/spark/pull/5008 + + [SPARK-6394][Core] cleanup BlockManager companion object and improve the getCacheLocs method in DAGScheduler + Wenchen Fan + 2015-03-18 19:43:04 -0700 + Commit: 540b2a4, github.com/apache/spark/pull/5043 + + SPARK-6085 Part. 2 Increase default value for memory overhead + Jongyoul Lee + 2015-03-18 20:54:22 -0400 + Commit: 3db1387, github.com/apache/spark/pull/5065 + + [SPARK-6374] [MLlib] add get for GeneralizedLinearAlgo + Yuhao Yang + 2015-03-18 13:44:37 -0400 + Commit: a95ee24, github.com/apache/spark/pull/5058 + + [SPARK-6325] [core,yarn] Do not change target executor count when killing executors. + Marcelo Vanzin + 2015-03-18 09:18:28 -0400 + Commit: 981fbaf, github.com/apache/spark/pull/5018 + + [SPARK-6286][minor] Handle missing Mesos case TASK_ERROR. + Iulian Dragos + 2015-03-18 09:15:33 -0400 + Commit: 9d112a9, github.com/apache/spark/pull/5000 + + SPARK-6389 YARN app diagnostics report doesn't report NPEs + Steve Loughran + 2015-03-18 09:09:32 -0400 + Commit: e09c852, github.com/apache/spark/pull/5070 + + [SPARK-6372] [core] Propagate --conf to child processes. + Marcelo Vanzin + 2015-03-18 09:06:57 -0400 + Commit: 6205a25, github.com/apache/spark/pull/5057 + + [SPARK-6247][SQL] Fix resolution of ambiguous joins caused by new aliases + Michael Armbrust + 2015-03-17 19:47:51 -0700 + Commit: 3579003, github.com/apache/spark/pull/5062 + + [SPARK-5651][SQL] Add input64 in blacklist and add test suit for create table within backticks + watermen , q00251598 + 2015-03-17 19:35:18 -0700 + Commit: a6ee2f7, github.com/apache/spark/pull/4427 + + [SPARK-5404] [SQL] Update the default statistic number + Cheng Hao + 2015-03-17 19:32:38 -0700 + Commit: 78cb08a, github.com/apache/spark/pull/4914 + + [SPARK-5908][SQL] Resolve UdtfsAlias when only single Alias is used + Liang-Chi Hsieh + 2015-03-17 18:58:52 -0700 + Commit: 5c80643, github.com/apache/spark/pull/4692 + + [SPARK-6383][SQL]Fixed compiler and errors in Dataframe examples + Tijo Thomas + 2015-03-17 18:50:19 -0700 + Commit: a012e08, github.com/apache/spark/pull/5068 + + [SPARK-6366][SQL] In Python API, the default save mode for save and saveAsTable should be "error" instead of "append". + Yin Huai + 2015-03-18 09:41:06 +0800 + Commit: dc9c919, github.com/apache/spark/pull/5053 + + [SPARK-6330] [SQL] Add a test case for SPARK-6330 + Pei-Lun Lee + 2015-03-18 08:34:46 +0800 + Commit: 4633a87, github.com/apache/spark/pull/5039 + + [SPARK-6226][MLLIB] add save/load in PySpark's KMeansModel + Xiangrui Meng + 2015-03-17 12:14:40 -0700 + Commit: c94d062, github.com/apache/spark/pull/5049 + + [SPARK-6336] LBFGS should document what convergenceTol means + lewuathe + 2015-03-17 12:11:57 -0700 + Commit: d9f3e01, github.com/apache/spark/pull/5033 + + [SPARK-6313] Add config option to disable file locks/fetchFile cache to ... + nemccarthy + 2015-03-17 09:33:11 -0700 + Commit: 4cca391, github.com/apache/spark/pull/5036 + + [SPARK-3266] Use intermediate abstract classes to fix type erasure issues in Java APIs + Josh Rosen + 2015-03-17 09:18:57 -0700 + Commit: 0f673c2, github.com/apache/spark/pull/5050 + + [SPARK-6365] jetty-security also needed for SPARK_PREPEND_CLASSES to work + Imran Rashid + 2015-03-17 09:41:06 -0500 + Commit: e9f22c6, github.com/apache/spark/pull/5052 + + [SPARK-6331] Load new master URL if present when recovering streaming context from checkpoint + Tathagata Das + 2015-03-17 05:31:27 -0700 + Commit: c928796, github.com/apache/spark/pull/5024 + + [docs] [SPARK-4820] Spark build encounters "File name too long" on some encrypted filesystems + Theodore Vasiloudis + 2015-03-17 11:25:01 +0000 + Commit: e26db9be, github.com/apache/spark/pull/5041 + + [SPARK-6269] [CORE] Use ScalaRunTime's array methods instead of java.lang.reflect.Array in size estimation + mcheah , Justin Uang + 2015-03-17 11:20:20 +0000 + Commit: 005d1c5, github.com/apache/spark/pull/4972 + + [SPARK-4011] tighten the visibility of the members in Master/Worker class + CodingCat + 2015-03-17 11:18:27 +0000 + Commit: 25f3580, github.com/apache/spark/pull/4844 + + SPARK-6044 [CORE] RDD.aggregate() should not use the closure serializer on the zero value + Sean Owen + 2015-03-16 23:58:52 -0700 + Commit: b2d8c02, github.com/apache/spark/pull/5028 + + [SPARK-6357][GraphX] Add unapply in EdgeContext + Takeshi YAMAMURO + 2015-03-16 23:54:54 -0700 + Commit: b3e6eca, github.com/apache/spark/pull/5047 + + [SQL][docs][minor] Fixed sample code in SQLContext scaladoc + Lomig Mégard + 2015-03-16 23:52:42 -0700 + Commit: 6870722, github.com/apache/spark/pull/5051 + + [SPARK-6299][CORE] ClassNotFoundException in standalone mode when running groupByKey with class defined in REPL + Kevin (Sangwoo) Kim + 2015-03-16 23:49:23 -0700 + Commit: f0edeae, github.com/apache/spark/pull/5046 + + [SPARK-5712] [SQL] fix comment with semicolon at end + Daoyuan Wang + 2015-03-17 12:29:15 +0800 + Commit: 9667b9f, github.com/apache/spark/pull/4500 + + [SPARK-6327] [PySpark] fix launch spark-submit from python + Davies Liu + 2015-03-16 16:26:55 -0700 + Commit: e3f315a, github.com/apache/spark/pull/5019 + + [SPARK-6077] Remove streaming tab while stopping StreamingContext + lisurprise + 2015-03-16 13:10:32 -0700 + Commit: f149b8b, github.com/apache/spark/pull/4828 + + [SPARK-6330] Fix filesystem bug in newParquet relation + Volodymyr Lyubinets + 2015-03-16 12:13:18 -0700 + Commit: d19efed, github.com/apache/spark/pull/5020 + + [SPARK-2087] [SQL] Multiple thriftserver sessions with single HiveContext instance + Cheng Hao + 2015-03-17 01:09:27 +0800 + Commit: 12a345a, github.com/apache/spark/pull/4885 + + [SPARK-6300][Spark Core] sc.addFile(path) does not support the relative path. + DoingDone9 <799203320@qq.com> + 2015-03-16 12:27:15 +0000 + Commit: 00e730b, github.com/apache/spark/pull/4993 + + [SPARK-5922][GraphX]: Add diff(other: RDD[VertexId, VD]) in VertexRDD + Brennon York + 2015-03-16 01:06:26 -0700 + Commit: 45f4c66, github.com/apache/spark/pull/4733 + + [SPARK-3619] Part 2. Upgrade to Mesos 0.21 to work around MESOS-1688 + Jongyoul Lee + 2015-03-15 15:46:55 +0000 + Commit: aa6536f, github.com/apache/spark/pull/4361 + + [SPARK-6285][SQL]Remove ParquetTestData in SparkBuild.scala and in README.md + OopsOutOfMemory + 2015-03-15 20:44:45 +0800 + Commit: 62ede53, github.com/apache/spark/pull/5032 + + [SPARK-5790][GraphX]: VertexRDD's won't zip properly for `diff` capability (added tests) + Brennon York + 2015-03-14 17:38:12 +0000 + Commit: c49d156, github.com/apache/spark/pull/5023 + + [SPARK-6329][Docs]: Minor doc changes for Mesos and TOC + Brennon York + 2015-03-14 17:28:13 +0000 + Commit: 127268b, github.com/apache/spark/pull/5022 + + [SPARK-6195] [SQL] Adds in-memory column type for fixed-precision decimals + Cheng Lian + 2015-03-14 19:53:54 +0800 + Commit: 5be6b0e, github.com/apache/spark/pull/4938 + + [SQL]Delete some dupliate code in HiveThriftServer2 + ArcherShao , ArcherShao + 2015-03-14 08:27:18 +0000 + Commit: ee15404, github.com/apache/spark/pull/5007 + + [SPARK-6210] [SQL] use prettyString as column name in agg() + Davies Liu + 2015-03-14 00:43:33 -0700 + Commit: b38e073, github.com/apache/spark/pull/5006 + + [SPARK-6317][SQL]Fixed HIVE console startup issue + vinodkc , Vinod K C + 2015-03-14 07:17:54 +0800 + Commit: e360d5e, github.com/apache/spark/pull/5011 + + [SPARK-6285] [SQL] Removes unused ParquetTestData and duplicated TestGroupWriteSupport + Cheng Lian + 2015-03-14 07:09:53 +0800 + Commit: cdc34ed, github.com/apache/spark/pull/5010 + + [SPARK-4600][GraphX]: org.apache.spark.graphx.VertexRDD.diff does not work + Brennon York + 2015-03-13 18:48:31 +0000 + Commit: b943f5d, github.com/apache/spark/pull/5015 + + [SPARK-6278][MLLIB] Mention the change of objective in linear regression + Xiangrui Meng + 2015-03-13 10:27:28 -0700 + Commit: 7f13434, github.com/apache/spark/pull/4978 + + [SPARK-6252] [mllib] Added getLambda to Scala NaiveBayes + Joseph K. Bradley , Joseph K. Bradley + 2015-03-13 10:26:09 -0700 + Commit: dc4abd4, github.com/apache/spark/pull/4969 + + [CORE][minor] remove unnecessary ClassTag in `DAGScheduler` + Wenchen Fan + 2015-03-13 14:08:56 +0000 + Commit: ea3d2ee, github.com/apache/spark/pull/4992 + + [SPARK-6197][CORE] handle json exception when hisotry file not finished writing + Zhang, Liye + 2015-03-13 13:59:54 +0000 + Commit: 9048e81, github.com/apache/spark/pull/4927 + + [SPARK-5310] [SQL] [DOC] Parquet section for the SQL programming guide + Cheng Lian + 2015-03-13 21:34:50 +0800 + Commit: 69ff8e8, github.com/apache/spark/pull/5001 + + [SPARK-5845][Shuffle] Time to cleanup spilled shuffle files not included in shuffle write time + Ilya Ganelin + 2015-03-13 13:21:04 +0000 + Commit: 0af9ea7, github.com/apache/spark/pull/4965 + + HOTFIX: Changes to release script. + Patrick Wendell + 2015-03-12 18:36:17 -0700 + Commit: 3980ebd + + [mllib] [python] Add LassoModel to __all__ in regression.py + Joseph K. Bradley + 2015-03-12 16:46:29 -0700 + Commit: 17c309c, github.com/apache/spark/pull/4970 + + [SPARK-4588] ML Attributes + Xiangrui Meng , Sean Owen + 2015-03-12 16:34:56 -0700 + Commit: a4b2716, github.com/apache/spark/pull/4925 + + [SPARK-6268][MLlib] KMeans parameter getter methods + Yuhao Yang + 2015-03-12 15:17:46 -0700 + Commit: fb4787c, github.com/apache/spark/pull/4974 + + [build] [hotfix] Fix make-distribution.sh for Scala 2.11. + Marcelo Vanzin + 2015-03-12 19:16:58 +0000 + Commit: 8f1bc79, github.com/apache/spark/pull/5002 + + [SPARK-6275][Documentation]Miss toDF() function in docs/sql-programming-guide.md + zzcclp + 2015-03-12 15:07:15 +0000 + Commit: 304366c, github.com/apache/spark/pull/4977 + + [docs] [SPARK-6306] Readme points to dead link + Theodore Vasiloudis + 2015-03-12 15:01:33 +0000 + Commit: 4e47d54, github.com/apache/spark/pull/4999 + + [SPARK-5814][MLLIB][GRAPHX] Remove JBLAS from runtime + Xiangrui Meng + 2015-03-12 01:39:04 -0700 + Commit: 0cba802, github.com/apache/spark/pull/4699 + + [SPARK-6294] fix hang when call take() in JVM on PythonRDD + Davies Liu + 2015-03-12 01:34:38 -0700 + Commit: 712679a, github.com/apache/spark/pull/4987 + + [SPARK-6296] [SQL] Added equals to Column + Volodymyr Lyubinets + 2015-03-12 00:55:26 -0700 + Commit: 25b71d8, github.com/apache/spark/pull/4988 + + BUILD: Adding more known contributor names + Patrick Wendell + 2015-03-11 22:24:08 -0700 + Commit: e921a66 + + [SPARK-6128][Streaming][Documentation] Updates to Spark Streaming Programming Guide + Tathagata Das + 2015-03-11 18:48:21 -0700 + Commit: cd3b68d, github.com/apache/spark/pull/4956 + + [SPARK-6274][Streaming][Examples] Added examples streaming + sql examples. + Tathagata Das + 2015-03-11 11:19:51 -0700 + Commit: 51a79a7, github.com/apache/spark/pull/4975 + + SPARK-6245 [SQL] jsonRDD() of empty RDD results in exception + Sean Owen + 2015-03-11 14:09:09 +0000 + Commit: 55c4831, github.com/apache/spark/pull/4971 + + SPARK-3642. Document the nuances of shared variables. + Sandy Ryza + 2015-03-11 13:22:05 +0000 + Commit: 2d87a41, github.com/apache/spark/pull/2490 + + [SPARK-4423] Improve foreach() documentation to avoid confusion between local- and cluster-mode behavior + Ilya Ganelin + 2015-03-11 13:20:15 +0000 + Commit: 548643a, github.com/apache/spark/pull/4696 + + [SPARK-6228] [network] Move SASL classes from network/shuffle to network... + Marcelo Vanzin + 2015-03-11 13:16:22 +0000 + Commit: 5b335bd, github.com/apache/spark/pull/4953 + + SPARK-6225 [CORE] [SQL] [STREAMING] Resolve most build warnings, 1.3.0 edition + Sean Owen + 2015-03-11 13:15:19 +0000 + Commit: 6e94c4e, github.com/apache/spark/pull/4950 + + [SPARK-6279][Streaming]In KafkaRDD.scala, Miss expressions flag "s" at logging string + zzcclp + 2015-03-11 12:22:24 +0000 + Commit: ec30c17, github.com/apache/spark/pull/4979 + + [SQL][Minor] fix typo in comments + Hongbo Liu + 2015-03-11 12:18:24 +0000 + Commit: 40f4979, github.com/apache/spark/pull/4976 + + [MINOR] [DOCS] Fix map -> mapToPair in Streaming Java example + Sean Owen + 2015-03-11 12:16:32 +0000 + Commit: 35b2564, github.com/apache/spark/pull/4967 + + [SPARK-4924] Add a library for launching Spark jobs programmatically. + Marcelo Vanzin + 2015-03-11 01:03:01 -0700 + Commit: 517975d, github.com/apache/spark/pull/3916 + + [SPARK-5986][MLLib] Add save/load for k-means + Xusen Yin + 2015-03-11 00:24:55 -0700 + Commit: 2d4e00e, github.com/apache/spark/pull/4951 + + [SPARK-5183][SQL] Update SQL Docs with JDBC and Migration Guide + Michael Armbrust + 2015-03-10 18:13:09 -0700 + Commit: 2672374, github.com/apache/spark/pull/4958 + + Minor doc: Remove the extra blank line in data types javadoc. + Reynold Xin + 2015-03-10 17:25:04 -0700 + Commit: 74fb433, github.com/apache/spark/pull/4955 + + [SPARK-6186] [EC2] Make Tachyon version configurable in EC2 deployment script + cheng chang + 2015-03-10 11:02:12 +0000 + Commit: 7c7d2d5, github.com/apache/spark/pull/4901 + + [SPARK-6191] [EC2] Generalize ability to download libs + Nicholas Chammas + 2015-03-10 10:58:31 +0000 + Commit: d14df06, github.com/apache/spark/pull/4919 + + [SPARK-6087][CORE] Provide actionable exception if Kryo buffer is not large enough + Lev Khomich + 2015-03-10 10:55:42 +0000 + Commit: c4c4b07, github.com/apache/spark/pull/4947 + + [SPARK-6177][MLlib]Add note in LDA example to remind possible coalesce + Yuhao Yang + 2015-03-10 10:51:44 +0000 + Commit: 9a0272f, github.com/apache/spark/pull/4899 + + [SPARK-6194] [SPARK-677] [PySpark] fix memory leak in collect() + Davies Liu + 2015-03-09 16:24:06 -0700 + Commit: 8767565, github.com/apache/spark/pull/4923 + + [SPARK-5310][Doc] Update SQL Programming Guide to include DataFrames. + Reynold Xin + 2015-03-09 16:16:16 -0700 + Commit: 3cac199, github.com/apache/spark/pull/4954 + + [Docs] Replace references to SchemaRDD with DataFrame + Reynold Xin + 2015-03-09 13:29:19 -0700 + Commit: 70f8814, github.com/apache/spark/pull/4952 + + [EC2] [SPARK-6188] Instance types can be mislabeled when re-starting cluster with default arguments + Theodore Vasiloudis , Theodore Vasiloudis + 2015-03-09 14:16:07 +0000 + Commit: f7c7992, github.com/apache/spark/pull/4916 + + [GraphX] Improve LiveJournalPageRank example + Jacky Li + 2015-03-08 19:47:35 +0000 + Commit: 55b1b32, github.com/apache/spark/pull/4917 + + SPARK-6205 [CORE] UISeleniumSuite fails for Hadoop 2.x test with NoClassDefFoundError + Sean Owen + 2015-03-08 14:09:40 +0000 + Commit: f16b7b0, github.com/apache/spark/pull/4933 + + [SPARK-6193] [EC2] Push group filter up to EC2 + Nicholas Chammas + 2015-03-08 14:01:26 +0000 + Commit: 52ed7da, github.com/apache/spark/pull/4922 + + [SPARK-5641] [EC2] Allow spark_ec2.py to copy arbitrary files to cluster + Florian Verhein + 2015-03-07 12:56:59 +0000 + Commit: 334c5bd, github.com/apache/spark/pull/4583 + + [Minor]fix the wrong description + WangTaoTheTonic + 2015-03-07 12:35:26 +0000 + Commit: 729c05b, github.com/apache/spark/pull/4936 + + [EC2] Reorder print statements on termination + Nicholas Chammas + 2015-03-07 12:33:41 +0000 + Commit: 2646794, github.com/apache/spark/pull/4932 + + Fix python typo (+ Scala, Java typos) + RobertZK , Robert Krzyzanowski + 2015-03-07 00:16:50 +0000 + Commit: 48a723c, github.com/apache/spark/pull/4840 + + [SPARK-6178][Shuffle] Removed unused imports + Vinod K C , Vinod K C + 2015-03-06 14:43:09 +0000 + Commit: dba0b2e, github.com/apache/spark/pull/4900 + + [Minor] Resolve sbt warnings: postfix operator second should be enabled + GuoQiang Li + 2015-03-06 13:20:20 +0000 + Commit: 05cb6b3, github.com/apache/spark/pull/4908 + + [core] [minor] Don't pollute source directory when running UtilsSuite. + Marcelo Vanzin + 2015-03-06 09:43:24 +0000 + Commit: cd7594c, github.com/apache/spark/pull/4921 + + [CORE, DEPLOY][minor] align arguments order with docs of worker + Zhang, Liye + 2015-03-06 09:34:07 +0000 + Commit: d8b3da9, github.com/apache/spark/pull/4924 + + [SQL] Make Strategies a public developer API + Michael Armbrust + 2015-03-05 14:50:25 -0800 + Commit: eb48fd6, github.com/apache/spark/pull/4920 + + [SPARK-6163][SQL] jsonFile should be backed by the data source API + Yin Huai + 2015-03-05 14:49:44 -0800 + Commit: 1b4bb25, github.com/apache/spark/pull/4896 + + [SPARK-6145][SQL] fix ORDER BY on nested fields + Wenchen Fan , Michael Armbrust + 2015-03-05 14:49:01 -0800 + Commit: 5873c71, github.com/apache/spark/pull/4918 + + [SPARK-6175] Fix standalone executor log links when ephemeral ports or SPARK_PUBLIC_DNS are used + Josh Rosen + 2015-03-05 12:04:00 -0800 + Commit: 424a86a, github.com/apache/spark/pull/4903 + + [SPARK-6090][MLLIB] add a basic BinaryClassificationMetrics to PySpark/MLlib + Xiangrui Meng + 2015-03-05 11:50:09 -0800 + Commit: 0bfacd5, github.com/apache/spark/pull/4863 + + SPARK-6182 [BUILD] spark-parent pom needs to be published for both 2.10 and 2.11 + Sean Owen + 2015-03-05 11:31:48 -0800 + Commit: c9cfba0, github.com/apache/spark/pull/4912 + + [SPARK-6153] [SQL] promote guava dep for hive-thriftserver + Daoyuan Wang + 2015-03-05 16:35:17 +0800 + Commit: e06c7df, github.com/apache/spark/pull/4884 + + SPARK-5143 [BUILD] [WIP] spark-network-yarn 2.11 depends on spark-network-shuffle 2.10 + Sean Owen + 2015-03-04 21:00:51 -0800 + Commit: 7ac072f, github.com/apache/spark/pull/4876 + + [SPARK-6149] [SQL] [Build] Excludes Guava 15 referenced by jackson-module-scala_2.10 + Cheng Lian + 2015-03-04 20:52:58 -0800 + Commit: 1aa90e3, github.com/apache/spark/pull/4890 + + [SPARK-6144] [core] Fix addFile when source files are on "hdfs:" + Marcelo Vanzin , trystanleftwich + 2015-03-04 12:58:39 -0800 + Commit: 3a35a0d, github.com/apache/spark/pull/4894 + + [SPARK-6107][CORE] Display inprogress application information for event log history for standalone mode + Zhang, Liye + 2015-03-04 12:28:27 +0000 + Commit: f6773ed, github.com/apache/spark/pull/4848 + + [SPARK-6134][SQL] Fix wrong datatype for casting FloatType and default LongType value in defaultPrimitive + Liang-Chi Hsieh + 2015-03-04 20:23:43 +0800 + Commit: aef8a84, github.com/apache/spark/pull/4870 + + [SPARK-6136] [SQL] Removed JDBC integration tests which depends on docker-client + Cheng Lian + 2015-03-04 19:39:02 +0800 + Commit: 76b472f, github.com/apache/spark/pull/4872 + + [SPARK-3355][Core]: Allow running maven tests in run-tests + Brennon York + 2015-03-04 11:02:33 +0000 + Commit: 418f38d, github.com/apache/spark/pull/4734 + + SPARK-6085 Increase default value for memory overhead + tedyu + 2015-03-04 11:00:52 +0000 + Commit: 8d3e241, github.com/apache/spark/pull/4836 + + [SPARK-6141][MLlib] Upgrade Breeze from 0.10 to 0.11 to fix convergence bug + Xiangrui Meng , DB Tsai , DB Tsai + 2015-03-03 23:52:02 -0800 + Commit: 76e20a0, github.com/apache/spark/pull/4879 + + [SPARK-6132][HOTFIX] ContextCleaner InterruptedException should be quiet + Andrew Or + 2015-03-03 20:49:45 -0800 + Commit: d334bfb, github.com/apache/spark/pull/4882 + + [SPARK-5949] HighlyCompressedMapStatus needs more classes registered w/ kryo + Imran Rashid + 2015-03-03 15:33:19 -0800 + Commit: 1f1fccc, github.com/apache/spark/pull/4877 + + [SPARK-6133] Make sc.stop() idempotent + Andrew Or + 2015-03-03 15:09:57 -0800 + Commit: 6c20f35, github.com/apache/spark/pull/4871 + + [SPARK-6132] ContextCleaner race condition across SparkContexts + Andrew Or + 2015-03-03 13:44:05 -0800 + Commit: fe63e82, github.com/apache/spark/pull/4869 + + SPARK-1911 [DOCS] Warn users if their assembly jars are not built with Java 6 + Sean Owen + 2015-03-03 13:40:11 -0800 + Commit: e750a6b, github.com/apache/spark/pull/4874 + + Revert "[SPARK-5423][Core] Cleanup resources in DiskMapIterator.finalize to ensure deleting the temp file" + Andrew Or + 2015-03-03 13:03:52 -0800 + Commit: 9af0017 + + [SPARK-6138][CORE][minor] enhance the `toArray` method in `SizeTrackingVector` + Wenchen Fan + 2015-03-03 12:12:23 +0000 + Commit: e359794, github.com/apache/spark/pull/4825 + + [SPARK-6118] making package name of deploy.worker.CommandUtils and deploy.CommandUtilsSuite consistent + CodingCat + 2015-03-03 10:32:57 +0000 + Commit: 975643c, github.com/apache/spark/pull/4856 + + BUILD: Minor tweaks to internal build scripts + Patrick Wendell + 2015-03-03 00:38:12 -0800 + Commit: 0c9a8ea + + HOTFIX: Bump HBase version in MapR profiles. + Patrick Wendell + 2015-03-03 01:38:07 -0800 + Commit: 165ff36 + + [SPARK-5537][MLlib][Docs] Add user guide for multinomial logistic regression + DB Tsai + 2015-03-02 22:37:12 -0800 + Commit: b196056, github.com/apache/spark/pull/4866 + + [SPARK-6120] [mllib] Warnings about memory in tree, ensemble model save + Joseph K. Bradley + 2015-03-02 22:33:51 -0800 + Commit: c2fe3a6, github.com/apache/spark/pull/4864 + + [SPARK-6097][MLLIB] Support tree model save/load in PySpark/MLlib + Xiangrui Meng + 2015-03-02 22:27:01 -0800 + Commit: 7e53a79, github.com/apache/spark/pull/4854 + + [SPARK-5310][SQL] Fixes to Docs and Datasources API + Reynold Xin , Michael Armbrust + 2015-03-02 22:14:08 -0800 + Commit: 54d1968, github.com/apache/spark/pull/4868 + + [SPARK-5950][SQL]Insert array into a metastore table saved as parquet should work when using datasource api + Yin Huai + 2015-03-02 19:31:55 -0800 + Commit: 1259994, github.com/apache/spark/pull/4826 + + [SPARK-6127][Streaming][Docs] Add Kafka to Python api docs + Tathagata Das + 2015-03-02 18:40:46 -0800 + Commit: 9eb22ec, github.com/apache/spark/pull/4860 + + [SPARK-5537] Add user guide for multinomial logistic regression + Xiangrui Meng , DB Tsai + 2015-03-02 18:10:50 -0800 + Commit: 9d6c5ae, github.com/apache/spark/pull/4801 + + [SPARK-6121][SQL][MLLIB] simpleString for UDT + Xiangrui Meng + 2015-03-02 17:14:34 -0800 + Commit: 2db6a85, github.com/apache/spark/pull/4858 + + [SPARK-4777][CORE] Some block memory after unrollSafely not count into used memory(memoryStore.entrys or unrollMemory) + hushan[胡珊] + 2015-03-02 16:53:54 -0800 + Commit: e3a88d1, github.com/apache/spark/pull/3629 + + [SPARK-6048] SparkConf should not translate deprecated configs on set + Andrew Or + 2015-03-02 16:36:42 -0800 + Commit: 258d154, github.com/apache/spark/pull/4799 + + [SPARK-6066] Make event log format easier to parse + Andrew Or + 2015-03-02 16:34:32 -0800 + Commit: 6776cb3, github.com/apache/spark/pull/4821 + + [SPARK-6082] [SQL] Provides better error message for malformed rows when caching tables + Cheng Lian + 2015-03-02 16:18:00 -0800 + Commit: 1a49496, github.com/apache/spark/pull/4842 + + [SPARK-6114][SQL] Avoid metastore conversions before plan is resolved + Michael Armbrust + 2015-03-02 16:10:54 -0800 + Commit: 8223ce6, github.com/apache/spark/pull/4855 + + [SPARK-5522] Accelerate the Histroty Server start + guliangliang + 2015-03-02 15:33:23 -0800 + Commit: 26c1c56, github.com/apache/spark/pull/4525 + + [SPARK-6050] [yarn] Relax matching of vcore count in received containers. + Marcelo Vanzin + 2015-03-02 16:41:43 -0600 + Commit: 6b348d9, github.com/apache/spark/pull/4818 + + [SPARK-6040][SQL] Fix the percent bug in tablesample + q00251598 + 2015-03-02 13:16:29 -0800 + Commit: 582e5a2, github.com/apache/spark/pull/4789 + + [Minor] Fix doc typo for describing primitiveTerm effectiveness condition + Liang-Chi Hsieh + 2015-03-02 13:11:17 -0800 + Commit: 3f9def8, github.com/apache/spark/pull/4762 + + SPARK-5390 [DOCS] Encourage users to post on Stack Overflow in Community Docs + Sean Owen + 2015-03-02 21:10:08 +0000 + Commit: 0b472f6, github.com/apache/spark/pull/4843 + + [DOCS] Refactored Dataframe join comment to use correct parameter ordering + Paul Power + 2015-03-02 13:08:47 -0800 + Commit: d9a8bae, github.com/apache/spark/pull/4847 + + [SPARK-6080] [PySpark] correct LogisticRegressionWithLBFGS regType parameter for pyspark + Yanbo Liang + 2015-03-02 10:17:24 -0800 + Commit: af2effd, github.com/apache/spark/pull/4831 + + aggregateMessages example in graphX doc + DEBORAH SIEGEL + 2015-03-02 10:15:32 -0800 + Commit: e7d8ae4, github.com/apache/spark/pull/4853 + + [SPARK-5741][SQL] Support the path contains comma in HiveContext + q00251598 + 2015-03-02 10:13:11 -0800 + Commit: 9ce12aa, github.com/apache/spark/pull/4532 + + [SPARK-6111] Fixed usage string in documentation. + Kenneth Myers + 2015-03-02 17:25:24 +0000 + Commit: 95ac68b, github.com/apache/spark/pull/4852 + + [SPARK-6052][SQL]In JSON schema inference, we should always set containsNull of an ArrayType to true + Yin Huai + 2015-03-02 23:18:07 +0800 + Commit: 3efd8bb, github.com/apache/spark/pull/4806 + + [SPARK-6073][SQL] Need to refresh metastore cache after append data in CreateMetastoreDataSourceAsSelect + Yin Huai + 2015-03-02 22:42:18 +0800 + Commit: 39a54b4, github.com/apache/spark/pull/4824 + + [SPARK-6103][Graphx]remove unused class to import in EdgeRDDImpl + Lianhui Wang + 2015-03-02 09:06:56 +0000 + Commit: 49c7a8f, github.com/apache/spark/pull/4846 + + SPARK-3357 [CORE] Internal log messages should be set at DEBUG level instead of INFO + Sean Owen + 2015-03-02 08:51:03 +0000 + Commit: 948c239, github.com/apache/spark/pull/4838 + + [Streaming][Minor]Fix some error docs in streaming examples + Saisai Shao + 2015-03-02 08:49:19 +0000 + Commit: d8fb40e, github.com/apache/spark/pull/4837 + + [SPARK-6083] [MLLib] [DOC] Make Python API example consistent in NaiveBayes + MechCoder + 2015-03-01 16:28:15 -0800 + Commit: 3f00bb3, github.com/apache/spark/pull/4834 + + [SPARK-6053][MLLIB] support save/load in PySpark's ALS + Xiangrui Meng + 2015-03-01 16:26:57 -0800 + Commit: aedbbaa, github.com/apache/spark/pull/4811 + + [SPARK-6074] [sql] Package pyspark sql bindings. + Marcelo Vanzin + 2015-03-01 11:05:10 +0000 + Commit: fd8d283, github.com/apache/spark/pull/4822 + + [SPARK-6075] Fix bug in that caused lost accumulator updates: do not store WeakReferences in localAccums map + Josh Rosen + 2015-02-28 22:51:01 -0800 + Commit: 2df5f1f, github.com/apache/spark/pull/4835 + + SPARK-5984: Fix TimSort bug causes ArrayOutOfBoundsException + Evan Yu + 2015-02-28 18:55:34 -0800 + Commit: 643300a, github.com/apache/spark/pull/4804 + + SPARK-1965 [WEBUI] Spark UI throws NPE on trying to load the app page for non-existent app + Sean Owen + 2015-02-28 15:34:08 +0000 + Commit: 86fcdae, github.com/apache/spark/pull/4777 + + SPARK-5983 [WEBUI] Don't respond to HTTP TRACE in HTTP-based UIs + Sean Owen + 2015-02-28 15:23:59 +0000 + Commit: f91298e, github.com/apache/spark/pull/4765 + + SPARK-6063 MLlib doesn't pass mvn scalastyle check due to UTF chars in LDAModel.scala + Michael Griffiths , Griffiths, Michael (NYC-RPM) + 2015-02-28 14:47:39 +0000 + Commit: b36b1bc, github.com/apache/spark/pull/4815 + + [SPARK-5775] [SQL] BugFix: GenericRow cannot be cast to SpecificMutableRow when nested data and partitioned table + Cheng Lian , Cheng Lian , Yin Huai + 2015-02-28 21:15:43 +0800 + Commit: e6003f0, github.com/apache/spark/pull/4792 + + MAINTENANCE: Automated closing of pull requests. + Patrick Wendell + 2015-02-27 23:10:09 -0800 + Commit: 9168259, github.com/apache/spark/pull/1128 + + [SPARK-5979][SPARK-6032] Smaller safer --packages fix + Burak Yavuz + 2015-02-27 22:59:35 -0800 + Commit: 6d8e5fb, github.com/apache/spark/pull/4802 + + [SPARK-6070] [yarn] Remove unneeded classes from shuffle service jar. + Marcelo Vanzin + 2015-02-27 22:44:11 -0800 + Commit: dba08d1, github.com/apache/spark/pull/4820 + + [SPARK-6055] [PySpark] fix incorrect __eq__ of DataType + Davies Liu + 2015-02-27 20:07:17 -0800 + Commit: e0e64ba, github.com/apache/spark/pull/4808 + + [SPARK-5751] [SQL] Sets SPARK_HOME as SPARK_PID_DIR when running Thrift server test suites + Cheng Lian + 2015-02-28 08:41:49 +0800 + Commit: 8c468a6, github.com/apache/spark/pull/4758 + + [Streaming][Minor] Remove useless type signature of Java Kafka direct stream API + Saisai Shao + 2015-02-27 13:01:42 -0800 + Commit: 5f7f3b9, github.com/apache/spark/pull/4817 + + [SPARK-4587] [mllib] [docs] Fixed save,load calls in ML guide examples + Joseph K. Bradley + 2015-02-27 13:00:36 -0800 + Commit: d17cb2b, github.com/apache/spark/pull/4816 + + [SPARK-6059][Yarn] Add volatile to ApplicationMaster's reporterThread and allocator + zsxwing + 2015-02-27 13:33:39 +0000 + Commit: 57566d0, github.com/apache/spark/pull/4814 + + [SPARK-6058][Yarn] Log the user class exception in ApplicationMaster + zsxwing + 2015-02-27 13:31:46 +0000 + Commit: e747e98, github.com/apache/spark/pull/4813 + + [SPARK-6036][CORE] avoid race condition between eventlogListener and akka actor system + Zhang, Liye + 2015-02-26 23:11:43 -0800 + Commit: 8cd1692, github.com/apache/spark/pull/4785 + + fix spark-6033, clarify the spark.worker.cleanup behavior in standalone mode + 许鹏 + 2015-02-26 23:05:56 -0800 + Commit: 0375a41, github.com/apache/spark/pull/4803 + + [SPARK-6046] Privatize SparkConf.translateConfKey + Andrew Or + 2015-02-26 22:39:46 -0800 + Commit: 7c99a01, github.com/apache/spark/pull/4797 + + SPARK-2168 [Spark core] Use relative URIs for the app links in the History Server. + Lukasz Jastrzebski + 2015-02-26 22:38:06 -0800 + Commit: 4a8a0a8, github.com/apache/spark/pull/4778 + + [SPARK-5495][UI] Add app and driver kill function in master web UI + jerryshao + 2015-02-26 22:36:48 -0800 + Commit: 67595eb, github.com/apache/spark/pull/4288 + + [SPARK-5771][UI][hotfix] Change Requested Cores into * if default cores is not set + jerryshao + 2015-02-26 22:35:43 -0800 + Commit: 12135e9, github.com/apache/spark/pull/4800 + + [SPARK-6024][SQL] When a data source table has too many columns, it's schema cannot be stored in metastore. + Yin Huai + 2015-02-26 20:46:05 -0800 + Commit: 5e5ad65, github.com/apache/spark/pull/4795 + + [SPARK-6037][SQL] Avoiding duplicate Parquet schema merging + Liang-Chi Hsieh + 2015-02-27 11:06:47 +0800 + Commit: 4ad5153, github.com/apache/spark/pull/4786 + + [SPARK-5529][CORE]Add expireDeadHosts in HeartbeatReceiver + Hong Shen + 2015-02-26 18:43:23 -0800 + Commit: 18f2098, github.com/apache/spark/pull/4363 + + SPARK-4579 [WEBUI] Scheduling Delay appears negative + Sean Owen + 2015-02-26 17:35:09 -0800 + Commit: fbc4694, github.com/apache/spark/pull/4796 + + SPARK-6045 RecordWriter should be checked against null in PairRDDFunctio... + tedyu + 2015-02-26 23:26:07 +0000 + Commit: e60ad2f, github.com/apache/spark/pull/4794 + + [SPARK-5951][YARN] Remove unreachable driver memory properties in yarn client mode + mohit.goyal + 2015-02-26 14:27:47 -0800 + Commit: b38dec2, github.com/apache/spark/pull/4730 + + Add a note for context termination for History server on Yarn + moussa taifi + 2015-02-26 14:19:43 -0800 + Commit: c871e2d, github.com/apache/spark/pull/4721 + + SPARK-4300 [CORE] Race condition during SparkWorker shutdown + Sean Owen + 2015-02-26 14:08:56 -0800 + Commit: 3fb53c0, github.com/apache/spark/pull/4787 + + [SPARK-6018] [YARN] NoSuchMethodError in Spark app is swallowed by YARN AM + Cheolsoo Park + 2015-02-26 13:53:49 -0800 + Commit: 5f3238b, github.com/apache/spark/pull/4773 + + [SPARK-6027][SPARK-5546] Fixed --jar and --packages not working for KafkaUtils and improved error message + Tathagata Das + 2015-02-26 13:46:07 -0800 + Commit: aa63f63, github.com/apache/spark/pull/4779 + + [SPARK-3562]Periodic cleanup event logs + xukun 00228947 + 2015-02-26 13:24:00 -0800 + Commit: 8942b52, github.com/apache/spark/pull/4214 + + Modify default value description for spark.scheduler.minRegisteredResourcesRatio on docs. + Li Zhihui + 2015-02-26 13:07:07 -0800 + Commit: 10094a5, github.com/apache/spark/pull/4781 + + SPARK-4704 [CORE] SparkSubmitDriverBootstrap doesn't flush output + Sean Owen + 2015-02-26 12:56:54 -0800 + Commit: cd5c8d7, github.com/apache/spark/pull/4788 + + [SPARK-5363] Fix bug in PythonRDD: remove() inside iterator is not safe + Davies Liu + 2015-02-26 11:54:17 -0800 + Commit: 7fa960e, github.com/apache/spark/pull/4776 + + [SPARK-6004][MLlib] Pick the best model when training GradientBoostedTrees with validation + Liang-Chi Hsieh + 2015-02-26 10:51:47 -0800 + Commit: cfff397, github.com/apache/spark/pull/4763 + + [SPARK-6007][SQL] Add numRows param in DataFrame.show() + Jacky Li + 2015-02-26 10:40:58 -0800 + Commit: 2358657, github.com/apache/spark/pull/4767 + + [SPARK-5801] [core] Avoid creating nested directories. + Marcelo Vanzin + 2015-02-26 17:35:03 +0000 + Commit: df3d559, github.com/apache/spark/pull/4747 + + [SPARK-6016][SQL] Cannot read the parquet table after overwriting the existing table when spark.sql.parquet.cacheMetadata=true + Yin Huai + 2015-02-27 01:01:32 +0800 + Commit: 192e42a, github.com/apache/spark/pull/4775 + + [SPARK-6023][SQL] ParquetConversions fails to replace the destination MetastoreRelation of an InsertIntoTable node to ParquetRelation2 + Yin Huai + 2015-02-26 22:39:49 +0800 + Commit: f02394d, github.com/apache/spark/pull/4782 + + [SPARK-5914] to run spark-submit requiring only user perm on windows + Judy Nash + 2015-02-26 11:14:37 +0000 + Commit: 51a6f90, github.com/apache/spark/pull/4742 + + [SPARK-5976][MLLIB] Add partitioner to factors returned by ALS + Xiangrui Meng + 2015-02-25 23:43:29 -0800 + Commit: e43139f, github.com/apache/spark/pull/4748 + + [SPARK-5974] [SPARK-5980] [mllib] [python] [docs] Update ML guide with save/load, Python GBT + Joseph K. Bradley + 2015-02-25 16:13:17 -0800 + Commit: d20559b, github.com/apache/spark/pull/4750 + + [SPARK-1182][Docs] Sort the configuration parameters in configuration.md + Brennon York + 2015-02-25 16:12:56 -0800 + Commit: 46a044a, github.com/apache/spark/pull/3863 + + [SPARK-5926] [SQL] make DataFrame.explain leverage queryExecution.logical + Yanbo Liang + 2015-02-25 15:37:13 -0800 + Commit: 41e2e5a, github.com/apache/spark/pull/4707 + + [SPARK-5999][SQL] Remove duplicate Literal matching block + Liang-Chi Hsieh + 2015-02-25 15:22:33 -0800 + Commit: 12dbf98, github.com/apache/spark/pull/4760 + + [SPARK-6010] [SQL] Merging compatible Parquet schemas before computing splits + Cheng Lian + 2015-02-25 15:15:22 -0800 + Commit: e0fdd46, github.com/apache/spark/pull/4768 + + [SPARK-5944] [PySpark] fix version in Python API docs + Davies Liu + 2015-02-25 15:13:34 -0800 + Commit: f3f4c87, github.com/apache/spark/pull/4731 + + [SPARK-5982] Remove incorrect Local Read Time Metric + Kay Ousterhout + 2015-02-25 14:55:24 -0800 + Commit: 838a480, github.com/apache/spark/pull/4749 + + [SPARK-1955][GraphX]: VertexRDD can incorrectly assume index sharing + Brennon York + 2015-02-25 14:11:12 -0800 + Commit: 9f603fc, github.com/apache/spark/pull/4705 + + [SPARK-5970][core] Register directory created in getOrCreateLocalRootDirs for automatic deletion. + Milan Straka + 2015-02-25 21:33:34 +0000 + Commit: a777c65, github.com/apache/spark/pull/4759 + + SPARK-5930 [DOCS] Documented default of spark.shuffle.io.retryWait is confusing + Sean Owen + 2015-02-25 12:20:44 -0800 + Commit: 7d8e6a2, github.com/apache/spark/pull/4769 + + [SPARK-5996][SQL] Fix specialized outbound conversions + Michael Armbrust + 2015-02-25 10:13:40 -0800 + Commit: f84c799, github.com/apache/spark/pull/4757 + + [SPARK-5771] Number of Cores in Completed Applications of Standalone Master Web Page always be 0 if sc.stop() is called + guliangliang + 2015-02-25 14:48:02 +0000 + Commit: dd077ab, github.com/apache/spark/pull/4567 + + [GraphX] fixing 3 typos in the graphx programming guide + Benedikt Linse + 2015-02-25 14:46:17 +0000 + Commit: 5b8480e, github.com/apache/spark/pull/4766 + + [SPARK-5666][streaming][MQTT streaming] some trivial fixes + prabs , Prabeesh K + 2015-02-25 14:37:35 +0000 + Commit: d51ed26, github.com/apache/spark/pull/4178 + + [SPARK-5994] [SQL] Python DataFrame documentation fixes + Davies Liu + 2015-02-24 20:51:55 -0800 + Commit: d641fbb, github.com/apache/spark/pull/4756 + + [SPARK-5286][SQL] SPARK-5286 followup + Yin Huai + 2015-02-24 19:51:36 -0800 + Commit: 769e092, github.com/apache/spark/pull/4755 + + [SPARK-5993][Streaming][Build] Fix assembly jar location of kafka-assembly + Tathagata Das + 2015-02-24 19:10:37 -0800 + Commit: 922b43b, github.com/apache/spark/pull/4753 + + [SPARK-5985][SQL] DataFrame sortBy -> orderBy in Python. + Reynold Xin + 2015-02-24 18:59:23 -0800 + Commit: fba11c2, github.com/apache/spark/pull/4752 + + [SPARK-5904][SQL] DataFrame Java API test suites. + Reynold Xin + 2015-02-24 18:51:41 -0800 + Commit: 53a1ebf, github.com/apache/spark/pull/4751 + + [SPARK-5751] [SQL] [WIP] Revamped HiveThriftServer2Suite for robustness + Cheng Lian + 2015-02-25 08:34:55 +0800 + Commit: f816e73, github.com/apache/spark/pull/4720 + + [SPARK-5436] [MLlib] Validate GradientBoostedTrees using runWithValidation + MechCoder + 2015-02-24 15:13:22 -0800 + Commit: 2a0fe34, github.com/apache/spark/pull/4677 + + [SPARK-5973] [PySpark] fix zip with two RDDs with AutoBatchedSerializer + Davies Liu + 2015-02-24 14:50:00 -0800 + Commit: da505e5, github.com/apache/spark/pull/4745 + + [SPARK-5952][SQL] Lock when using hive metastore client + Michael Armbrust + 2015-02-24 13:39:29 -0800 + Commit: a2b9137, github.com/apache/spark/pull/4746 + + [Spark-5708] Add Slf4jSink to Spark Metrics + Judy , judynash + 2015-02-24 20:50:16 +0000 + Commit: c5ba975, github.com/apache/spark/pull/4644 + + [MLLIB] Change x_i to y_i in Variance's user guide + Xiangrui Meng + 2015-02-24 11:38:59 -0800 + Commit: 105791e, github.com/apache/spark/pull/4740 + + [SPARK-5965] Standalone Worker UI displays {{USER_JAR}} + Andrew Or + 2015-02-24 11:08:07 -0800 + Commit: 6d2caa5, github.com/apache/spark/pull/4739 + + [Spark-5967] [UI] Correctly clean JobProgressListener.stageIdToActiveJobIds + Tathagata Das + 2015-02-24 11:02:47 -0800 + Commit: 64d2c01, github.com/apache/spark/pull/4741 + + [SPARK-5532][SQL] Repartition should not use external rdd representation + Michael Armbrust + 2015-02-24 10:52:18 -0800 + Commit: 2012366, github.com/apache/spark/pull/4738 + + [SPARK-5910][SQL] Support for as in selectExpr + Michael Armbrust + 2015-02-24 10:49:51 -0800 + Commit: 0a59e45, github.com/apache/spark/pull/4736 + + [SPARK-5968] [SQL] Suppresses ParquetOutputCommitter WARN logs + Cheng Lian + 2015-02-24 10:45:38 -0800 + Commit: 8403331, github.com/apache/spark/pull/4744 + + [SPARK-5958][MLLIB][DOC] update block matrix user guide + Xiangrui Meng + 2015-02-23 22:08:44 -0800 + Commit: cf2e416, github.com/apache/spark/pull/4737 + + [SPARK-5873][SQL] Allow viewing of partially analyzed plans in queryExecution + Michael Armbrust + 2015-02-23 17:34:54 -0800 + Commit: 1ed5708, github.com/apache/spark/pull/4684 + + [SPARK-5935][SQL] Accept MapType in the schema provided to a JSON dataset. + Yin Huai , Yin Huai + 2015-02-23 17:16:34 -0800 + Commit: 48376bf, github.com/apache/spark/pull/4710 + + [SPARK-5912] [docs] [mllib] Small fixes to ChiSqSelector docs + Joseph K. Bradley + 2015-02-23 16:15:57 -0800 + Commit: 59536cc, github.com/apache/spark/pull/4732 + + [MLLIB] SPARK-5912 Programming guide for feature selection + Alexander Ulanov + 2015-02-23 12:09:40 -0800 + Commit: 28ccf5e, github.com/apache/spark/pull/4709 + + [SPARK-5939][MLLib] make FPGrowth example app take parameters + Jacky Li + 2015-02-23 08:47:28 -0800 + Commit: 651a1c0, github.com/apache/spark/pull/4714 + + [SPARK-5724] fix the misconfiguration in AkkaUtils + CodingCat + 2015-02-23 11:29:25 +0000 + Commit: 242d495, github.com/apache/spark/pull/4512 + + [SPARK-5943][Streaming] Update the test to use new API to reduce the warning + Saisai Shao + 2015-02-23 11:27:27 +0000 + Commit: 757b14b, github.com/apache/spark/pull/4722 + + [EXAMPLES] fix typo. + Makoto Fukuhara + 2015-02-23 09:24:33 +0000 + Commit: 9348767, github.com/apache/spark/pull/4724 + + [SPARK-3885] Provide mechanism to remove accumulators once they are no longer used + Ilya Ganelin + 2015-02-22 22:43:04 -0800 + Commit: 95cd643, github.com/apache/spark/pull/4021 + + [SPARK-911] allow efficient queries for a range if RDD is partitioned wi... + Aaron Josephs + 2015-02-22 22:09:06 -0800 + Commit: e4f9d03, github.com/apache/spark/pull/1381 + + [DataFrame] [Typo] Fix the typo + Cheng Hao + 2015-02-22 08:56:30 +0000 + Commit: 275b1be, github.com/apache/spark/pull/4717 + + [DOCS] Fix typo in API for custom InputFormats based on the “new” MapReduce API + Alexander + 2015-02-22 08:53:05 +0000 + Commit: a7f9039, github.com/apache/spark/pull/4718 + + MAINTENANCE: Automated closing of pull requests. + Patrick Wendell + 2015-02-21 23:07:30 -0800 + Commit: 46462ff, github.com/apache/spark/pull/3490 + + [SPARK-5860][CORE] JdbcRDD: overflow on large range with high number of partitions + Evan Yu + 2015-02-21 20:40:21 +0000 + Commit: 7683982, github.com/apache/spark/pull/4701 + + [SPARK-5937][YARN] Fix ClientSuite to set YARN mode, so that the correct class is used in t... + Hari Shreedharan + 2015-02-21 10:01:01 -0800 + Commit: 7138816, github.com/apache/spark/pull/4711 + + SPARK-5841 [CORE] [HOTFIX 2] Memory leak in DiskBlockManager + Nishkam Ravi , nishkamravi2 , nravi + 2015-02-21 09:59:28 -0800 + Commit: d3cbd38, github.com/apache/spark/pull/4690 + + [MLlib] fix typo + Jacky Li + 2015-02-21 13:00:16 +0000 + Commit: e155324, github.com/apache/spark/pull/4713 + + [SPARK-5898] [SPARK-5896] [SQL] [PySpark] create DataFrame from pandas and tuple/list + Davies Liu + 2015-02-20 15:35:05 -0800 + Commit: 5b0a42c, github.com/apache/spark/pull/4679 + + [SPARK-5867] [SPARK-5892] [doc] [ml] [mllib] Doc cleanups for 1.3 release + Joseph K. Bradley + 2015-02-20 02:31:32 -0800 + Commit: 4a17eed, github.com/apache/spark/pull/4675 + + SPARK-5744 [CORE] Take 2. RDD.isEmpty / take fails for (empty) RDD of Nothing + Sean Owen + 2015-02-20 10:21:39 +0000 + Commit: d3dfebe, github.com/apache/spark/pull/4698 + + [SPARK-5909][SQL] Add a clearCache command to Spark SQL's cache manager + Yin Huai + 2015-02-20 16:20:02 +0800 + Commit: 70bfb5c, github.com/apache/spark/pull/4694 + + [SPARK-4808] Removing minimum number of elements read before spill check + mcheah + 2015-02-19 18:09:22 -0800 + Commit: 3be92cd, github.com/apache/spark/pull/4420 + + [SPARK-5900][MLLIB] make PIC and FPGrowth Java-friendly + Xiangrui Meng + 2015-02-19 18:06:16 -0800 + Commit: 0cfd2ce, github.com/apache/spark/pull/4695 + + SPARK-5570: No docs stating that `new SparkConf().set("spark.driver.memory", ...) will not work + Ilya Ganelin + 2015-02-19 15:50:58 -0800 + Commit: 6bddc40, github.com/apache/spark/pull/4665 + + SPARK-4682 [CORE] Consolidate various 'Clock' classes + Sean Owen + 2015-02-19 15:35:23 -0800 + Commit: 34b7c35, github.com/apache/spark/pull/4514 + + [Spark-5889] Remove pid file after stopping service. + Zhan Zhang + 2015-02-19 23:13:02 +0000 + Commit: ad6b169, github.com/apache/spark/pull/4676 + + [SPARK-5902] [ml] Made PipelineStage.transformSchema public instead of private to ml + Joseph K. Bradley + 2015-02-19 12:46:27 -0800 + Commit: a5fed34, github.com/apache/spark/pull/4682 + + [SPARK-5904][SQL] DataFrame API fixes. + Reynold Xin + 2015-02-19 12:09:44 -0800 + Commit: 8ca3418, github.com/apache/spark/pull/4686 + + [SPARK-5825] [Spark Submit] Remove the double checking instance name when stopping the service + Cheng Hao + 2015-02-19 12:07:51 -0800 + Commit: 94cdb05, github.com/apache/spark/pull/4611 + + [SPARK-5423][Core] Cleanup resources in DiskMapIterator.finalize to ensure deleting the temp file + zsxwing + 2015-02-19 18:37:31 +0000 + Commit: 90095bf, github.com/apache/spark/pull/4219 + + [SPARK-5816] Add huge compatibility warning in DriverWrapper + Andrew Or + 2015-02-19 09:56:25 -0800 + Commit: 38e624a, github.com/apache/spark/pull/4687 + + SPARK-5548: Fix for AkkaUtilsSuite failure - attempt 2 + Jacek Lewandowski + 2015-02-19 09:53:36 -0800 + Commit: fb87f44, github.com/apache/spark/pull/4653 + + [SPARK-5846] Correctly set job description and pool for SQL jobs + Kay Ousterhout + 2015-02-19 09:49:34 +0800 + Commit: e945aa6, github.com/apache/spark/pull/4630 + + [SPARK-5879][MLLIB] update PIC user guide and add a Java example + Xiangrui Meng + 2015-02-18 16:29:32 -0800 + Commit: d12d2ad, github.com/apache/spark/pull/4680 + + [SPARK-5722] [SQL] [PySpark] infer int as LongType + Davies Liu + 2015-02-18 14:17:04 -0800 + Commit: aa8f10e, github.com/apache/spark/pull/4666 + + [SPARK-5840][SQL] HiveContext cannot be serialized due to tuple extraction + Reynold Xin + 2015-02-18 14:02:32 -0800 + Commit: f0e3b71, github.com/apache/spark/pull/4628 + + [SPARK-5507] Added documentation for BlockMatrix + Burak Yavuz + 2015-02-18 10:11:08 -0800 + Commit: a8eb92d, github.com/apache/spark/pull/4664 + + [SPARK-5519][MLLIB] add user guide with example code for fp-growth + Xiangrui Meng + 2015-02-18 10:09:56 -0800 + Commit: 85e9d09, github.com/apache/spark/pull/4661 + + SPARK-5669 [BUILD] [HOTFIX] Spark assembly includes incompatibly licensed libgfortran, libgcc code via JBLAS + Sean Owen + 2015-02-18 14:41:44 +0000 + Commit: 5aecdcf, github.com/apache/spark/pull/4673 + + [SPARK-4949]shutdownCallback in SparkDeploySchedulerBackend should be enclosed by synchronized block. + Kousuke Saruta + 2015-02-18 12:20:11 +0000 + Commit: 82197ed, github.com/apache/spark/pull/3781 + + SPARK-4610 addendum: [Minor] [MLlib] Minor doc fix in GBT classification example + MechCoder + 2015-02-18 10:13:28 +0000 + Commit: e79a7a6, github.com/apache/spark/pull/4672 + + [SPARK-5878] fix DataFrame.repartition() in Python + Davies Liu + 2015-02-18 01:00:54 -0800 + Commit: c1b6fa9, github.com/apache/spark/pull/4667 + + Avoid deprecation warnings in JDBCSuite. + Tor Myklebust + 2015-02-18 01:00:13 -0800 + Commit: de0dd6d, github.com/apache/spark/pull/4668 + + [Minor] [SQL] Cleans up DataFrame variable names and toDF() calls + Cheng Lian + 2015-02-17 23:36:20 -0800 + Commit: 61ab085, github.com/apache/spark/pull/4670 + + [SPARK-5731][Streaming][Test] Fix incorrect test in DirectKafkaStreamSuite + Tathagata Das + 2015-02-17 22:44:16 -0800 + Commit: 3912d33, github.com/apache/spark/pull/4597 + + [SPARK-5723][SQL]Change the default file format to Parquet for CTAS statements. + Yin Huai + 2015-02-17 18:14:33 -0800 + Commit: e50934f, github.com/apache/spark/pull/4639 + + [SPARK-5875][SQL]logical.Project should not be resolved if it contains aggregates or generators + Yin Huai + 2015-02-17 17:50:39 -0800 + Commit: d5f12bf, github.com/apache/spark/pull/4663 + + [SPARK-4454] Revert getOrElse() cleanup in DAGScheduler.getCacheLocs() + Josh Rosen + 2015-02-17 17:45:16 -0800 + Commit: a51fc7e + + [SPARK-4454] Properly synchronize accesses to DAGScheduler cacheLocs map + Josh Rosen + 2015-02-17 17:39:58 -0800 + Commit: d46d624, github.com/apache/spark/pull/4660 + + [SPARK-5811] Added documentation for maven coordinates and added Spark Packages support + Burak Yavuz , Davies Liu + 2015-02-17 17:15:43 -0800 + Commit: ae6cfb3, github.com/apache/spark/pull/4662 + + [SPARK-5785] [PySpark] narrow dependency for cogroup/join in PySpark + Davies Liu + 2015-02-17 16:54:57 -0800 + Commit: c3d2b90, github.com/apache/spark/pull/4629 + + [SPARK-5852][SQL]Fail to convert a newly created empty metastore parquet table to a data source parquet table. + Yin Huai , Cheng Hao + 2015-02-17 15:47:59 -0800 + Commit: 117121a, github.com/apache/spark/pull/4655 + + [SPARK-5872] [SQL] create a sqlCtx in pyspark shell + Davies Liu + 2015-02-17 15:44:37 -0800 + Commit: 4d4cc76, github.com/apache/spark/pull/4659 + + [SPARK-5871] output explain in Python + Davies Liu + 2015-02-17 13:48:38 -0800 + Commit: 3df85dc, github.com/apache/spark/pull/4658 + + [SPARK-4172] [PySpark] Progress API in Python + Davies Liu + 2015-02-17 13:36:43 -0800 + Commit: 445a755, github.com/apache/spark/pull/3027 + + [SPARK-5868][SQL] Fix python UDFs in HiveContext and checks in SQLContext + Michael Armbrust + 2015-02-17 13:23:45 -0800 + Commit: de4836f, github.com/apache/spark/pull/4657 + + [SQL] [Minor] Update the HiveContext Unittest + Cheng Hao + 2015-02-17 12:25:35 -0800 + Commit: 9d281fa, github.com/apache/spark/pull/4584 + + [Minor][SQL] Use same function to check path parameter in JSONRelation + Liang-Chi Hsieh + 2015-02-17 12:24:13 -0800 + Commit: ac506b7, github.com/apache/spark/pull/4649 + + [SPARK-5862][SQL] Only transformUp the given plan once in HiveMetastoreCatalog + Liang-Chi Hsieh + 2015-02-17 12:23:18 -0800 + Commit: 4611de1, github.com/apache/spark/pull/4651 + + [Minor] fix typo in SQL document + CodingCat + 2015-02-17 12:16:52 -0800 + Commit: 31efb39, github.com/apache/spark/pull/4656 + + [SPARK-5864] [PySpark] support .jar as python package + Davies Liu + 2015-02-17 12:05:06 -0800 + Commit: fc4eb95, github.com/apache/spark/pull/4652 + + SPARK-5841 [CORE] [HOTFIX] Memory leak in DiskBlockManager + Sean Owen + 2015-02-17 19:40:06 +0000 + Commit: 49c19fd, github.com/apache/spark/pull/4648 + + MAINTENANCE: Automated closing of pull requests. + Patrick Wendell + 2015-02-17 11:35:26 -0800 + Commit: 24f358b, github.com/apache/spark/pull/3297 + + [SPARK-3381] [MLlib] Eliminate bins for unordered features in DecisionTrees + MechCoder + 2015-02-17 11:19:23 -0800 + Commit: 9b746f3, github.com/apache/spark/pull/4231 + + [SPARK-5661]function hasShutdownDeleteTachyonDir should use shutdownDeleteTachyonPaths to determine whether contains file + xukun 00228947 , viper-kun + 2015-02-17 18:59:41 +0000 + Commit: b271c26, github.com/apache/spark/pull/4418 + + [SPARK-5778] throw if nonexistent metrics config file provided + Ryan Williams + 2015-02-17 10:57:16 -0800 + Commit: d8f69cf, github.com/apache/spark/pull/4571 + + [SPARK-5859] [PySpark] [SQL] fix DataFrame Python API + Davies Liu + 2015-02-17 10:22:48 -0800 + Commit: d8adefe, github.com/apache/spark/pull/4645 + + [SPARK-5166][SPARK-5247][SPARK-5258][SQL] API Cleanup / Documentation + Michael Armbrust + 2015-02-17 10:21:17 -0800 + Commit: c74b07f, github.com/apache/spark/pull/4642 + + [SPARK-5858][MLLIB] Remove unnecessary first() call in GLM + Xiangrui Meng + 2015-02-17 10:17:45 -0800 + Commit: c76da36, github.com/apache/spark/pull/4647 + + SPARK-5856: In Maven build script, launch Zinc with more memory + Patrick Wendell + 2015-02-17 10:10:01 -0800 + Commit: 3ce46e9, github.com/apache/spark/pull/4643 + + Revert "[SPARK-5363] [PySpark] check ending mark in non-block way" + Josh Rosen + 2015-02-17 07:48:27 -0800 + Commit: ee6e3ef + + [SPARK-5826][Streaming] Fix Configuration not serializable problem + jerryshao + 2015-02-17 10:45:18 +0000 + Commit: a65766b, github.com/apache/spark/pull/4612 + + HOTFIX: Style issue causing build break + Patrick Wendell + 2015-02-16 22:10:39 -0800 + Commit: c06e42f + + [SPARK-5802][MLLIB] cache transformed data in glm + Xiangrui Meng + 2015-02-16 22:09:04 -0800 + Commit: fd84229, github.com/apache/spark/pull/4593 + + [SPARK-5853][SQL] Schema support in Row. + Reynold Xin + 2015-02-16 20:42:57 -0800 + Commit: d380f32, github.com/apache/spark/pull/4640 + + SPARK-5850: Remove experimental label for Scala 2.11 and FlumePollingStream + Patrick Wendell + 2015-02-16 20:33:33 -0800 + Commit: a51d51f, github.com/apache/spark/pull/4638 + + [SPARK-5363] [PySpark] check ending mark in non-block way + Davies Liu + 2015-02-16 20:32:03 -0800 + Commit: ac6fe67, github.com/apache/spark/pull/4601 + + [SQL] Various DataFrame doc changes. + Reynold Xin + 2015-02-16 19:00:30 -0800 + Commit: 0e180bf, github.com/apache/spark/pull/4636 + + [SPARK-5849] Handle more types of invalid JSON requests in SubmitRestProtocolMessage.parseAction + Josh Rosen + 2015-02-16 18:08:02 -0800 + Commit: 58a82a7, github.com/apache/spark/pull/4637 + + [SPARK-3340] Deprecate ADD_JARS and ADD_FILES + azagrebin + 2015-02-16 18:06:19 -0800 + Commit: 1668765, github.com/apache/spark/pull/4616 + + [SPARK-5788] [PySpark] capture the exception in python write thread + Davies Liu + 2015-02-16 17:57:14 -0800 + Commit: b1bd1dd, github.com/apache/spark/pull/4577 + + SPARK-5848: tear down the ConsoleProgressBar timer + Matt Whelan + 2015-02-17 00:59:49 +0000 + Commit: 1294a6e, github.com/apache/spark/pull/4635 + + [SPARK-4865][SQL]Include temporary tables in SHOW TABLES + Yin Huai + 2015-02-16 15:59:23 -0800 + Commit: e189cbb, github.com/apache/spark/pull/4618 + + [SQL] Optimize arithmetic and predicate operators + kai + 2015-02-16 15:58:05 -0800 + Commit: cb6c48c, github.com/apache/spark/pull/4472 + + [SPARK-5839][SQL]HiveMetastoreCatalog does not recognize table names and aliases of data source tables. + Yin Huai + 2015-02-16 15:54:01 -0800 + Commit: f3ff1eb, github.com/apache/spark/pull/4626 + + [SPARK-5746][SQL] Check invalid cases for the write path of data source API + Yin Huai + 2015-02-16 15:51:59 -0800 + Commit: 5b6cd65, github.com/apache/spark/pull/4617 + + HOTFIX: Break in Jekyll build from #4589 + Patrick Wendell + 2015-02-16 15:43:56 -0800 + Commit: 04b401d + + [SPARK-2313] Use socket to communicate GatewayServer port back to Python driver + Josh Rosen + 2015-02-16 15:25:11 -0800 + Commit: 0cfda84, github.com/apache/spark/pull/3424. + + SPARK-5357: Update commons-codec version to 1.10 (current) + Matt Whelan + 2015-02-16 23:05:34 +0000 + Commit: c01c4eb, github.com/apache/spark/pull/4153 + + SPARK-5841: remove DiskBlockManager shutdown hook on stop + Matt Whelan + 2015-02-16 22:54:32 +0000 + Commit: bb05982, github.com/apache/spark/pull/4627 + + [SPARK-5833] [SQL] Adds REFRESH TABLE command + Cheng Lian + 2015-02-16 12:52:05 -0800 + Commit: c51ab37, github.com/apache/spark/pull/4624 + + [SPARK-5296] [SQL] Add more filter types for data sources API + Cheng Lian + 2015-02-16 12:48:55 -0800 + Commit: 6f54dee, github.com/apache/spark/pull/4623 + + [SQL] Add fetched row count in SparkSQLCLIDriver + OopsOutOfMemory + 2015-02-16 12:34:09 -0800 + Commit: b4d7c70, github.com/apache/spark/pull/4604 + + [SQL] Initial support for reporting location of error in sql string + Michael Armbrust + 2015-02-16 12:32:56 -0800 + Commit: 104b2c4, github.com/apache/spark/pull/4587 + + [SPARK-5824] [SQL] add null format in ctas and set default col comment to null + Daoyuan Wang + 2015-02-16 12:31:36 -0800 + Commit: 275a0c0, github.com/apache/spark/pull/4609 + + [SQL] [Minor] Update the SpecificMutableRow.copy + Cheng Hao + 2015-02-16 12:21:08 -0800 + Commit: cc552e0, github.com/apache/spark/pull/4619 + + SPARK-5795 [STREAMING] api.java.JavaPairDStream.saveAsNewAPIHadoopFiles may not friendly to java + Sean Owen + 2015-02-16 19:32:31 +0000 + Commit: 8e25373, github.com/apache/spark/pull/4608 + + Minor fixes for commit https://github.com/apache/spark/pull/4592. + Reynold Xin + 2015-02-16 10:09:55 -0800 + Commit: 9baac56 + + [SPARK-5799][SQL] Compute aggregation function on specified numeric columns + Liang-Chi Hsieh + 2015-02-16 10:06:11 -0800 + Commit: 5c78be7, github.com/apache/spark/pull/4592 + + SPARK-5815 [MLLIB] Part 2. Deprecate SVDPlusPlus APIs that expose DoubleMatrix from JBLAS + Sean Owen + 2015-02-16 17:04:30 +0000 + Commit: a3afa4a, github.com/apache/spark/pull/4625 + + [SPARK-5831][Streaming]When checkpoint file size is bigger than 10, then delete the old ones + Xutingjun <1039320815@qq.com> + 2015-02-16 14:54:23 +0000 + Commit: 1115e8e, github.com/apache/spark/pull/4621 + + [SPARK-4553] [SPARK-5767] [SQL] Wires Parquet data source with the newly introduced write support for data source API + Cheng Lian + 2015-02-16 01:38:31 -0800 + Commit: 3ce58cf, github.com/apache/spark/pull/4563 + + [Minor] [SQL] Renames stringRddToDataFrame to stringRddToDataFrameHolder for consistency + Cheng Lian + 2015-02-16 01:33:37 -0800 + Commit: 199a9e8, github.com/apache/spark/pull/4613 + + [Ml] SPARK-5804 Explicitly manage cache in Crossvalidator k-fold loop + Peter Rudenko + 2015-02-16 00:07:23 -0800 + Commit: d51d6ba, github.com/apache/spark/pull/4595 + + [Ml] SPARK-5796 Don't transform data on a last estimator in Pipeline + Peter Rudenko + 2015-02-15 20:51:32 -0800 + Commit: c78a12c, github.com/apache/spark/pull/4590 + + SPARK-5815 [MLLIB] Deprecate SVDPlusPlus APIs that expose DoubleMatrix from JBLAS + Sean Owen + 2015-02-15 20:41:27 -0800 + Commit: acf2558, github.com/apache/spark/pull/4614 + + [SPARK-5769] Set params in constructors and in setParams in Python ML pipelines + Xiangrui Meng + 2015-02-15 20:29:26 -0800 + Commit: cd4a153, github.com/apache/spark/pull/4564 + + SPARK-5669 [BUILD] Spark assembly includes incompatibly licensed libgfortran, libgcc code via JBLAS + Sean Owen + 2015-02-15 09:15:48 -0800 + Commit: 836577b, github.com/apache/spark/pull/4453 + + [MLLIB][SPARK-5502] User guide for isotonic regression + martinzapletal + 2015-02-15 09:10:03 -0800 + Commit: 61eb126, github.com/apache/spark/pull/4536 + + [SPARK-5827][SQL] Add missing import in the example of SqlContext + Takeshi Yamamuro + 2015-02-15 14:42:20 +0000 + Commit: c771e47, github.com/apache/spark/pull/4615 + + SPARK-5822 [BUILD] cannot import src/main/scala & src/test/scala into eclipse as source folder + gli + 2015-02-14 20:43:27 +0000 + Commit: ed5f4bb, github.com/apache/spark/pull/4531 + + Revise formatting of previous commit f80e2629bb74bc62960c61ff313f7e7802d61319 + Sean Owen + 2015-02-14 20:12:29 +0000 + Commit: 15a2ab5 + + [SPARK-5800] Streaming Docs. Change linked files according the selected language + gasparms + 2015-02-14 20:10:29 +0000 + Commit: f80e262, github.com/apache/spark/pull/4589 + + [SPARK-5752][SQL] Don't implicitly convert RDDs directly to DataFrames + Reynold Xin , Davies Liu + 2015-02-13 23:03:22 -0800 + Commit: e98dfe6, github.com/apache/spark/pull/4556 + + SPARK-3290 [GRAPHX] No unpersist callls in SVDPlusPlus + Sean Owen + 2015-02-13 20:12:52 -0800 + Commit: 0ce4e43, github.com/apache/spark/pull/4234 + + [SPARK-5227] [SPARK-5679] Disable FileSystem cache in WholeTextFileRecordReaderSuite + Josh Rosen + 2015-02-13 17:45:31 -0800 + Commit: d06d5ee, github.com/apache/spark/pull/4599 + + [SPARK-5730][ML] add doc groups to spark.ml components + Xiangrui Meng + 2015-02-13 16:45:59 -0800 + Commit: 4f4c6d5, github.com/apache/spark/pull/4600 + + [SPARK-5803][MLLIB] use ArrayBuilder to build primitive arrays + Xiangrui Meng + 2015-02-13 16:43:49 -0800 + Commit: d50a91d, github.com/apache/spark/pull/4594 + + [SPARK-5806] re-organize sections in mllib-clustering.md + Xiangrui Meng + 2015-02-13 15:09:27 -0800 + Commit: cc56c87, github.com/apache/spark/pull/4598 + + [SPARK-5789][SQL]Throw a better error message if JsonRDD.parseJson encounters unrecoverable parsing errors. + Yin Huai + 2015-02-13 13:51:06 -0800 + Commit: 2e0c084, github.com/apache/spark/pull/4582 + + [SPARK-5642] [SQL] Apply column pruning on unused aggregation fields + Daoyuan Wang , Michael Armbrust + 2015-02-13 13:46:50 -0800 + Commit: 2cbb3e4, github.com/apache/spark/pull/4415 + + [HOTFIX] Fix build break in MesosSchedulerBackendSuite + Andrew Or + 2015-02-13 13:10:29 -0800 + Commit: 5d3cc6b + + [HOTFIX] Ignore DirectKafkaStreamSuite. + Reynold Xin + 2015-02-13 12:43:53 -0800 + Commit: 378c7eb + + SPARK-5805 Fixed the type error in documentation. + Emre Sevinç + 2015-02-13 12:31:27 -0800 + Commit: 9f31db0, github.com/apache/spark/pull/4596 + + [SPARK-5735] Replace uses of EasyMock with Mockito + Josh Rosen + 2015-02-13 09:53:57 -0800 + Commit: 077eec2, github.com/apache/spark/pull/4578 + + [SPARK-5783] Better eventlog-parsing error messages + Ryan Williams + 2015-02-13 09:47:26 -0800 + Commit: fc6d3e7, github.com/apache/spark/pull/4573 + + [SPARK-5503][MLLIB] Example code for Power Iteration Clustering + sboeschhuawei + 2015-02-13 09:45:57 -0800 + Commit: e1a1ff8, github.com/apache/spark/pull/4495 + + [SPARK-5732][CORE]:Add an option to print the spark version in spark script. + uncleGen , genmao.ygm + 2015-02-13 09:43:10 -0800 + Commit: c0ccd25, github.com/apache/spark/pull/4522 + + [SPARK-4832][Deploy]some other processes might take the daemon pid + WangTaoTheTonic , WangTaoTheTonic + 2015-02-13 10:27:23 +0000 + Commit: 1768bd5, github.com/apache/spark/pull/3683 + + [SPARK-3365][SQL]Wrong schema generated for List type + tianyi + 2015-02-12 22:18:39 -0800 + Commit: 1c8633f, github.com/apache/spark/pull/4581 + + [SQL] Fix docs of SQLContext.tables + Yin Huai + 2015-02-12 20:37:55 -0800 + Commit: 2aea892, github.com/apache/spark/pull/4579 + + [SPARK-3299][SQL]Public API in SQLContext to list tables + Yin Huai + 2015-02-12 18:08:01 -0800 + Commit: 1d0596a, github.com/apache/spark/pull/4547 + + [SQL] Move SaveMode to SQL package. + Yin Huai + 2015-02-12 15:32:17 -0800 + Commit: c025a46, github.com/apache/spark/pull/4542 + + [SPARK-5335] Fix deletion of security groups within a VPC + Vladimir Grigor , Vladimir Grigor + 2015-02-12 23:26:24 +0000 + Commit: ada993e, github.com/apache/spark/pull/4122 + + [SPARK-5755] [SQL] remove unnecessary Add + Daoyuan Wang + 2015-02-12 15:22:07 -0800 + Commit: d5fc514, github.com/apache/spark/pull/4551 + + [SPARK-5573][SQL] Add explode to dataframes + Michael Armbrust + 2015-02-12 15:19:19 -0800 + Commit: ee04a8b, github.com/apache/spark/pull/4546 + + [SPARK-5758][SQL] Use LongType as the default type for integers in JSON schema inference. + Yin Huai + 2015-02-12 15:17:25 -0800 + Commit: c352ffb, github.com/apache/spark/pull/4544 + + [SPARK-5780] [PySpark] Mute the logging during unit tests + Davies Liu + 2015-02-12 14:54:38 -0800 + Commit: 0bf0315, github.com/apache/spark/pull/4572 + + SPARK-5747: Fix wordsplitting bugs in make-distribution.sh + David Y. Ross + 2015-02-12 14:52:38 -0800 + Commit: 26c816e, github.com/apache/spark/pull/4540 + + [SPARK-5759][Yarn]ExecutorRunnable should catch YarnException while NMClient start contain... + lianhuiwang + 2015-02-12 14:50:16 -0800 + Commit: 947b8bd, github.com/apache/spark/pull/4554 + + [SPARK-5760][SPARK-5761] Fix standalone rest protocol corner cases + revamp tests + Andrew Or + 2015-02-12 14:47:52 -0800 + Commit: 1d5663e, github.com/apache/spark/pull/4557 + + [SPARK-5762] Fix shuffle write time for sort-based shuffle + Kay Ousterhout + 2015-02-12 14:46:37 -0800 + Commit: 47c73d4, github.com/apache/spark/pull/4559 + + [SPARK-5765][Examples]Fixed word split problem in run-example and compute-classpath + Venkata Ramana G , Venkata Ramana Gollamudi + 2015-02-12 14:44:21 -0800 + Commit: 629d014, github.com/apache/spark/pull/4561 + + [EC2] Update default Spark version to 1.2.1 + Katsunori Kanda + 2015-02-12 14:38:42 -0800 + Commit: 9c80765, github.com/apache/spark/pull/4566 + + [SPARK-5645] Added local read bytes/time to task metrics + Kay Ousterhout + 2015-02-12 14:35:44 -0800 + Commit: 893d6fd, github.com/apache/spark/pull/4510 + + [SQL] Improve error messages + Michael Armbrust , wangfei + 2015-02-12 13:11:28 -0800 + Commit: aa4ca8b, github.com/apache/spark/pull/4558 + + [SQL][DOCS] Update sql documentation + Antonio Navarro Perez + 2015-02-12 12:46:17 -0800 + Commit: 6a1be02, github.com/apache/spark/pull/4560 + + SPARK-5776 JIRA version not of form x.y.z breaks merge_spark_pr.py + Sean Owen + 2015-02-12 20:14:45 +0000 + Commit: bc57789, github.com/apache/spark/pull/4570 + + [SPARK-5757][MLLIB] replace SQL JSON usage in model import/export by json4s + Xiangrui Meng + 2015-02-12 10:48:13 -0800 + Commit: 99bd500, github.com/apache/spark/pull/4555 + + [SPARK-5655] Don't chmod700 application files if running in YARN + Andrew Rowson + 2015-02-12 18:41:39 +0000 + Commit: 466b1f6, github.com/apache/spark/pull/4509 + + ignore cache paths for RAT tests + Oren Mazor + 2015-02-12 18:37:00 +0000 + Commit: 9a6efbc, github.com/apache/spark/pull/4569 + + SPARK-5727 [BUILD] Remove Debian packaging + Sean Owen + 2015-02-12 12:36:26 +0000 + Commit: 9a3ea49, github.com/apache/spark/pull/4526 + + [SQL] Make dataframe more tolerant of being serialized + Michael Armbrust + 2015-02-11 19:05:49 -0800 + Commit: a38e23c, github.com/apache/spark/pull/4545 + + [SQL] Two DataFrame fixes. + Reynold Xin + 2015-02-11 18:32:48 -0800 + Commit: d931b01, github.com/apache/spark/pull/4543 + + [SPARK-3688][SQL] More inline comments for LogicalPlan. + Reynold Xin + 2015-02-11 15:26:31 -0800 + Commit: fa6bdc6, github.com/apache/spark/pull/4539 + + [SPARK-3688][SQL]LogicalPlan can't resolve column correctlly + tianyi + 2015-02-11 12:50:17 -0800 + Commit: 44b2311, github.com/apache/spark/pull/4524 + + [SPARK-5454] More robust handling of self joins + Michael Armbrust + 2015-02-11 12:31:56 -0800 + Commit: a60d2b7, github.com/apache/spark/pull/4520 + + Remove outdated remark about take(n). + Daniel Darabos + 2015-02-11 20:24:17 +0000 + Commit: 03bf704, github.com/apache/spark/pull/4533 + + [SPARK-5677] [SPARK-5734] [SQL] [PySpark] Python DataFrame API remaining tasks + Davies Liu + 2015-02-11 12:13:16 -0800 + Commit: b694eb9, github.com/apache/spark/pull/4528 + + [SPARK-5733] Error Link in Pagination of HistroyPage when showing Incomplete Applications + guliangliang + 2015-02-11 15:55:49 +0000 + Commit: 1ac099e, github.com/apache/spark/pull/4523 + + SPARK-5727 [BUILD] Deprecate Debian packaging + Sean Owen + 2015-02-11 08:30:16 +0000 + Commit: bd0d6e0, github.com/apache/spark/pull/4516 + + SPARK-5728 [STREAMING] MQTTStreamSuite leaves behind ActiveMQ database files + Sean Owen + 2015-02-11 08:13:51 +0000 + Commit: da89720, github.com/apache/spark/pull/4517 + + [SPARK-4964] [Streaming] refactor createRDD to take leaders via map instead of array + cody koeninger + 2015-02-11 00:13:27 -0800 + Commit: 658687b, github.com/apache/spark/pull/4511 + + HOTFIX: Adding Junit to Hive tests for Maven build + Patrick Wendell + 2015-02-10 23:39:21 -0800 + Commit: c2131c0 + + HOTFIX: Java 6 compilation error in Spark SQL + Patrick Wendell + 2015-02-10 22:43:32 -0800 + Commit: 7e2f882 + + [SPARK-5714][Mllib] Refactor initial step of LDA to remove redundant operations + Liang-Chi Hsieh + 2015-02-10 21:51:15 -0800 + Commit: f86a89a, github.com/apache/spark/pull/4501 + + [SPARK-5702][SQL] Allow short names for built-in data sources. + Reynold Xin + 2015-02-10 20:40:21 -0800 + Commit: b8f88d3, github.com/apache/spark/pull/4489 + + [SPARK-5729] Potential NPE in standalone REST API + Andrew Or + 2015-02-10 20:19:14 -0800 + Commit: b969182, github.com/apache/spark/pull/4518 + + [SPARK-4879] Use driver to coordinate Hadoop output committing for speculative tasks + mcheah , Josh Rosen + 2015-02-10 20:12:18 -0800 + Commit: 1cb3770, github.com/apache/spark/pull/4155. + + [SQL][DataFrame] Fix column computability bug. + Reynold Xin + 2015-02-10 19:50:44 -0800 + Commit: 7e24249, github.com/apache/spark/pull/4519 + + [SPARK-5709] [SQL] Add EXPLAIN support in DataFrame API for debugging purpose + Cheng Hao + 2015-02-10 19:40:51 -0800 + Commit: 45df77b, github.com/apache/spark/pull/4496 + + [SPARK-5704] [SQL] [PySpark] createDataFrame from RDD with columns + Davies Liu + 2015-02-10 19:40:12 -0800 + Commit: ea60284, github.com/apache/spark/pull/4498 + + [SPARK-5683] [SQL] Avoid multiple json generator created + Cheng Hao + 2015-02-10 18:19:56 -0800 + Commit: a60aea8, github.com/apache/spark/pull/4468 + + [SQL] Add an exception for analysis errors. + Michael Armbrust + 2015-02-10 17:32:42 -0800 + Commit: 6195e24, github.com/apache/spark/pull/4439 + + [SPARK-5658][SQL] Finalize DDL and write support APIs + Yin Huai + 2015-02-10 17:29:52 -0800 + Commit: aaf50d0, github.com/apache/spark/pull/4446 + + [SPARK-5493] [core] Add option to impersonate user. + Marcelo Vanzin + 2015-02-10 17:19:10 -0800 + Commit: ed167e7, github.com/apache/spark/pull/4405 + + [SQL] Make Options in the data source API CREATE TABLE statements optional. + Yin Huai + 2015-02-10 17:06:12 -0800 + Commit: e28b6bd, github.com/apache/spark/pull/4515 + + [SPARK-5725] [SQL] Fixes ParquetRelation2.equals + Cheng Lian + 2015-02-10 17:02:44 -0800 + Commit: 2d50a01, github.com/apache/spark/pull/4513 + + [SQL][Minor] correct some comments + Sheng, Li , OopsOutOfMemory + 2015-02-11 00:59:46 +0000 + Commit: 91e3512, github.com/apache/spark/pull/4508 + + [SPARK-5644] [Core]Delete tmp dir when sc is stop + Sephiroth-Lin + 2015-02-10 23:23:35 +0000 + Commit: 52983d7, github.com/apache/spark/pull/4412 + + [SPARK-5343][GraphX]: ShortestPaths traverses backwards + Brennon York + 2015-02-10 14:57:00 -0800 + Commit: 5820961, github.com/apache/spark/pull/4478 + + [SPARK-5021] [MLlib] Gaussian Mixture now supports Sparse Input + MechCoder + 2015-02-10 14:05:55 -0800 + Commit: fd2c032, github.com/apache/spark/pull/4459 + + [SPARK-5686][SQL] Add show current roles command in HiveQl + OopsOutOfMemory + 2015-02-10 13:20:15 -0800 + Commit: f98707c, github.com/apache/spark/pull/4471 + + [SQL] Add toString to DataFrame/Column + Michael Armbrust + 2015-02-10 13:14:01 -0800 + Commit: de80b1b, github.com/apache/spark/pull/4436 + + [SPARK-5668] Display region in spark_ec2.py get_existing_cluster() + Miguel Peralvo + 2015-02-10 19:54:52 +0000 + Commit: c49a404, github.com/apache/spark/pull/4457 + + [SPARK-5592][SQL] java.net.URISyntaxException when insert data to a partitioned table + wangfei , Fei Wang + 2015-02-10 11:54:30 -0800 + Commit: 59272da, github.com/apache/spark/pull/4368 + + [HOTFIX][SPARK-4136] Fix compilation and tests + Andrew Or + 2015-02-10 11:18:01 -0800 + Commit: b640c84 + + SPARK-4136. Under dynamic allocation, cancel outstanding executor requests when no longer needed + Sandy Ryza + 2015-02-10 11:07:25 -0800 + Commit: 69bc3bb, github.com/apache/spark/pull/4168 + + [SPARK-5716] [SQL] Support TOK_CHARSETLITERAL in HiveQl + Daoyuan Wang + 2015-02-10 11:08:21 -0800 + Commit: c7ad80a, github.com/apache/spark/pull/4502 + + [Spark-5717] [MLlib] add stop and reorganize import + JqueryFan , Yuhao Yang + 2015-02-10 17:37:32 +0000 + Commit: 6cc96cf, github.com/apache/spark/pull/4503 + + [SPARK-1805] [EC2] Validate instance types + Nicholas Chammas + 2015-02-10 15:45:38 +0000 + Commit: 50820f1, github.com/apache/spark/pull/4455 + + [SPARK-5700] [SQL] [Build] Bumps jets3t to 0.9.3 for hadoop-2.3 and hadoop-2.4 profiles + Cheng Lian + 2015-02-10 02:28:47 -0800 + Commit: ba66793, github.com/apache/spark/pull/4499 + + SPARK-5239 [CORE] JdbcRDD throws "java.lang.AbstractMethodError: oracle.jdbc.driver.xxxxxx.isClosed()Z" + Sean Owen + 2015-02-10 09:19:01 +0000 + Commit: 2d1e916, github.com/apache/spark/pull/4470 + + [SPARK-4964][Streaming][Kafka] More updates to Exactly-once Kafka stream + Tathagata Das + 2015-02-09 22:45:48 -0800 + Commit: c151346, github.com/apache/spark/pull/4384 + + [SPARK-5597][MLLIB] save/load for decision trees and emsembles + Joseph K. Bradley , Xiangrui Meng + 2015-02-09 22:09:07 -0800 + Commit: ef2f55b, github.com/apache/spark/pull/4444. + + [SQL] Remove the duplicated code + Cheng Hao + 2015-02-09 21:33:34 -0800 + Commit: bd0b5ea, github.com/apache/spark/pull/4494 + + [SPARK-5701] Only set ShuffleReadMetrics when task has shuffle deps + Kay Ousterhout + 2015-02-09 21:22:09 -0800 + Commit: a2d33d0, github.com/apache/spark/pull/4488 + + [SPARK-5703] AllJobsPage throws empty.max exception + Andrew Or + 2015-02-09 21:18:48 -0800 + Commit: a95ed52, github.com/apache/spark/pull/4490 + + [SPARK-2996] Implement userClassPathFirst for driver, yarn. + Marcelo Vanzin + 2015-02-09 21:17:06 -0800 + Commit: 20a6013, github.com/apache/spark/pull/3233 + + SPARK-4900 [MLLIB] MLlib SingularValueDecomposition ARPACK IllegalStateException + Sean Owen + 2015-02-09 21:13:58 -0800 + Commit: 36c4e1d, github.com/apache/spark/pull/4485 + + Add a config option to print DAG. + KaiXinXiaoLei + 2015-02-09 20:58:58 -0800 + Commit: 31d435e, github.com/apache/spark/pull/4257 + + [SPARK-5469] restructure pyspark.sql into multiple files + Davies Liu + 2015-02-09 20:49:22 -0800 + Commit: 08488c1, github.com/apache/spark/pull/4479 + + [SPARK-5698] Do not let user request negative # of executors + Andrew Or + 2015-02-09 17:33:29 -0800 + Commit: d302c48, github.com/apache/spark/pull/4483 + + [SPARK-5699] [SQL] [Tests] Runs hive-thriftserver tests whenever SQL code is modified + Cheng Lian + 2015-02-09 16:52:05 -0800 + Commit: 3ec3ad2, github.com/apache/spark/pull/4486 + + [SPARK-5648][SQL] support "alter ... unset tblproperties("key")" + DoingDone9 <799203320@qq.com> + 2015-02-09 16:40:26 -0800 + Commit: d08e7c2, github.com/apache/spark/pull/4424 + + [SPARK-2096][SQL] support dot notation on array of struct + Wenchen Fan + 2015-02-09 16:39:34 -0800 + Commit: 0ee53eb, github.com/apache/spark/pull/2405 + + [SPARK-5614][SQL] Predicate pushdown through Generate. + Lu Yan + 2015-02-09 16:25:38 -0800 + Commit: 2a36292, github.com/apache/spark/pull/4394 + + [SPARK-5696] [SQL] [HOTFIX] Asks HiveThriftServer2 to re-initialize log4j using Hive configurations + Cheng Lian + 2015-02-09 16:23:12 -0800 + Commit: b8080aa, github.com/apache/spark/pull/4484 + + [SQL] Code cleanup. + Yin Huai + 2015-02-09 16:20:42 -0800 + Commit: 5f0b30e, github.com/apache/spark/pull/4482 + + [SQL] Add some missing DataFrame functions. + Michael Armbrust + 2015-02-09 16:02:56 -0800 + Commit: 68b25cf, github.com/apache/spark/pull/4437 + + [SPARK-5611] [EC2] Allow spark-ec2 repo and branch to be set on CLI of spark_ec2.py + Florian Verhein + 2015-02-09 23:47:07 +0000 + Commit: b884daa, github.com/apache/spark/pull/4385 + + [SPARK-5675][SQL] XyzType companion object should subclass XyzType + Reynold Xin + 2015-02-09 14:51:46 -0800 + Commit: f48199e, github.com/apache/spark/pull/4463 + + [SPARK-4905][STREAMING] FlumeStreamSuite fix. + Hari Shreedharan + 2015-02-09 14:17:14 -0800 + Commit: 0765af9, github.com/apache/spark/pull/4371 + + [SPARK-5691] Fixing wrong data structure lookup for dupe app registratio... + mcheah + 2015-02-09 13:20:14 -0800 + Commit: 6fe70d8, github.com/apache/spark/pull/4477 + + [SPARK-5664][BUILD] Restore stty settings when exiting from SBT's spark-shell + Liang-Chi Hsieh + 2015-02-09 11:45:12 -0800 + Commit: dae2161, github.com/apache/spark/pull/4451 + + [SPARK-5678] Convert DataFrame to pandas.DataFrame and Series + Davies Liu + 2015-02-09 11:42:52 -0800 + Commit: afb1316, github.com/apache/spark/pull/4476 + + SPARK-4267 [YARN] Failing to launch jobs on Spark on YARN with Hadoop 2.5.0 or later + Sean Owen + 2015-02-09 10:33:57 -0800 + Commit: de78060, github.com/apache/spark/pull/4452 + + SPARK-2149. [MLLIB] Univariate kernel density estimation + Sandy Ryza + 2015-02-09 10:12:12 +0000 + Commit: 0793ee1, github.com/apache/spark/pull/1093 + + [SPARK-5473] [EC2] Expose SSH failures after status checks pass + Nicholas Chammas + 2015-02-09 09:44:53 +0000 + Commit: 4dfe180, github.com/apache/spark/pull/4262 + + [SPARK-5539][MLLIB] LDA guide + Xiangrui Meng , Joseph K. Bradley + 2015-02-08 23:40:36 -0800 + Commit: 855d12a, github.com/apache/spark/pull/4465 + + [SPARK-5472][SQL] Fix Scala code style + Hung Lin + 2015-02-08 22:36:42 -0800 + Commit: 4575c56, github.com/apache/spark/pull/4464 + + SPARK-4405 [MLLIB] Matrices.* construction methods should check for rows x cols overflow + Sean Owen + 2015-02-08 21:08:50 -0800 + Commit: 4396dfb, github.com/apache/spark/pull/4461 + + [SPARK-5660][MLLIB] Make Matrix apply public + Joseph K. Bradley , Xiangrui Meng + 2015-02-08 21:07:36 -0800 + Commit: c171611, github.com/apache/spark/pull/4447 + + [SPARK-5643][SQL] Add a show method to print the content of a DataFrame in tabular format. + Reynold Xin + 2015-02-08 18:56:51 -0800 + Commit: a052ed4, github.com/apache/spark/pull/4416 + + SPARK-5665 [DOCS] Update netlib-java documentation + Sam Halliday , Sam Halliday + 2015-02-08 16:34:26 -0800 + Commit: 56aff4b, github.com/apache/spark/pull/4448 + + [SPARK-5598][MLLIB] model save/load for ALS + Xiangrui Meng + 2015-02-08 16:26:20 -0800 + Commit: 5c299c5, github.com/apache/spark/pull/4422 + + [SQL] Set sessionState in QueryExecution. + Yin Huai + 2015-02-08 14:55:07 -0800 + Commit: 804949d, github.com/apache/spark/pull/4445 + + [SPARK-3039] [BUILD] Spark assembly for new hadoop API (hadoop 2) contai... + medale + 2015-02-08 10:35:29 +0000 + Commit: 75fdccc, github.com/apache/spark/pull/4315 + + [SPARK-5672][Web UI] Don't return `ERROR 500` when have missing args + Kirill A. Korinskiy + 2015-02-08 10:31:46 +0000 + Commit: 23a99da, github.com/apache/spark/pull/4239 + + [SPARK-5656] Fail gracefully for large values of k and/or n that will ex... + mbittmann , bittmannm + 2015-02-08 10:13:29 +0000 + Commit: 4878313, github.com/apache/spark/pull/4433 + + [SPARK-5366][EC2] Check the mode of private key + liuchang0812 + 2015-02-08 10:08:51 +0000 + Commit: 6fb141e, github.com/apache/spark/pull/4162 + + [SPARK-5671] Upgrade jets3t to 0.9.2 in hadoop-2.3 and 2.4 profiles + Josh Rosen + 2015-02-07 17:19:08 -0800 + Commit: 5de14cc, github.com/apache/spark/pull/4454 + + [SPARK-5108][BUILD] Jackson dependency management for Hadoop-2.6.0 support + Zhan Zhang + 2015-02-07 19:41:30 +0000 + Commit: ecbbed2, github.com/apache/spark/pull/3938 + + SPARK-5408: Use -XX:MaxPermSize specified by user instead of default in ... + Jacek Lewandowski + 2015-02-07 15:58:04 +0000 + Commit: dd4cb33, github.com/apache/spark/pull/4203 + + [BUILD] Add the ability to launch spark-shell from SBT. + Michael Armbrust + 2015-02-07 00:14:38 -0800 + Commit: e9a4fe1, github.com/apache/spark/pull/4438 + + [SPARK-5388] Provide a stable application submission gateway for standalone cluster mode + Andrew Or + 2015-02-06 15:57:06 -0800 + Commit: 1390e56, github.com/apache/spark/pull/4216 + + SPARK-5403: Ignore UserKnownHostsFile in SSH calls + Grzegorz Dubicki + 2015-02-06 15:43:58 -0800 + Commit: e772b4e, github.com/apache/spark/pull/4196 + + [SPARK-5601][MLLIB] make streaming linear algorithms Java-friendly + Xiangrui Meng + 2015-02-06 15:42:59 -0800 + Commit: 0e23ca9, github.com/apache/spark/pull/4432 + + [SQL] [Minor] HiveParquetSuite was disabled by mistake, re-enable them + Cheng Lian + 2015-02-06 15:23:42 -0800 + Commit: c402140, github.com/apache/spark/pull/4440 + + [SQL] Use TestSQLContext in Java tests + Michael Armbrust + 2015-02-06 15:11:02 -0800 + Commit: 76c4bf5, github.com/apache/spark/pull/4441 + + [SPARK-4994][network]Cleanup removed executors' ShuffleInfo in yarn shuffle service + lianhuiwang + 2015-02-06 14:47:52 -0800 + Commit: 61073f8, github.com/apache/spark/pull/3828 + + [SPARK-5444][Network]Add a retry to deal with the conflict port in netty server. + huangzhaowei + 2015-02-06 14:35:29 -0800 + Commit: 2bda1c1, github.com/apache/spark/pull/4240 + + [SPARK-4874] [CORE] Collect record count metrics + Kostas Sakellis + 2015-02-06 14:31:20 -0800 + Commit: dcd1e42, github.com/apache/spark/pull/4067 + + [HOTFIX] Fix the maven build after adding sqlContext to spark-shell + Michael Armbrust + 2015-02-06 14:27:06 -0800 + Commit: 5796156, github.com/apache/spark/pull/4443 + + [SPARK-5600] [core] Clean up FsHistoryProvider test, fix app sort order. + Marcelo Vanzin + 2015-02-06 14:23:09 -0800 + Commit: 5687bab, github.com/apache/spark/pull/4370 + + SPARK-5613: Catch the ApplicationNotFoundException exception to avoid thread from getting killed on yarn restart. + Kashish Jain + 2015-02-06 13:47:23 -0800 + Commit: ca66159, github.com/apache/spark/pull/4392 + + SPARK-5633 pyspark saveAsTextFile support for compression codec + Vladimir Vladimirov + 2015-02-06 13:55:02 -0800 + Commit: b3872e0, github.com/apache/spark/pull/4403 + + [HOTFIX][MLLIB] fix a compilation error with java 6 + Xiangrui Meng + 2015-02-06 13:52:35 -0800 + Commit: 65181b7, github.com/apache/spark/pull/4442 + + [SPARK-4983] Insert waiting time before tagging EC2 instances + GenTang , Gen TANG + 2015-02-06 13:27:34 -0800 + Commit: 0f3a360, github.com/apache/spark/pull/3986 + + [SPARK-5586][Spark Shell][SQL] Make `sqlContext` available in spark shell + OopsOutOfMemory + 2015-02-06 13:20:10 -0800 + Commit: 3d3ecd7, github.com/apache/spark/pull/4387 + + [SPARK-5278][SQL] Introduce UnresolvedGetField and complete the check of ambiguous reference to fields + Wenchen Fan + 2015-02-06 13:08:09 -0800 + Commit: 4793c84, github.com/apache/spark/pull/4068 + + [SQL][Minor] Remove cache keyword in SqlParser + wangfei + 2015-02-06 12:42:23 -0800 + Commit: bc36356, github.com/apache/spark/pull/4393 + + [SQL][HiveConsole][DOC] HiveConsole `correct hiveconsole imports` + OopsOutOfMemory + 2015-02-06 12:41:28 -0800 + Commit: b62c352, github.com/apache/spark/pull/4389 + + [SPARK-5595][SPARK-5603][SQL] Add a rule to do PreInsert type casting and field renaming and invalidating in memory cache after INSERT + Yin Huai + 2015-02-06 12:38:07 -0800 + Commit: 3eccf29, github.com/apache/spark/pull/4373 + + [SPARK-5324][SQL] Results of describe can't be queried + OopsOutOfMemory , Sheng, Li + 2015-02-06 12:33:20 -0800 + Commit: 0b7eb3f, github.com/apache/spark/pull/4249 + + [SPARK-5619][SQL] Support 'show roles' in HiveContext + q00251598 + 2015-02-06 12:29:26 -0800 + Commit: a958d60, github.com/apache/spark/pull/4397 + + [SPARK-5640] Synchronize ScalaReflection where necessary + Tobias Schlatter + 2015-02-06 12:15:02 -0800 + Commit: 500dc2b, github.com/apache/spark/pull/4431 + + [SPARK-5650][SQL] Support optional 'FROM' clause + Liang-Chi Hsieh + 2015-02-06 12:13:44 -0800 + Commit: d433816, github.com/apache/spark/pull/4426 + + [SPARK-5628] Add version option to spark-ec2 + Nicholas Chammas + 2015-02-06 12:08:22 -0800 + Commit: 70e5b03, github.com/apache/spark/pull/4414 + + [SPARK-2945][YARN][Doc]add doc for spark.executor.instances + WangTaoTheTonic + 2015-02-06 11:57:02 -0800 + Commit: d34f79c, github.com/apache/spark/pull/4350 + + [SPARK-4361][Doc] Add more docs for Hadoop Configuration + zsxwing + 2015-02-06 11:50:20 -0800 + Commit: af2a2a2, github.com/apache/spark/pull/3225 + + [HOTFIX] Fix test build break in ExecutorAllocationManagerSuite. + Josh Rosen + 2015-02-06 11:47:32 -0800 + Commit: fb6c0cb + + [SPARK-5652][Mllib] Use broadcasted weights in LogisticRegressionModel + Liang-Chi Hsieh + 2015-02-06 11:22:11 -0800 + Commit: 80f3bcb, github.com/apache/spark/pull/4429 + + [SPARK-5555] Enable UISeleniumSuite tests + Josh Rosen + 2015-02-06 11:14:58 -0800 + Commit: 0d74bd7, github.com/apache/spark/pull/4334 + + SPARK-2450 Adds executor log links to Web UI + Kostas Sakellis , Josh Rosen + 2015-02-06 11:13:00 -0800 + Commit: 32e964c, github.com/apache/spark/pull/3486 + + [SPARK-5618][Spark Core][Minor] Optimise utility code. + Makoto Fukuhara + 2015-02-06 11:11:38 -0800 + Commit: 4cdb26c, github.com/apache/spark/pull/4396 + + [SPARK-5593][Core]Replace BlockManagerListener with ExecutorListener in ExecutorAllocationListener + lianhuiwang + 2015-02-06 11:09:37 -0800 + Commit: 6072fcc, github.com/apache/spark/pull/4369 + + [SPARK-4877] Allow user first classes to extend classes in the parent. + Stephen Haberman + 2015-02-06 11:03:56 -0800 + Commit: 9792bec, github.com/apache/spark/pull/3725 + + [SPARK-5396] Syntax error in spark scripts on windows. + Masayoshi TSUZUKI + 2015-02-06 10:58:26 -0800 + Commit: c01b985, github.com/apache/spark/pull/4428 + + [SPARK-5636] Ramp up faster in dynamic allocation + Andrew Or + 2015-02-06 10:54:23 -0800 + Commit: fe3740c, github.com/apache/spark/pull/4409 + + SPARK-4337. [YARN] Add ability to cancel pending requests + Sandy Ryza + 2015-02-06 10:53:16 -0800 + Commit: 1a88f20, github.com/apache/spark/pull/4141 + + [SPARK-5653][YARN] In ApplicationMaster rename isDriver to isClusterMode + lianhuiwang + 2015-02-06 10:48:31 -0800 + Commit: cc6e531, github.com/apache/spark/pull/4430 + + [SPARK-5013] [MLlib] Added documentation and sample data file for GaussianMixture + Travis Galoppo + 2015-02-06 10:26:51 -0800 + Commit: 9ad56ad, github.com/apache/spark/pull/4401 + + [SPARK-5416] init Executor.threadPool before ExecutorSource + Ryan Williams + 2015-02-06 12:22:25 +0000 + Commit: 37d35ab, github.com/apache/spark/pull/4212 + + [Build] Set all Debian package permissions to 755 + Nicholas Chammas + 2015-02-06 11:38:39 +0000 + Commit: cf6778e, github.com/apache/spark/pull/4277 + + Update ec2-scripts.md + Miguel Peralvo + 2015-02-06 11:04:48 +0000 + Commit: f827ef4, github.com/apache/spark/pull/4300 + + [SPARK-5470][Core]use defaultClassLoader to load classes in KryoSerializer + lianhuiwang + 2015-02-06 11:00:35 +0000 + Commit: ed3aac7, github.com/apache/spark/pull/4258 + + [SPARK-5582] [history] Ignore empty log directories. + Marcelo Vanzin + 2015-02-06 10:07:20 +0000 + Commit: 8569289, github.com/apache/spark/pull/4352 + + [SPARK-5157][YARN] Configure more JVM options properly when we use ConcMarkSweepGC for AM. + Kousuke Saruta + 2015-02-06 09:39:12 +0000 + Commit: 24dbc50, github.com/apache/spark/pull/3956 + + [Minor] Remove permission for execution from spark-shell.cmd + Kousuke Saruta + 2015-02-06 09:33:36 +0000 + Commit: f6ba813, github.com/apache/spark/pull/3983 + + [SPARK-5380][GraphX] Solve an ArrayIndexOutOfBoundsException when build graph with a file format error + Leolh + 2015-02-06 09:01:53 +0000 + Commit: 575d2df, github.com/apache/spark/pull/4176 + + [SPARK-4789] [SPARK-4942] [SPARK-5031] [mllib] Standardize ML Prediction APIs + Joseph K. Bradley + 2015-02-05 23:43:47 -0800 + Commit: dc0c449, github.com/apache/spark/pull/3637 + + [SPARK-5604][MLLIB] remove checkpointDir from trees + Xiangrui Meng + 2015-02-05 23:32:09 -0800 + Commit: 6b88825, github.com/apache/spark/pull/4407 + + [SPARK-5639][SQL] Support DataFrame.renameColumn. + Reynold Xin + 2015-02-05 23:02:40 -0800 + Commit: 7dc4965, github.com/apache/spark/pull/4410 + + Revert "SPARK-5607: Update to Kryo 2.24.0 to avoid including objenesis 1.2." + Patrick Wendell + 2015-02-05 18:36:48 -0800 + Commit: 6d3b7cb + + SPARK-5557: Explicitly include servlet API in dependencies. + Patrick Wendell + 2015-02-05 18:14:54 -0800 + Commit: 793dbae, github.com/apache/spark/pull/4411 + + [HOTFIX] [SQL] Disables Metastore Parquet table conversion for "SQLQuerySuite.CTAS with serde" + Cheng Lian + 2015-02-05 18:09:18 -0800 + Commit: 7c0a648, github.com/apache/spark/pull/4413 + + [SPARK-5638][SQL] Add a config flag to disable eager analysis of DataFrames + Reynold Xin + 2015-02-05 18:07:10 -0800 + Commit: e8a5d50, github.com/apache/spark/pull/4408 + + [SPARK-5620][DOC] group methods in generated unidoc + Xiangrui Meng + 2015-02-05 16:26:51 -0800 + Commit: 85ccee8, github.com/apache/spark/pull/4404 + + [SPARK-5182] [SPARK-5528] [SPARK-5509] [SPARK-3575] [SQL] Parquet data source improvements + Cheng Lian + 2015-02-05 15:29:56 -0800 + Commit: a9ed511, github.com/apache/spark/pull/4308 + + [SPARK-5604[MLLIB] remove checkpointDir from LDA + Xiangrui Meng + 2015-02-05 15:07:33 -0800 + Commit: c19152c, github.com/apache/spark/pull/4390 + + [SPARK-5460][MLlib] Wrapped `Try` around `deleteAllCheckpoints` - RandomForest. + x1- + 2015-02-05 15:02:04 -0800 + Commit: 62371ad, github.com/apache/spark/pull/4347 + + [SPARK-5135][SQL] Add support for describe table to DDL in SQLContext + OopsOutOfMemory + 2015-02-05 13:07:48 -0800 + Commit: 4d8d070, github.com/apache/spark/pull/4227 + + [SPARK-5617][SQL] fix test failure of SQLQuerySuite + wangfei + 2015-02-05 12:44:12 -0800 + Commit: a83936e, github.com/apache/spark/pull/4395 + + [Branch-1.3] [DOC] doc fix for date + Daoyuan Wang + 2015-02-05 12:42:27 -0800 + Commit: 6fa4ac1, github.com/apache/spark/pull/4400 + + SPARK-5548: Fixed a race condition in AkkaUtilsSuite + Jacek Lewandowski + 2015-02-05 12:00:04 -0800 + Commit: 081ac69, github.com/apache/spark/pull/4343 + + [SPARK-5474][Build]curl should support URL redirection in build/mvn + GuoQiang Li + 2015-02-05 12:03:13 -0800 + Commit: 3414754, github.com/apache/spark/pull/4263 + + [SPARK-5608] Improve SEO of Spark documentation pages + Matei Zaharia + 2015-02-05 11:12:50 -0800 + Commit: 4d74f06, github.com/apache/spark/pull/4381 + + SPARK-4687. Add a recursive option to the addFile API + Sandy Ryza + 2015-02-05 10:15:55 -0800 + Commit: c4b1108, github.com/apache/spark/pull/3670 + + [HOTFIX] MLlib build break. + Reynold Xin + 2015-02-05 00:42:50 -0800 + Commit: 6580929 + + [MLlib] Minor: UDF style update. + Reynold Xin + 2015-02-04 23:57:53 -0800 + Commit: c3ba4d4, github.com/apache/spark/pull/4388 + + [SPARK-5612][SQL] Move DataFrame implicit functions into SQLContext.implicits. + Reynold Xin + 2015-02-04 23:44:34 -0800 + Commit: 7d789e1, github.com/apache/spark/pull/4386 + + [SPARK-5606][SQL] Support plus sign in HiveContext + q00251598 + 2015-02-04 23:16:01 -0800 + Commit: 9d3a75e, github.com/apache/spark/pull/4378 + + [SPARK-5599] Check MLlib public APIs for 1.3 + Xiangrui Meng + 2015-02-04 23:03:47 -0800 + Commit: db34690, github.com/apache/spark/pull/4377 + + [SPARK-5596] [mllib] ML model import/export for GLMs, NaiveBayes + Joseph K. Bradley + 2015-02-04 22:46:48 -0800 + Commit: 975bcef, github.com/apache/spark/pull/4233 + + SPARK-5607: Update to Kryo 2.24.0 to avoid including objenesis 1.2. + Patrick Wendell + 2015-02-04 22:39:44 -0800 + Commit: c23ac03, github.com/apache/spark/pull/4383 + + [SPARK-5602][SQL] Better support for creating DataFrame from local data collection + Reynold Xin + 2015-02-04 19:53:57 -0800 + Commit: 84acd08, github.com/apache/spark/pull/4372 + + [SPARK-5538][SQL] Fix flaky CachedTableSuite + Reynold Xin + 2015-02-04 19:52:41 -0800 + Commit: 206f9bc, github.com/apache/spark/pull/4379 + + [SQL][DataFrame] Minor cleanup. + Reynold Xin + 2015-02-04 19:51:48 -0800 + Commit: 6b4c7f0, github.com/apache/spark/pull/4374 + + [SPARK-4520] [SQL] This pr fixes the ArrayIndexOutOfBoundsException as r... + Sadhan Sood + 2015-02-04 19:18:06 -0800 + Commit: dba98bf, github.com/apache/spark/pull/4148 + + [SPARK-5605][SQL][DF] Allow using String to specify colum name in DSL aggregate functions + Reynold Xin + 2015-02-04 18:35:51 -0800 + Commit: 1fbd124, github.com/apache/spark/pull/4376 + + [SPARK-5411] Allow SparkListeners to be specified in SparkConf and loaded when creating SparkContext + Josh Rosen + 2015-02-04 17:18:03 -0800 + Commit: 9a7ce70, github.com/apache/spark/pull/4111 + + [SPARK-5577] Python udf for DataFrame + Davies Liu + 2015-02-04 15:55:09 -0800 + Commit: dc101b0, github.com/apache/spark/pull/4351 + + [SPARK-5118][SQL] Fix: create table test stored as parquet as select .. + guowei2 + 2015-02-04 15:26:10 -0800 + Commit: e0490e2, github.com/apache/spark/pull/3921 + + [SQL] Use HiveContext's sessionState in HiveMetastoreCatalog.hiveDefaultTableFilePath + Yin Huai + 2015-02-04 15:22:40 -0800 + Commit: 548c9c2, github.com/apache/spark/pull/4355 + + [SQL] Correct the default size of TimestampType and expose NumericType + Yin Huai + 2015-02-04 15:14:49 -0800 + Commit: 0d81645, github.com/apache/spark/pull/4314 + + [SQL][Hiveconsole] Bring hive console code up to date and update README.md + OopsOutOfMemory , Sheng, Li + 2015-02-04 15:13:54 -0800 + Commit: b73d5ff, github.com/apache/spark/pull/4330 + + [SPARK-5367][SQL] Support star expression in udfs + wangfei , scwf + 2015-02-04 15:12:07 -0800 + Commit: 417d111, github.com/apache/spark/pull/4353 + + [SPARK-5426][SQL] Add SparkSQL Java API helper methods. + kul + 2015-02-04 15:08:37 -0800 + Commit: 424cb69, github.com/apache/spark/pull/4243 + + [SPARK-5587][SQL] Support change database owner + wangfei + 2015-02-04 14:35:12 -0800 + Commit: b90dd39, github.com/apache/spark/pull/4357 + + [SPARK-5591][SQL] Fix NoSuchObjectException for CTAS + wangfei + 2015-02-04 14:33:07 -0800 + Commit: a9f0db1, github.com/apache/spark/pull/4365 + + [SPARK-4939] move to next locality when no pending tasks + Davies Liu + 2015-02-04 14:22:07 -0800 + Commit: 0a89b15, github.com/apache/spark/pull/3779 + + [SPARK-4707][STREAMING] Reliable Kafka Receiver can lose data if the blo... + Hari Shreedharan + 2015-02-04 14:20:44 -0800 + Commit: f0500f9, github.com/apache/spark/pull/3655 + + [SPARK-4964] [Streaming] Exactly-once semantics for Kafka + cody koeninger + 2015-02-04 12:06:34 -0800 + Commit: b0c0021, github.com/apache/spark/pull/3798 + + [SPARK-5588] [SQL] support select/filter by SQL expression + Davies Liu + 2015-02-04 11:34:46 -0800 + Commit: ac0b2b7, github.com/apache/spark/pull/4359 + + [SPARK-5585] Flaky test in MLlib python + Davies Liu + 2015-02-04 08:54:20 -0800 + Commit: 38a416f, github.com/apache/spark/pull/4358 + + [SPARK-5574] use given name prefix in dir + Imran Rashid + 2015-02-04 01:02:20 -0800 + Commit: 5aa0f21, github.com/apache/spark/pull/4344 + + [Minor] Fix incorrect warning log + Liang-Chi Hsieh + 2015-02-04 00:52:41 -0800 + Commit: a74cbbf, github.com/apache/spark/pull/4360 + + [SPARK-5379][Streaming] Add awaitTerminationOrTimeout + zsxwing + 2015-02-04 00:40:28 -0800 + Commit: 4cf4cba, github.com/apache/spark/pull/4171 + + [SPARK-5341] Use maven coordinates as dependencies in spark-shell and spark-submit + Burak Yavuz + 2015-02-03 22:39:17 -0800 + Commit: 6aed719, github.com/apache/spark/pull/4215 + + [SPARK-4939] revive offers periodically in LocalBackend + Davies Liu + 2015-02-03 22:30:23 -0800 + Commit: 83de71c, github.com/apache/spark/pull/4147 + + [SPARK-4969][STREAMING][PYTHON] Add binaryRecords to streaming + freeman + 2015-02-03 22:24:30 -0800 + Commit: 242b4f0, github.com/apache/spark/pull/3803 + + [SPARK-5579][SQL][DataFrame] Support for project/filter using SQL expressions + Reynold Xin + 2015-02-03 22:15:35 -0800 + Commit: 40c4cb2, github.com/apache/spark/pull/4348 + + [FIX][MLLIB] fix seed handling in Python GMM + Xiangrui Meng + 2015-02-03 20:39:11 -0800 + Commit: eb15631, github.com/apache/spark/pull/4349 + + [SPARK-4795][Core] Redesign the "primitive type => Writable" implicit APIs to make them be activated automatically + zsxwing + 2015-02-03 20:17:12 -0800 + Commit: d37978d, github.com/apache/spark/pull/3642 + + [SPARK-5578][SQL][DataFrame] Provide a convenient way for Scala users to use UDFs + Reynold Xin + 2015-02-03 20:07:46 -0800 + Commit: 1077f2e, github.com/apache/spark/pull/4345 + + [SPARK-5520][MLlib] Make FP-Growth implementation take generic item types (WIP) + Jacky Li , Jacky Li , Xiangrui Meng + 2015-02-03 17:02:42 -0800 + Commit: e380d2d, github.com/apache/spark/pull/4340 + + [SPARK-5554] [SQL] [PySpark] add more tests for DataFrame Python API + Davies Liu + 2015-02-03 16:01:56 -0800 + Commit: 068c0e2, github.com/apache/spark/pull/4331 + + [STREAMING] SPARK-4986 Wait for receivers to deregister and receiver job to terminate + Jesper Lundgren + 2015-02-03 14:53:39 -0800 + Commit: 1e8b539, github.com/apache/spark/pull/4338 + + [SPARK-5153][Streaming][Test] Increased timeout to deal with flaky KafkaStreamSuite + Tathagata Das + 2015-02-03 13:46:02 -0800 + Commit: 681f9df, github.com/apache/spark/pull/4342 + + [SPARK-4508] [SQL] build native date type to conform behavior to Hive + Daoyuan Wang + 2015-02-03 12:21:45 -0800 + Commit: db821ed, github.com/apache/spark/pull/4325 + + [SPARK-5383][SQL] Support alias for udtfs + wangfei , scwf , Fei Wang + 2015-02-03 12:16:31 -0800 + Commit: 5adbb39, github.com/apache/spark/pull/4186 + + [SPARK-5550] [SQL] Support the case insensitive for UDF + Cheng Hao + 2015-02-03 12:12:26 -0800 + Commit: ca7a6cd, github.com/apache/spark/pull/4326 + + [SPARK-4987] [SQL] parquet timestamp type support + Daoyuan Wang + 2015-02-03 12:06:06 -0800 + Commit: 0c20ce6, github.com/apache/spark/pull/3820 + + +Release 1.3.1 + + [SQL] Use path.makeQualified in newParquet. + Yin Huai + 2015-04-04 23:26:10 +0800 + Commit: eb57d4f, github.com/apache/spark/pull/5353 + + [SPARK-6700] disable flaky test + Davies Liu + 2015-04-03 15:22:21 -0700 + Commit: 3366af6, github.com/apache/spark/pull/5356 + + [SPARK-6688] [core] Always use resolved URIs in EventLoggingListener. + Marcelo Vanzin + 2015-04-03 11:54:31 -0700 + Commit: f17a2fe, github.com/apache/spark/pull/5340 + + [SPARK-6575][SQL] Converted Parquet Metastore tables no longer cache metadata + Yin Huai + 2015-04-03 14:40:36 +0800 + Commit: 0c1b78b, github.com/apache/spark/pull/5339 + + [SPARK-6621][Core] Fix the bug that calling EventLoop.stop in EventLoop.onReceive/onError/onStart doesn't call onStop + zsxwing + 2015-04-02 22:54:30 -0700 + Commit: ac705aa, github.com/apache/spark/pull/5280 + + [SPARK-6345][STREAMING][MLLIB] Fix for training with prediction + freeman + 2015-04-02 21:37:44 -0700 + Commit: d21f779, github.com/apache/spark/pull/5037 + + [CORE] The descriptionof jobHistory config should be spark.history.fs.logDirectory + KaiXinXiaoLei + 2015-04-02 20:24:31 -0700 + Commit: 17ab6b0, github.com/apache/spark/pull/5332 + + [SPARK-6575][SQL] Converted Parquet Metastore tables no longer cache metadata + Yin Huai + 2015-04-02 20:23:08 -0700 + Commit: 0c1c0fb, github.com/apache/spark/pull/5339 + + [SPARK-6650] [core] Stop ExecutorAllocationManager when context stops. + Marcelo Vanzin + 2015-04-02 19:48:55 -0700 + Commit: 0ef46b2, github.com/apache/spark/pull/5311 + + [SPARK-6686][SQL] Use resolved output instead of names for toDF rename + Michael Armbrust + 2015-04-02 18:30:55 -0700 + Commit: 2927af1, github.com/apache/spark/pull/5337 + + [SPARK-6672][SQL] convert row to catalyst in createDataFrame(RDD[Row], ...) + Xiangrui Meng + 2015-04-02 17:57:01 +0800 + Commit: c2694bb, github.com/apache/spark/pull/5329 + + [SPARK-6618][SPARK-6669][SQL] Lock Hive metastore client correctly. + Yin Huai , Michael Armbrust + 2015-04-02 16:46:50 -0700 + Commit: e6ee95c, github.com/apache/spark/pull/5333 + + [Minor] [SQL] Follow-up of PR #5210 + Cheng Lian + 2015-04-02 16:15:34 -0700 + Commit: 4f1fe3f, github.com/apache/spark/pull/5219 + + [SPARK-6655][SQL] We need to read the schema of a data source table stored in spark.sql.sources.schema property + Yin Huai + 2015-04-02 16:02:31 -0700 + Commit: aecec07, github.com/apache/spark/pull/5313 + + [SQL] Throw UnsupportedOperationException instead of NotImplementedError + Michael Armbrust + 2015-04-02 16:01:03 -0700 + Commit: 78ba245, github.com/apache/spark/pull/5315 + + SPARK-6414: Spark driver failed with NPE on job cancelation + Hung Lin + 2015-04-02 14:01:43 -0700 + Commit: 58e2b3f, github.com/apache/spark/pull/5124 + + [SPARK-6079] Use index to speed up StatusTracker.getJobIdsForGroup() + Josh Rosen + 2015-03-25 17:40:00 -0700 + Commit: a6664dc, github.com/apache/spark/pull/4830 + + [SPARK-6667] [PySpark] remove setReuseAddress + Davies Liu + 2015-04-02 12:18:33 -0700 + Commit: ee2bd70, github.com/apache/spark/pull/5324 + + Revert "[SPARK-6618][SQL] HiveMetastoreCatalog.lookupRelation should use fine-grained lock" + Cheng Lian + 2015-04-02 12:59:38 +0800 + Commit: 1160cc9 + + [SQL] SPARK-6658: Update DataFrame documentation to refer to correct types + Michael Armbrust + 2015-04-01 18:00:07 -0400 + Commit: 223dd3f + + [SPARK-6578] Small rewrite to make the logic more clear in MessageWithHeader.transferTo. + Reynold Xin + 2015-04-01 18:36:06 -0700 + Commit: d697b76, github.com/apache/spark/pull/5319 + + [SPARK-6660][MLLIB] pythonToJava doesn't recognize object arrays + Xiangrui Meng + 2015-04-01 18:17:07 -0700 + Commit: 0d1e476, github.com/apache/spark/pull/5318 + + [SPARK-6553] [pyspark] Support functools.partial as UDF + ksonj + 2015-04-01 17:23:57 -0700 + Commit: 98f72df, github.com/apache/spark/pull/5206 + + [SPARK-6642][MLLIB] use 1.2 lambda scaling and remove addImplicit from NormalEquation + Xiangrui Meng + 2015-04-01 16:47:18 -0700 + Commit: bc04fa2, github.com/apache/spark/pull/5314 + + [SPARK-6578] [core] Fix thread-safety issue in outbound path of network library. + Marcelo Vanzin + 2015-04-01 16:06:11 -0700 + Commit: 1c31ebd, github.com/apache/spark/pull/5234 + + [SPARK-6657] [Python] [Docs] fixed python doc build warnings + Joseph K. Bradley + 2015-04-01 15:15:47 -0700 + Commit: e347a7a, github.com/apache/spark/pull/5317 + + [SPARK-6651][MLLIB] delegate dense vector arithmetics to the underlying numpy array + Xiangrui Meng + 2015-04-01 13:29:04 -0700 + Commit: f50d95a, github.com/apache/spark/pull/5312 + + SPARK-6626 [DOCS]: Corrected Scala:TwitterUtils parameters + jayson + 2015-04-01 11:12:55 +0100 + Commit: 7d029cb, github.com/apache/spark/pull/5295 + + [Doc] Improve Python DataFrame documentation + Reynold Xin + 2015-03-31 18:31:36 -0700 + Commit: e527b35, github.com/apache/spark/pull/5287 + + [SPARK-6614] OutputCommitCoordinator should clear authorized committer only after authorized committer fails, not after any failure + Josh Rosen + 2015-03-31 16:18:39 -0700 + Commit: c4c982a, github.com/apache/spark/pull/5276 + + [SPARK-6633][SQL] Should be "Contains" instead of "EndsWith" when constructing sources.StringContains + Liang-Chi Hsieh + 2015-03-31 13:18:07 -0700 + Commit: d851646, github.com/apache/spark/pull/5299 + + [SPARK-5371][SQL] Propagate types after function conversion, before futher resolution + Michael Armbrust + 2015-03-31 11:34:29 -0700 + Commit: 5a957fe, github.com/apache/spark/pull/5278 + + [SPARK-6145][SQL] fix ORDER BY on nested fields + Michael Armbrust + 2015-03-31 11:23:18 -0700 + Commit: 045228f, github.com/apache/spark/pull/5189 + + [SPARK-6575] [SQL] Adds configuration to disable schema merging while converting metastore Parquet tables + Cheng Lian + 2015-03-31 11:21:15 -0700 + Commit: 778c876, github.com/apache/spark/pull/5231 + + [SPARK-6555] [SQL] Overrides equals() and hashCode() for MetastoreRelation + Cheng Lian + 2015-03-31 11:18:25 -0700 + Commit: 9ebefb1, github.com/apache/spark/pull/5289 + + [SPARK-6618][SQL] HiveMetastoreCatalog.lookupRelation should use fine-grained lock + Yin Huai + 2015-03-31 16:28:40 +0800 + Commit: fd600ce, github.com/apache/spark/pull/5281 + + [SPARK-6623][SQL] Alias DataFrame.na.drop and DataFrame.na.fill in Python. + Reynold Xin + 2015-03-31 00:25:23 -0700 + Commit: cf651a4, github.com/apache/spark/pull/5284 + + [SPARK-6625][SQL] Add common string filters to data sources. + Reynold Xin + 2015-03-31 00:19:51 -0700 + Commit: a97d4e6, github.com/apache/spark/pull/5285 + + [SPARK-6119][SQL] DataFrame support for missing data handling + Reynold Xin + 2015-03-30 20:47:10 -0700 + Commit: 67c885e, github.com/apache/spark/pull/5274 + + [SPARK-6369] [SQL] Uses commit coordinator to help committing Hive and Parquet tables + Cheng Lian + 2015-03-31 07:48:37 +0800 + Commit: fedbfc7, github.com/apache/spark/pull/5139 + + [SPARK-6603] [PySpark] [SQL] add SQLContext.udf and deprecate inferSchema() and applySchema + Davies Liu + 2015-03-30 15:47:00 -0700 + Commit: 30e7c63, github.com/apache/spark/pull/5273 + + [SPARK-6592][SQL] fix filter for scaladoc to generate API doc for Row class under catalyst dir + CodingCat + 2015-03-30 11:54:44 -0700 + Commit: f9d4efa, github.com/apache/spark/pull/5252 + + [SPARK-6571][MLLIB] use wrapper in MatrixFactorizationModel.load + Xiangrui Meng + 2015-03-28 15:08:05 -0700 + Commit: 93a7166, github.com/apache/spark/pull/5243 + + [SPARK-6595][SQL] MetastoreRelation should be a MultiInstanceRelation + Michael Armbrust + 2015-03-30 22:24:12 +0800 + Commit: c411530, github.com/apache/spark/pull/5251 + + [SPARK-6558] Utils.getCurrentUserName returns the full principal name instead of login name + Thomas Graves + 2015-03-29 12:43:30 +0100 + Commit: f8132de, github.com/apache/spark/pull/5229 + + [SPARK-5750][SPARK-3441][SPARK-5836][CORE] Added documentation explaining shuffle + Ilya Ganelin , Ilya Ganelin + 2015-03-30 11:52:02 +0100 + Commit: 1c59a4b, github.com/apache/spark/pull/5074 + + [spark-sql] a better exception message than "scala.MatchError" for unsupported types in Schema creation + Eran Medan + 2015-03-30 00:02:52 -0700 + Commit: 4859c40, github.com/apache/spark/pull/5235 + + [HOTFIX] Build break due to NoRelation cherry-pick. + Reynold Xin + 2015-03-29 12:07:28 -0700 + Commit: 6181366 + + [DOC] Improvements to Python docs. + Reynold Xin + 2015-03-28 23:59:27 -0700 + Commit: 3db0844, github.com/apache/spark/pull/5238 + + [SPARK-6538][SQL] Add missing nullable Metastore fields when merging a Parquet schema + Adam Budde + 2015-03-28 09:14:09 +0800 + Commit: 5e04f45, github.com/apache/spark/pull/5214 + + [SPARK-6564][SQL] SQLContext.emptyDataFrame should contain 0 row, not 1 row + Reynold Xin + 2015-03-27 14:56:57 -0700 + Commit: 7006858, github.com/apache/spark/pull/5226 + + [SPARK-6544][build] Increment Avro version from 1.7.6 to 1.7.7 + Dean Chen + 2015-03-27 14:32:51 +0000 + Commit: fefd49f, github.com/apache/spark/pull/5193 + + [SPARK-6574] [PySpark] fix sql example + Davies Liu + 2015-03-27 11:42:26 -0700 + Commit: b902a95, github.com/apache/spark/pull/5230 + + [SPARK-6550][SQL] Use analyzed plan in DataFrame + Michael Armbrust + 2015-03-27 11:40:00 -0700 + Commit: bc75189, github.com/apache/spark/pull/5217 + + [SPARK-6341][mllib] Upgrade breeze from 0.11.1 to 0.11.2 + Yu ISHIKAWA + 2015-03-27 00:15:02 -0700 + Commit: b318858, github.com/apache/spark/pull/5222 + + [DOCS][SQL] Fix JDBC example + Michael Armbrust + 2015-03-26 14:51:46 -0700 + Commit: 54d92b5, github.com/apache/spark/pull/5192 + + [SPARK-6554] [SQL] Don't push down predicates which reference partition column(s) + Cheng Lian + 2015-03-26 13:11:37 -0700 + Commit: 3d54578, github.com/apache/spark/pull/5210 + + [SPARK-6117] [SQL] Improvements to DataFrame.describe() + Reynold Xin + 2015-03-26 12:26:13 -0700 + Commit: 28e3a1e, github.com/apache/spark/pull/5201 + + [SPARK-6117] [SQL] add describe function to DataFrame for summary statis... + azagrebin + 2015-03-26 00:25:04 -0700 + Commit: 84735c3, github.com/apache/spark/pull/5073 + + SPARK-6480 [CORE] histogram() bucket function is wrong in some simple edge cases + Sean Owen + 2015-03-26 15:00:23 +0000 + Commit: aa2d157, github.com/apache/spark/pull/5148 + + [SPARK-6491] Spark will put the current working dir to the CLASSPATH + guliangliang + 2015-03-26 13:28:56 +0000 + Commit: 5b5f0e2, github.com/apache/spark/pull/5156 + + [SQL][SPARK-6471]: Metastore schema should only be a subset of parquet schema to support dropping of columns using replace columns + Yash Datta + 2015-03-26 21:13:38 +0800 + Commit: 836c921, github.com/apache/spark/pull/5141 + + [SPARK-6465][SQL] Fix serialization of GenericRowWithSchema using kryo + Michael Armbrust + 2015-03-26 18:46:57 +0800 + Commit: 8254996, github.com/apache/spark/pull/5191 + + [SPARK-6536] [PySpark] Column.inSet() in Python + Davies Liu + 2015-03-26 00:01:24 -0700 + Commit: 0ba7599, github.com/apache/spark/pull/5190 + + [SPARK-6463][SQL] AttributeSet.equal should compare size + sisihj , Michael Armbrust + 2015-03-25 19:21:54 -0700 + Commit: 9edb34f, github.com/apache/spark/pull/5194 + + [SPARK-6450] [SQL] Fixes metastore Parquet table conversion + Cheng Lian + 2015-03-25 17:40:19 -0700 + Commit: 0cd4748, github.com/apache/spark/pull/5183 + + [SPARK-6409][SQL] It is not necessary that avoid old inteface of hive, because this will make some UDAF can not work. + DoingDone9 <799203320@qq.com> + 2015-03-25 11:11:52 -0700 + Commit: 4efa6c5, github.com/apache/spark/pull/5131 + + SPARK-6063 MLlib doesn't pass mvn scalastyle check due to UTF chars in LDAModel.scala + Michael Griffiths , Griffiths, Michael (NYC-RPM) + 2015-02-28 14:47:39 +0000 + Commit: 6791f42, github.com/apache/spark/pull/4815 + + [SPARK-6496] [MLLIB] GeneralizedLinearAlgorithm.run(input, initialWeights) should initialize numFeatures + Yanbo Liang + 2015-03-25 17:05:56 +0000 + Commit: 2be4255, github.com/apache/spark/pull/5167 + + [DOCUMENTATION]Fixed Missing Type Import in Documentation + Bill Chambers , anabranch + 2015-03-24 22:24:35 -0700 + Commit: 8e4e2e3, github.com/apache/spark/pull/5179 + + [SPARK-6469] Improving documentation on YARN local directories usage + Christophe Préaud + 2015-03-24 17:05:49 -0700 + Commit: 6af9408, github.com/apache/spark/pull/5165 + + [SPARK-3570] Include time to open files in shuffle write time. + Kay Ousterhout + 2015-03-24 16:29:40 -0700 + Commit: e4db5a3, github.com/apache/spark/pull/4550 + + [SPARK-6088] Correct how tasks that get remote results are shown in UI. + Kay Ousterhout + 2015-03-24 16:26:43 -0700 + Commit: de8b2d4, github.com/apache/spark/pull/4839 + + [SPARK-6428][SQL] Added explicit types for all public methods in catalyst + Reynold Xin + 2015-03-24 16:03:55 -0700 + Commit: 586e0d9, github.com/apache/spark/pull/5162 + + [SPARK-6209] Clean up connections in ExecutorClassLoader after failing to load classes (master branch PR) + Josh Rosen + 2015-03-24 14:38:20 -0700 + Commit: dcf56aa, github.com/apache/spark/pull/4944 + + [SPARK-6458][SQL] Better error messages for invalid data sources + Michael Armbrust + 2015-03-24 14:10:56 -0700 + Commit: f48c16d, github.com/apache/spark/pull/5158 + + [SPARK-6376][SQL] Avoid eliminating subqueries until optimization + Michael Armbrust + 2015-03-24 14:08:20 -0700 + Commit: df671bc, github.com/apache/spark/pull/5160 + + [SPARK-6375][SQL] Fix formatting of error messages. + Michael Armbrust + 2015-03-24 13:22:46 -0700 + Commit: 92bf888, github.com/apache/spark/pull/5155 + + Revert "[SPARK-5680][SQL] Sum function on all null values, should return zero" + Michael Armbrust + 2015-03-24 12:32:25 -0700 + Commit: 930b667 + + [SPARK-6054][SQL] Fix transformations of TreeNodes that hold StructTypes + Michael Armbrust + 2015-03-24 12:28:01 -0700 + Commit: c699e2b, github.com/apache/spark/pull/5157 + + [SPARK-6437][SQL] Use completion iterator to close external sorter + Michael Armbrust + 2015-03-24 12:10:30 -0700 + Commit: c0101d3, github.com/apache/spark/pull/5161 + + [SPARK-6459][SQL] Warn when constructing trivially true equals predicate + Michael Armbrust + 2015-03-24 12:09:02 -0700 + Commit: f0141ca, github.com/apache/spark/pull/5163 + + [SPARK-5955][MLLIB] add checkpointInterval to ALS + Xiangrui Meng + 2015-03-20 15:02:57 -0400 + Commit: bc92a2e, github.com/apache/spark/pull/5076 + + [ML][docs][minor] Define LabeledDocument/Document classes in CV example + Peter Rudenko + 2015-03-24 16:33:38 +0000 + Commit: 4ff5771, github.com/apache/spark/pull/5135 + + [SPARK-5559] [Streaming] [Test] Remove oppotunity we met flakiness when running FlumeStreamSuite + Kousuke Saruta + 2015-03-24 16:13:25 +0000 + Commit: 8722369, github.com/apache/spark/pull/4337 + + Update the command to use IPython notebook + Cong Yue + 2015-03-24 12:56:13 +0000 + Commit: e545143, github.com/apache/spark/pull/5111 + + [SPARK-6452] [SQL] Checks for missing attributes and unresolved operator for all types of operator + Cheng Lian + 2015-03-24 01:12:11 -0700 + Commit: 6f10142, github.com/apache/spark/pull/5129 + + [SPARK-6124] Support jdbc connection properties in OPTIONS part of the query + Volodymyr Lyubinets + 2015-03-23 17:00:27 -0700 + Commit: 04b2078, github.com/apache/spark/pull/4859 + + [SPARK-6397][SQL] Check the missingInput simply + Yadong Qi + 2015-03-23 18:16:49 +0800 + Commit: a29f493, github.com/apache/spark/pull/5132 + + [SPARK-4985] [SQL] parquet support for date type + Daoyuan Wang + 2015-03-23 11:46:16 +0800 + Commit: 60b9b96, github.com/apache/spark/pull/3822 + + [SPARK-6337][Documentation, SQL]Spark 1.3 doc fixes + vinodkc + 2015-03-22 20:00:08 +0000 + Commit: 857e8a6, github.com/apache/spark/pull/5112 + + SPARK-6454 [DOCS] Fix links to pyspark api + Kamil Smuga , stderr + 2015-03-22 15:56:25 +0000 + Commit: 3ba295f, github.com/apache/spark/pull/5120 + + [SPARK-6408] [SQL] Fix JDBCRDD filtering string literals + ypcat , Pei-Lun Lee + 2015-03-22 15:49:13 +0800 + Commit: e60fbf6, github.com/apache/spark/pull/5087 + + [SPARK-6428][SQL] Added explicit type for all public methods for Hive module + Reynold Xin + 2015-03-21 14:30:04 -0700 + Commit: 0021d22, github.com/apache/spark/pull/5108 + + [SPARK-6428][SQL] Added explicit type for all public methods in sql/core + Reynold Xin + 2015-03-20 15:47:07 -0700 + Commit: c964588, github.com/apache/spark/pull/5104 + + [SPARK-6250][SPARK-6146][SPARK-5911][SQL] Types are now reserved words in DDL parser. + Yin Huai + 2015-03-21 13:27:53 -0700 + Commit: 102daaf, github.com/apache/spark/pull/5078 + + [SPARK-5680][SQL] Sum function on all null values, should return zero + Venkata Ramana G , Venkata Ramana Gollamudi + 2015-03-21 13:24:24 -0700 + Commit: 93975a3, github.com/apache/spark/pull/4466 + + [SPARK-5320][SQL]Add statistics method at NoRelation (override super). + x1- + 2015-03-21 13:22:34 -0700 + Commit: cba6842, github.com/apache/spark/pull/5105 + + [SPARK-5821] [SQL] JSON CTAS command should throw error message when delete path failure + Yanbo Liang , Yanbo Liang + 2015-03-21 11:23:28 +0800 + Commit: 8de90c7, github.com/apache/spark/pull/4610 + + [SPARK-6315] [SQL] Also tries the case class string parser while reading Parquet schema + Cheng Lian + 2015-03-21 11:18:45 +0800 + Commit: b75943f, github.com/apache/spark/pull/5034 + + [SPARK-5821] [SQL] ParquetRelation2 CTAS should check if delete is successful + Yanbo Liang + 2015-03-21 10:53:04 +0800 + Commit: df83e21, github.com/apache/spark/pull/5107 + + [SPARK-6421][MLLIB] _regression_train_wrapper does not test initialWeights correctly + lewuathe + 2015-03-20 17:18:18 -0400 + Commit: aff9f8d, github.com/apache/spark/pull/5101 + + [SPARK-6286][Mesos][minor] Handle missing Mesos case TASK_ERROR + Jongyoul Lee + 2015-03-20 12:24:34 +0000 + Commit: db812d9, github.com/apache/spark/pull/5088 + + [SPARK-6222][Streaming] Dont delete checkpoint data when doing pre-batch-start checkpoint + Tathagata Das + 2015-03-19 02:15:50 -0400 + Commit: 03e263f, github.com/apache/spark/pull/5008 + + [SPARK-6325] [core,yarn] Do not change target executor count when killing executors. + Marcelo Vanzin + 2015-03-18 09:18:28 -0400 + Commit: 1723f05, github.com/apache/spark/pull/5018 + + [SPARK-6286][minor] Handle missing Mesos case TASK_ERROR. + Iulian Dragos + 2015-03-18 09:15:33 -0400 + Commit: ff0a7f4, github.com/apache/spark/pull/5000 + + [SPARK-6247][SQL] Fix resolution of ambiguous joins caused by new aliases + Michael Armbrust + 2015-03-17 19:47:51 -0700 + Commit: ba8352c, github.com/apache/spark/pull/5062 + + [SPARK-6383][SQL]Fixed compiler and errors in Dataframe examples + Tijo Thomas + 2015-03-17 18:50:19 -0700 + Commit: cee6d08, github.com/apache/spark/pull/5068 + + [SPARK-6366][SQL] In Python API, the default save mode for save and saveAsTable should be "error" instead of "append". + Yin Huai + 2015-03-18 09:41:06 +0800 + Commit: 3ea38bc, github.com/apache/spark/pull/5053 + + [SPARK-6330] [SQL] Add a test case for SPARK-6330 + Pei-Lun Lee + 2015-03-18 08:34:46 +0800 + Commit: 9d88f0c, github.com/apache/spark/pull/5039 + + [SPARK-6336] LBFGS should document what convergenceTol means + lewuathe + 2015-03-17 12:11:57 -0700 + Commit: 476c4e1, github.com/apache/spark/pull/5033 + + [SPARK-6365] jetty-security also needed for SPARK_PREPEND_CLASSES to work + Imran Rashid + 2015-03-17 12:03:54 -0500 + Commit: ac0e7cc, github.com/apache/spark/pull/5071 + + [SPARK-6313] Add config option to disable file locks/fetchFile cache to ... + nemccarthy + 2015-03-17 09:33:11 -0700 + Commit: febb123, github.com/apache/spark/pull/5036 + + [SPARK-3266] Use intermediate abstract classes to fix type erasure issues in Java APIs + Josh Rosen + 2015-03-17 09:18:57 -0700 + Commit: 29e39e1, github.com/apache/spark/pull/5050 + + [SPARK-6331] Load new master URL if present when recovering streaming context from checkpoint + Tathagata Das + 2015-03-17 05:31:27 -0700 + Commit: 95f8d1c, github.com/apache/spark/pull/5024 + + [SQL][docs][minor] Fixed sample code in SQLContext scaladoc + Lomig Mégard + 2015-03-16 23:52:42 -0700 + Commit: 426816b, github.com/apache/spark/pull/5051 + + [SPARK-6299][CORE] ClassNotFoundException in standalone mode when running groupByKey with class defined in REPL + Kevin (Sangwoo) Kim + 2015-03-16 23:49:23 -0700 + Commit: 5c16ced, github.com/apache/spark/pull/5046 + + [SPARK-6077] Remove streaming tab while stopping StreamingContext + lisurprise + 2015-03-16 13:10:32 -0700 + Commit: 47cce98, github.com/apache/spark/pull/4828 + + [SPARK-6330] Fix filesystem bug in newParquet relation + Volodymyr Lyubinets + 2015-03-16 12:13:18 -0700 + Commit: 67fa6d1, github.com/apache/spark/pull/5020 + + SPARK-6245 [SQL] jsonRDD() of empty RDD results in exception + Sean Owen + 2015-03-11 14:09:09 +0000 + Commit: 684ff24, github.com/apache/spark/pull/4971 + + [SPARK-6300][Spark Core] sc.addFile(path) does not support the relative path. + DoingDone9 <799203320@qq.com> + 2015-03-16 12:27:15 +0000 + Commit: 724aab4, github.com/apache/spark/pull/4993 + + [SPARK-3619] Part 2. Upgrade to Mesos 0.21 to work around MESOS-1688 + Jongyoul Lee + 2015-03-15 15:46:55 +0000 + Commit: 43fcab0, github.com/apache/spark/pull/4361 + + [SPARK-6210] [SQL] use prettyString as column name in agg() + Davies Liu + 2015-03-14 00:43:33 -0700 + Commit: ad47563, github.com/apache/spark/pull/5006 + + [SPARK-6275][Documentation]Miss toDF() function in docs/sql-programming-guide.md + zzcclp + 2015-03-12 15:07:15 +0000 + Commit: 3012781, github.com/apache/spark/pull/4977 + + [SPARK-6133] Make sc.stop() idempotent + Andrew Or + 2015-03-03 15:09:57 -0800 + Commit: a08588c, github.com/apache/spark/pull/4871 + + [SPARK-6132][HOTFIX] ContextCleaner InterruptedException should be quiet + Andrew Or + 2015-03-03 20:49:45 -0800 + Commit: 338bea7, github.com/apache/spark/pull/4882 + + [SPARK-6132] ContextCleaner race condition across SparkContexts + Andrew Or + 2015-03-03 13:44:05 -0800 + Commit: 3cdc8a3, github.com/apache/spark/pull/4869 + + [SPARK-6087][CORE] Provide actionable exception if Kryo buffer is not large enough + Lev Khomich + 2015-03-10 10:55:42 +0000 + Commit: 9846790, github.com/apache/spark/pull/4947 + + [SPARK-6036][CORE] avoid race condition between eventlogListener and akka actor system + Zhang, Liye + 2015-02-26 23:11:43 -0800 + Commit: f81611d, github.com/apache/spark/pull/4785 + + SPARK-4044 [CORE] Thriftserver fails to start when JAVA_HOME points to JRE instead of JDK + Sean Owen + 2015-03-13 17:59:31 +0000 + Commit: 4aa4132, github.com/apache/spark/pull/4981 + + SPARK-4300 [CORE] Race condition during SparkWorker shutdown + Sean Owen + 2015-02-26 14:08:56 -0800 + Commit: a3493eb, github.com/apache/spark/pull/4787 + + [SPARK-6194] [SPARK-677] [PySpark] fix memory leak in collect() + Davies Liu + 2015-03-09 16:24:06 -0700 + Commit: 170af49, github.com/apache/spark/pull/4923 + + SPARK-4704 [CORE] SparkSubmitDriverBootstrap doesn't flush output + Sean Owen + 2015-02-26 12:56:54 -0800 + Commit: dbee7e1, github.com/apache/spark/pull/4788 + + [SPARK-6278][MLLIB] Mention the change of objective in linear regression + Xiangrui Meng + 2015-03-13 10:27:28 -0700 + Commit: 214f681, github.com/apache/spark/pull/4978 + + [SPARK-5310] [SQL] [DOC] Parquet section for the SQL programming guide + Cheng Lian + 2015-03-13 21:34:50 +0800 + Commit: dc287f3, github.com/apache/spark/pull/5001 + + [mllib] [python] Add LassoModel to __all__ in regression.py + Joseph K. Bradley + 2015-03-12 16:46:29 -0700 + Commit: 23069bd, github.com/apache/spark/pull/4970 + + [SPARK-6294] fix hang when call take() in JVM on PythonRDD + Davies Liu + 2015-03-12 01:34:38 -0700 + Commit: 850e694, github.com/apache/spark/pull/4987 + + [SPARK-6296] [SQL] Added equals to Column + Volodymyr Lyubinets + 2015-03-12 00:55:26 -0700 + Commit: d9e141c, github.com/apache/spark/pull/4988 + + [SPARK-6128][Streaming][Documentation] Updates to Spark Streaming Programming Guide + Tathagata Das + 2015-03-11 18:48:21 -0700 + Commit: bdc4682, github.com/apache/spark/pull/4956 + + [SPARK-6274][Streaming][Examples] Added examples streaming + sql examples. + Tathagata Das + 2015-03-11 11:19:51 -0700 + Commit: ac61466, github.com/apache/spark/pull/4975 + + [SPARK-5183][SQL] Update SQL Docs with JDBC and Migration Guide + Michael Armbrust + 2015-03-10 18:13:09 -0700 + Commit: edbcb6f, github.com/apache/spark/pull/4958 + + Minor doc: Remove the extra blank line in data types javadoc. + Reynold Xin + 2015-03-10 17:25:04 -0700 + Commit: 7295192, github.com/apache/spark/pull/4955 + + [SPARK-5310][Doc] Update SQL Programming Guide to include DataFrames. + Reynold Xin + 2015-03-09 16:16:16 -0700 + Commit: bc53d3d, github.com/apache/spark/pull/4954 + + [Docs] Replace references to SchemaRDD with DataFrame + Reynold Xin + 2015-03-09 13:29:19 -0700 + Commit: 5e58f76, github.com/apache/spark/pull/4952 + + Preparing development version 1.3.1-SNAPSHOT + Patrick Wendell + 2015-03-05 23:02:08 +0000 + Commit: c152f9a + + +Release 1.3.0 + + [SQL] Make Strategies a public developer API + Michael Armbrust + 2015-03-05 14:50:25 -0800 + Commit: 556e0de, github.com/apache/spark/pull/4920 + + [SPARK-6163][SQL] jsonFile should be backed by the data source API + Yin Huai + 2015-03-05 14:49:44 -0800 + Commit: 083fed5, github.com/apache/spark/pull/4896 + + [SPARK-6145][SQL] fix ORDER BY on nested fields + Wenchen Fan , Michael Armbrust + 2015-03-05 14:49:01 -0800 + Commit: e358f55, github.com/apache/spark/pull/4918 + + [SPARK-6175] Fix standalone executor log links when ephemeral ports or SPARK_PUBLIC_DNS are used + Josh Rosen + 2015-03-05 12:04:00 -0800 + Commit: 988b498, github.com/apache/spark/pull/4903 + + SPARK-6182 [BUILD] spark-parent pom needs to be published for both 2.10 and 2.11 + Sean Owen + 2015-03-05 11:31:48 -0800 + Commit: ae315d2, github.com/apache/spark/pull/4912 + + Revert "[SPARK-6153] [SQL] promote guava dep for hive-thriftserver" + Cheng Lian + 2015-03-05 17:58:18 +0800 + Commit: f8205d3 + + [SPARK-6153] [SQL] promote guava dep for hive-thriftserver + Daoyuan Wang + 2015-03-05 16:35:17 +0800 + Commit: b92d925, github.com/apache/spark/pull/4884 + + Updating CHANGES file + Patrick Wendell + 2015-03-04 21:19:49 -0800 + Commit: 87eac3c + + SPARK-5143 [BUILD] [WIP] spark-network-yarn 2.11 depends on spark-network-shuffle 2.10 + Sean Owen + 2015-03-04 21:00:51 -0800 + Commit: f509159, github.com/apache/spark/pull/4876 + + [SPARK-6149] [SQL] [Build] Excludes Guava 15 referenced by jackson-module-scala_2.10 + Cheng Lian + 2015-03-04 20:52:58 -0800 + Commit: a0aa24a, github.com/apache/spark/pull/4890 + + [SPARK-6144] [core] Fix addFile when source files are on "hdfs:" + Marcelo Vanzin , trystanleftwich + 2015-03-04 12:58:39 -0800 + Commit: 3fc74f4, github.com/apache/spark/pull/4894 + + [SPARK-6134][SQL] Fix wrong datatype for casting FloatType and default LongType value in defaultPrimitive + Liang-Chi Hsieh + 2015-03-04 20:23:43 +0800 + Commit: bfa4e31, github.com/apache/spark/pull/4870 + + [SPARK-6136] [SQL] Removed JDBC integration tests which depends on docker-client + Cheng Lian + 2015-03-04 19:39:02 +0800 + Commit: 035243d, github.com/apache/spark/pull/4872 + + [SPARK-6141][MLlib] Upgrade Breeze from 0.10 to 0.11 to fix convergence bug + Xiangrui Meng , DB Tsai , DB Tsai + 2015-03-03 23:52:02 -0800 + Commit: 9f24977, github.com/apache/spark/pull/4879 + + [SPARK-5949] HighlyCompressedMapStatus needs more classes registered w/ kryo + Imran Rashid + 2015-03-03 15:33:19 -0800 + Commit: 9a0b75c, github.com/apache/spark/pull/4877 + + SPARK-1911 [DOCS] Warn users if their assembly jars are not built with Java 6 + Sean Owen + 2015-03-03 13:40:11 -0800 + Commit: 8446ad0, github.com/apache/spark/pull/4874 + + Revert "[SPARK-5423][Core] Cleanup resources in DiskMapIterator.finalize to ensure deleting the temp file" + Andrew Or + 2015-03-03 13:04:15 -0800 + Commit: ee4929d + + Adding CHANGES.txt for Spark 1.3 + Patrick Wendell + 2015-03-03 02:19:19 -0800 + Commit: ce7158c + + BUILD: Minor tweaks to internal build scripts + Patrick Wendell + 2015-03-03 00:38:12 -0800 + Commit: ae60eb9 + + HOTFIX: Bump HBase version in MapR profiles. + Patrick Wendell + 2015-03-03 01:38:07 -0800 + Commit: 1aa8461 + + [SPARK-5537][MLlib][Docs] Add user guide for multinomial logistic regression + DB Tsai + 2015-03-02 22:37:12 -0800 + Commit: 841d2a2, github.com/apache/spark/pull/4866 + + [SPARK-6120] [mllib] Warnings about memory in tree, ensemble model save + Joseph K. Bradley + 2015-03-02 22:33:51 -0800 + Commit: 81648a7, github.com/apache/spark/pull/4864 + + [SPARK-6097][MLLIB] Support tree model save/load in PySpark/MLlib + Xiangrui Meng + 2015-03-02 22:27:01 -0800 + Commit: 62c53be, github.com/apache/spark/pull/4854 + + [SPARK-5310][SQL] Fixes to Docs and Datasources API + Reynold Xin , Michael Armbrust + 2015-03-02 22:14:08 -0800 + Commit: 4e6e008, github.com/apache/spark/pull/4868 + + [SPARK-5950][SQL]Insert array into a metastore table saved as parquet should work when using datasource api + Yin Huai + 2015-03-02 19:31:55 -0800 + Commit: 1b490e9, github.com/apache/spark/pull/4826 + + [SPARK-6127][Streaming][Docs] Add Kafka to Python api docs + Tathagata Das + 2015-03-02 18:40:46 -0800 + Commit: ffd0591, github.com/apache/spark/pull/4860 + + [SPARK-5537] Add user guide for multinomial logistic regression + Xiangrui Meng , DB Tsai + 2015-03-02 18:10:50 -0800 + Commit: 11389f0, github.com/apache/spark/pull/4801 + + [SPARK-6121][SQL][MLLIB] simpleString for UDT + Xiangrui Meng + 2015-03-02 17:14:34 -0800 + Commit: 1b8ab57, github.com/apache/spark/pull/4858 + + [SPARK-6048] SparkConf should not translate deprecated configs on set + Andrew Or + 2015-03-02 16:36:42 -0800 + Commit: ea69cf2, github.com/apache/spark/pull/4799 + + [SPARK-6066] Make event log format easier to parse + Andrew Or + 2015-03-02 16:34:32 -0800 + Commit: 8100b79, github.com/apache/spark/pull/4821 + + [SPARK-6082] [SQL] Provides better error message for malformed rows when caching tables + Cheng Lian + 2015-03-02 16:18:00 -0800 + Commit: 866f281, github.com/apache/spark/pull/4842 + + [SPARK-6114][SQL] Avoid metastore conversions before plan is resolved + Michael Armbrust + 2015-03-02 16:10:54 -0800 + Commit: 3899c7c, github.com/apache/spark/pull/4855 + + [SPARK-6050] [yarn] Relax matching of vcore count in received containers. + Marcelo Vanzin + 2015-03-02 16:41:43 -0600 + Commit: 650d1e7, github.com/apache/spark/pull/4818 + + [SPARK-6040][SQL] Fix the percent bug in tablesample + q00251598 + 2015-03-02 13:16:29 -0800 + Commit: a83b9bb, github.com/apache/spark/pull/4789 + + [Minor] Fix doc typo for describing primitiveTerm effectiveness condition + Liang-Chi Hsieh + 2015-03-02 13:11:17 -0800 + Commit: f92876a, github.com/apache/spark/pull/4762 + + SPARK-5390 [DOCS] Encourage users to post on Stack Overflow in Community Docs + Sean Owen + 2015-03-02 21:10:08 +0000 + Commit: 58e7198, github.com/apache/spark/pull/4843 + + [DOCS] Refactored Dataframe join comment to use correct parameter ordering + Paul Power + 2015-03-02 13:08:47 -0800 + Commit: 54ac243, github.com/apache/spark/pull/4847 + + [SPARK-6080] [PySpark] correct LogisticRegressionWithLBFGS regType parameter for pyspark + Yanbo Liang + 2015-03-02 10:17:24 -0800 + Commit: 4ffaf85, github.com/apache/spark/pull/4831 + + [SPARK-5741][SQL] Support the path contains comma in HiveContext + q00251598 + 2015-03-02 10:13:11 -0800 + Commit: f476108, github.com/apache/spark/pull/4532 + + [SPARK-6111] Fixed usage string in documentation. + Kenneth Myers + 2015-03-02 17:25:24 +0000 + Commit: b2b7f01, github.com/apache/spark/pull/4852 + + [SPARK-6052][SQL]In JSON schema inference, we should always set containsNull of an ArrayType to true + Yin Huai + 2015-03-02 23:18:07 +0800 + Commit: a3fef2c, github.com/apache/spark/pull/4806 + + [SPARK-6073][SQL] Need to refresh metastore cache after append data in CreateMetastoreDataSourceAsSelect + Yin Huai + 2015-03-02 22:42:18 +0800 + Commit: c59871c, github.com/apache/spark/pull/4824 + + [Streaming][Minor]Fix some error docs in streaming examples + Saisai Shao + 2015-03-02 08:49:19 +0000 + Commit: 1fe677a, github.com/apache/spark/pull/4837 + + [SPARK-6083] [MLLib] [DOC] Make Python API example consistent in NaiveBayes + MechCoder + 2015-03-01 16:28:15 -0800 + Commit: 6a2fc85, github.com/apache/spark/pull/4834 + + [SPARK-6053][MLLIB] support save/load in PySpark's ALS + Xiangrui Meng + 2015-03-01 16:26:57 -0800 + Commit: b570d98, github.com/apache/spark/pull/4811 + + [SPARK-6074] [sql] Package pyspark sql bindings. + Marcelo Vanzin + 2015-03-01 11:05:10 +0000 + Commit: bb16618, github.com/apache/spark/pull/4822 + + SPARK-5984: Fix TimSort bug causes ArrayOutOfBoundsException + Evan Yu + 2015-02-28 18:55:34 -0800 + Commit: 317694c, github.com/apache/spark/pull/4804 + + [SPARK-5775] [SQL] BugFix: GenericRow cannot be cast to SpecificMutableRow when nested data and partitioned table + Cheng Lian , Cheng Lian , Yin Huai + 2015-02-28 21:15:43 +0800 + Commit: aa39460, github.com/apache/spark/pull/4792 + + [SPARK-5979][SPARK-6032] Smaller safer --packages fix + Burak Yavuz + 2015-02-27 22:59:35 -0800 + Commit: 5a55c96, github.com/apache/spark/pull/4802 + + [SPARK-6070] [yarn] Remove unneeded classes from shuffle service jar. + Marcelo Vanzin + 2015-02-27 22:44:11 -0800 + Commit: 1747e0a, github.com/apache/spark/pull/4820 + + [SPARK-6055] [PySpark] fix incorrect __eq__ of DataType + Davies Liu + 2015-02-27 20:07:17 -0800 + Commit: 49f2187, github.com/apache/spark/pull/4808 + + [SPARK-5751] [SQL] Sets SPARK_HOME as SPARK_PID_DIR when running Thrift server test suites + Cheng Lian + 2015-02-28 08:41:49 +0800 + Commit: 5d19cf0, github.com/apache/spark/pull/4758 + + [Streaming][Minor] Remove useless type signature of Java Kafka direct stream API + Saisai Shao + 2015-02-27 13:01:42 -0800 + Commit: ceebe3c, github.com/apache/spark/pull/4817 + + [SPARK-4587] [mllib] [docs] Fixed save,load calls in ML guide examples + Joseph K. Bradley + 2015-02-27 13:00:36 -0800 + Commit: 117e10c, github.com/apache/spark/pull/4816 + + [SPARK-6058][Yarn] Log the user class exception in ApplicationMaster + zsxwing + 2015-02-27 13:31:46 +0000 + Commit: bff8088, github.com/apache/spark/pull/4813 + + fix spark-6033, clarify the spark.worker.cleanup behavior in standalone mode + 许鹏 + 2015-02-26 23:05:56 -0800 + Commit: b8db84c, github.com/apache/spark/pull/4803 + + SPARK-2168 [Spark core] Use relative URIs for the app links in the History Server. + Lukasz Jastrzebski + 2015-02-26 22:38:06 -0800 + Commit: 485b919, github.com/apache/spark/pull/4778 + + [SPARK-6024][SQL] When a data source table has too many columns, it's schema cannot be stored in metastore. + Yin Huai + 2015-02-26 20:46:05 -0800 + Commit: 6200f07, github.com/apache/spark/pull/4795 + + [SPARK-6037][SQL] Avoiding duplicate Parquet schema merging + Liang-Chi Hsieh + 2015-02-27 11:06:47 +0800 + Commit: 25a109e, github.com/apache/spark/pull/4786 + + SPARK-4579 [WEBUI] Scheduling Delay appears negative + Sean Owen + 2015-02-26 17:35:09 -0800 + Commit: b83a93e, github.com/apache/spark/pull/4796 + + [SPARK-5951][YARN] Remove unreachable driver memory properties in yarn client mode + mohit.goyal + 2015-02-26 14:27:47 -0800 + Commit: 5b426cb, github.com/apache/spark/pull/4730 + + Add a note for context termination for History server on Yarn + moussa taifi + 2015-02-26 14:19:43 -0800 + Commit: 297c3ef, github.com/apache/spark/pull/4721 + + [SPARK-6018] [YARN] NoSuchMethodError in Spark app is swallowed by YARN AM + Cheolsoo Park + 2015-02-26 13:53:49 -0800 + Commit: fe79674, github.com/apache/spark/pull/4773 + + [SPARK-6027][SPARK-5546] Fixed --jar and --packages not working for KafkaUtils and improved error message + Tathagata Das + 2015-02-26 13:46:07 -0800 + Commit: 731a997, github.com/apache/spark/pull/4779 + + Modify default value description for spark.scheduler.minRegisteredResourcesRatio on docs. + Li Zhihui + 2015-02-26 13:07:07 -0800 + Commit: 62652dc, github.com/apache/spark/pull/4781 + + [SPARK-5363] Fix bug in PythonRDD: remove() inside iterator is not safe + Davies Liu + 2015-02-26 11:54:17 -0800 + Commit: 5d309ad, github.com/apache/spark/pull/4776 + + [SPARK-6015] fix links to source code in Python API docs + Davies Liu + 2015-02-26 10:45:29 -0800 + Commit: dafb3d2, github.com/apache/spark/pull/4772 + + [SPARK-6007][SQL] Add numRows param in DataFrame.show() + Jacky Li + 2015-02-26 10:40:58 -0800 + Commit: 7c779d8, github.com/apache/spark/pull/4767 + + [SPARK-6016][SQL] Cannot read the parquet table after overwriting the existing table when spark.sql.parquet.cacheMetadata=true + Yin Huai + 2015-02-27 01:01:32 +0800 + Commit: b5c5e93, github.com/apache/spark/pull/4775 + + [SPARK-6023][SQL] ParquetConversions fails to replace the destination MetastoreRelation of an InsertIntoTable node to ParquetRelation2 + Yin Huai + 2015-02-26 22:39:49 +0800 + Commit: e0f5fb0, github.com/apache/spark/pull/4782 + + [SPARK-5976][MLLIB] Add partitioner to factors returned by ALS + Xiangrui Meng + 2015-02-25 23:43:29 -0800 + Commit: a51d9db, github.com/apache/spark/pull/4748 + + [SPARK-1182][Docs] Sort the configuration parameters in configuration.md + Brennon York + 2015-02-25 16:12:56 -0800 + Commit: 56fa38a, github.com/apache/spark/pull/3863 + + [SPARK-5724] fix the misconfiguration in AkkaUtils + CodingCat + 2015-02-23 11:29:25 +0000 + Commit: b32a653, github.com/apache/spark/pull/4512 + + [SPARK-5974] [SPARK-5980] [mllib] [python] [docs] Update ML guide with save/load, Python GBT + Joseph K. Bradley + 2015-02-25 16:13:17 -0800 + Commit: a1b4856, github.com/apache/spark/pull/4750 + + [SPARK-5926] [SQL] make DataFrame.explain leverage queryExecution.logical + Yanbo Liang + 2015-02-25 15:37:13 -0800 + Commit: 5bd4b49, github.com/apache/spark/pull/4707 + + [SPARK-5999][SQL] Remove duplicate Literal matching block + Liang-Chi Hsieh + 2015-02-25 15:22:33 -0800 + Commit: 6fff9b8, github.com/apache/spark/pull/4760 + + [SPARK-6010] [SQL] Merging compatible Parquet schemas before computing splits + Cheng Lian + 2015-02-25 15:15:22 -0800 + Commit: 016f1f8, github.com/apache/spark/pull/4768 + + [SPARK-5944] [PySpark] fix version in Python API docs + Davies Liu + 2015-02-25 15:13:34 -0800 + Commit: 9aca3c6, github.com/apache/spark/pull/4731 + + [SPARK-5982] Remove incorrect Local Read Time Metric + Kay Ousterhout + 2015-02-25 14:55:24 -0800 + Commit: 791df93, github.com/apache/spark/pull/4749 + + [SPARK-1955][GraphX]: VertexRDD can incorrectly assume index sharing + Brennon York + 2015-02-25 14:11:12 -0800 + Commit: 8073767, github.com/apache/spark/pull/4705 + + SPARK-5930 [DOCS] Documented default of spark.shuffle.io.retryWait is confusing + Sean Owen + 2015-02-25 12:20:44 -0800 + Commit: eaffc6e, github.com/apache/spark/pull/4769 + + [SPARK-5996][SQL] Fix specialized outbound conversions + Michael Armbrust + 2015-02-25 10:13:40 -0800 + Commit: fada683, github.com/apache/spark/pull/4757 + + [SPARK-5994] [SQL] Python DataFrame documentation fixes + Davies Liu + 2015-02-24 20:51:55 -0800 + Commit: 5c421e0, github.com/apache/spark/pull/4756 + + [SPARK-5286][SQL] SPARK-5286 followup + Yin Huai + 2015-02-24 19:51:36 -0800 + Commit: e7a748e, github.com/apache/spark/pull/4755 + + [SPARK-5993][Streaming][Build] Fix assembly jar location of kafka-assembly + Tathagata Das + 2015-02-24 19:10:37 -0800 + Commit: 1e94894, github.com/apache/spark/pull/4753 + + [SPARK-5985][SQL] DataFrame sortBy -> orderBy in Python. + Reynold Xin + 2015-02-24 18:59:23 -0800 + Commit: 5e233b2, github.com/apache/spark/pull/4752 + + [SPARK-5904][SQL] DataFrame Java API test suites. + Reynold Xin + 2015-02-24 18:51:41 -0800 + Commit: 78a1781, github.com/apache/spark/pull/4751 + + [SPARK-5751] [SQL] [WIP] Revamped HiveThriftServer2Suite for robustness + Cheng Lian + 2015-02-25 08:34:55 +0800 + Commit: 17ee246, github.com/apache/spark/pull/4720 + + [SPARK-5973] [PySpark] fix zip with two RDDs with AutoBatchedSerializer + Davies Liu + 2015-02-24 14:50:00 -0800 + Commit: 91bf0f8, github.com/apache/spark/pull/4745 + + [SPARK-5952][SQL] Lock when using hive metastore client + Michael Armbrust + 2015-02-24 13:39:29 -0800 + Commit: 641423d, github.com/apache/spark/pull/4746 + + [MLLIB] Change x_i to y_i in Variance's user guide + Xiangrui Meng + 2015-02-24 11:38:59 -0800 + Commit: a4ff445, github.com/apache/spark/pull/4740 + + [SPARK-5965] Standalone Worker UI displays {{USER_JAR}} + Andrew Or + 2015-02-24 11:08:07 -0800 + Commit: eaf7bf9, github.com/apache/spark/pull/4739 + + [Spark-5967] [UI] Correctly clean JobProgressListener.stageIdToActiveJobIds + Tathagata Das + 2015-02-24 11:02:47 -0800 + Commit: 28dd53b, github.com/apache/spark/pull/4741 + + [SPARK-5532][SQL] Repartition should not use external rdd representation + Michael Armbrust + 2015-02-24 10:52:18 -0800 + Commit: e46096b, github.com/apache/spark/pull/4738 + + [SPARK-5910][SQL] Support for as in selectExpr + Michael Armbrust + 2015-02-24 10:49:51 -0800 + Commit: ba5d60d, github.com/apache/spark/pull/4736 + + [SPARK-5968] [SQL] Suppresses ParquetOutputCommitter WARN logs + Cheng Lian + 2015-02-24 10:45:38 -0800 + Commit: 2b562b0, github.com/apache/spark/pull/4744 + + [SPARK-5958][MLLIB][DOC] update block matrix user guide + Xiangrui Meng + 2015-02-23 22:08:44 -0800 + Commit: dd42558, github.com/apache/spark/pull/4737 + + [SPARK-5873][SQL] Allow viewing of partially analyzed plans in queryExecution + Michael Armbrust + 2015-02-23 17:34:54 -0800 + Commit: 2d7786e, github.com/apache/spark/pull/4684 + + [SPARK-5935][SQL] Accept MapType in the schema provided to a JSON dataset. + Yin Huai , Yin Huai + 2015-02-23 17:16:34 -0800 + Commit: 33ccad2, github.com/apache/spark/pull/4710 + + [SPARK-5912] [docs] [mllib] Small fixes to ChiSqSelector docs + Joseph K. Bradley + 2015-02-23 16:15:57 -0800 + Commit: ae97040, github.com/apache/spark/pull/4732 + + [MLLIB] SPARK-5912 Programming guide for feature selection + Alexander Ulanov + 2015-02-23 12:09:40 -0800 + Commit: 8355773, github.com/apache/spark/pull/4709 + + [SPARK-5939][MLLib] make FPGrowth example app take parameters + Jacky Li + 2015-02-23 08:47:28 -0800 + Commit: 33b9084, github.com/apache/spark/pull/4714 + + [SPARK-5943][Streaming] Update the test to use new API to reduce the warning + Saisai Shao + 2015-02-23 11:27:27 +0000 + Commit: 67b7f79, github.com/apache/spark/pull/4722 + + [EXAMPLES] fix typo. + Makoto Fukuhara + 2015-02-23 09:24:33 +0000 + Commit: f172387, github.com/apache/spark/pull/4724 + + Revert "[SPARK-4808] Removing minimum number of elements read before spill check" + Andrew Or + 2015-02-22 09:44:52 -0800 + Commit: 4186dd3 + + SPARK-5669 [BUILD] Reverse exclusion of JBLAS libs for 1.3 + Sean Owen + 2015-02-22 09:09:06 +0000 + Commit: eed7389, github.com/apache/spark/pull/4715 + + [DataFrame] [Typo] Fix the typo + Cheng Hao + 2015-02-22 08:56:30 +0000 + Commit: 04d3b32, github.com/apache/spark/pull/4717 + + [DOCS] Fix typo in API for custom InputFormats based on the “new” MapReduce API + Alexander + 2015-02-22 08:53:05 +0000 + Commit: c5a5c6f, github.com/apache/spark/pull/4718 + + [SPARK-5937][YARN] Fix ClientSuite to set YARN mode, so that the correct class is used in t... + Hari Shreedharan + 2015-02-21 10:01:01 -0800 + Commit: 76e3e65, github.com/apache/spark/pull/4711 + + SPARK-5841 [CORE] [HOTFIX 2] Memory leak in DiskBlockManager + Nishkam Ravi , nishkamravi2 , nravi + 2015-02-21 09:59:28 -0800 + Commit: 932338e, github.com/apache/spark/pull/4690 + + [SPARK-5909][SQL] Add a clearCache command to Spark SQL's cache manager + Yin Huai + 2015-02-20 16:20:02 +0800 + Commit: b9a6c5c, github.com/apache/spark/pull/4694 + + [SPARK-5898] [SPARK-5896] [SQL] [PySpark] create DataFrame from pandas and tuple/list + Davies Liu + 2015-02-20 15:35:05 -0800 + Commit: 913562a, github.com/apache/spark/pull/4679 + + [SPARK-5867] [SPARK-5892] [doc] [ml] [mllib] Doc cleanups for 1.3 release + Joseph K. Bradley + 2015-02-20 02:31:32 -0800 + Commit: 8c12f31, github.com/apache/spark/pull/4675 + + [SPARK-4808] Removing minimum number of elements read before spill check + mcheah + 2015-02-19 18:09:22 -0800 + Commit: 0382dcc, github.com/apache/spark/pull/4420 + + [SPARK-5900][MLLIB] make PIC and FPGrowth Java-friendly + Xiangrui Meng + 2015-02-19 18:06:16 -0800 + Commit: ba941ce, github.com/apache/spark/pull/4695 + + SPARK-5570: No docs stating that `new SparkConf().set("spark.driver.memory", ...) will not work + Ilya Ganelin + 2015-02-19 15:50:58 -0800 + Commit: c5f3b9e, github.com/apache/spark/pull/4665 + + SPARK-4682 [CORE] Consolidate various 'Clock' classes + Sean Owen + 2015-02-19 15:35:23 -0800 + Commit: bd49e8b, github.com/apache/spark/pull/4514 + + [Spark-5889] Remove pid file after stopping service. + Zhan Zhang + 2015-02-19 23:13:02 +0000 + Commit: ff8976e, github.com/apache/spark/pull/4676 + + [SPARK-5902] [ml] Made PipelineStage.transformSchema public instead of private to ml + Joseph K. Bradley + 2015-02-19 12:46:27 -0800 + Commit: 0c494cf, github.com/apache/spark/pull/4682 + + [SPARK-5904][SQL] DataFrame API fixes. + Reynold Xin + 2015-02-19 12:09:44 -0800 + Commit: 55d91d9, github.com/apache/spark/pull/4686 + + [SPARK-5825] [Spark Submit] Remove the double checking instance name when stopping the service + Cheng Hao + 2015-02-19 12:07:51 -0800 + Commit: fe00eb6, github.com/apache/spark/pull/4611 + + [SPARK-5423][Core] Cleanup resources in DiskMapIterator.finalize to ensure deleting the temp file + zsxwing + 2015-02-19 18:37:31 +0000 + Commit: 25fae8e, github.com/apache/spark/pull/4219 + + [SPARK-5816] Add huge compatibility warning in DriverWrapper + Andrew Or + 2015-02-19 09:56:25 -0800 + Commit: f93d4d9, github.com/apache/spark/pull/4687 + + SPARK-5548: Fix for AkkaUtilsSuite failure - attempt 2 + Jacek Lewandowski + 2015-02-19 09:53:36 -0800 + Commit: fbcb949, github.com/apache/spark/pull/4653 + + [SPARK-5846] Correctly set job description and pool for SQL jobs + Kay Ousterhout + 2015-02-19 09:49:34 +0800 + Commit: 092b45f, github.com/apache/spark/pull/4630 + + [SPARK-5879][MLLIB] update PIC user guide and add a Java example + Xiangrui Meng + 2015-02-18 16:29:32 -0800 + Commit: a64f374, github.com/apache/spark/pull/4680 + + [SPARK-5722] [SQL] [PySpark] infer int as LongType + Davies Liu + 2015-02-18 14:17:04 -0800 + Commit: 470cba8, github.com/apache/spark/pull/4666 + + [SPARK-5840][SQL] HiveContext cannot be serialized due to tuple extraction + Reynold Xin + 2015-02-18 14:02:32 -0800 + Commit: b86e44c, github.com/apache/spark/pull/4628 + + [SPARK-5507] Added documentation for BlockMatrix + Burak Yavuz + 2015-02-18 10:11:08 -0800 + Commit: 56f8f29, github.com/apache/spark/pull/4664 + + [SPARK-5519][MLLIB] add user guide with example code for fp-growth + Xiangrui Meng + 2015-02-18 10:09:56 -0800 + Commit: 661fbd3, github.com/apache/spark/pull/4661 + + SPARK-5669 [BUILD] [HOTFIX] Spark assembly includes incompatibly licensed libgfortran, libgcc code via JBLAS + Sean Owen + 2015-02-18 14:41:44 +0000 + Commit: 9f256ce, github.com/apache/spark/pull/4673 + + SPARK-4610 addendum: [Minor] [MLlib] Minor doc fix in GBT classification example + MechCoder + 2015-02-18 10:13:28 +0000 + Commit: 3997e74, github.com/apache/spark/pull/4672 + + [SPARK-5878] fix DataFrame.repartition() in Python + Davies Liu + 2015-02-18 01:00:54 -0800 + Commit: aca7991, github.com/apache/spark/pull/4667 + + Avoid deprecation warnings in JDBCSuite. + Tor Myklebust + 2015-02-18 01:00:13 -0800 + Commit: 9a565b8, github.com/apache/spark/pull/4668 + + [Minor] [SQL] Cleans up DataFrame variable names and toDF() calls + Cheng Lian + 2015-02-17 23:36:20 -0800 + Commit: 2bd33ce, github.com/apache/spark/pull/4670 + + [SPARK-5731][Streaming][Test] Fix incorrect test in DirectKafkaStreamSuite + Tathagata Das + 2015-02-17 22:44:16 -0800 + Commit: f8f9a64, github.com/apache/spark/pull/4597 + + [SPARK-5723][SQL]Change the default file format to Parquet for CTAS statements. + Yin Huai + 2015-02-17 18:14:33 -0800 + Commit: 6e82c46, github.com/apache/spark/pull/4639 + + Preparing development version 1.3.1-SNAPSHOT + Patrick Wendell + 2015-02-18 01:52:06 +0000 + Commit: 2ab0ba0 + + Preparing Spark release v1.3.0-rc1 + Patrick Wendell + 2015-02-18 01:52:06 +0000 + Commit: f97b0d4 + + [SPARK-5875][SQL]logical.Project should not be resolved if it contains aggregates or generators + Yin Huai + 2015-02-17 17:50:39 -0800 + Commit: e8284b2, github.com/apache/spark/pull/4663 + + Revert "Preparing Spark release v1.3.0-snapshot1" + Patrick Wendell + 2015-02-17 17:48:47 -0800 + Commit: 7320605 + + Revert "Preparing development version 1.3.1-SNAPSHOT" + Patrick Wendell + 2015-02-17 17:48:43 -0800 + Commit: 932ae4d + + [SPARK-4454] Revert getOrElse() cleanup in DAGScheduler.getCacheLocs() + Josh Rosen + 2015-02-17 17:45:16 -0800 + Commit: 7e5e4d8 + + [SPARK-4454] Properly synchronize accesses to DAGScheduler cacheLocs map + Josh Rosen + 2015-02-17 17:39:58 -0800 + Commit: 07a401a, github.com/apache/spark/pull/4660 + + [SPARK-5811] Added documentation for maven coordinates and added Spark Packages support + Burak Yavuz , Davies Liu + 2015-02-17 17:15:43 -0800 + Commit: cb90584, github.com/apache/spark/pull/4662 + + [SPARK-5785] [PySpark] narrow dependency for cogroup/join in PySpark + Davies Liu + 2015-02-17 16:54:57 -0800 + Commit: 8120235, github.com/apache/spark/pull/4629 + + [SPARK-5852][SQL]Fail to convert a newly created empty metastore parquet table to a data source parquet table. + Yin Huai , Cheng Hao + 2015-02-17 15:47:59 -0800 + Commit: 07d8ef9, github.com/apache/spark/pull/4655 + + [SPARK-5872] [SQL] create a sqlCtx in pyspark shell + Davies Liu + 2015-02-17 15:44:37 -0800 + Commit: 0dba382, github.com/apache/spark/pull/4659 + + [SPARK-5871] output explain in Python + Davies Liu + 2015-02-17 13:48:38 -0800 + Commit: cb06160, github.com/apache/spark/pull/4658 + + [SPARK-4172] [PySpark] Progress API in Python + Davies Liu + 2015-02-17 13:36:43 -0800 + Commit: 35e23ff, github.com/apache/spark/pull/3027 + + [SPARK-5868][SQL] Fix python UDFs in HiveContext and checks in SQLContext + Michael Armbrust + 2015-02-17 13:23:45 -0800 + Commit: e65dc1f, github.com/apache/spark/pull/4657 + + [SQL] [Minor] Update the HiveContext Unittest + Cheng Hao + 2015-02-17 12:25:35 -0800 + Commit: 0135651, github.com/apache/spark/pull/4584 + + [Minor][SQL] Use same function to check path parameter in JSONRelation + Liang-Chi Hsieh + 2015-02-17 12:24:13 -0800 + Commit: d74d5e8, github.com/apache/spark/pull/4649 + + [SPARK-5862][SQL] Only transformUp the given plan once in HiveMetastoreCatalog + Liang-Chi Hsieh + 2015-02-17 12:23:18 -0800 + Commit: 62063b7, github.com/apache/spark/pull/4651 + + [Minor] fix typo in SQL document + CodingCat + 2015-02-17 12:16:52 -0800 + Commit: 5636c4a, github.com/apache/spark/pull/4656 + + [SPARK-5864] [PySpark] support .jar as python package + Davies Liu + 2015-02-17 12:05:06 -0800 + Commit: 71cf6e2, github.com/apache/spark/pull/4652 + + SPARK-5841 [CORE] [HOTFIX] Memory leak in DiskBlockManager + Sean Owen + 2015-02-17 19:40:06 +0000 + Commit: e64afcd, github.com/apache/spark/pull/4648 + + [SPARK-5661]function hasShutdownDeleteTachyonDir should use shutdownDeleteTachyonPaths to determine whether contains file + xukun 00228947 , viper-kun + 2015-02-17 18:59:41 +0000 + Commit: 420bc9b, github.com/apache/spark/pull/4418 + + [SPARK-5778] throw if nonexistent metrics config file provided + Ryan Williams + 2015-02-17 10:57:16 -0800 + Commit: 2bf2b56, github.com/apache/spark/pull/4571 + + [SPARK-5859] [PySpark] [SQL] fix DataFrame Python API + Davies Liu + 2015-02-17 10:22:48 -0800 + Commit: 4a581aa, github.com/apache/spark/pull/4645 + + [SPARK-5166][SPARK-5247][SPARK-5258][SQL] API Cleanup / Documentation + Michael Armbrust + 2015-02-17 10:21:17 -0800 + Commit: cd3d415, github.com/apache/spark/pull/4642 + + [SPARK-5858][MLLIB] Remove unnecessary first() call in GLM + Xiangrui Meng + 2015-02-17 10:17:45 -0800 + Commit: 97cb568, github.com/apache/spark/pull/4647 + + SPARK-5856: In Maven build script, launch Zinc with more memory + Patrick Wendell + 2015-02-17 10:10:01 -0800 + Commit: 8240629, github.com/apache/spark/pull/4643 + + Revert "[SPARK-5363] [PySpark] check ending mark in non-block way" + Josh Rosen + 2015-02-17 07:48:27 -0800 + Commit: aeb85cd + + [SPARK-5826][Streaming] Fix Configuration not serializable problem + jerryshao + 2015-02-17 10:45:18 +0000 + Commit: b8da5c3, github.com/apache/spark/pull/4612 + + HOTFIX: Style issue causing build break + Patrick Wendell + 2015-02-16 22:10:39 -0800 + Commit: e9241fa + + [SPARK-5802][MLLIB] cache transformed data in glm + Xiangrui Meng + 2015-02-16 22:09:04 -0800 + Commit: dfe0fa0, github.com/apache/spark/pull/4593 + + [SPARK-5853][SQL] Schema support in Row. + Reynold Xin + 2015-02-16 20:42:57 -0800 + Commit: d0701d9, github.com/apache/spark/pull/4640 + + SPARK-5850: Remove experimental label for Scala 2.11 and FlumePollingStream + Patrick Wendell + 2015-02-16 20:33:33 -0800 + Commit: c6a7069, github.com/apache/spark/pull/4638 + + [SPARK-5363] [PySpark] check ending mark in non-block way + Davies Liu + 2015-02-16 20:32:03 -0800 + Commit: baad6b3, github.com/apache/spark/pull/4601 + + [SQL] Various DataFrame doc changes. + Reynold Xin + 2015-02-16 19:00:30 -0800 + Commit: e355b54, github.com/apache/spark/pull/4636 + + [SPARK-5849] Handle more types of invalid JSON requests in SubmitRestProtocolMessage.parseAction + Josh Rosen + 2015-02-16 18:08:02 -0800 + Commit: 385a339, github.com/apache/spark/pull/4637 + + [SPARK-3340] Deprecate ADD_JARS and ADD_FILES + azagrebin + 2015-02-16 18:06:19 -0800 + Commit: d8c70fb, github.com/apache/spark/pull/4616 + + [SPARK-5788] [PySpark] capture the exception in python write thread + Davies Liu + 2015-02-16 17:57:14 -0800 + Commit: c2a9a61, github.com/apache/spark/pull/4577 + + SPARK-5848: tear down the ConsoleProgressBar timer + Matt Whelan + 2015-02-17 00:59:49 +0000 + Commit: 52994d8, github.com/apache/spark/pull/4635 + + [SPARK-4865][SQL]Include temporary tables in SHOW TABLES + Yin Huai + 2015-02-16 15:59:23 -0800 + Commit: 8a94bf7, github.com/apache/spark/pull/4618 + + [SQL] Optimize arithmetic and predicate operators + kai + 2015-02-16 15:58:05 -0800 + Commit: 639a3c2, github.com/apache/spark/pull/4472 + + [SPARK-5839][SQL]HiveMetastoreCatalog does not recognize table names and aliases of data source tables. + Yin Huai + 2015-02-16 15:54:01 -0800 + Commit: a15a0a0, github.com/apache/spark/pull/4626 + + [SPARK-5746][SQL] Check invalid cases for the write path of data source API + Yin Huai + 2015-02-16 15:51:59 -0800 + Commit: 4198654, github.com/apache/spark/pull/4617 + + HOTFIX: Break in Jekyll build from #4589 + Patrick Wendell + 2015-02-16 15:43:56 -0800 + Commit: ad8fd4f + + [SPARK-2313] Use socket to communicate GatewayServer port back to Python driver + Josh Rosen + 2015-02-16 15:25:11 -0800 + Commit: b70b8ba, github.com/apache/spark/pull/3424. + + SPARK-5357: Update commons-codec version to 1.10 (current) + Matt Whelan + 2015-02-16 23:05:34 +0000 + Commit: 8c45619, github.com/apache/spark/pull/4153 + + SPARK-5841: remove DiskBlockManager shutdown hook on stop + Matt Whelan + 2015-02-16 22:54:32 +0000 + Commit: dd977df, github.com/apache/spark/pull/4627 + + [SPARK-5833] [SQL] Adds REFRESH TABLE command + Cheng Lian + 2015-02-16 12:52:05 -0800 + Commit: 864d77e, github.com/apache/spark/pull/4624 + + [SPARK-5296] [SQL] Add more filter types for data sources API + Cheng Lian + 2015-02-16 12:48:55 -0800 + Commit: 363a9a7, github.com/apache/spark/pull/4623 + + [SQL] Add fetched row count in SparkSQLCLIDriver + OopsOutOfMemory + 2015-02-16 12:34:09 -0800 + Commit: 0368494, github.com/apache/spark/pull/4604 + + [SQL] Initial support for reporting location of error in sql string + Michael Armbrust + 2015-02-16 12:32:56 -0800 + Commit: 63fa123, github.com/apache/spark/pull/4587 + + [SPARK-5824] [SQL] add null format in ctas and set default col comment to null + Daoyuan Wang + 2015-02-16 12:31:36 -0800 + Commit: c2eaaea, github.com/apache/spark/pull/4609 + + [SQL] [Minor] Update the SpecificMutableRow.copy + Cheng Hao + 2015-02-16 12:21:08 -0800 + Commit: 1a88955, github.com/apache/spark/pull/4619 + + SPARK-5795 [STREAMING] api.java.JavaPairDStream.saveAsNewAPIHadoopFiles may not friendly to java + Sean Owen + 2015-02-16 19:32:31 +0000 + Commit: fef2267, github.com/apache/spark/pull/4608 + + [SPARK-5799][SQL] Compute aggregation function on specified numeric columns + Liang-Chi Hsieh + 2015-02-16 10:06:11 -0800 + Commit: 0165e9d, github.com/apache/spark/pull/4592 + + [SPARK-4553] [SPARK-5767] [SQL] Wires Parquet data source with the newly introduced write support for data source API + Cheng Lian + 2015-02-16 01:38:31 -0800 + Commit: 78f7edb, github.com/apache/spark/pull/4563 + + [Minor] [SQL] Renames stringRddToDataFrame to stringRddToDataFrameHolder for consistency + Cheng Lian + 2015-02-16 01:33:37 -0800 + Commit: 066301c, github.com/apache/spark/pull/4613 + + [Ml] SPARK-5804 Explicitly manage cache in Crossvalidator k-fold loop + Peter Rudenko + 2015-02-16 00:07:23 -0800 + Commit: 0d93205, github.com/apache/spark/pull/4595 + + [Ml] SPARK-5796 Don't transform data on a last estimator in Pipeline + Peter Rudenko + 2015-02-15 20:51:32 -0800 + Commit: 9cf7d70, github.com/apache/spark/pull/4590 + + SPARK-5815 [MLLIB] Deprecate SVDPlusPlus APIs that expose DoubleMatrix from JBLAS + Sean Owen + 2015-02-15 20:41:27 -0800 + Commit: db3c539, github.com/apache/spark/pull/4614 + + [SPARK-5769] Set params in constructors and in setParams in Python ML pipelines + Xiangrui Meng + 2015-02-15 20:29:26 -0800 + Commit: d710991, github.com/apache/spark/pull/4564 + + SPARK-5669 [BUILD] Spark assembly includes incompatibly licensed libgfortran, libgcc code via JBLAS + Sean Owen + 2015-02-15 09:15:48 -0800 + Commit: 4e099d7, github.com/apache/spark/pull/4453 + + [MLLIB][SPARK-5502] User guide for isotonic regression + martinzapletal + 2015-02-15 09:10:03 -0800 + Commit: d96e188, github.com/apache/spark/pull/4536 + + [HOTFIX] Ignore DirectKafkaStreamSuite. + Patrick Wendell + 2015-02-13 12:43:53 -0800 + Commit: 70ebad4 + + [SPARK-5827][SQL] Add missing import in the example of SqlContext + Takeshi Yamamuro + 2015-02-15 14:42:20 +0000 + Commit: 9c1c70d, github.com/apache/spark/pull/4615 + + SPARK-5822 [BUILD] cannot import src/main/scala & src/test/scala into eclipse as source folder + gli + 2015-02-14 20:43:27 +0000 + Commit: f87f3b7, github.com/apache/spark/pull/4531 + + Revise formatting of previous commit f80e2629bb74bc62960c61ff313f7e7802d61319 + Sean Owen + 2015-02-14 20:12:29 +0000 + Commit: 1945fcf + + [SPARK-5800] Streaming Docs. Change linked files according the selected language + gasparms + 2015-02-14 20:10:29 +0000 + Commit: e99e170, github.com/apache/spark/pull/4589 + + [SPARK-5752][SQL] Don't implicitly convert RDDs directly to DataFrames + Reynold Xin , Davies Liu + 2015-02-13 23:03:22 -0800 + Commit: ba91bf5, github.com/apache/spark/pull/4556 + + SPARK-3290 [GRAPHX] No unpersist callls in SVDPlusPlus + Sean Owen + 2015-02-13 20:12:52 -0800 + Commit: db57479, github.com/apache/spark/pull/4234 + + [SPARK-5227] [SPARK-5679] Disable FileSystem cache in WholeTextFileRecordReaderSuite + Josh Rosen + 2015-02-13 17:45:31 -0800 + Commit: 152147f, github.com/apache/spark/pull/4599 + + [SPARK-5730][ML] add doc groups to spark.ml components + Xiangrui Meng + 2015-02-13 16:45:59 -0800 + Commit: fccd38d, github.com/apache/spark/pull/4600 + + [SPARK-5803][MLLIB] use ArrayBuilder to build primitive arrays + Xiangrui Meng + 2015-02-13 16:43:49 -0800 + Commit: 356b798, github.com/apache/spark/pull/4594 + + [SPARK-5806] re-organize sections in mllib-clustering.md + Xiangrui Meng + 2015-02-13 15:09:27 -0800 + Commit: 9658763, github.com/apache/spark/pull/4598 + + [SPARK-5789][SQL]Throw a better error message if JsonRDD.parseJson encounters unrecoverable parsing errors. + Yin Huai + 2015-02-13 13:51:06 -0800 + Commit: d9d0250, github.com/apache/spark/pull/4582 + + [SPARK-5642] [SQL] Apply column pruning on unused aggregation fields + Daoyuan Wang , Michael Armbrust + 2015-02-13 13:46:50 -0800 + Commit: efffc2e, github.com/apache/spark/pull/4415 + + [HOTFIX] Fix build break in MesosSchedulerBackendSuite + Andrew Or + 2015-02-13 13:10:29 -0800 + Commit: 4160371 + + SPARK-5805 Fixed the type error in documentation. + Emre Sevinç + 2015-02-13 12:31:27 -0800 + Commit: ad73189, github.com/apache/spark/pull/4596 + + [SPARK-5735] Replace uses of EasyMock with Mockito + Josh Rosen + 2015-02-13 09:53:57 -0800 + Commit: cc9eec1, github.com/apache/spark/pull/4578 + + [SPARK-5783] Better eventlog-parsing error messages + Ryan Williams + 2015-02-13 09:47:26 -0800 + Commit: e5690a5, github.com/apache/spark/pull/4573 + + [SPARK-5503][MLLIB] Example code for Power Iteration Clustering + sboeschhuawei + 2015-02-13 09:45:57 -0800 + Commit: 5e63942, github.com/apache/spark/pull/4495 + + [SPARK-5732][CORE]:Add an option to print the spark version in spark script. + uncleGen , genmao.ygm + 2015-02-13 09:43:10 -0800 + Commit: 5c883df, github.com/apache/spark/pull/4522 + + [SPARK-4832][Deploy]some other processes might take the daemon pid + WangTaoTheTonic , WangTaoTheTonic + 2015-02-13 10:27:23 +0000 + Commit: 1255e83, github.com/apache/spark/pull/3683 + + [SQL] Fix docs of SQLContext.tables + Yin Huai + 2015-02-12 20:37:55 -0800 + Commit: a8f560c, github.com/apache/spark/pull/4579 + + [SPARK-3365][SQL]Wrong schema generated for List type + tianyi + 2015-02-12 22:18:39 -0800 + Commit: b9f332a, github.com/apache/spark/pull/4581 + + [SPARK-3299][SQL]Public API in SQLContext to list tables + Yin Huai + 2015-02-12 18:08:01 -0800 + Commit: edbac17, github.com/apache/spark/pull/4547 + + [SQL] Move SaveMode to SQL package. + Yin Huai + 2015-02-12 15:32:17 -0800 + Commit: 925fd84, github.com/apache/spark/pull/4542 + + [SPARK-5335] Fix deletion of security groups within a VPC + Vladimir Grigor , Vladimir Grigor + 2015-02-12 23:26:24 +0000 + Commit: 5c9db4e, github.com/apache/spark/pull/4122 + + [SPARK-5755] [SQL] remove unnecessary Add + Daoyuan Wang + 2015-02-12 15:22:07 -0800 + Commit: f7103b3, github.com/apache/spark/pull/4551 + + [SPARK-5573][SQL] Add explode to dataframes + Michael Armbrust + 2015-02-12 15:19:19 -0800 + Commit: c7eb9ee, github.com/apache/spark/pull/4546 + + [SPARK-5758][SQL] Use LongType as the default type for integers in JSON schema inference. + Yin Huai + 2015-02-12 15:17:25 -0800 + Commit: b0c79da, github.com/apache/spark/pull/4544 + + [SPARK-5780] [PySpark] Mute the logging during unit tests + Davies Liu + 2015-02-12 14:54:38 -0800 + Commit: bf0d15c, github.com/apache/spark/pull/4572 + + SPARK-5747: Fix wordsplitting bugs in make-distribution.sh + David Y. Ross + 2015-02-12 14:52:38 -0800 + Commit: 11a0d5b, github.com/apache/spark/pull/4540 + + [SPARK-5759][Yarn]ExecutorRunnable should catch YarnException while NMClient start contain... + lianhuiwang + 2015-02-12 14:50:16 -0800 + Commit: 02d5b32, github.com/apache/spark/pull/4554 + + [SPARK-5760][SPARK-5761] Fix standalone rest protocol corner cases + revamp tests + Andrew Or + 2015-02-12 14:47:52 -0800 + Commit: 11d1080, github.com/apache/spark/pull/4557 + + [SPARK-5762] Fix shuffle write time for sort-based shuffle + Kay Ousterhout + 2015-02-12 14:46:37 -0800 + Commit: 0040fc5, github.com/apache/spark/pull/4559 + + [SPARK-5765][Examples]Fixed word split problem in run-example and compute-classpath + Venkata Ramana G , Venkata Ramana Gollamudi + 2015-02-12 14:44:21 -0800 + Commit: 9a1de4b, github.com/apache/spark/pull/4561 + + [SPARK-5645] Added local read bytes/time to task metrics + Kay Ousterhout + 2015-02-12 14:35:44 -0800 + Commit: 74f34bb, github.com/apache/spark/pull/4510 + + [SQL] Improve error messages + Michael Armbrust , wangfei + 2015-02-12 13:11:28 -0800 + Commit: e3a975d, github.com/apache/spark/pull/4558 + + [SQL][DOCS] Update sql documentation + Antonio Navarro Perez + 2015-02-12 12:46:17 -0800 + Commit: cbd659e, github.com/apache/spark/pull/4560 + + [SPARK-5757][MLLIB] replace SQL JSON usage in model import/export by json4s + Xiangrui Meng + 2015-02-12 10:48:13 -0800 + Commit: e26c149, github.com/apache/spark/pull/4555 + + [SPARK-5655] Don't chmod700 application files if running in YARN + Andrew Rowson + 2015-02-12 18:41:39 +0000 + Commit: e23c8f5, github.com/apache/spark/pull/4509 + + [SQL] Make dataframe more tolerant of being serialized + Michael Armbrust + 2015-02-11 19:05:49 -0800 + Commit: 3c1b9bf, github.com/apache/spark/pull/4545 + + [SQL] Two DataFrame fixes. + Reynold Xin + 2015-02-11 18:32:48 -0800 + Commit: bcb1382, github.com/apache/spark/pull/4543 + + [SPARK-3688][SQL] More inline comments for LogicalPlan. + Reynold Xin + 2015-02-11 15:26:31 -0800 + Commit: 08ab3d2, github.com/apache/spark/pull/4539 + + [SPARK-3688][SQL]LogicalPlan can't resolve column correctlly + tianyi + 2015-02-11 12:50:17 -0800 + Commit: e136f47, github.com/apache/spark/pull/4524 + + [SPARK-5454] More robust handling of self joins + Michael Armbrust + 2015-02-11 12:31:56 -0800 + Commit: 1bb3631, github.com/apache/spark/pull/4520 + + Remove outdated remark about take(n). + Daniel Darabos + 2015-02-11 20:24:17 +0000 + Commit: 72adfc5, github.com/apache/spark/pull/4533 + + [SPARK-5677] [SPARK-5734] [SQL] [PySpark] Python DataFrame API remaining tasks + Davies Liu + 2015-02-11 12:13:16 -0800 + Commit: d66aae2, github.com/apache/spark/pull/4528 + + [SPARK-5733] Error Link in Pagination of HistroyPage when showing Incomplete Applications + guliangliang + 2015-02-11 15:55:49 +0000 + Commit: 864dccd, github.com/apache/spark/pull/4523 + + SPARK-5727 [BUILD] Deprecate Debian packaging + Sean Owen + 2015-02-11 08:30:16 +0000 + Commit: 057ec4f, github.com/apache/spark/pull/4516 + + SPARK-5728 [STREAMING] MQTTStreamSuite leaves behind ActiveMQ database files + Sean Owen + 2015-02-11 08:13:51 +0000 + Commit: 476b6d7, github.com/apache/spark/pull/4517 + + [SPARK-4964] [Streaming] refactor createRDD to take leaders via map instead of array + cody koeninger + 2015-02-11 00:13:27 -0800 + Commit: 811d179, github.com/apache/spark/pull/4511 + + Preparing development version 1.3.1-SNAPSHOT + Patrick Wendell + 2015-02-11 07:47:03 +0000 + Commit: e57c81b + + Preparing Spark release v1.3.0-snapshot1 + Patrick Wendell + 2015-02-11 07:47:02 +0000 + Commit: d97bfc6 + + Revert "Preparing Spark release v1.3.0-snapshot1" + Patrick Wendell + 2015-02-10 23:46:04 -0800 + Commit: 6a91d59 + + Revert "Preparing development version 1.3.1-SNAPSHOT" + Patrick Wendell + 2015-02-10 23:46:02 -0800 + Commit: 3a50383 + + HOTFIX: Adding Junit to Hive tests for Maven build + Patrick Wendell + 2015-02-10 23:39:21 -0800 + Commit: 0386fc4 + + Preparing development version 1.3.1-SNAPSHOT + Patrick Wendell + 2015-02-11 06:45:03 +0000 + Commit: ba12b79 + + Preparing Spark release v1.3.0-snapshot1 + Patrick Wendell + 2015-02-11 06:45:03 +0000 + Commit: 53068f5 + + HOTFIX: Java 6 compilation error in Spark SQL + Patrick Wendell + 2015-02-10 22:43:32 -0800 + Commit: 15180bc + + Revert "Preparing Spark release v1.3.0-snapshot1" + Patrick Wendell + 2015-02-10 22:44:10 -0800 + Commit: 536dae9 + + Revert "Preparing development version 1.3.1-SNAPSHOT" + Patrick Wendell + 2015-02-10 22:44:07 -0800 + Commit: 01b562e + + Preparing development version 1.3.1-SNAPSHOT + Patrick Wendell + 2015-02-11 06:15:29 +0000 + Commit: db80d0f + + Preparing Spark release v1.3.0-snapshot1 + Patrick Wendell + 2015-02-11 06:15:29 +0000 + Commit: c2e4001 + + Updating versions for Spark 1.3 + Patrick Wendell + 2015-02-10 21:54:55 -0800 + Commit: 2f52489 + + [SPARK-5714][Mllib] Refactor initial step of LDA to remove redundant operations + Liang-Chi Hsieh + 2015-02-10 21:51:15 -0800 + Commit: ba3aa8f, github.com/apache/spark/pull/4501 + + [SPARK-5702][SQL] Allow short names for built-in data sources. + Reynold Xin + 2015-02-10 20:40:21 -0800 + Commit: 63af90c, github.com/apache/spark/pull/4489 + + [SPARK-5729] Potential NPE in standalone REST API + Andrew Or + 2015-02-10 20:19:14 -0800 + Commit: 1bc75b0, github.com/apache/spark/pull/4518 + + [SPARK-4879] Use driver to coordinate Hadoop output committing for speculative tasks + mcheah , Josh Rosen + 2015-02-10 20:12:18 -0800 + Commit: 79cd59c, github.com/apache/spark/pull/4155. + + [SQL][DataFrame] Fix column computability bug. + Reynold Xin + 2015-02-10 19:50:44 -0800 + Commit: e477e91, github.com/apache/spark/pull/4519 + + [SPARK-5709] [SQL] Add EXPLAIN support in DataFrame API for debugging purpose + Cheng Hao + 2015-02-10 19:40:51 -0800 + Commit: 7fa0d5f, github.com/apache/spark/pull/4496 + + [SPARK-5704] [SQL] [PySpark] createDataFrame from RDD with columns + Davies Liu + 2015-02-10 19:40:12 -0800 + Commit: 1056c5b, github.com/apache/spark/pull/4498 + + [SPARK-5683] [SQL] Avoid multiple json generator created + Cheng Hao + 2015-02-10 18:19:56 -0800 + Commit: fc0446f, github.com/apache/spark/pull/4468 + + [SQL] Add an exception for analysis errors. + Michael Armbrust + 2015-02-10 17:32:42 -0800 + Commit: 748cdc1, github.com/apache/spark/pull/4439 + + [SPARK-5658][SQL] Finalize DDL and write support APIs + Yin Huai + 2015-02-10 17:29:52 -0800 + Commit: a21090e, github.com/apache/spark/pull/4446 + + [SPARK-5493] [core] Add option to impersonate user. + Marcelo Vanzin + 2015-02-10 17:19:10 -0800 + Commit: 8e75b0e, github.com/apache/spark/pull/4405 + + [SQL] Make Options in the data source API CREATE TABLE statements optional. + Yin Huai + 2015-02-10 17:06:12 -0800 + Commit: 445dbc7, github.com/apache/spark/pull/4515 + + [SPARK-5725] [SQL] Fixes ParquetRelation2.equals + Cheng Lian + 2015-02-10 17:02:44 -0800 + Commit: f43bc3d, github.com/apache/spark/pull/4513 + + [SPARK-5343][GraphX]: ShortestPaths traverses backwards + Brennon York + 2015-02-10 14:57:00 -0800 + Commit: 5be8902, github.com/apache/spark/pull/4478 + + [SPARK-5021] [MLlib] Gaussian Mixture now supports Sparse Input + MechCoder + 2015-02-10 14:05:55 -0800 + Commit: bba0953, github.com/apache/spark/pull/4459 + + [HOTFIX][SPARK-4136] Fix compilation and tests + Andrew Or + 2015-02-10 11:18:01 -0800 + Commit: 4e3aa68 + + [SPARK-5686][SQL] Add show current roles command in HiveQl + OopsOutOfMemory + 2015-02-10 13:20:15 -0800 + Commit: 8b7587a, github.com/apache/spark/pull/4471 + + [SQL] Add toString to DataFrame/Column + Michael Armbrust + 2015-02-10 13:14:01 -0800 + Commit: ef739d9, github.com/apache/spark/pull/4436 + + SPARK-5613: Catch the ApplicationNotFoundException exception to avoid thread from getting killed on yarn restart. + Kashish Jain + 2015-02-06 13:47:23 -0800 + Commit: c294216, github.com/apache/spark/pull/4392 + + [SPARK-5592][SQL] java.net.URISyntaxException when insert data to a partitioned table + wangfei , Fei Wang + 2015-02-10 11:54:30 -0800 + Commit: dbfce30, github.com/apache/spark/pull/4368 + + SPARK-4136. Under dynamic allocation, cancel outstanding executor requests when no longer needed + Sandy Ryza + 2015-02-10 11:07:25 -0800 + Commit: e53da21, github.com/apache/spark/pull/4168 + + [SPARK-5716] [SQL] Support TOK_CHARSETLITERAL in HiveQl + Daoyuan Wang + 2015-02-10 11:08:21 -0800 + Commit: e508237, github.com/apache/spark/pull/4502 + + [Spark-5717] [MLlib] add stop and reorganize import + JqueryFan , Yuhao Yang + 2015-02-10 17:37:32 +0000 + Commit: b32f553, github.com/apache/spark/pull/4503 + + [SPARK-5700] [SQL] [Build] Bumps jets3t to 0.9.3 for hadoop-2.3 and hadoop-2.4 profiles + Cheng Lian + 2015-02-10 02:28:47 -0800 + Commit: d6f31e0, github.com/apache/spark/pull/4499 + + SPARK-5239 [CORE] JdbcRDD throws "java.lang.AbstractMethodError: oracle.jdbc.driver.xxxxxx.isClosed()Z" + Sean Owen + 2015-02-10 09:19:01 +0000 + Commit: 4cfc025, github.com/apache/spark/pull/4470 + + [SPARK-4964][Streaming][Kafka] More updates to Exactly-once Kafka stream + Tathagata Das + 2015-02-09 22:45:48 -0800 + Commit: 281614d, github.com/apache/spark/pull/4384 + + [SPARK-5597][MLLIB] save/load for decision trees and emsembles + Joseph K. Bradley , Xiangrui Meng + 2015-02-09 22:09:07 -0800 + Commit: 01905c4, github.com/apache/spark/pull/4444. + + [SQL] Remove the duplicated code + Cheng Hao + 2015-02-09 21:33:34 -0800 + Commit: 663d34e, github.com/apache/spark/pull/4494 + + [SPARK-5701] Only set ShuffleReadMetrics when task has shuffle deps + Kay Ousterhout + 2015-02-09 21:22:09 -0800 + Commit: 6ddbca4, github.com/apache/spark/pull/4488 + + [SPARK-5703] AllJobsPage throws empty.max exception + Andrew Or + 2015-02-09 21:18:48 -0800 + Commit: 8326255, github.com/apache/spark/pull/4490 + + [SPARK-2996] Implement userClassPathFirst for driver, yarn. + Marcelo Vanzin + 2015-02-09 21:17:06 -0800 + Commit: 6a1e0f9, github.com/apache/spark/pull/3233 + + SPARK-4900 [MLLIB] MLlib SingularValueDecomposition ARPACK IllegalStateException + Sean Owen + 2015-02-09 21:13:58 -0800 + Commit: ebf1df0, github.com/apache/spark/pull/4485 + + Add a config option to print DAG. + KaiXinXiaoLei + 2015-02-09 20:58:58 -0800 + Commit: dad05e0, github.com/apache/spark/pull/4257 + + [SPARK-5469] restructure pyspark.sql into multiple files + Davies Liu + 2015-02-09 20:49:22 -0800 + Commit: f0562b4, github.com/apache/spark/pull/4479 + + [SPARK-5698] Do not let user request negative # of executors + Andrew Or + 2015-02-09 17:33:29 -0800 + Commit: 62b1e1f, github.com/apache/spark/pull/4483 + + [SPARK-5699] [SQL] [Tests] Runs hive-thriftserver tests whenever SQL code is modified + Cheng Lian + 2015-02-09 16:52:05 -0800 + Commit: 71f0f51, github.com/apache/spark/pull/4486 + + [SPARK-5648][SQL] support "alter ... unset tblproperties("key")" + DoingDone9 <799203320@qq.com> + 2015-02-09 16:40:26 -0800 + Commit: e2bf59a, github.com/apache/spark/pull/4424 + + [SPARK-2096][SQL] support dot notation on array of struct + Wenchen Fan + 2015-02-09 16:39:34 -0800 + Commit: 15f557f, github.com/apache/spark/pull/2405 + + [SPARK-5614][SQL] Predicate pushdown through Generate. + Lu Yan + 2015-02-09 16:25:38 -0800 + Commit: ce2c89c, github.com/apache/spark/pull/4394 + + [SPARK-5696] [SQL] [HOTFIX] Asks HiveThriftServer2 to re-initialize log4j using Hive configurations + Cheng Lian + 2015-02-09 16:23:12 -0800 + Commit: 379233c, github.com/apache/spark/pull/4484 + + [SQL] Code cleanup. + Yin Huai + 2015-02-09 16:20:42 -0800 + Commit: e241601, github.com/apache/spark/pull/4482 + + [SQL] Add some missing DataFrame functions. + Michael Armbrust + 2015-02-09 16:02:56 -0800 + Commit: a70dca0, github.com/apache/spark/pull/4437 + + [SPARK-5675][SQL] XyzType companion object should subclass XyzType + Reynold Xin + 2015-02-09 14:51:46 -0800 + Commit: 1e2fab2, github.com/apache/spark/pull/4463 + + [SPARK-4905][STREAMING] FlumeStreamSuite fix. + Hari Shreedharan + 2015-02-09 14:17:14 -0800 + Commit: 18c5a99, github.com/apache/spark/pull/4371 + + [SPARK-5691] Fixing wrong data structure lookup for dupe app registratio... + mcheah + 2015-02-09 13:20:14 -0800 + Commit: 6a0144c, github.com/apache/spark/pull/4477 + + [SPARK-5678] Convert DataFrame to pandas.DataFrame and Series + Davies Liu + 2015-02-09 11:42:52 -0800 + Commit: 43972b5, github.com/apache/spark/pull/4476 + + [SPARK-5664][BUILD] Restore stty settings when exiting from SBT's spark-shell + Liang-Chi Hsieh + 2015-02-09 11:45:12 -0800 + Commit: fa67877, github.com/apache/spark/pull/4451 + + SPARK-4267 [YARN] Failing to launch jobs on Spark on YARN with Hadoop 2.5.0 or later + Sean Owen + 2015-02-09 10:33:57 -0800 + Commit: c88d4ab, github.com/apache/spark/pull/4452 + + [SPARK-5473] [EC2] Expose SSH failures after status checks pass + Nicholas Chammas + 2015-02-09 09:44:53 +0000 + Commit: f2aa7b7, github.com/apache/spark/pull/4262 + + [SPARK-5539][MLLIB] LDA guide + Xiangrui Meng , Joseph K. Bradley + 2015-02-08 23:40:36 -0800 + Commit: 5782ee2, github.com/apache/spark/pull/4465 + + [SPARK-5472][SQL] Fix Scala code style + Hung Lin + 2015-02-08 22:36:42 -0800 + Commit: 955f286, github.com/apache/spark/pull/4464 + + SPARK-4405 [MLLIB] Matrices.* construction methods should check for rows x cols overflow + Sean Owen + 2015-02-08 21:08:50 -0800 + Commit: fa8ea48, github.com/apache/spark/pull/4461 + + [SPARK-5660][MLLIB] Make Matrix apply public + Joseph K. Bradley , Xiangrui Meng + 2015-02-08 21:07:36 -0800 + Commit: df9b105, github.com/apache/spark/pull/4447 + + [SPARK-5643][SQL] Add a show method to print the content of a DataFrame in tabular format. + Reynold Xin + 2015-02-08 18:56:51 -0800 + Commit: e1996aa, github.com/apache/spark/pull/4416 + + SPARK-5665 [DOCS] Update netlib-java documentation + Sam Halliday , Sam Halliday + 2015-02-08 16:34:26 -0800 + Commit: c515634, github.com/apache/spark/pull/4448 + + [SPARK-5598][MLLIB] model save/load for ALS + Xiangrui Meng + 2015-02-08 16:26:20 -0800 + Commit: 9e4d58f, github.com/apache/spark/pull/4422 + + [SQL] Set sessionState in QueryExecution. + Yin Huai + 2015-02-08 14:55:07 -0800 + Commit: 42c56b6, github.com/apache/spark/pull/4445 + + [SPARK-3039] [BUILD] Spark assembly for new hadoop API (hadoop 2) contai... + medale + 2015-02-08 10:35:29 +0000 + Commit: bc55e20, github.com/apache/spark/pull/4315 + + [SPARK-5672][Web UI] Don't return `ERROR 500` when have missing args + Kirill A. Korinskiy + 2015-02-08 10:31:46 +0000 + Commit: 96010fa, github.com/apache/spark/pull/4239 + + [SPARK-5671] Upgrade jets3t to 0.9.2 in hadoop-2.3 and 2.4 profiles + Josh Rosen + 2015-02-07 17:19:08 -0800 + Commit: 0f9d765, github.com/apache/spark/pull/4454 + + [SPARK-5108][BUILD] Jackson dependency management for Hadoop-2.6.0 support + Zhan Zhang + 2015-02-07 19:41:30 +0000 + Commit: 51fbca4, github.com/apache/spark/pull/3938 + + [BUILD] Add the ability to launch spark-shell from SBT. + Michael Armbrust + 2015-02-07 00:14:38 -0800 + Commit: 6bda169, github.com/apache/spark/pull/4438 + + [SPARK-5388] Provide a stable application submission gateway for standalone cluster mode + Andrew Or + 2015-02-06 15:57:06 -0800 + Commit: 6ec0cdc, github.com/apache/spark/pull/4216 + + SPARK-5403: Ignore UserKnownHostsFile in SSH calls + Grzegorz Dubicki + 2015-02-06 15:43:58 -0800 + Commit: 3d99741, github.com/apache/spark/pull/4196 + + [SPARK-5601][MLLIB] make streaming linear algorithms Java-friendly + Xiangrui Meng + 2015-02-06 15:42:59 -0800 + Commit: 11b28b9, github.com/apache/spark/pull/4432 + + [SQL] [Minor] HiveParquetSuite was disabled by mistake, re-enable them + Cheng Lian + 2015-02-06 15:23:42 -0800 + Commit: 4005802, github.com/apache/spark/pull/4440 + + [SQL] Use TestSQLContext in Java tests + Michael Armbrust + 2015-02-06 15:11:02 -0800 + Commit: c950058, github.com/apache/spark/pull/4441 + + [SPARK-4994][network]Cleanup removed executors' ShuffleInfo in yarn shuffle service + lianhuiwang + 2015-02-06 14:47:52 -0800 + Commit: af6ddf8, github.com/apache/spark/pull/3828 + + [SPARK-5444][Network]Add a retry to deal with the conflict port in netty server. + huangzhaowei + 2015-02-06 14:35:29 -0800 + Commit: caca15a, github.com/apache/spark/pull/4240 + + [SPARK-4874] [CORE] Collect record count metrics + Kostas Sakellis + 2015-02-06 14:31:20 -0800 + Commit: 9fa29a6, github.com/apache/spark/pull/4067 + + [HOTFIX] Fix the maven build after adding sqlContext to spark-shell + Michael Armbrust + 2015-02-06 14:27:06 -0800 + Commit: 11dbf71, github.com/apache/spark/pull/4443 + + [SPARK-5600] [core] Clean up FsHistoryProvider test, fix app sort order. + Marcelo Vanzin + 2015-02-06 14:23:09 -0800 + Commit: 09feecc, github.com/apache/spark/pull/4370 + + SPARK-5633 pyspark saveAsTextFile support for compression codec + Vladimir Vladimirov + 2015-02-06 13:55:02 -0800 + Commit: 1d32341, github.com/apache/spark/pull/4403 + + [HOTFIX][MLLIB] fix a compilation error with java 6 + Xiangrui Meng + 2015-02-06 13:52:35 -0800 + Commit: 87e0f0d, github.com/apache/spark/pull/4442 + + [SPARK-4983] Insert waiting time before tagging EC2 instances + GenTang , Gen TANG + 2015-02-06 13:27:34 -0800 + Commit: 2872d83, github.com/apache/spark/pull/3986 + + [SPARK-5586][Spark Shell][SQL] Make `sqlContext` available in spark shell + OopsOutOfMemory + 2015-02-06 13:20:10 -0800 + Commit: 2ef9853, github.com/apache/spark/pull/4387 + + [SPARK-5278][SQL] Introduce UnresolvedGetField and complete the check of ambiguous reference to fields + Wenchen Fan + 2015-02-06 13:08:09 -0800 + Commit: 1b148ad, github.com/apache/spark/pull/4068 + + [SQL][Minor] Remove cache keyword in SqlParser + wangfei + 2015-02-06 12:42:23 -0800 + Commit: d822606, github.com/apache/spark/pull/4393 + + [SQL][HiveConsole][DOC] HiveConsole `correct hiveconsole imports` + OopsOutOfMemory + 2015-02-06 12:41:28 -0800 + Commit: 2abaa6e, github.com/apache/spark/pull/4389 + + [SPARK-5595][SPARK-5603][SQL] Add a rule to do PreInsert type casting and field renaming and invalidating in memory cache after INSERT + Yin Huai + 2015-02-06 12:38:07 -0800 + Commit: 3c34d62, github.com/apache/spark/pull/4373 + + [SPARK-5324][SQL] Results of describe can't be queried + OopsOutOfMemory , Sheng, Li + 2015-02-06 12:33:20 -0800 + Commit: 0fc35da, github.com/apache/spark/pull/4249 + + [SPARK-5619][SQL] Support 'show roles' in HiveContext + q00251598 + 2015-02-06 12:29:26 -0800 + Commit: cc66a3c, github.com/apache/spark/pull/4397 + + [SPARK-5640] Synchronize ScalaReflection where necessary + Tobias Schlatter + 2015-02-06 12:15:02 -0800 + Commit: 779e28b, github.com/apache/spark/pull/4431 + + [SPARK-5650][SQL] Support optional 'FROM' clause + Liang-Chi Hsieh + 2015-02-06 12:13:44 -0800 + Commit: 921121d, github.com/apache/spark/pull/4426 + + [SPARK-5628] Add version option to spark-ec2 + Nicholas Chammas + 2015-02-06 12:08:22 -0800 + Commit: ab0ffde, github.com/apache/spark/pull/4414 + + [SPARK-2945][YARN][Doc]add doc for spark.executor.instances + WangTaoTheTonic + 2015-02-06 11:57:02 -0800 + Commit: 540f474, github.com/apache/spark/pull/4350 + + [SPARK-4361][Doc] Add more docs for Hadoop Configuration + zsxwing + 2015-02-06 11:50:20 -0800 + Commit: 528dd34, github.com/apache/spark/pull/3225 + + [HOTFIX] Fix test build break in ExecutorAllocationManagerSuite. + Josh Rosen + 2015-02-06 11:47:32 -0800 + Commit: 9e828f4 + + [SPARK-5652][Mllib] Use broadcasted weights in LogisticRegressionModel + Liang-Chi Hsieh + 2015-02-06 11:22:11 -0800 + Commit: 6fda4c1, github.com/apache/spark/pull/4429 + + [SPARK-5555] Enable UISeleniumSuite tests + Josh Rosen + 2015-02-06 11:14:58 -0800 + Commit: 93fee7b, github.com/apache/spark/pull/4334 + + SPARK-2450 Adds executor log links to Web UI + Kostas Sakellis , Josh Rosen + 2015-02-06 11:13:00 -0800 + Commit: e74dd04, github.com/apache/spark/pull/3486 + + [SPARK-5618][Spark Core][Minor] Optimise utility code. + Makoto Fukuhara + 2015-02-06 11:11:38 -0800 + Commit: 3feb798, github.com/apache/spark/pull/4396 + + [SPARK-5593][Core]Replace BlockManagerListener with ExecutorListener in ExecutorAllocationListener + lianhuiwang + 2015-02-06 11:09:37 -0800 + Commit: 9387dc1, github.com/apache/spark/pull/4369 + + [SPARK-4877] Allow user first classes to extend classes in the parent. + Stephen Haberman + 2015-02-06 11:03:56 -0800 + Commit: 52386cf, github.com/apache/spark/pull/3725 + + [SPARK-5396] Syntax error in spark scripts on windows. + Masayoshi TSUZUKI + 2015-02-06 10:58:26 -0800 + Commit: 2dc94cd, github.com/apache/spark/pull/4428 + + [SPARK-5636] Ramp up faster in dynamic allocation + Andrew Or + 2015-02-06 10:54:23 -0800 + Commit: 0a90305, github.com/apache/spark/pull/4409 + + SPARK-4337. [YARN] Add ability to cancel pending requests + Sandy Ryza + 2015-02-06 10:53:16 -0800 + Commit: 1568391, github.com/apache/spark/pull/4141 + + [SPARK-5416] init Executor.threadPool before ExecutorSource + Ryan Williams + 2015-02-06 12:22:25 +0000 + Commit: f9bc4ef, github.com/apache/spark/pull/4212 + + [Build] Set all Debian package permissions to 755 + Nicholas Chammas + 2015-02-06 11:38:39 +0000 + Commit: 3638216, github.com/apache/spark/pull/4277 + + Update ec2-scripts.md + Miguel Peralvo + 2015-02-06 11:04:48 +0000 + Commit: f6613fc, github.com/apache/spark/pull/4300 + + [SPARK-5470][Core]use defaultClassLoader to load classes in KryoSerializer + lianhuiwang + 2015-02-06 11:00:35 +0000 + Commit: 8007a4f, github.com/apache/spark/pull/4258 + + [SPARK-5653][YARN] In ApplicationMaster rename isDriver to isClusterMode + lianhuiwang + 2015-02-06 10:48:31 -0800 + Commit: 4ff8855, github.com/apache/spark/pull/4430 + + [SPARK-5582] [history] Ignore empty log directories. + Marcelo Vanzin + 2015-02-06 10:07:20 +0000 + Commit: faccdcb, github.com/apache/spark/pull/4352 + + [SPARK-5157][YARN] Configure more JVM options properly when we use ConcMarkSweepGC for AM. + Kousuke Saruta + 2015-02-06 09:39:12 +0000 + Commit: 25d8044, github.com/apache/spark/pull/3956 + + [Minor] Remove permission for execution from spark-shell.cmd + Kousuke Saruta + 2015-02-06 09:33:36 +0000 + Commit: 7c54681, github.com/apache/spark/pull/3983 + + [SPARK-5380][GraphX] Solve an ArrayIndexOutOfBoundsException when build graph with a file format error + Leolh + 2015-02-06 09:01:53 +0000 + Commit: ffdb2e9, github.com/apache/spark/pull/4176 + + [SPARK-5013] [MLlib] Added documentation and sample data file for GaussianMixture + Travis Galoppo + 2015-02-06 10:26:51 -0800 + Commit: f408db6, github.com/apache/spark/pull/4401 + + [SPARK-4789] [SPARK-4942] [SPARK-5031] [mllib] Standardize ML Prediction APIs + Joseph K. Bradley + 2015-02-05 23:43:47 -0800 + Commit: 45b95e7, github.com/apache/spark/pull/3637 + + [SPARK-5604][MLLIB] remove checkpointDir from trees + Xiangrui Meng + 2015-02-05 23:32:09 -0800 + Commit: c35a11e, github.com/apache/spark/pull/4407 + + [SPARK-5639][SQL] Support DataFrame.renameColumn. + Reynold Xin + 2015-02-05 23:02:40 -0800 + Commit: 0639d3e, github.com/apache/spark/pull/4410 + + Revert "SPARK-5607: Update to Kryo 2.24.0 to avoid including objenesis 1.2." + Patrick Wendell + 2015-02-05 18:37:55 -0800 + Commit: 6d31531 + + SPARK-5557: Explicitly include servlet API in dependencies. + Patrick Wendell + 2015-02-05 18:14:54 -0800 + Commit: 34131fd, github.com/apache/spark/pull/4411 + + [HOTFIX] [SQL] Disables Metastore Parquet table conversion for "SQLQuerySuite.CTAS with serde" + Cheng Lian + 2015-02-05 18:09:18 -0800 + Commit: ce6d8bb, github.com/apache/spark/pull/4413 + + [SPARK-5638][SQL] Add a config flag to disable eager analysis of DataFrames + Reynold Xin + 2015-02-05 18:07:10 -0800 + Commit: 4fd67e4, github.com/apache/spark/pull/4408 + + [SPARK-5620][DOC] group methods in generated unidoc + Xiangrui Meng + 2015-02-05 16:26:51 -0800 + Commit: e2be79d, github.com/apache/spark/pull/4404 + + [SPARK-5182] [SPARK-5528] [SPARK-5509] [SPARK-3575] [SQL] Parquet data source improvements + Cheng Lian + 2015-02-05 15:29:56 -0800 + Commit: 50c48eb, github.com/apache/spark/pull/4308 + + [SPARK-5604[MLLIB] remove checkpointDir from LDA + Xiangrui Meng + 2015-02-05 15:07:33 -0800 + Commit: 59798cb, github.com/apache/spark/pull/4390 + + [SPARK-5460][MLlib] Wrapped `Try` around `deleteAllCheckpoints` - RandomForest. + x1- + 2015-02-05 15:02:04 -0800 + Commit: 44768f5, github.com/apache/spark/pull/4347 + + [SPARK-5135][SQL] Add support for describe table to DDL in SQLContext + OopsOutOfMemory + 2015-02-05 13:07:48 -0800 + Commit: 55cebcf, github.com/apache/spark/pull/4227 + + [SPARK-5617][SQL] fix test failure of SQLQuerySuite + wangfei + 2015-02-05 12:44:12 -0800 + Commit: 785a2e3, github.com/apache/spark/pull/4395 + + [Branch-1.3] [DOC] doc fix for date + Daoyuan Wang + 2015-02-05 12:42:27 -0800 + Commit: 17ef7f9, github.com/apache/spark/pull/4400 + + [SPARK-5474][Build]curl should support URL redirection in build/mvn + GuoQiang Li + 2015-02-05 12:03:13 -0800 + Commit: d1066e9, github.com/apache/spark/pull/4263 + + [HOTFIX] MLlib build break. + Reynold Xin + 2015-02-05 00:42:50 -0800 + Commit: c83d118 + + SPARK-5548: Fixed a race condition in AkkaUtilsSuite + Jacek Lewandowski + 2015-02-05 12:00:04 -0800 + Commit: fba2dc6, github.com/apache/spark/pull/4343 + + [SPARK-5608] Improve SEO of Spark documentation pages + Matei Zaharia + 2015-02-05 11:12:50 -0800 + Commit: de112a2, github.com/apache/spark/pull/4381 + + SPARK-4687. Add a recursive option to the addFile API + Sandy Ryza + 2015-02-05 10:15:55 -0800 + Commit: c22ccc0, github.com/apache/spark/pull/3670 + + [MLlib] Minor: UDF style update. + Reynold Xin + 2015-02-04 23:57:53 -0800 + Commit: 4074674, github.com/apache/spark/pull/4388 + + [SPARK-5612][SQL] Move DataFrame implicit functions into SQLContext.implicits. + Reynold Xin + 2015-02-04 23:44:34 -0800 + Commit: 0040b61, github.com/apache/spark/pull/4386 + + [SPARK-5606][SQL] Support plus sign in HiveContext + q00251598 + 2015-02-04 23:16:01 -0800 + Commit: bf43781, github.com/apache/spark/pull/4378 + + [SPARK-5599] Check MLlib public APIs for 1.3 + Xiangrui Meng + 2015-02-04 23:03:47 -0800 + Commit: abc184e, github.com/apache/spark/pull/4377 + + [SPARK-5596] [mllib] ML model import/export for GLMs, NaiveBayes + Joseph K. Bradley + 2015-02-04 22:46:48 -0800 + Commit: 885bcbb, github.com/apache/spark/pull/4233 + + SPARK-5607: Update to Kryo 2.24.0 to avoid including objenesis 1.2. + Patrick Wendell + 2015-02-04 22:39:44 -0800 + Commit: 59fb5c7, github.com/apache/spark/pull/4383 + + [SPARK-5602][SQL] Better support for creating DataFrame from local data collection + Reynold Xin + 2015-02-04 19:53:57 -0800 + Commit: b8f9c00, github.com/apache/spark/pull/4372 + + [SPARK-5538][SQL] Fix flaky CachedTableSuite + Reynold Xin + 2015-02-04 19:52:41 -0800 + Commit: 1901b19, github.com/apache/spark/pull/4379 + + [SQL][DataFrame] Minor cleanup. + Reynold Xin + 2015-02-04 19:51:48 -0800 + Commit: f05bfa6, github.com/apache/spark/pull/4374 + + [SPARK-4520] [SQL] This pr fixes the ArrayIndexOutOfBoundsException as r... + Sadhan Sood + 2015-02-04 19:18:06 -0800 + Commit: aa6f4ca, github.com/apache/spark/pull/4148 + + [SPARK-5605][SQL][DF] Allow using String to specify colum name in DSL aggregate functions + Reynold Xin + 2015-02-04 18:35:51 -0800 + Commit: 478ee3f, github.com/apache/spark/pull/4376 + + [SPARK-5411] Allow SparkListeners to be specified in SparkConf and loaded when creating SparkContext + Josh Rosen + 2015-02-04 17:18:03 -0800 + Commit: 47e4d57, github.com/apache/spark/pull/4111 + + [SPARK-5577] Python udf for DataFrame + Davies Liu + 2015-02-04 15:55:09 -0800 + Commit: dc9ead9, github.com/apache/spark/pull/4351 + + [SPARK-5118][SQL] Fix: create table test stored as parquet as select .. + guowei2 + 2015-02-04 15:26:10 -0800 + Commit: 06da868, github.com/apache/spark/pull/3921 + + [SQL] Use HiveContext's sessionState in HiveMetastoreCatalog.hiveDefaultTableFilePath + Yin Huai + 2015-02-04 15:22:40 -0800 + Commit: cb4c3e5, github.com/apache/spark/pull/4355 + + [SQL] Correct the default size of TimestampType and expose NumericType + Yin Huai + 2015-02-04 15:14:49 -0800 + Commit: 513bb2c, github.com/apache/spark/pull/4314 + + [SQL][Hiveconsole] Bring hive console code up to date and update README.md + OopsOutOfMemory , Sheng, Li + 2015-02-04 15:13:54 -0800 + Commit: 2cdcfe3, github.com/apache/spark/pull/4330 + + [SPARK-5367][SQL] Support star expression in udfs + wangfei , scwf + 2015-02-04 15:12:07 -0800 + Commit: 8b803f6, github.com/apache/spark/pull/4353 + + [SPARK-5426][SQL] Add SparkSQL Java API helper methods. + kul + 2015-02-04 15:08:37 -0800 + Commit: 38ab92e, github.com/apache/spark/pull/4243 + + [SPARK-5587][SQL] Support change database owner + wangfei + 2015-02-04 14:35:12 -0800 + Commit: 7920791, github.com/apache/spark/pull/4357 + + [SPARK-5591][SQL] Fix NoSuchObjectException for CTAS + wangfei + 2015-02-04 14:33:07 -0800 + Commit: c79dd1e, github.com/apache/spark/pull/4365 + + [SPARK-4939] move to next locality when no pending tasks + Davies Liu + 2015-02-04 14:22:07 -0800 + Commit: f9bb3cb, github.com/apache/spark/pull/3779 + + [SPARK-4707][STREAMING] Reliable Kafka Receiver can lose data if the blo... + Hari Shreedharan + 2015-02-04 14:20:44 -0800 + Commit: 14c9f32, github.com/apache/spark/pull/3655 + + [SPARK-4964] [Streaming] Exactly-once semantics for Kafka + cody koeninger + 2015-02-04 12:06:34 -0800 + Commit: a119cae, github.com/apache/spark/pull/3798 + + [SPARK-5588] [SQL] support select/filter by SQL expression + Davies Liu + 2015-02-04 11:34:46 -0800 + Commit: 950a0d3, github.com/apache/spark/pull/4359 + + [SPARK-5585] Flaky test in MLlib python + Davies Liu + 2015-02-04 08:54:20 -0800 + Commit: 84c6273, github.com/apache/spark/pull/4358 + + [SPARK-5574] use given name prefix in dir + Imran Rashid + 2015-02-04 01:02:20 -0800 + Commit: 5d9278a, github.com/apache/spark/pull/4344 + + [Minor] Fix incorrect warning log + Liang-Chi Hsieh + 2015-02-04 00:52:41 -0800 + Commit: 316a4bb, github.com/apache/spark/pull/4360 + + [SPARK-5379][Streaming] Add awaitTerminationOrTimeout + zsxwing + 2015-02-04 00:40:28 -0800 + Commit: 4d3dbfd, github.com/apache/spark/pull/4171 + + [SPARK-5341] Use maven coordinates as dependencies in spark-shell and spark-submit + Burak Yavuz + 2015-02-03 22:39:17 -0800 + Commit: 3b7acd2, github.com/apache/spark/pull/4215 + + [SPARK-4939] revive offers periodically in LocalBackend + Davies Liu + 2015-02-03 22:30:23 -0800 + Commit: e196da8, github.com/apache/spark/pull/4147 + + [SPARK-4969][STREAMING][PYTHON] Add binaryRecords to streaming + freeman + 2015-02-03 22:24:30 -0800 + Commit: 9a33f89, github.com/apache/spark/pull/3803 + + [SPARK-5579][SQL][DataFrame] Support for project/filter using SQL expressions + Reynold Xin + 2015-02-03 22:15:35 -0800 + Commit: cb7f783, github.com/apache/spark/pull/4348 + + [FIX][MLLIB] fix seed handling in Python GMM + Xiangrui Meng + 2015-02-03 20:39:11 -0800 + Commit: 679228b, github.com/apache/spark/pull/4349 + + [SPARK-4795][Core] Redesign the "primitive type => Writable" implicit APIs to make them be activated automatically + zsxwing + 2015-02-03 20:17:12 -0800 + Commit: 5c63e05, github.com/apache/spark/pull/3642 + + [SPARK-5578][SQL][DataFrame] Provide a convenient way for Scala users to use UDFs + Reynold Xin + 2015-02-03 20:07:46 -0800 + Commit: b22d5b5, github.com/apache/spark/pull/4345 + + [SPARK-5520][MLlib] Make FP-Growth implementation take generic item types (WIP) + Jacky Li , Jacky Li , Xiangrui Meng + 2015-02-03 17:02:42 -0800 + Commit: 298ef5b, github.com/apache/spark/pull/4340 + + [SPARK-5554] [SQL] [PySpark] add more tests for DataFrame Python API + Davies Liu + 2015-02-03 16:01:56 -0800 + Commit: 4640623, github.com/apache/spark/pull/4331 + + [STREAMING] SPARK-4986 Wait for receivers to deregister and receiver job to terminate + Jesper Lundgren + 2015-02-03 14:53:39 -0800 + Commit: 092d4ba, github.com/apache/spark/pull/4338 + + [SPARK-5153][Streaming][Test] Increased timeout to deal with flaky KafkaStreamSuite + Tathagata Das + 2015-02-03 13:46:02 -0800 + Commit: d644bd9, github.com/apache/spark/pull/4342 + + [SPARK-4508] [SQL] build native date type to conform behavior to Hive + Daoyuan Wang + 2015-02-03 12:21:45 -0800 + Commit: 6e244cf, github.com/apache/spark/pull/4325 + + [SPARK-5383][SQL] Support alias for udtfs + wangfei , scwf , Fei Wang + 2015-02-03 12:16:31 -0800 + Commit: 5dbeb21, github.com/apache/spark/pull/4186 + + [SPARK-5550] [SQL] Support the case insensitive for UDF + Cheng Hao + 2015-02-03 12:12:26 -0800 + Commit: 654c992, github.com/apache/spark/pull/4326 + + [SPARK-4987] [SQL] parquet timestamp type support + Daoyuan Wang + 2015-02-03 12:06:06 -0800 + Commit: 67d5220, github.com/apache/spark/pull/3820 + + [SQL] DataFrame API update + Reynold Xin + 2015-02-03 10:34:56 -0800 + Commit: 4204a12, github.com/apache/spark/pull/4332 + + Minor: Fix TaskContext deprecated annotations. + Reynold Xin + 2015-02-03 10:34:16 -0800 + Commit: f7948f3, github.com/apache/spark/pull/4333 + + [SPARK-5549] Define TaskContext interface in Scala. + Reynold Xin + 2015-02-03 00:46:04 -0800 + Commit: bebf4c4, github.com/apache/spark/pull/4324 + + [SPARK-5551][SQL] Create type alias for SchemaRDD for source backward compatibility + Reynold Xin + 2015-02-03 00:29:23 -0800 + Commit: 523a935, github.com/apache/spark/pull/4327 + + [SQL][DataFrame] Remove DataFrameApi, ExpressionApi, and GroupedDataFrameApi + Reynold Xin + 2015-02-03 00:29:04 -0800 + Commit: 37df330, github.com/apache/spark/pull/4328 + + [minor] update streaming linear algorithms + Xiangrui Meng + 2015-02-03 00:14:43 -0800 + Commit: 659329f, github.com/apache/spark/pull/4329 + + [SPARK-1405] [mllib] Latent Dirichlet Allocation (LDA) using EM + Xiangrui Meng + 2015-02-02 23:57:35 -0800 + Commit: 980764f, github.com/apache/spark/pull/2388 + + [SPARK-5536] replace old ALS implementation by the new one + Xiangrui Meng + 2015-02-02 23:49:09 -0800 + Commit: 0cc7b88, github.com/apache/spark/pull/4321 + + [SPARK-5414] Add SparkFirehoseListener class for consuming all SparkListener events + Josh Rosen + 2015-02-02 23:35:07 -0800 + Commit: b8ebebe, github.com/apache/spark/pull/4210 + + [SPARK-5501][SPARK-5420][SQL] Write support for the data source API + Yin Huai + 2015-02-02 23:30:44 -0800 + Commit: 13531dd, github.com/apache/spark/pull/4294 + + [SPARK-5012][MLLib][PySpark]Python API for Gaussian Mixture Model + FlytxtRnD + 2015-02-02 23:04:55 -0800 + Commit: 50a1a87, github.com/apache/spark/pull/4059 + + [SPARK-3778] newAPIHadoopRDD doesn't properly pass credentials for secure hdfs + Thomas Graves + 2015-02-02 22:45:55 -0800 + Commit: c31c36c, github.com/apache/spark/pull/4292 + + [SPARK-4979][MLLIB] Streaming logisitic regression + freeman + 2015-02-02 22:42:15 -0800 + Commit: eb0da6c, github.com/apache/spark/pull/4306 + + [SPARK-5219][Core] Add locks to avoid scheduling race conditions + zsxwing + 2015-02-02 21:42:18 -0800 + Commit: c306555, github.com/apache/spark/pull/4019 + + [Doc] Minor: Fixes several formatting issues + Cheng Lian + 2015-02-02 21:14:21 -0800 + Commit: 60f67e7, github.com/apache/spark/pull/4316 + + SPARK-3996: Add jetty servlet and continuations. + Patrick Wendell + 2015-02-02 21:01:36 -0800 + Commit: 7930d2b, github.com/apache/spark/pull/4323 + + SPARK-5542: Decouple publishing, packaging, and tagging in release script + Patrick Wendell , Patrick Wendell + 2015-02-02 21:00:30 -0800 + Commit: 0ef38f5, github.com/apache/spark/pull/4319 + + [SPARK-5543][WebUI] Remove unused import JsonUtil from from JsonProtocol + nemccarthy + 2015-02-02 20:03:13 -0800 + Commit: cb39f12, github.com/apache/spark/pull/4320 + + [SPARK-5472][SQL] A JDBC data source for Spark SQL. + Tor Myklebust + 2015-02-02 19:50:14 -0800 + Commit: 8f471a6, github.com/apache/spark/pull/4261 + + [SPARK-5512][Mllib] Run the PIC algorithm with initial vector suggected by the PIC paper + Liang-Chi Hsieh + 2015-02-02 19:34:25 -0800 + Commit: 1bcd465, github.com/apache/spark/pull/4301 + + [SPARK-5154] [PySpark] [Streaming] Kafka streaming support in Python + Davies Liu , Tathagata Das + 2015-02-02 19:16:27 -0800 + Commit: 0561c45, github.com/apache/spark/pull/3715 + + [SQL] Improve DataFrame API error reporting + Reynold Xin , Davies Liu + 2015-02-02 19:01:47 -0800 + Commit: 554403f, github.com/apache/spark/pull/4296 + + Revert "[SPARK-4508] [SQL] build native date type to conform behavior to Hive" + Patrick Wendell + 2015-02-02 17:52:17 -0800 + Commit: eccb9fb + + Spark 3883: SSL support for HttpServer and Akka + Jacek Lewandowski , Jacek Lewandowski + 2015-02-02 17:18:54 -0800 + Commit: cfea300, github.com/apache/spark/pull/3571 + + [SPARK-5540] hide ALS.solveLeastSquares + Xiangrui Meng + 2015-02-02 17:10:01 -0800 + Commit: ef65cf0, github.com/apache/spark/pull/4318 + + [SPARK-5534] [graphx] Graph getStorageLevel fix + Joseph K. Bradley + 2015-02-02 17:02:29 -0800 + Commit: f133dec, github.com/apache/spark/pull/4317 + + [SPARK-5514] DataFrame.collect should call executeCollect + Reynold Xin + 2015-02-02 16:55:36 -0800 + Commit: 8aa3cff, github.com/apache/spark/pull/4313 + + [SPARK-5195][sql]Update HiveMetastoreCatalog.scala(override the MetastoreRelation's sameresult method only compare databasename and table name) + seayi <405078363@qq.com>, Michael Armbrust + 2015-02-02 16:06:52 -0800 + Commit: dca6faa, github.com/apache/spark/pull/3898 + + [SPARK-2309][MLlib] Multinomial Logistic Regression + DB Tsai + 2015-02-02 15:59:15 -0800 + Commit: b1aa8fe, github.com/apache/spark/pull/3833 + + [SPARK-5513][MLLIB] Add nonnegative option to ml's ALS + Xiangrui Meng + 2015-02-02 15:55:44 -0800 + Commit: 46d50f1, github.com/apache/spark/pull/4302 + + [SPARK-4508] [SQL] build native date type to conform behavior to Hive + Daoyuan Wang + 2015-02-02 15:49:22 -0800 + Commit: 1646f89, github.com/apache/spark/pull/3732 + + SPARK-5500. Document that feeding hadoopFile into a shuffle operation wi... + Sandy Ryza + 2015-02-02 14:52:46 -0800 + Commit: 8309349, github.com/apache/spark/pull/4293 + + [SPARK-5461] [graphx] Add isCheckpointed, getCheckpointedFiles methods to Graph + Joseph K. Bradley + 2015-02-02 14:34:48 -0800 + Commit: 842d000, github.com/apache/spark/pull/4253 + + SPARK-5425: Use synchronised methods in system properties to create SparkConf + Jacek Lewandowski + 2015-02-02 14:07:19 -0800 + Commit: 5a55261, github.com/apache/spark/pull/4222 + + Disabling Utils.chmod700 for Windows + Martin Weindel , mweindel + 2015-02-02 13:46:18 -0800 + Commit: bff65b5, github.com/apache/spark/pull/4299 + + Make sure only owner can read / write to directories created for the job. + Josh Rosen + 2015-01-21 14:38:14 -0800 + Commit: 52f5754 + + [HOTFIX] Add jetty references to build for YARN module. + Patrick Wendell + 2015-02-02 14:00:14 -0800 + Commit: 2321dd1 + + [SPARK-4631][streaming][FIX] Wait for a receiver to start before publishing test data. + Iulian Dragos + 2015-02-02 14:00:33 -0800 + Commit: e908322, github.com/apache/spark/pull/4270 + + [SPARK-5212][SQL] Add support of schema-less, custom field delimiter and SerDe for HiveQL transform + Liang-Chi Hsieh + 2015-02-02 13:53:55 -0800 + Commit: 683e938, github.com/apache/spark/pull/4014 + + [SPARK-5530] Add executor container to executorIdToContainer + Xutingjun <1039320815@qq.com> + 2015-02-02 12:37:51 -0800 + Commit: 62a93a1, github.com/apache/spark/pull/4309 + + [Docs] Fix Building Spark link text + Nicholas Chammas + 2015-02-02 12:33:49 -0800 + Commit: 3f941b6, github.com/apache/spark/pull/4312 + + [SPARK-5173]support python application running on yarn cluster mode + lianhuiwang , Wang Lianhui + 2015-02-02 12:32:28 -0800 + Commit: f5e6375, github.com/apache/spark/pull/3976 + + SPARK-4585. Spark dynamic executor allocation should use minExecutors as... + Sandy Ryza + 2015-02-02 12:27:08 -0800 + Commit: b2047b5, github.com/apache/spark/pull/4051 + + [MLLIB] SPARK-5491 (ex SPARK-1473): Chi-square feature selection + Alexander Ulanov + 2015-02-02 12:13:05 -0800 + Commit: c081b21, github.com/apache/spark/pull/1484 + + SPARK-5492. Thread statistics can break with older Hadoop versions + Sandy Ryza + 2015-02-02 00:54:06 -0800 + Commit: 6f34131, github.com/apache/spark/pull/4305 + + [SPARK-5478][UI][Minor] Add missing right parentheses + jerryshao + 2015-02-01 23:56:13 -0800 + Commit: 63dfe21, github.com/apache/spark/pull/4267 + + [SPARK-5353] Log failures in REPL class loading + Tobias Schlatter + 2015-02-01 21:43:49 -0800 + Commit: 9f0a6e1, github.com/apache/spark/pull/4130 + + [SPARK-3996]: Shade Jetty in Spark deliverables + Patrick Wendell + 2015-02-01 21:13:57 -0800 + Commit: a15f6e3, github.com/apache/spark/pull/4285 + + [SPARK-4001][MLlib] adding parallel FP-Growth algorithm for frequent pattern mining in MLlib + Jacky Li , Jacky Li , Xiangrui Meng + 2015-02-01 20:07:25 -0800 + Commit: 859f724, github.com/apache/spark/pull/2847 + + [Spark-5406][MLlib] LocalLAPACK mode in RowMatrix.computeSVD should have much smaller upper bound + Yuhao Yang + 2015-02-01 19:40:26 -0800 + Commit: d85cd4e, github.com/apache/spark/pull/4200 + + [SPARK-5465] [SQL] Fixes filter push-down for Parquet data source + Cheng Lian + 2015-02-01 18:52:39 -0800 + Commit: ec10032, github.com/apache/spark/pull/4255 + + [SPARK-5262] [SPARK-5244] [SQL] add coalesce in SQLParser and widen types for parameters of coalesce + Daoyuan Wang + 2015-02-01 18:51:38 -0800 + Commit: 8cf4a1f, github.com/apache/spark/pull/4057 + + [SPARK-5196][SQL] Support `comment` in Create Table Field DDL + OopsOutOfMemory + 2015-02-01 18:41:49 -0800 + Commit: 1b56f1d, github.com/apache/spark/pull/3999 + + [SPARK-1825] Make Windows Spark client work fine with Linux YARN cluster + Masayoshi TSUZUKI + 2015-02-01 18:26:28 -0800 + Commit: 7712ed5, github.com/apache/spark/pull/3943 + + [SPARK-5176] The thrift server does not support cluster mode + Tom Panning + 2015-02-01 17:57:31 -0800 + Commit: 1ca0a10, github.com/apache/spark/pull/4137 + + [SPARK-5155] Build fails with spark-ganglia-lgpl profile + Kousuke Saruta + 2015-02-01 17:53:56 -0800 + Commit: c80194b, github.com/apache/spark/pull/4303 + + [Minor][SQL] Little refactor DataFrame related codes + Liang-Chi Hsieh + 2015-02-01 17:52:18 -0800 + Commit: ef89b82, github.com/apache/spark/pull/4298 + + [SPARK-4859][Core][Streaming] Refactor LiveListenerBus and StreamingListenerBus + zsxwing + 2015-02-01 17:47:51 -0800 + Commit: 883bc88, github.com/apache/spark/pull/4006 + + [SPARK-5424][MLLIB] make the new ALS impl take generic ID types + Xiangrui Meng + 2015-02-01 14:13:31 -0800 + Commit: 4a17122, github.com/apache/spark/pull/4281 + + [SPARK-5207] [MLLIB] StandardScalerModel mean and variance re-use + Octavian Geagla + 2015-02-01 09:21:14 -0800 + Commit: bdb0680, github.com/apache/spark/pull/4140 + + [SPARK-5422] Add support for sending Graphite metrics via UDP + Ryan Williams + 2015-01-31 23:41:05 -0800 + Commit: 80bd715, github.com/apache/spark/pull/4218 + + SPARK-3359 [CORE] [DOCS] `sbt/sbt unidoc` doesn't work with Java 8 + Sean Owen + 2015-01-31 10:40:42 -0800 + Commit: c84d5a1, github.com/apache/spark/pull/4193 + + [SPARK-3975] Added support for BlockMatrix addition and multiplication + Burak Yavuz , Burak Yavuz , Burak Yavuz , Burak Yavuz , Burak Yavuz + 2015-01-31 00:47:30 -0800 + Commit: ef8974b, github.com/apache/spark/pull/4274 + + [MLLIB][SPARK-3278] Monotone (Isotonic) regression using parallel pool adjacent violators algorithm + martinzapletal , Xiangrui Meng , Martin Zapletal + 2015-01-31 00:46:02 -0800 + Commit: 34250a6, github.com/apache/spark/pull/3519 + + [SPARK-5307] Add a config option for SerializationDebugger. + Reynold Xin + 2015-01-31 00:06:36 -0800 + Commit: 6364083, github.com/apache/spark/pull/4297 + + [SQL] remove redundant field "childOutput" from execution.Aggregate, use child.output instead + kai + 2015-01-30 23:19:10 -0800 + Commit: f54c9f6, github.com/apache/spark/pull/4291 + + [SPARK-5307] SerializationDebugger + Reynold Xin + 2015-01-30 22:34:10 -0800 + Commit: 740a568, github.com/apache/spark/pull/4098 + + [SPARK-5504] [sql] convertToCatalyst should support nested arrays + Joseph K. Bradley + 2015-01-30 15:40:14 -0800 + Commit: e643de4, github.com/apache/spark/pull/4295 + + SPARK-5400 [MLlib] Changed name of GaussianMixtureEM to GaussianMixture + Travis Galoppo + 2015-01-30 15:32:25 -0800 + Commit: 9869773, github.com/apache/spark/pull/4290 + + [SPARK-4259][MLlib]: Add Power Iteration Clustering Algorithm with Gaussian Similarity Function + sboeschhuawei , Fan Jiang , Jiang Fan , Stephen Boesch , Xiangrui Meng + 2015-01-30 14:09:49 -0800 + Commit: f377431, github.com/apache/spark/pull/4254 + + [SPARK-5486] Added validate method to BlockMatrix + Burak Yavuz + 2015-01-30 13:59:10 -0800 + Commit: 6ee8338, github.com/apache/spark/pull/4279 + + [SPARK-5496][MLLIB] Allow both classification and Classification in Algo for trees. + Xiangrui Meng + 2015-01-30 10:08:07 -0800 + Commit: 0a95085, github.com/apache/spark/pull/4287 + + [MLLIB] SPARK-4846: throw a RuntimeException and give users hints to increase the minCount + Joseph J.C. Tang + 2015-01-30 10:07:26 -0800 + Commit: 54d9575, github.com/apache/spark/pull/4247 + + SPARK-5393. Flood of util.RackResolver log messages after SPARK-1714 + Sandy Ryza + 2015-01-30 11:31:54 -0600 + Commit: 254eaa4, github.com/apache/spark/pull/4192 + + [SPARK-5457][SQL] Add missing DSL for ApproxCountDistinct. + Takuya UESHIN + 2015-01-30 01:21:35 -0800 + Commit: 6f21dce, github.com/apache/spark/pull/4250 + + [SPARK-5094][MLlib] Add Python API for Gradient Boosted Trees + Kazuki Taniguchi + 2015-01-30 00:39:44 -0800 + Commit: bc1fc9b, github.com/apache/spark/pull/3951 + + [SPARK-5322] Added transpose functionality to BlockMatrix + Burak Yavuz + 2015-01-29 21:26:29 -0800 + Commit: dd4d84c, github.com/apache/spark/pull/4275 + + [SQL] Support df("*") to select all columns in a data frame. + Reynold Xin + 2015-01-29 19:09:08 -0800 + Commit: 80def9d, github.com/apache/spark/pull/4283 + + [SPARK-5462] [SQL] Use analyzed query plan in DataFrame.apply() + Josh Rosen + 2015-01-29 18:23:05 -0800 + Commit: 22271f9, github.com/apache/spark/pull/4282 + + [SPARK-5395] [PySpark] fix python process leak while coalesce() + Davies Liu + 2015-01-29 17:28:37 -0800 + Commit: 5c746ee, github.com/apache/spark/pull/4238 + + [SQL] DataFrame API improvements + Reynold Xin + 2015-01-29 17:24:00 -0800 + Commit: ce9c43b, github.com/apache/spark/pull/4280 + + Revert "[WIP] [SPARK-3996]: Shade Jetty in Spark deliverables" + Patrick Wendell + 2015-01-29 17:14:27 -0800 + Commit: d2071e8 + + remove 'return' + Yoshihiro Shimizu + 2015-01-29 16:55:00 -0800 + Commit: 5338772, github.com/apache/spark/pull/4268 + + [WIP] [SPARK-3996]: Shade Jetty in Spark deliverables + Patrick Wendell + 2015-01-29 16:31:19 -0800 + Commit: f240fe3, github.com/apache/spark/pull/4252 + + [SPARK-5464] Fix help() for Python DataFrame instances + Josh Rosen + 2015-01-29 16:23:20 -0800 + Commit: 0bb15f2, github.com/apache/spark/pull/4278 + + [SPARK-4296][SQL] Trims aliases when resolving and checking aggregate expressions + Yin Huai , Cheng Lian + 2015-01-29 15:49:34 -0800 + Commit: c00d517, github.com/apache/spark/pull/4010 + + [SPARK-5373][SQL] Literal in agg grouping expressions leads to incorrect result + wangfei + 2015-01-29 15:47:13 -0800 + Commit: c1b3eeb, github.com/apache/spark/pull/4169 + + [SPARK-5367][SQL] Support star expression in udf + wangfei , scwf + 2015-01-29 15:44:53 -0800 + Commit: fbaf9e0, github.com/apache/spark/pull/4163 + + [SPARK-4786][SQL]: Parquet filter pushdown for castable types + Yash Datta + 2015-01-29 15:42:23 -0800 + Commit: de221ea, github.com/apache/spark/pull/4156 + + [SPARK-5309][SQL] Add support for dictionaries in PrimitiveConverter for Strin... + Michael Davies + 2015-01-29 15:40:59 -0800 + Commit: 940f375, github.com/apache/spark/pull/4187 + + [SPARK-5429][SQL] Use javaXML plan serialization for Hive golden answers on Hive 0.13.1 + Liang-Chi Hsieh + 2015-01-29 15:28:22 -0800 + Commit: bce0ba1, github.com/apache/spark/pull/4223 + + [SPARK-5445][SQL] Consolidate Java and Scala DSL static methods. + Reynold Xin + 2015-01-29 15:13:09 -0800 + Commit: 7156322, github.com/apache/spark/pull/4276 + + [SPARK-5466] Add explicit guava dependencies where needed. + Marcelo Vanzin + 2015-01-29 13:00:45 -0800 + Commit: f9e5694, github.com/apache/spark/pull/4272 + + [SPARK-5477] refactor stat.py + Xiangrui Meng + 2015-01-29 10:11:44 -0800 + Commit: a3dc618, github.com/apache/spark/pull/4266 + + [SQL] Various DataFrame DSL update. + Reynold Xin + 2015-01-29 00:01:10 -0800 + Commit: 5ad78f6, github.com/apache/spark/pull/4260 + + [SPARK-3977] Conversion methods for BlockMatrix to other Distributed Matrices + Burak Yavuz + 2015-01-28 23:42:07 -0800 + Commit: a63be1a, github.com/apache/spark/pull/4256 + + [SPARK-5445][SQL] Made DataFrame dsl usable in Java + Reynold Xin + 2015-01-28 19:10:32 -0800 + Commit: 5b9760d, github.com/apache/spark/pull/4241 + + [SPARK-5430] move treeReduce and treeAggregate from mllib to core + Xiangrui Meng + 2015-01-28 17:26:03 -0800 + Commit: 4ee79c7, github.com/apache/spark/pull/4228 + + [SPARK-4586][MLLIB] Python API for ML pipeline and parameters + Xiangrui Meng , Davies Liu + 2015-01-28 17:14:23 -0800 + Commit: e80dc1c, github.com/apache/spark/pull/4151 + + [SPARK-5441][pyspark] Make SerDeUtil PairRDD to Python conversions more robust + Michael Nazario + 2015-01-28 13:55:01 -0800 + Commit: e023112, github.com/apache/spark/pull/4236 + + [SPARK-4387][PySpark] Refactoring python profiling code to make it extensible + Yandu Oppacher , Davies Liu + 2015-01-28 13:48:06 -0800 + Commit: 3bead67, github.com/apache/spark/pull/3255. + + [SPARK-5417] Remove redundant executor-id set() call + Ryan Williams + 2015-01-28 13:04:52 -0800 + Commit: a731314, github.com/apache/spark/pull/4213 + + [SPARK-5434] [EC2] Preserve spaces in EC2 path + Nicholas Chammas + 2015-01-28 12:56:03 -0800 + Commit: d44ee43, github.com/apache/spark/pull/4224 + + [SPARK-5437] Fix DriverSuite and SparkSubmitSuite timeout issues + Andrew Or + 2015-01-28 12:52:31 -0800 + Commit: 84b6ecd, github.com/apache/spark/pull/4230 + + [SPARK-4955]With executor dynamic scaling enabled,executor shoude be added or killed in yarn-cluster mode. + lianhuiwang + 2015-01-28 12:50:57 -0800 + Commit: 81f8f34, github.com/apache/spark/pull/3962 + + [SPARK-5440][pyspark] Add toLocalIterator to pyspark rdd + Michael Nazario + 2015-01-28 12:47:12 -0800 + Commit: 456c11f, github.com/apache/spark/pull/4237 + + SPARK-1934 [CORE] "this" reference escape to "selectorThread" during construction in ConnectionManager + Sean Owen + 2015-01-28 12:44:35 -0800 + Commit: 9b18009, github.com/apache/spark/pull/4225 + + [SPARK-5188][BUILD] make-distribution.sh should support curl, not only wget to get Tachyon + Kousuke Saruta + 2015-01-28 12:43:22 -0800 + Commit: e902dc4, github.com/apache/spark/pull/3988 + + SPARK-5458. Refer to aggregateByKey instead of combineByKey in docs + Sandy Ryza + 2015-01-28 12:41:23 -0800 + Commit: 406f6d3, github.com/apache/spark/pull/4251 + + [SPARK-5447][SQL] Replaced reference to SchemaRDD with DataFrame. + Reynold Xin + 2015-01-28 12:10:01 -0800 + Commit: c8e934e, github.com/apache/spark/pull/4242 + + [SPARK-5361]Multiple Java RDD <-> Python RDD conversions not working correctly + Winston Chen + 2015-01-28 11:08:44 -0800 + Commit: 453d799, github.com/apache/spark/pull/4146 + + [SPARK-5291][CORE] Add timestamp and reason why an executor is removed to SparkListenerExecutorAdded and SparkListenerExecutorRemoved + Kousuke Saruta + 2015-01-28 11:02:51 -0800 + Commit: 0b35fcd, github.com/apache/spark/pull/4082 + + [SPARK-3974][MLlib] Distributed Block Matrix Abstractions + Burak Yavuz , Xiangrui Meng , Burak Yavuz , Burak Yavuz , Burak Yavuz + 2015-01-28 10:06:37 -0800 + Commit: eeb53bf, github.com/apache/spark/pull/3200 + + MAINTENANCE: Automated closing of pull requests. + Patrick Wendell + 2015-01-28 02:15:14 -0800 + Commit: 622ff09, github.com/apache/spark/pull/1480 + + [SPARK-5415] bump sbt to version to 0.13.7 + Ryan Williams + 2015-01-28 02:13:06 -0800 + Commit: 661d3f9, github.com/apache/spark/pull/4211 + + [SPARK-4809] Rework Guava library shading. + Marcelo Vanzin + 2015-01-28 00:29:29 -0800 + Commit: 37a5e27, github.com/apache/spark/pull/3658 + + [SPARK-5097][SQL] Test cases for DataFrame expressions. + Reynold Xin + 2015-01-27 18:10:49 -0800 + Commit: d743732, github.com/apache/spark/pull/4235 + + [SPARK-5097][SQL] DataFrame + Reynold Xin , Davies Liu + 2015-01-27 16:08:24 -0800 + Commit: 119f45d, github.com/apache/spark/pull/4173 + + SPARK-5199. FS read metrics should support CombineFileSplits and track bytes from all FSs + Sandy Ryza + 2015-01-27 15:42:55 -0800 + Commit: b1b35ca, github.com/apache/spark/pull/4050 + + [MLlib] fix python example of ALS in guide + Davies Liu + 2015-01-27 15:33:01 -0800 + Commit: fdaad4e, github.com/apache/spark/pull/4226 + + SPARK-5308 [BUILD] MD5 / SHA1 hash format doesn't match standard Maven output + Sean Owen + 2015-01-27 10:22:50 -0800 + Commit: ff356e2, github.com/apache/spark/pull/4161 + + [SPARK-5321] Support for transposing local matrices + Burak Yavuz + 2015-01-27 01:46:17 -0800 + Commit: 9142674, github.com/apache/spark/pull/4109 + + [SPARK-5419][Mllib] Fix the logic in Vectors.sqdist + Liang-Chi Hsieh + 2015-01-27 01:29:14 -0800 + Commit: 7b0ed79, github.com/apache/spark/pull/4217 + + [SPARK-3726] [MLlib] Allow sampling_rate not equal to 1.0 in RandomForests + MechCoder + 2015-01-26 19:46:17 -0800 + Commit: d6894b1, github.com/apache/spark/pull/4073 + + [SPARK-5119] java.lang.ArrayIndexOutOfBoundsException on trying to train... + lewuathe + 2015-01-26 18:03:21 -0800 + Commit: f2ba5c6, github.com/apache/spark/pull/3975 + + [SPARK-5052] Add common/base classes to fix guava methods signatures. + Elmer Garduno + 2015-01-26 17:40:48 -0800 + Commit: 661e0fc, github.com/apache/spark/pull/3874 + + SPARK-960 [CORE] [TEST] JobCancellationSuite "two jobs sharing the same stage" is broken + Sean Owen + 2015-01-26 14:32:27 -0800 + Commit: 0497ea5, github.com/apache/spark/pull/4180 + + Fix command spaces issue in make-distribution.sh + David Y. Ross + 2015-01-26 14:26:10 -0800 + Commit: b38034e, github.com/apache/spark/pull/4126 + + SPARK-4147 [CORE] Reduce log4j dependency + Sean Owen + 2015-01-26 14:23:42 -0800 + Commit: 54e7b45, github.com/apache/spark/pull/4190 + + [SPARK-5339][BUILD] build/mvn doesn't work because of invalid URL for maven's tgz. + Kousuke Saruta + 2015-01-26 13:07:49 -0800 + Commit: c094c73, github.com/apache/spark/pull/4124 + + [SPARK-5355] use j.u.c.ConcurrentHashMap instead of TrieMap + Davies Liu + 2015-01-26 12:51:32 -0800 + Commit: 1420931, github.com/apache/spark/pull/4208 + + [SPARK-5384][mllib] Vectors.sqdist returns inconsistent results for sparse/dense vectors when the vectors have different lengths + Yuhao Yang + 2015-01-25 22:18:09 -0800 + Commit: 8125168, github.com/apache/spark/pull/4183 + + [SPARK-5268] don't stop CoarseGrainedExecutorBackend for irrelevant DisassociatedEvent + CodingCat + 2015-01-25 19:28:53 -0800 + Commit: 8df9435, github.com/apache/spark/pull/4063 + + SPARK-4430 [STREAMING] [TEST] Apache RAT Checks fail spuriously on test files + Sean Owen + 2015-01-25 19:16:44 -0800 + Commit: 0528b85, github.com/apache/spark/pull/4189 + + [SPARK-5326] Show fetch wait time as optional metric in the UI + Kay Ousterhout + 2015-01-25 16:48:26 -0800 + Commit: fc2168f, github.com/apache/spark/pull/4110 + + [SPARK-5344][WebUI] HistoryServer cannot recognize that inprogress file was renamed to completed file + Kousuke Saruta + 2015-01-25 15:34:20 -0800 + Commit: 8f5c827, github.com/apache/spark/pull/4132 + + SPARK-4506 [DOCS] Addendum: Update more docs to reflect that standalone works in cluster mode + Sean Owen + 2015-01-25 15:25:05 -0800 + Commit: 9f64357, github.com/apache/spark/pull/4160 + + SPARK-5382: Use SPARK_CONF_DIR in spark-class if it is defined + Jacek Lewandowski + 2015-01-25 15:15:09 -0800 + Commit: 1c30afd, github.com/apache/spark/pull/4179 + + SPARK-3782 [CORE] Direct use of log4j in AkkaUtils interferes with certain logging configurations + Sean Owen + 2015-01-25 15:11:57 -0800 + Commit: 383425a, github.com/apache/spark/pull/4184 + + SPARK-3852 [DOCS] Document spark.driver.extra* configs + Sean Owen + 2015-01-25 15:08:05 -0800 + Commit: c586b45, github.com/apache/spark/pull/4185 + + [SPARK-5402] log executor ID at executor-construction time + Ryan Williams + 2015-01-25 14:20:02 -0800 + Commit: aea2548, github.com/apache/spark/pull/4195 + + [SPARK-5401] set executor ID before creating MetricsSystem + Ryan Williams + 2015-01-25 14:17:59 -0800 + Commit: 2d9887b, github.com/apache/spark/pull/4194 + + Add comment about defaultMinPartitions + Idan Zalzberg + 2015-01-25 11:28:05 -0800 + Commit: 412a58e, github.com/apache/spark/pull/4102 + + Closes #4157 + Reynold Xin + 2015-01-25 00:24:59 -0800 + Commit: d22ca1e + + [SPARK-5214][Test] Add a test to demonstrate EventLoop can be stopped in the event thread + zsxwing + 2015-01-24 11:00:35 -0800 + Commit: 0d1e67e, github.com/apache/spark/pull/4174 + + [SPARK-5058] Part 2. Typos and broken URL + Jongyoul Lee + 2015-01-23 23:34:11 -0800 + Commit: 09e09c5, github.com/apache/spark/pull/4172 + + [SPARK-5351][GraphX] Do not use Partitioner.defaultPartitioner as a partitioner of EdgeRDDImp... + Takeshi Yamamuro + 2015-01-23 19:25:15 -0800 + Commit: e224dbb, github.com/apache/spark/pull/4136 + + [SPARK-5063] More helpful error messages for several invalid operations + Josh Rosen + 2015-01-23 17:53:15 -0800 + Commit: cef1f09, github.com/apache/spark/pull/3884 + + [SPARK-3541][MLLIB] New ALS implementation with improved storage + Xiangrui Meng + 2015-01-22 22:09:13 -0800 + Commit: ea74365, github.com/apache/spark/pull/3720 + + [SPARK-5315][Streaming] Fix reduceByWindow Java API not work bug + jerryshao + 2015-01-22 22:04:21 -0800 + Commit: e0f7fb7, github.com/apache/spark/pull/4104 + + [SPARK-5233][Streaming] Fix error replaying of WAL introduced bug + jerryshao + 2015-01-22 21:58:53 -0800 + Commit: 3c3fa63, github.com/apache/spark/pull/4032 + + SPARK-5370. [YARN] Remove some unnecessary synchronization in YarnAlloca... + Sandy Ryza + 2015-01-22 13:49:35 -0600 + Commit: 820ce03, github.com/apache/spark/pull/4164 + + [SPARK-5365][MLlib] Refactor KMeans to reduce redundant data + Liang-Chi Hsieh + 2015-01-22 08:16:35 -0800 + Commit: 246111d, github.com/apache/spark/pull/4159 + + [SPARK-5147][Streaming] Delete the received data WAL log periodically + Tathagata Das , jerryshao + 2015-01-21 23:41:44 -0800 + Commit: 3027f06, github.com/apache/spark/pull/4149 + + [SPARK-5317]Set BoostingStrategy.defaultParams With Enumeration Algo.Classification or Algo.Regression + Basin + 2015-01-21 23:06:34 -0800 + Commit: fcb3e18, github.com/apache/spark/pull/4103 + + [SPARK-3424][MLLIB] cache point distances during k-means|| init + Xiangrui Meng + 2015-01-21 21:20:31 -0800 + Commit: ca7910d, github.com/apache/spark/pull/4144 + + [SPARK-5202] [SQL] Add hql variable substitution support + Cheng Hao + 2015-01-21 17:34:18 -0800 + Commit: 27bccc5, github.com/apache/spark/pull/4003 + + [SPARK-5355] make SparkConf thread-safe + Davies Liu + 2015-01-21 16:51:42 -0800 + Commit: 9bad062, github.com/apache/spark/pull/4143 + + [SPARK-4984][CORE][WEBUI] Adding a pop-up containing the full job description when it is very long + wangfei + 2015-01-21 15:27:42 -0800 + Commit: 3be2a88, github.com/apache/spark/pull/3819 + + [SQL] [Minor] Remove deprecated parquet tests + Cheng Lian + 2015-01-21 14:38:10 -0800 + Commit: ba19689, github.com/apache/spark/pull/4116 + + Revert "[SPARK-5244] [SQL] add coalesce() in sql parser" + Josh Rosen + 2015-01-21 14:27:43 -0800 + Commit: b328ac6 + + [SPARK-5009] [SQL] Long keyword support in SQL Parsers + Cheng Hao + 2015-01-21 13:05:56 -0800 + Commit: 8361078, github.com/apache/spark/pull/3926 + + [SPARK-5244] [SQL] add coalesce() in sql parser + Daoyuan Wang + 2015-01-21 12:59:41 -0800 + Commit: 812d367, github.com/apache/spark/pull/4040 + + [SPARK-5064][GraphX] Add numEdges upperbound validation for R-MAT graph generator to prevent infinite loop + Kenji Kikushima + 2015-01-21 12:34:00 -0800 + Commit: 3ee3ab5, github.com/apache/spark/pull/3950 + + [SPARK-4749] [mllib]: Allow initializing KMeans clusters using a seed + nate.crosswhite , nxwhite-str , Xiangrui Meng + 2015-01-21 10:32:10 -0800 + Commit: 7450a99, github.com/apache/spark/pull/3610 + + [MLlib] [SPARK-5301] Missing conversions and operations on IndexedRowMatrix and CoordinateMatrix + Reza Zadeh + 2015-01-21 09:48:38 -0800 + Commit: aa1e22b, github.com/apache/spark/pull/4089 + + SPARK-1714. Take advantage of AMRMClient APIs to simplify logic in YarnA... + Sandy Ryza + 2015-01-21 10:31:54 -0600 + Commit: 2eeada3, github.com/apache/spark/pull/3765 + + [SPARK-5336][YARN]spark.executor.cores must not be less than spark.task.cpus + WangTao , WangTaoTheTonic + 2015-01-21 09:42:30 -0600 + Commit: 8c06a5f, github.com/apache/spark/pull/4123 + + [SPARK-5297][Streaming] Fix Java file stream type erasure problem + jerryshao + 2015-01-20 23:37:47 -0800 + Commit: 424d8c6, github.com/apache/spark/pull/4101 + + [HOTFIX] Update pom.xml to pull MapR's Hadoop version 2.4.1. + Kannan Rajah + 2015-01-20 23:34:04 -0800 + Commit: ec5b0f2, github.com/apache/spark/pull/4108 + + [SPARK-5275] [Streaming] include python source code + Davies Liu + 2015-01-20 22:44:58 -0800 + Commit: bad6c57, github.com/apache/spark/pull/4128 + + [SPARK-5294][WebUI] Hide tables in AllStagePages for "Active Stages, Completed Stages and Failed Stages" when they are empty + Kousuke Saruta + 2015-01-20 16:40:46 -0800 + Commit: 9a151ce, github.com/apache/spark/pull/4083 + + [SPARK-5186] [MLLIB] Vector.equals and Vector.hashCode are very inefficient + Yuhao Yang , Yuhao Yang + 2015-01-20 15:20:20 -0800 + Commit: 2f82c84, github.com/apache/spark/pull/3997 + + [SPARK-5323][SQL] Remove Row's Seq inheritance. + Reynold Xin + 2015-01-20 15:16:14 -0800 + Commit: d181c2a, github.com/apache/spark/pull/4115 + + [SPARK-5287][SQL] Add defaultSizeOf to every data type. + Yin Huai + 2015-01-20 13:26:36 -0800 + Commit: bc20a52, github.com/apache/spark/pull/4081 + + SPARK-5019 [MLlib] - GaussianMixtureModel exposes instances of MultivariateGauss... + Travis Galoppo + 2015-01-20 12:58:11 -0800 + Commit: 23e2554, github.com/apache/spark/pull/4088 + + [SPARK-5329][WebUI] UIWorkloadGenerator should stop SparkContext. + Kousuke Saruta + 2015-01-20 12:40:55 -0800 + Commit: 769aced, github.com/apache/spark/pull/4112 + + SPARK-4660: Use correct class loader in JavaSerializer (copy of PR #3840... + Jacek Lewandowski + 2015-01-20 12:38:01 -0800 + Commit: c93a57f, github.com/apache/spark/pull/4113 + + [SQL][Minor] Refactors deeply nested FP style code in BooleanSimplification + Cheng Lian + 2015-01-20 11:20:14 -0800 + Commit: 8140802, github.com/apache/spark/pull/4091 + + [SPARK-5333][Mesos] MesosTaskLaunchData occurs BufferUnderflowException + Jongyoul Lee + 2015-01-20 10:17:29 -0800 + Commit: 9d9294a, github.com/apache/spark/pull/4119 + + [SPARK-4803] [streaming] Remove duplicate RegisterReceiver message + Ilayaperumal Gopinathan + 2015-01-20 01:41:10 -0800 + Commit: 4afad9c, github.com/apache/spark/pull/3648 + + [SQL][minor] Add a log4j file for catalyst test. + Reynold Xin + 2015-01-20 00:55:25 -0800 + Commit: debc031, github.com/apache/spark/pull/4117 + + SPARK-5270 [CORE] Provide isEmpty() function in RDD API + Sean Owen + 2015-01-19 22:50:44 -0800 + Commit: 306ff18, github.com/apache/spark/pull/4074 + + [SPARK-5214][Core] Add EventLoop and change DAGScheduler to an EventLoop + zsxwing + 2015-01-19 18:15:51 -0800 + Commit: e69fb8c, github.com/apache/spark/pull/4016 + + [SPARK-4504][Examples] fix run-example failure if multiple assembly jars exist + Venkata Ramana Gollamudi + 2015-01-19 11:58:16 -0800 + Commit: 74de94e, github.com/apache/spark/pull/3377 + + [SPARK-5286][SQL] Fail to drop an invalid table when using the data source API + Yin Huai + 2015-01-19 10:45:29 -0800 + Commit: 2604bc3, github.com/apache/spark/pull/4076 + + [SPARK-5284][SQL] Insert into Hive throws NPE when a inner complex type field has a null value + Yin Huai + 2015-01-19 10:44:12 -0800 + Commit: cd5da42, github.com/apache/spark/pull/4077 + + [SPARK-5282][mllib]: RowMatrix easily gets int overflow in the memory size warning + Yuhao Yang + 2015-01-19 10:10:15 -0800 + Commit: 4432568, github.com/apache/spark/pull/4069 + + MAINTENANCE: Automated closing of pull requests. + Patrick Wendell + 2015-01-19 02:05:24 -0800 + Commit: 1ac1c1d, github.com/apache/spark/pull/3584 + + [SPARK-5088] Use spark-class for running executors directly + Jongyoul Lee + 2015-01-19 02:01:56 -0800 + Commit: 4a4f9cc, github.com/apache/spark/pull/3897 + + [SPARK-3288] All fields in TaskMetrics should be private and use getters/setters + Ilya Ganelin + 2015-01-19 01:32:22 -0800 + Commit: 3453d57, github.com/apache/spark/pull/4020 + + SPARK-5217 Spark UI should report pending stages during job execution on AllStagesPage. + Prashant Sharma + 2015-01-19 01:28:42 -0800 + Commit: 851b6a9, github.com/apache/spark/pull/4043 + + [SQL] fix typo in class description + Jacky Li + 2015-01-18 23:59:08 -0800 + Commit: 7dbf1fd, github.com/apache/spark/pull/4100 + + [SQL][minor] Put DataTypes.java in java dir. + Reynold Xin + 2015-01-18 16:35:40 -0800 + Commit: 1955645, github.com/apache/spark/pull/4097 + + [SQL][Minor] Update sql doc according to data type APIs changes + scwf + 2015-01-18 11:03:13 -0800 + Commit: 1a200a3, github.com/apache/spark/pull/4095 + + [SPARK-5279][SQL] Use java.math.BigDecimal as the exposed Decimal type. + Reynold Xin + 2015-01-18 11:01:42 -0800 + Commit: 1727e08, github.com/apache/spark/pull/4092 + + [HOTFIX]: Minor clean up regarding skipped artifacts in build files. + Patrick Wendell + 2015-01-17 23:15:12 -0800 + Commit: ad16da1, github.com/apache/spark/pull/4080 + + MAINTENANCE: Automated closing of pull requests. + Patrick Wendell + 2015-01-17 20:39:54 -0800 + Commit: e12b5b6, github.com/apache/spark/pull/681 + + [SQL][Minor] Added comments and examples to explain BooleanSimplification + Reynold Xin + 2015-01-17 17:35:53 -0800 + Commit: e7884bc, github.com/apache/spark/pull/4090 + + [SPARK-5096] Use sbt tasks instead of vals to get hadoop version + Michael Armbrust + 2015-01-17 17:03:07 -0800 + Commit: 6999910, github.com/apache/spark/pull/3905 + + [SPARK-4937][SQL] Comment for the newly optimization rules in `BooleanSimplification` + scwf + 2015-01-17 15:51:24 -0800 + Commit: c1f3c27, github.com/apache/spark/pull/4086 + + [SQL][minor] Improved Row documentation. + Reynold Xin + 2015-01-17 00:11:08 -0800 + Commit: f3bfc76, github.com/apache/spark/pull/4085 + + [SPARK-5193][SQL] Remove Spark SQL Java-specific API. + Reynold Xin + 2015-01-16 21:09:06 -0800 + Commit: 61b427d, github.com/apache/spark/pull/4065 + + [SPARK-4937][SQL] Adding optimization to simplify the And, Or condition in spark sql + scwf , wangfei + 2015-01-16 14:01:22 -0800 + Commit: ee1c1f3, github.com/apache/spark/pull/3778 + + [SPARK-733] Add documentation on use of accumulators in lazy transformation + Ilya Ganelin + 2015-01-16 13:25:17 -0800 + Commit: fd3a8a1, github.com/apache/spark/pull/4022 + + [SPARK-4923][REPL] Add Developer API to REPL to allow re-publishing the REPL jar + Chip Senkbeil , Chip Senkbeil + 2015-01-16 12:56:40 -0800 + Commit: d05c9ee, github.com/apache/spark/pull/4034 + + [WebUI] Fix collapse of WebUI layout + Kousuke Saruta + 2015-01-16 12:19:08 -0800 + Commit: ecf943d, github.com/apache/spark/pull/3995 + + [SPARK-5231][WebUI] History Server shows wrong job submission time. + Kousuke Saruta + 2015-01-16 10:05:11 -0800 + Commit: e8422c5, github.com/apache/spark/pull/4029 + + [DOCS] Fix typo in return type of cogroup + Sean Owen + 2015-01-16 09:28:44 -0800 + Commit: f6b852a, github.com/apache/spark/pull/4072 + + [SPARK-5201][CORE] deal with int overflow in the ParallelCollectionRDD.slice method + Ye Xianjin + 2015-01-16 09:20:53 -0800 + Commit: e200ac8, github.com/apache/spark/pull/4002 + + [SPARK-1507][YARN]specify # cores for ApplicationMaster + WangTaoTheTonic , WangTao + 2015-01-16 09:16:56 -0800 + Commit: 2be82b1, github.com/apache/spark/pull/4018 + + [SPARK-4092] [CORE] Fix InputMetrics for coalesce'd Rdds + Kostas Sakellis + 2015-01-15 18:48:39 -0800 + Commit: a79a9f9, github.com/apache/spark/pull/3120 + + [SPARK-4857] [CORE] Adds Executor membership events to SparkListener + Kostas Sakellis + 2015-01-15 17:53:42 -0800 + Commit: 96c2c71, github.com/apache/spark/pull/3711 + + [Minor] Fix tiny typo in BlockManager + Kousuke Saruta + 2015-01-15 17:07:44 -0800 + Commit: 65858ba, github.com/apache/spark/pull/4046 + + [SPARK-5274][SQL] Reconcile Java and Scala UDFRegistration. + Reynold Xin + 2015-01-15 16:15:12 -0800 + Commit: 1881431, github.com/apache/spark/pull/4056 + + [SPARK-5224] [PySpark] improve performance of parallelize list/ndarray + Davies Liu + 2015-01-15 11:40:41 -0800 + Commit: 3c8650c, github.com/apache/spark/pull/4024 + + [SPARK-5193][SQL] Tighten up HiveContext API + Reynold Xin + 2015-01-14 20:31:02 -0800 + Commit: 4b325c7, github.com/apache/spark/pull/4054 + + [SPARK-5254][MLLIB] remove developers section from spark.ml guide + Xiangrui Meng + 2015-01-14 18:54:17 -0800 + Commit: 6abc45e, github.com/apache/spark/pull/4053 + + [SPARK-5193][SQL] Tighten up SQLContext API + Reynold Xin + 2015-01-14 18:36:15 -0800 + Commit: cfa397c, github.com/apache/spark/pull/4049 + + [SPARK-5254][MLLIB] Update the user guide to position spark.ml better + Xiangrui Meng + 2015-01-14 17:50:33 -0800 + Commit: 13d2406, github.com/apache/spark/pull/4052 + + [SPARK-5234][ml]examples for ml don't have sparkContext.stop + Yuhao Yang + 2015-01-14 11:53:43 -0800 + Commit: 76389c5, github.com/apache/spark/pull/4044 + + [SPARK-5235] Make SQLConf Serializable + Alex Baretta + 2015-01-14 11:51:55 -0800 + Commit: 2fd7f72, github.com/apache/spark/pull/4031 + + [SPARK-4014] Add TaskContext.attemptNumber and deprecate TaskContext.attemptId + Josh Rosen + 2015-01-14 11:45:40 -0800 + Commit: 259936b, github.com/apache/spark/pull/3849 + + [SPARK-5228][WebUI] Hide tables for "Active Jobs/Completed Jobs/Failed Jobs" when they are empty + Kousuke Saruta + 2015-01-14 11:10:29 -0800 + Commit: 9d4449c, github.com/apache/spark/pull/4028 + + [SPARK-2909] [MLlib] [PySpark] SparseVector in pyspark now supports indexing + MechCoder + 2015-01-14 11:03:11 -0800 + Commit: 5840f54, github.com/apache/spark/pull/4025 + + [SQL] some comments fix for GROUPING SETS + Daoyuan Wang + 2015-01-14 09:50:01 -0800 + Commit: 38bdc99, github.com/apache/spark/pull/4000 + + [SPARK-5211][SQL]Restore HiveMetastoreTypes.toDataType + Yin Huai + 2015-01-14 09:47:30 -0800 + Commit: 81f72a0, github.com/apache/spark/pull/4026 + + [SPARK-5248] [SQL] move sql.types.decimal.Decimal to sql.types.Decimal + Daoyuan Wang + 2015-01-14 09:36:59 -0800 + Commit: a3f7421, github.com/apache/spark/pull/4041 + + [SPARK-5167][SQL] Move Row into sql package and make it usable for Java. + Reynold Xin + 2015-01-14 00:38:55 -0800 + Commit: d5eeb35, github.com/apache/spark/pull/4030 + + [SPARK-5123][SQL] Reconcile Java/Scala API for data types. + Reynold Xin + 2015-01-13 17:16:41 -0800 + Commit: f996909, github.com/apache/spark/pull/3958 + + [SPARK-5168] Make SQLConf a field rather than mixin in SQLContext + Reynold Xin + 2015-01-13 13:30:35 -0800 + Commit: 14e3f11, github.com/apache/spark/pull/3965 + + [SPARK-4912][SQL] Persistent tables for the Spark SQL data sources api + Yin Huai , Michael Armbrust + 2015-01-13 13:01:27 -0800 + Commit: 6463e0b, github.com/apache/spark/pull/3960 + + [SPARK-5223] [MLlib] [PySpark] fix MapConverter and ListConverter in MLlib + Davies Liu + 2015-01-13 12:50:31 -0800 + Commit: 8ead999, github.com/apache/spark/pull/4023 + + [SPARK-5131][Streaming][DOC]: There is a discrepancy in WAL implementation and configuration doc. + uncleGen + 2015-01-13 10:07:19 -0800 + Commit: 39e333e, github.com/apache/spark/pull/3930 + + [SPARK-4697][YARN]System properties should override environment variables + WangTaoTheTonic , WangTao + 2015-01-13 09:43:48 -0800 + Commit: 9dea64e, github.com/apache/spark/pull/3557 + + [SPARK-5006][Deploy]spark.port.maxRetries doesn't work + WangTaoTheTonic , WangTao + 2015-01-13 09:28:21 -0800 + Commit: f7741a9, github.com/apache/spark/pull/3841 + + [SPARK-5138][SQL] Ensure schema can be inferred from a namedtuple + Gabe Mulley + 2015-01-12 21:44:51 -0800 + Commit: 1e42e96, github.com/apache/spark/pull/3978 + + [SPARK-5049][SQL] Fix ordering of partition columns in ParquetTableScan + Michael Armbrust + 2015-01-12 15:19:09 -0800 + Commit: 5d9fa55, github.com/apache/spark/pull/3990 + + [SPARK-4999][Streaming] Change storeInBlockManager to false by default + jerryshao + 2015-01-12 13:14:44 -0800 + Commit: 3aed305, github.com/apache/spark/pull/3906 + + SPARK-5172 [BUILD] spark-examples-***.jar shades a wrong Hadoop distribution + Sean Owen + 2015-01-12 12:15:34 -0800 + Commit: aff49a3, github.com/apache/spark/pull/3992 + + [SPARK-5078] Optionally read from SPARK_LOCAL_HOSTNAME + Michael Armbrust + 2015-01-12 11:57:59 -0800 + Commit: a3978f3, github.com/apache/spark/pull/3893 + + SPARK-4159 [BUILD] Addendum: improve running of single test after enabling Java tests + Sean Owen + 2015-01-12 11:00:56 -0800 + Commit: 13e610b, github.com/apache/spark/pull/3993 + + [SPARK-5102][Core]subclass of MapStatus needs to be registered with Kryo + lianhuiwang + 2015-01-12 10:57:12 -0800 + Commit: ef9224e, github.com/apache/spark/pull/4007 + + [SPARK-5200] Disable web UI in Hive ThriftServer tests + Josh Rosen + 2015-01-12 10:47:12 -0800 + Commit: 82fd38d, github.com/apache/spark/pull/3998 + + SPARK-5018 [MLlib] [WIP] Make MultivariateGaussian public + Travis Galoppo + 2015-01-11 21:31:16 -0800 + Commit: 2130de9, github.com/apache/spark/pull/3923 + + [SPARK-4033][Examples]Input of the SparkPi too big causes the emption exception + huangzhaowei + 2015-01-11 16:32:47 -0800 + Commit: f38ef65, github.com/apache/spark/pull/2874 + + [SPARK-4951][Core] Fix the issue that a busy executor may be killed + zsxwing + 2015-01-11 16:23:28 -0800 + Commit: 6942b97, github.com/apache/spark/pull/3783 + + [SPARK-5073] spark.storage.memoryMapThreshold have two default value + lewuathe + 2015-01-11 13:50:42 -0800 + Commit: 1656aae, github.com/apache/spark/pull/3900 + + [SPARK-5032] [graphx] Remove GraphX MIMA exclude for 1.3 + Joseph K. Bradley + 2015-01-10 17:25:39 -0800 + Commit: 3313260, github.com/apache/spark/pull/3856 + + [SPARK-5029][SQL] Enable from follow multiple brackets + scwf + 2015-01-10 17:07:34 -0800 + Commit: d22a31f, github.com/apache/spark/pull/3853 + + [SPARK-4871][SQL] Show sql statement in spark ui when run sql with spark-sql + wangfei + 2015-01-10 17:04:56 -0800 + Commit: 92d9a70, github.com/apache/spark/pull/3718 + + [Minor]Resolve sbt warnings during build (MQTTStreamSuite.scala). + GuoQiang Li + 2015-01-10 15:38:43 -0800 + Commit: 8a29dc7, github.com/apache/spark/pull/3989 + + [SPARK-5181] do not print writing WAL log when WAL is disabled + CodingCat + 2015-01-10 15:35:41 -0800 + Commit: f0d558b, github.com/apache/spark/pull/3985 + + [SPARK-4692] [SQL] Support ! boolean logic operator like NOT + YanTangZhai , Michael Armbrust + 2015-01-10 15:05:23 -0800 + Commit: 0ca51cc, github.com/apache/spark/pull/3555 + + [SPARK-5187][SQL] Fix caching of tables with HiveUDFs in the WHERE clause + Michael Armbrust + 2015-01-10 14:25:45 -0800 + Commit: 3684fd2, github.com/apache/spark/pull/3987 + + SPARK-4963 [SQL] Add copy to SQL's Sample operator + Yanbo Liang + 2015-01-10 14:16:37 -0800 + Commit: 77106df, github.com/apache/spark/pull/3827 + + [SPARK-4861][SQL] Refactory command in spark sql + scwf + 2015-01-10 14:08:04 -0800 + Commit: b3e86dc, github.com/apache/spark/pull/3948 + + [SPARK-4574][SQL] Adding support for defining schema in foreign DDL commands. + scwf , Yin Huai , Fei Wang , wangfei + 2015-01-10 13:53:21 -0800 + Commit: 693a323, github.com/apache/spark/pull/3431 + + [SPARK-4943][SQL] Allow table name having dot for db/catalog + Alex Liu + 2015-01-10 13:23:09 -0800 + Commit: 4b39fd1, github.com/apache/spark/pull/3941 + + [SPARK-4925][SQL] Publish Spark SQL hive-thriftserver maven artifact + Alex Liu + 2015-01-10 13:19:12 -0800 + Commit: 1e56eba, github.com/apache/spark/pull/3766 + + [SPARK-5141][SQL]CaseInsensitiveMap throws java.io.NotSerializableException + luogankun + 2015-01-09 20:38:41 -0800 + Commit: 545dfcb, github.com/apache/spark/pull/3944 + + [SPARK-4406] [MLib] FIX: Validate k in SVD + MechCoder + 2015-01-09 17:45:18 -0800 + Commit: 4554529, github.com/apache/spark/pull/3945 + + [SPARK-4990][Deploy]to find default properties file, search SPARK_CONF_DIR first + WangTaoTheTonic , WangTao + 2015-01-09 17:10:02 -0800 + Commit: 8782eb9, github.com/apache/spark/pull/3823 + + [Minor] Fix import order and other coding style + bilna , Bilna P + 2015-01-09 14:45:28 -0800 + Commit: 4e1f12d, github.com/apache/spark/pull/3966 + + [DOC] Fixed Mesos version in doc from 0.18.1 to 0.21.0 + Kousuke Saruta + 2015-01-09 14:40:45 -0800 + Commit: ae62872, github.com/apache/spark/pull/3982 + + [SPARK-4737] Task set manager properly handles serialization errors + mcheah + 2015-01-09 14:16:20 -0800 + Commit: e0f28e0, github.com/apache/spark/pull/3638 + + [SPARK-1953][YARN]yarn client mode Application Master memory size is same as driver memory... + WangTaoTheTonic + 2015-01-09 13:20:32 -0800 + Commit: e966452, github.com/apache/spark/pull/3607 + + [SPARK-5015] [mllib] Random seed for GMM + make test suite deterministic + Joseph K. Bradley + 2015-01-09 13:00:15 -0800 + Commit: 7e8e62a, github.com/apache/spark/pull/3981 + + [SPARK-3619] Upgrade to Mesos 0.21 to work around MESOS-1688 + Jongyoul Lee + 2015-01-09 10:47:08 -0800 + Commit: 454fe12, github.com/apache/spark/pull/3934 + + [SPARK-5145][Mllib] Add BLAS.dsyr and use it in GaussianMixtureEM + Liang-Chi Hsieh + 2015-01-09 10:27:33 -0800 + Commit: e9ca16e, github.com/apache/spark/pull/3949 + + [SPARK-1143] Separate pool tests into their own suite. + Kay Ousterhout + 2015-01-09 09:47:06 -0800 + Commit: b6aa557, github.com/apache/spark/pull/3967 + + HOTFIX: Minor improvements to make-distribution.sh + Patrick Wendell + 2015-01-09 09:40:18 -0800 + Commit: 1790b38, github.com/apache/spark/pull/3973 + + SPARK-5136 [DOCS] Improve documentation around setting up Spark IntelliJ project + Sean Owen + 2015-01-09 09:35:46 -0800 + Commit: 547df97, github.com/apache/spark/pull/3952 + + [Minor] Fix test RetryingBlockFetcherSuite after changed config name + Aaron Davidson + 2015-01-09 09:20:16 -0800 + Commit: b4034c3, github.com/apache/spark/pull/3972 + + [SPARK-5169][YARN]fetch the correct max attempts + WangTaoTheTonic + 2015-01-09 08:10:09 -0600 + Commit: f3da4bd, github.com/apache/spark/pull/3942 + + [SPARK-5122] Remove Shark from spark-ec2 + Nicholas Chammas + 2015-01-08 17:42:08 -0800 + Commit: 167a5ab, github.com/apache/spark/pull/3939 + + [SPARK-4048] Enhance and extend hadoop-provided profile. + Marcelo Vanzin + 2015-01-08 17:15:13 -0800 + Commit: 48cecf6, github.com/apache/spark/pull/2982 + + [SPARK-4891][PySpark][MLlib] Add gamma/log normal/exp dist sampling to P... + RJ Nowling + 2015-01-08 15:03:43 -0800 + Commit: c9c8b21, github.com/apache/spark/pull/3955 + + [SPARK-4973][CORE] Local directory in the driver of client-mode continues remaining even if application finished when external shuffle is enabled + Kousuke Saruta + 2015-01-08 13:43:09 -0800 + Commit: a00af6b, github.com/apache/spark/pull/3811 + + SPARK-5148 [MLlib] Make usersOut/productsOut storagelevel in ALS configurable + Fernando Otero (ZeoS) + 2015-01-08 12:42:54 -0800 + Commit: 72df5a3, github.com/apache/spark/pull/3953 + + Document that groupByKey will OOM for large keys + Eric Moyer + 2015-01-08 11:55:23 -0800 + Commit: 538f221, github.com/apache/spark/pull/3936 + + [SPARK-5130][Deploy]Take yarn-cluster as cluster mode in spark-submit + WangTaoTheTonic + 2015-01-08 11:45:42 -0800 + Commit: 0760787, github.com/apache/spark/pull/3929 + + [Minor] Fix the value represented by spark.executor.id for consistency. + Kousuke Saruta + 2015-01-08 11:35:56 -0800 + Commit: 0a59727, github.com/apache/spark/pull/3812 + + [SPARK-4989][CORE] avoid wrong eventlog conf cause cluster down in standalone mode + Zhang, Liye + 2015-01-08 10:40:26 -0800 + Commit: 06dc4b5, github.com/apache/spark/pull/3824 + + [SPARK-4917] Add a function to convert into a graph with canonical edges in GraphOps + Takeshi Yamamuro + 2015-01-08 09:55:12 -0800 + Commit: f825e19, github.com/apache/spark/pull/3760 + + SPARK-5087. [YARN] Merge yarn.Client and yarn.ClientBase + Sandy Ryza + 2015-01-08 09:25:43 -0800 + Commit: 8d45834, github.com/apache/spark/pull/3896 + + MAINTENANCE: Automated closing of pull requests. + Patrick Wendell + 2015-01-07 23:25:56 -0800 + Commit: c082385, github.com/apache/spark/pull/3880 + + [SPARK-5116][MLlib] Add extractor for SparseVector and DenseVector + Shuo Xiang + 2015-01-07 23:22:37 -0800 + Commit: c66a976, github.com/apache/spark/pull/3919 + + [SPARK-5126][Core] Verify Spark urls before creating Actors so that invalid urls can crash the process. + zsxwing + 2015-01-07 23:01:30 -0800 + Commit: 2b729d2, github.com/apache/spark/pull/3927 + + [SPARK-5132][Core]Correct stage Attempt Id key in stageInfofromJson + hushan[胡珊] + 2015-01-07 12:09:12 -0800 + Commit: d345ebe, github.com/apache/spark/pull/3932 + + [SPARK-5128][MLLib] Add common used log1pExp API in MLUtils + DB Tsai + 2015-01-07 10:13:41 -0800 + Commit: 60e2d9e, github.com/apache/spark/pull/3915 + + [SPARK-2458] Make failed application log visible on History Server + Masayoshi TSUZUKI + 2015-01-07 07:32:16 -0800 + Commit: 6e74ede, github.com/apache/spark/pull/3467 + + [SPARK-2165][YARN]add support for setting maxAppAttempts in the ApplicationSubmissionContext + WangTaoTheTonic + 2015-01-07 08:14:39 -0600 + Commit: 8fdd489, github.com/apache/spark/pull/3878 + + [YARN][SPARK-4929] Bug fix: fix the yarn-client code to support HA + huangzhaowei + 2015-01-07 08:10:42 -0600 + Commit: 5fde661, github.com/apache/spark/pull/3771 + + [SPARK-5099][Mllib] Simplify logistic loss function + Liang-Chi Hsieh + 2015-01-06 21:23:31 -0800 + Commit: e21acc1, github.com/apache/spark/pull/3899 + + [SPARK-5050][Mllib] Add unit test for sqdist + Liang-Chi Hsieh + 2015-01-06 14:00:45 -0800 + Commit: bb38ebb, github.com/apache/spark/pull/3869 + + SPARK-5017 [MLlib] - Use SVD to compute determinant and inverse of covariance matrix + Travis Galoppo + 2015-01-06 13:57:42 -0800 + Commit: 4108e5f, github.com/apache/spark/pull/3871 + + SPARK-4159 [CORE] Maven build doesn't run JUnit test suites + Sean Owen + 2015-01-06 12:02:08 -0800 + Commit: 4cba6eb, github.com/apache/spark/pull/3651 + + [Minor] Fix comments for GraphX 2D partitioning strategy + kj-ki + 2015-01-06 09:49:37 -0800 + Commit: 5e3ec11, github.com/apache/spark/pull/3904 + + [SPARK-1600] Refactor FileInputStream tests to remove Thread.sleep() calls and SystemClock usage + Josh Rosen + 2015-01-06 00:31:19 -0800 + Commit: a6394bc, github.com/apache/spark/pull/3801 + + SPARK-4843 [YARN] Squash ExecutorRunnableUtil and ExecutorRunnable + Kostas Sakellis + 2015-01-05 23:26:33 -0800 + Commit: 451546a, github.com/apache/spark/pull/3696 + + [SPARK-5040][SQL] Support expressing unresolved attributes using $"attribute name" notation in SQL DSL. + Reynold Xin + 2015-01-05 15:34:22 -0800 + Commit: 04d55d8, github.com/apache/spark/pull/3862 + + [SPARK-5093] Set spark.network.timeout to 120s consistently. + Reynold Xin + 2015-01-05 15:19:53 -0800 + Commit: bbcba3a, github.com/apache/spark/pull/3903 + + [SPARK-5089][PYSPARK][MLLIB] Fix vector convert + freeman + 2015-01-05 13:10:59 -0800 + Commit: 6c6f325, github.com/apache/spark/pull/3902 + + [SPARK-4465] runAsSparkUser doesn't affect TaskRunner in Mesos environme... + Jongyoul Lee + 2015-01-05 12:05:09 -0800 + Commit: 1c0e7ce, github.com/apache/spark/pull/3741 + + [SPARK-5057] Log message in failed askWithReply attempts + WangTao , WangTaoTheTonic + 2015-01-05 11:59:38 -0800 + Commit: ce39b34, github.com/apache/spark/pull/3875 + + [SPARK-4688] Have a single shared network timeout in Spark + Varun Saxena , varunsaxena + 2015-01-05 10:32:37 -0800 + Commit: d3f07fd, github.com/apache/spark/pull/3562 + + [SPARK-5074][Core] Fix a non-deterministic test failure + zsxwing + 2015-01-04 21:18:33 -0800 + Commit: 5c506ce, github.com/apache/spark/pull/3889 + + [SPARK-5083][Core] Fix a flaky test in TaskResultGetterSuite + zsxwing + 2015-01-04 21:09:21 -0800 + Commit: 27e7f5a, github.com/apache/spark/pull/3894 + + [SPARK-5069][Core] Fix the race condition of TaskSchedulerImpl.dagScheduler + zsxwing + 2015-01-04 21:06:04 -0800 + Commit: 6c726a3, github.com/apache/spark/pull/3887 + + [SPARK-5067][Core] Use '===' to compare well-defined case class + zsxwing + 2015-01-04 21:03:17 -0800 + Commit: 7239652, github.com/apache/spark/pull/3886 + + [SPARK-4835] Disable validateOutputSpecs for Spark Streaming jobs + Josh Rosen + 2015-01-04 20:26:18 -0800 + Commit: 939ba1f, github.com/apache/spark/pull/3832 + + [SPARK-4631] unit test for MQTT + bilna , Bilna P + 2015-01-04 19:37:48 -0800 + Commit: e767d7d, github.com/apache/spark/pull/3844 + + [SPARK-4787] Stop SparkContext if a DAGScheduler init error occurs + Dale + 2015-01-04 13:28:37 -0800 + Commit: 3fddc94, github.com/apache/spark/pull/3809 + + [SPARK-794][Core] Remove sleep() in ClusterScheduler.stop + Brennon York + 2015-01-04 12:40:39 -0800 + Commit: b96008d, github.com/apache/spark/pull/3851 + + [SPARK-5058] Updated broken links + sigmoidanalytics + 2015-01-03 19:46:08 -0800 + Commit: 342612b, github.com/apache/spark/pull/3877 + + Fixed typos in streaming-kafka-integration.md + Akhil Das + 2015-01-02 15:12:27 -0800 + Commit: cdccc26, github.com/apache/spark/pull/3876 + + [SPARK-3325][Streaming] Add a parameter to the method print in class DStream + Yadong Qi , q00251598 , Tathagata Das , wangfei + 2015-01-02 15:09:41 -0800 + Commit: bd88b71, github.com/apache/spark/pull/3865 + + [HOTFIX] Bind web UI to ephemeral port in DriverSuite + Josh Rosen + 2015-01-01 15:03:54 -0800 + Commit: 0128398, github.com/apache/spark/pull/3873 + + [SPARK-5038] Add explicit return type for implicit functions. + Reynold Xin + 2014-12-31 17:07:47 -0800 + Commit: 7749dd6, github.com/apache/spark/pull/3860 + + SPARK-2757 [BUILD] [STREAMING] Add Mima test for Spark Sink after 1.10 is released + Sean Owen + 2014-12-31 16:59:17 -0800 + Commit: 4bb1248, github.com/apache/spark/pull/3842 + + [SPARK-5035] [Streaming] ReceiverMessage trait should extend Serializable + Josh Rosen + 2014-12-31 16:02:47 -0800 + Commit: fe6efac, github.com/apache/spark/pull/3857 + + SPARK-5020 [MLlib] GaussianMixtureModel.predictMembership() should take an RDD only + Travis Galoppo + 2014-12-31 15:39:58 -0800 + Commit: c4f0b4f, github.com/apache/spark/pull/3854 + + [SPARK-5028][Streaming]Add total received and processed records metrics to Streaming UI + jerryshao + 2014-12-31 14:45:31 -0800 + Commit: fdc2aa4, github.com/apache/spark/pull/3852 + + [SPARK-4790][STREAMING] Fix ReceivedBlockTrackerSuite waits for old file... + Hari Shreedharan + 2014-12-31 14:35:07 -0800 + Commit: 3610d3c, github.com/apache/spark/pull/3726 + + [SPARK-5038][SQL] Add explicit return type for implicit functions in Spark SQL + Reynold Xin + 2014-12-31 14:25:03 -0800 + Commit: c88a3d7, github.com/apache/spark/pull/3859 + + [HOTFIX] Disable Spark UI in SparkSubmitSuite tests + Josh Rosen + 2014-12-12 12:38:37 -0800 + Commit: e24d3a9 + + SPARK-4547 [MLLIB] OOM when making bins in BinaryClassificationMetrics + Sean Owen + 2014-12-31 13:37:04 -0800 + Commit: 3d194cc, github.com/apache/spark/pull/3702 + + [SPARK-4298][Core] - The spark-submit cannot read Main-Class from Manifest. + Brennon York + 2014-12-31 11:54:10 -0800 + Commit: 8e14c5e, github.com/apache/spark/pull/3561 + + [SPARK-4797] Replace breezeSquaredDistance + Liang-Chi Hsieh + 2014-12-31 11:50:53 -0800 + Commit: 06a9aa5, github.com/apache/spark/pull/3643 + + [SPARK-1010] Clean up uses of System.setProperty in unit tests + Josh Rosen + 2014-12-30 18:12:20 -0800 + Commit: 352ed6b, github.com/apache/spark/pull/3739 + + [SPARK-4998][MLlib]delete the "train" function + Liu Jiongzhou + 2014-12-30 15:55:56 -0800 + Commit: 035bac8, github.com/apache/spark/pull/3836 + + [SPARK-4813][Streaming] Fix the issue that ContextWaiter didn't handle 'spurious wakeup' + zsxwing + 2014-12-30 14:39:13 -0800 + Commit: 6a89782, github.com/apache/spark/pull/3661 + + [Spark-4995] Replace Vector.toBreeze.activeIterator with foreachActive + Jakub Dubovsky + 2014-12-30 14:19:07 -0800 + Commit: 0f31992, github.com/apache/spark/pull/3846 + + SPARK-3955 part 2 [CORE] [HOTFIX] Different versions between jackson-mapper-asl and jackson-core-asl + Sean Owen + 2014-12-30 14:00:57 -0800 + Commit: b239ea1, github.com/apache/spark/pull/3829 + + [SPARK-4570][SQL]add BroadcastLeftSemiJoinHash + wangxiaojing + 2014-12-30 13:54:12 -0800 + Commit: 07fa191, github.com/apache/spark/pull/3442 + + [SPARK-4935][SQL] When hive.cli.print.header configured, spark-sql aborted if passed in a invalid sql + wangfei , Fei Wang + 2014-12-30 13:44:30 -0800 + Commit: 8f29b7c, github.com/apache/spark/pull/3761 + + [SPARK-4386] Improve performance when writing Parquet files + Michael Davies + 2014-12-30 13:40:51 -0800 + Commit: 7425bec, github.com/apache/spark/pull/3843 + + [SPARK-4937][SQL] Normalizes conjunctions and disjunctions to eliminate common predicates + Cheng Lian + 2014-12-30 13:38:27 -0800 + Commit: 61a99f6, github.com/apache/spark/pull/3784 + + [SPARK-4928][SQL] Fix: Operator '>,<,>=,<=' with decimal between different precision report error + guowei2 + 2014-12-30 12:21:00 -0800 + Commit: a75dd83, github.com/apache/spark/pull/3767 + + [SPARK-4930][SQL][DOCS]Update SQL programming guide, CACHE TABLE is eager + luogankun + 2014-12-30 12:18:55 -0800 + Commit: 2deac74, github.com/apache/spark/pull/3773 + + [SPARK-4916][SQL][DOCS]Update SQL programming guide about cache section + luogankun + 2014-12-30 12:17:49 -0800 + Commit: f7a41a0, github.com/apache/spark/pull/3759 + + [SPARK-4493][SQL] Tests for IsNull / IsNotNull in the ParquetFilterSuite + Cheng Lian + 2014-12-30 12:16:45 -0800 + Commit: 19a8802, github.com/apache/spark/pull/3748 + + [Spark-4512] [SQL] Unresolved Attribute Exception in Sort By + Cheng Hao + 2014-12-30 12:11:44 -0800 + Commit: 53f0a00, github.com/apache/spark/pull/3386 + + [SPARK-5002][SQL] Using ascending by default when not specify order in order by + wangfei + 2014-12-30 12:07:24 -0800 + Commit: daac221, github.com/apache/spark/pull/3838 + + [SPARK-4904] [SQL] Remove the unnecessary code change in Generic UDF + Cheng Hao + 2014-12-30 11:47:08 -0800 + Commit: 63b84b7, github.com/apache/spark/pull/3745 + + [SPARK-4959] [SQL] Attributes are case sensitive when using a select query from a projection + Cheng Hao + 2014-12-30 11:33:47 -0800 + Commit: 5595eaa, github.com/apache/spark/pull/3796 + + [SPARK-4975][SQL] Fix HiveInspectorSuite test failure + scwf , Fei Wang + 2014-12-30 11:30:47 -0800 + Commit: 65357f1, github.com/apache/spark/pull/3814 + + [SQL] enable view test + Daoyuan Wang + 2014-12-30 11:29:13 -0800 + Commit: 94d60b7, github.com/apache/spark/pull/3826 + + [SPARK-4908][SQL] Prevent multiple concurrent hive native commands + Michael Armbrust + 2014-12-30 11:24:46 -0800 + Commit: 480bd1d, github.com/apache/spark/pull/3834 + + [SPARK-4882] Register PythonBroadcast with Kryo so that PySpark works with KryoSerializer + Josh Rosen + 2014-12-30 09:29:52 -0800 + Commit: efa80a53, github.com/apache/spark/pull/3831 + + [SPARK-4920][UI] add version on master and worker page for standalone mode + Zhang, Liye + 2014-12-30 09:19:47 -0800 + Commit: 9077e72, github.com/apache/spark/pull/3769 + + [SPARK-4972][MLlib] Updated the scala doc for lasso and ridge regression for the change of LeastSquaresGradient + DB Tsai + 2014-12-29 17:17:12 -0800 + Commit: 040d6f2, github.com/apache/spark/pull/3808 + + Added setMinCount to Word2Vec.scala + ganonp + 2014-12-29 15:31:19 -0800 + Commit: 343db39, github.com/apache/spark/pull/3693 + + SPARK-4156 [MLLIB] EM algorithm for GMMs + Travis Galoppo , Travis Galoppo , tgaloppo , FlytxtRnD + 2014-12-29 15:29:15 -0800 + Commit: 6cf6fdf, github.com/apache/spark/pull/3022 + + SPARK-4968: takeOrdered to skip reduce step in case mappers return no partitions + Yash Datta + 2014-12-29 13:49:45 -0800 + Commit: 9bc0df6, github.com/apache/spark/pull/3830 + + [SPARK-4409][MLlib] Additional Linear Algebra Utils + Burak Yavuz , Xiangrui Meng + 2014-12-29 13:24:26 -0800 + Commit: 02b55de, github.com/apache/spark/pull/3319 + + [Minor] Fix a typo of type parameter in JavaUtils.scala + Kousuke Saruta + 2014-12-29 12:05:08 -0800 + Commit: 8d72341, github.com/apache/spark/pull/3789 + + [SPARK-4946] [CORE] Using AkkaUtils.askWithReply in MapOutputTracker.askTracker to reduce the chance of the communicating problem + YanTangZhai , yantangzhai + 2014-12-29 11:30:54 -0800 + Commit: 815de54, github.com/apache/spark/pull/3785 + + Adde LICENSE Header to build/mvn, build/sbt and sbt/sbt + Kousuke Saruta + 2014-12-29 10:48:53 -0800 + Commit: 4cef05e, github.com/apache/spark/pull/3817 + + [SPARK-4982][DOC] `spark.ui.retainedJobs` description is wrong in Spark UI configuration guide + wangxiaojing + 2014-12-29 10:45:14 -0800 + Commit: 6645e52, github.com/apache/spark/pull/3818 + + [SPARK-4966][YARN]The MemoryOverhead value is setted not correctly + meiyoula <1039320815@qq.com> + 2014-12-29 08:20:30 -0600 + Commit: 14fa87b, github.com/apache/spark/pull/3797 + + [SPARK-4501][Core] - Create build/mvn to automatically download maven/zinc/scalac + Brennon York + 2014-12-27 13:25:18 -0800 + Commit: a3e51cc, github.com/apache/spark/pull/3707 + + [SPARK-4952][Core]Handle ConcurrentModificationExceptions in SparkEnv.environmentDetails + GuoQiang Li + 2014-12-26 23:31:29 -0800 + Commit: 080ceb7, github.com/apache/spark/pull/3788 + + [SPARK-4954][Core] add spark version infomation in log for standalone mode + Zhang, Liye + 2014-12-26 23:23:13 -0800 + Commit: 786808a, github.com/apache/spark/pull/3790 + + [SPARK-3955] Different versions between jackson-mapper-asl and jackson-c... + Jongyoul Lee + 2014-12-26 22:59:34 -0800 + Commit: 2483c1e, github.com/apache/spark/pull/3716 + + HOTFIX: Slight tweak on previous commit. + Patrick Wendell + 2014-12-26 22:55:04 -0800 + Commit: 82bf4be + + [SPARK-3787][BUILD] Assembly jar name is wrong when we build with sbt omitting -Dhadoop.version + Kousuke Saruta + 2014-12-26 22:52:04 -0800 + Commit: de95c57, github.com/apache/spark/pull/3046 + + MAINTENANCE: Automated closing of pull requests. + Patrick Wendell + 2014-12-26 22:39:56 -0800 + Commit: 534f24b, github.com/apache/spark/pull/3456 + + SPARK-4971: Fix typo in BlockGenerator comment + CodingCat + 2014-12-26 12:03:22 -0800 + Commit: fda4331, github.com/apache/spark/pull/3807 + + [SPARK-4608][Streaming] Reorganize StreamingContext implicit to improve API convenience + zsxwing + 2014-12-25 19:46:05 -0800 + Commit: f9ed2b6, github.com/apache/spark/pull/3464 + + [SPARK-4537][Streaming] Expand StreamingSource to add more metrics + jerryshao + 2014-12-25 19:39:49 -0800 + Commit: f205fe4, github.com/apache/spark/pull/3466 + + [EC2] Update mesos/spark-ec2 branch to branch-1.3 + Nicholas Chammas + 2014-12-25 14:16:50 -0800 + Commit: ac82785, github.com/apache/spark/pull/3804 + + [EC2] Update default Spark version to 1.2.0 + Nicholas Chammas + 2014-12-25 14:13:12 -0800 + Commit: b6b6393, github.com/apache/spark/pull/3793 + + Fix "Building Spark With Maven" link in README.md + Denny Lee + 2014-12-25 14:05:55 -0800 + Commit: 08b18c7, github.com/apache/spark/pull/3802 + + [SPARK-4953][Doc] Fix the description of building Spark with YARN + Kousuke Saruta + 2014-12-25 07:05:43 -0800 + Commit: 11dd993, github.com/apache/spark/pull/3787 + + [SPARK-4873][Streaming] Use `Future.zip` instead of `Future.flatMap`(for-loop) in WriteAheadLogBasedBlockHandler + zsxwing + 2014-12-24 19:49:41 -0800 + Commit: b4d0db8, github.com/apache/spark/pull/3721 + + SPARK-4297 [BUILD] Build warning fixes omnibus + Sean Owen + 2014-12-24 13:32:51 -0800 + Commit: 29fabb1, github.com/apache/spark/pull/3157 + + [SPARK-4881][Minor] Use SparkConf#getBoolean instead of get().toBoolean + Kousuke Saruta + 2014-12-23 19:14:34 -0800 + Commit: 199e59a, github.com/apache/spark/pull/3733 + + [SPARK-4860][pyspark][sql] speeding up `sample()` and `takeSample()` + jbencook , J. Benjamin Cook + 2014-12-23 17:46:24 -0800 + Commit: fd41eb9, github.com/apache/spark/pull/3764 + + [SPARK-4606] Send EOF to child JVM when there's no more data to read. + Marcelo Vanzin + 2014-12-23 16:02:59 -0800 + Commit: 7e2deb7, github.com/apache/spark/pull/3460 + + [SPARK-4671][Streaming]Do not replicate streaming block when WAL is enabled + jerryshao + 2014-12-23 15:45:53 -0800 + Commit: 3f5f4cc, github.com/apache/spark/pull/3534 + + [SPARK-4802] [streaming] Remove receiverInfo once receiver is de-registered + Ilayaperumal Gopinathan + 2014-12-23 15:14:54 -0800 + Commit: 10d69e9, github.com/apache/spark/pull/3647 + + [SPARK-4913] Fix incorrect event log path + Liang-Chi Hsieh + 2014-12-23 14:58:33 -0800 + Commit: 96281cd, github.com/apache/spark/pull/3755 + + [SPARK-4730][YARN] Warn against deprecated YARN settings + Andrew Or + 2014-12-23 14:28:36 -0800 + Commit: 27c5399, github.com/apache/spark/pull/3590 + + [SPARK-4914][Build] Cleans lib_managed before compiling with Hive 0.13.1 + Cheng Lian + 2014-12-23 12:54:20 -0800 + Commit: 395b771, github.com/apache/spark/pull/3756 + + [SPARK-4932] Add help comments in Analytics + Takeshi Yamamuro + 2014-12-23 12:39:41 -0800 + Commit: 9c251c5, github.com/apache/spark/pull/3775 + + [SPARK-4834] [standalone] Clean up application files after app finishes. + Marcelo Vanzin + 2014-12-23 12:02:08 -0800 + Commit: dd15536, github.com/apache/spark/pull/3705 + + [SPARK-4931][Yarn][Docs] Fix the format of running-on-yarn.md + zsxwing + 2014-12-23 11:18:06 -0800 + Commit: 2d215ae, github.com/apache/spark/pull/3774 + + [SPARK-4890] Ignore downloaded EC2 libs + Nicholas Chammas + 2014-12-23 11:12:16 -0800 + Commit: 2823c7f, github.com/apache/spark/pull/3770 + + [Docs] Minor typo fixes + Nicholas Chammas + 2014-12-22 22:54:32 -0800 + Commit: 0e532cc, github.com/apache/spark/pull/3772 + + [SPARK-4907][MLlib] Inconsistent loss and gradient in LeastSquaresGradient compared with R + DB Tsai + 2014-12-22 16:42:55 -0800 + Commit: a96b727, github.com/apache/spark/pull/3746 + + [SPARK-4818][Core] Add 'iterator' to reduce memory consumed by join + zsxwing + 2014-12-22 14:26:28 -0800 + Commit: c233ab3, github.com/apache/spark/pull/3671 + + [SPARK-4920][UI]:current spark version in UI is not striking. + genmao.ygm + 2014-12-22 14:14:39 -0800 + Commit: de9d7d2, github.com/apache/spark/pull/3763 + + [Minor] Fix scala doc + Liang-Chi Hsieh + 2014-12-22 14:13:31 -0800 + Commit: a61aa66, github.com/apache/spark/pull/3751 + + [SPARK-4864] Add documentation to Netty-based configs + Aaron Davidson + 2014-12-22 13:09:22 -0800 + Commit: fbca6b6, github.com/apache/spark/pull/3713 + + [SPARK-4079] [CORE] Consolidates Errors if a CompressionCodec is not available + Kostas Sakellis + 2014-12-22 13:07:01 -0800 + Commit: 7c0ed13, github.com/apache/spark/pull/3119 + + SPARK-4447. Remove layers of abstraction in YARN code no longer needed after dropping yarn-alpha + Sandy Ryza + 2014-12-22 12:23:43 -0800 + Commit: d62da64, github.com/apache/spark/pull/3652 + + [SPARK-4733] Add missing prameter comments in ShuffleDependency + Takeshi Yamamuro + 2014-12-22 12:19:23 -0800 + Commit: fb8e85e, github.com/apache/spark/pull/3594 + + [Minor] Improve some code in BroadcastTest for short + carlmartin + 2014-12-22 12:13:53 -0800 + Commit: 1d9788e, github.com/apache/spark/pull/3750 + + [SPARK-4883][Shuffle] Add a name to the directoryCleaner thread + zsxwing + 2014-12-22 12:11:36 -0800 + Commit: 8773705, github.com/apache/spark/pull/3734 + + [SPARK-4870] Add spark version to driver log + Zhang, Liye + 2014-12-22 11:36:49 -0800 + Commit: 39272c8, github.com/apache/spark/pull/3717 + + [SPARK-4915][YARN] Fix classname to be specified for external shuffle service. + Tsuyoshi Ozawa + 2014-12-22 11:28:05 -0800 + Commit: 96606f6, github.com/apache/spark/pull/3757 + + [SPARK-4918][Core] Reuse Text in saveAsTextFile + zsxwing + 2014-12-22 11:20:00 -0800 + Commit: 93b2f3a, github.com/apache/spark/pull/3762 + + [SPARK-2075][Core] Make the compiler generate same bytes code for Hadoop 1.+ and Hadoop 2.+ + zsxwing + 2014-12-21 22:10:19 -0800 + Commit: 6ee6aa7, github.com/apache/spark/pull/3740 + + SPARK-4910 [CORE] build failed (use of FileStatus.isFile in Hadoop 1.x) + Sean Owen + 2014-12-21 13:16:57 -0800 + Commit: c6a3c0d, github.com/apache/spark/pull/3754 + + [Minor] Build Failed: value defaultProperties not found + huangzhaowei + 2014-12-19 23:32:56 -0800 + Commit: a764960, github.com/apache/spark/pull/3749 + + [SPARK-4140] Document dynamic allocation + Andrew Or , Tsuyoshi Ozawa + 2014-12-19 19:36:20 -0800 + Commit: 15c03e1, github.com/apache/spark/pull/3731 + + [SPARK-4831] Do not include SPARK_CLASSPATH if empty + Daniel Darabos + 2014-12-19 19:32:39 -0800 + Commit: 7cb3f54, github.com/apache/spark/pull/3678 + + SPARK-2641: Passing num executors to spark arguments from properties file + Kanwaljit Singh + 2014-12-19 19:25:39 -0800 + Commit: 1d64812, github.com/apache/spark/pull/1657 + + [SPARK-3060] spark-shell.cmd doesn't accept application options in Windows OS + Masayoshi TSUZUKI + 2014-12-19 19:19:53 -0800 + Commit: 8d93247, github.com/apache/spark/pull/3350 + + change signature of example to match released code + Eran Medan + 2014-12-19 18:29:36 -0800 + Commit: c25c669, github.com/apache/spark/pull/3747 + + [SPARK-2261] Make event logger use a single file. + Marcelo Vanzin + 2014-12-19 18:21:15 -0800 + Commit: 4564519, github.com/apache/spark/pull/1222 + + [SPARK-4890] Upgrade Boto to 2.34.0; automatically download Boto from PyPi instead of packaging it + Josh Rosen + 2014-12-19 17:02:37 -0800 + Commit: c28083f, github.com/apache/spark/pull/3737 + + [SPARK-4896] don’t redundantly overwrite executor JAR deps + Ryan Williams + 2014-12-19 15:24:41 -0800 + Commit: 7981f96, github.com/apache/spark/pull/2848 + + [SPARK-4889] update history server example cmds + Ryan Williams + 2014-12-19 13:56:04 -0800 + Commit: cdb2c64, github.com/apache/spark/pull/3736 + + Small refactoring to pass SparkEnv into Executor rather than creating SparkEnv in Executor. + Reynold Xin + 2014-12-19 12:51:12 -0800 + Commit: 336cd34, github.com/apache/spark/pull/3738 + + [Build] Remove spark-staging-1038 + scwf + 2014-12-19 08:29:38 -0800 + Commit: 8e253eb, github.com/apache/spark/pull/3743 + + [SPARK-4901] [SQL] Hot fix for ByteWritables.copyBytes + Cheng Hao + 2014-12-19 08:04:41 -0800 + Commit: 5479450, github.com/apache/spark/pull/3742 + + SPARK-3428. TaskMetrics for running tasks is missing GC time metrics + Sandy Ryza + 2014-12-18 22:40:44 -0800 + Commit: 283263f, github.com/apache/spark/pull/3684 + + [SPARK-4674] Refactor getCallSite + Liang-Chi Hsieh + 2014-12-18 21:41:02 -0800 + Commit: d7fc69a, github.com/apache/spark/pull/3532 + + [SPARK-4728][MLLib] Add exponential, gamma, and log normal sampling to MLlib da... + RJ Nowling + 2014-12-18 21:00:49 -0800 + Commit: ee1fb97, github.com/apache/spark/pull/3680 + + [SPARK-4861][SQL] Refactory command in spark sql + wangfei , scwf + 2014-12-18 20:24:56 -0800 + Commit: c3d91da, github.com/apache/spark/pull/3712 + + [SPARK-4573] [SQL] Add SettableStructObjectInspector support in "wrap" function + Cheng Hao + 2014-12-18 20:21:52 -0800 + Commit: ae9f128, github.com/apache/spark/pull/3429 + + [SPARK-2554][SQL] Supporting SumDistinct partial aggregation + ravipesala + 2014-12-18 20:19:10 -0800 + Commit: 7687415, github.com/apache/spark/pull/3348 + + [SPARK-4693] [SQL] PruningPredicates may be wrong if predicates contains an empty AttributeSet() references + YanTangZhai , yantangzhai + 2014-12-18 20:13:46 -0800 + Commit: e7de7e5, github.com/apache/spark/pull/3556 + + [SPARK-4756][SQL] FIX: sessionToActivePool grow infinitely, even as sessions expire + guowei2 + 2014-12-18 20:10:23 -0800 + Commit: 22ddb6e, github.com/apache/spark/pull/3617 + + [SPARK-3928][SQL] Support wildcard matches on Parquet files. + Thu Kyaw + 2014-12-18 20:08:32 -0800 + Commit: b68bc6d, github.com/apache/spark/pull/3407 + + [SPARK-2663] [SQL] Support the Grouping Set + Cheng Hao + 2014-12-18 18:58:29 -0800 + Commit: f728e0f, github.com/apache/spark/pull/1567 + + [SPARK-4754] Refactor SparkContext into ExecutorAllocationClient + Andrew Or + 2014-12-18 17:37:42 -0800 + Commit: 9804a75, github.com/apache/spark/pull/3614 + + [SPARK-4837] NettyBlockTransferService should use spark.blockManager.port config + Aaron Davidson + 2014-12-18 16:43:16 -0800 + Commit: 105293a, github.com/apache/spark/pull/3688 + + SPARK-4743 - Use SparkEnv.serializer instead of closureSerializer in aggregateByKey and foldByKey + Ivan Vergiliev + 2014-12-18 16:29:36 -0800 + Commit: f9f58b9, github.com/apache/spark/pull/3605 + + [SPARK-4884]: Improve Partition docs + Madhu Siddalingaiah + 2014-12-18 16:00:53 -0800 + Commit: d5a596d, github.com/apache/spark/pull/3722 + + [SPARK-4880] remove spark.locality.wait in Analytics + Ernest + 2014-12-18 15:42:26 -0800 + Commit: a7ed6f3, github.com/apache/spark/pull/3730 + + [SPARK-4887][MLlib] Fix a bad unittest in LogisticRegressionSuite + DB Tsai + 2014-12-18 13:55:49 -0800 + Commit: 59a49db, github.com/apache/spark/pull/3735 + + [SPARK-3607] ConnectionManager threads.max configs on the thread pools don't work + Ilya Ganelin + 2014-12-18 12:53:18 -0800 + Commit: 3720057, github.com/apache/spark/pull/3664 + + Add mesos specific configurations into doc + Timothy Chen + 2014-12-18 12:15:53 -0800 + Commit: d9956f8, github.com/apache/spark/pull/3349 + + SPARK-3779. yarn spark.yarn.applicationMaster.waitTries config should be... + Sandy Ryza + 2014-12-18 12:19:07 -0600 + Commit: 253b72b, github.com/apache/spark/pull/3471 + + [SPARK-4461][YARN] pass extra java options to yarn application master + Zhan Zhang + 2014-12-18 10:01:46 -0600 + Commit: 3b76469, github.com/apache/spark/pull/3409 + + [SPARK-4822] Use sphinx tags for Python doc annotations + lewuathe + 2014-12-17 17:31:24 -0800 + Commit: 3cd5161, github.com/apache/spark/pull/3685 + + MAINTENANCE: Automated closing of pull requests. + Patrick Wendell + 2014-12-17 15:50:10 -0800 + Commit: ca12608, github.com/apache/spark/pull/3137 + + [SPARK-3891][SQL] Add array support to percentile, percentile_approx and constant inspectors support + Venkata Ramana G , Venkata Ramana Gollamudi + 2014-12-17 15:41:35 -0800 + Commit: f33d550, github.com/apache/spark/pull/2802 + + [SPARK-4856] [SQL] NullType instead of StringType when sampling against empty string or nul... + Cheng Hao + 2014-12-17 15:01:59 -0800 + Commit: 8d0d2a6, github.com/apache/spark/pull/3708 + + [HOTFIX][SQL] Fix parquet filter suite + Michael Armbrust + 2014-12-17 14:27:02 -0800 + Commit: 19c0faa, github.com/apache/spark/pull/3727 + + [SPARK-4821] [mllib] [python] [docs] Fix for pyspark.mllib.rand doc + Joseph K. Bradley + 2014-12-17 14:12:46 -0800 + Commit: affc3f4, github.com/apache/spark/pull/3669 + + [SPARK-3739] [SQL] Update the split num base on block size for table scanning + Cheng Hao + 2014-12-17 13:39:36 -0800 + Commit: 636d9fc, github.com/apache/spark/pull/2589 + + [SPARK-4755] [SQL] sqrt(negative value) should return null + Daoyuan Wang + 2014-12-17 12:51:27 -0800 + Commit: 902e4d5, github.com/apache/spark/pull/3616 + + [SPARK-4493][SQL] Don't pushdown Eq, NotEq, Lt, LtEq, Gt and GtEq predicates with nulls for Parquet + Cheng Lian + 2014-12-17 12:48:04 -0800 + Commit: 6277135, github.com/apache/spark/pull/3367 + + [SPARK-3698][SQL] Fix case insensitive resolution of GetField. + Michael Armbrust + 2014-12-17 12:43:51 -0800 + Commit: 7ad579e, github.com/apache/spark/pull/3724 + + [SPARK-4694]Fix HiveThriftServer2 cann't stop In Yarn HA mode. + carlmartin + 2014-12-17 12:24:03 -0800 + Commit: 4782def, github.com/apache/spark/pull/3576 + + [SPARK-4625] [SQL] Add sort by for DSL & SimpleSqlParser + Cheng Hao + 2014-12-17 12:01:57 -0800 + Commit: 5fdcbdc, github.com/apache/spark/pull/3481 + + [SPARK-4595][Core] Fix MetricsServlet not work issue + Saisai Shao , Josh Rosen , jerryshao + 2014-12-17 11:47:44 -0800 + Commit: cf50631, github.com/apache/spark/pull/3444 + + [HOTFIX] Fix RAT exclusion for known_translations file + Josh Rosen + 2014-12-16 23:00:25 -0800 + Commit: 3d0c37b, github.com/apache/spark/pull/3719 + + [Release] Update contributors list format and sort it + Andrew Or + 2014-12-16 22:11:03 -0800 + Commit: 4e1112e + + [SPARK-4618][SQL] Make foreign DDL commands options case-insensitive + scwf , wangfei + 2014-12-16 21:26:36 -0800 + Commit: 6069880, github.com/apache/spark/pull/3470 + + [SPARK-4866] support StructType as key in MapType + Davies Liu + 2014-12-16 21:23:28 -0800 + Commit: ec5c427, github.com/apache/spark/pull/3714 + + [SPARK-4375] [SQL] Add 0 argument support for udf + Cheng Hao + 2014-12-16 21:21:11 -0800 + Commit: 770d815, github.com/apache/spark/pull/3595 + + [SPARK-4720][SQL] Remainder should also return null if the divider is 0. + Takuya UESHIN + 2014-12-16 21:19:57 -0800 + Commit: ddc7ba3, github.com/apache/spark/pull/3581 + + [SPARK-4744] [SQL] Short circuit evaluation for AND & OR in CodeGen + Cheng Hao + 2014-12-16 21:18:39 -0800 + Commit: 0aa834a, github.com/apache/spark/pull/3606 + + [SPARK-4798][SQL] A new set of Parquet testing API and test suites + Cheng Lian + 2014-12-16 21:16:03 -0800 + Commit: 3b395e1, github.com/apache/spark/pull/3644 + + [Release] Cache known author translations locally + Andrew Or + 2014-12-16 19:28:43 -0800 + Commit: b85044e + + [Release] Major improvements to generate contributors script + Andrew Or + 2014-12-16 17:55:27 -0800 + Commit: 6f80b74 + + [SPARK-4269][SQL] make wait time configurable in BroadcastHashJoin + Jacky Li + 2014-12-16 15:34:59 -0800 + Commit: fa66ef6, github.com/apache/spark/pull/3133 + + [SPARK-4827][SQL] Fix resolution of deeply nested Project(attr, Project(Star,...)). + Michael Armbrust + 2014-12-16 15:31:19 -0800 + Commit: a66c23e, github.com/apache/spark/pull/3674 + + [SPARK-4483][SQL]Optimization about reduce memory costs during the HashOuterJoin + tianyi , tianyi + 2014-12-16 15:22:29 -0800 + Commit: 30f6b85, github.com/apache/spark/pull/3375 + + [SPARK-4527][SQl]Add BroadcastNestedLoopJoin operator selection testsuite + wangxiaojing + 2014-12-16 14:45:56 -0800 + Commit: ea1315e, github.com/apache/spark/pull/3395 + + SPARK-4767: Add support for launching in a specified placement group to spark_ec2 + Holden Karau + 2014-12-16 14:37:04 -0800 + Commit: b0dfdbd, github.com/apache/spark/pull/3623 + + [SPARK-4812][SQL] Fix the initialization issue of 'codegenEnabled' + zsxwing + 2014-12-16 14:13:40 -0800 + Commit: 6530243, github.com/apache/spark/pull/3660 + + [SPARK-4847][SQL]Fix "extraStrategies cannot take effect in SQLContext" issue + jerryshao + 2014-12-16 14:08:28 -0800 + Commit: dc8280d, github.com/apache/spark/pull/3698 + + [DOCS][SQL] Add a Note on jsonFile having separate JSON objects per line + Peter Vandenabeele + 2014-12-16 13:57:55 -0800 + Commit: 1a9e35e, github.com/apache/spark/pull/3517 + + [SQL] SPARK-4700: Add HTTP protocol spark thrift server + Judy Nash , judynash + 2014-12-16 12:37:26 -0800 + Commit: 17688d1, github.com/apache/spark/pull/3672 + + [SPARK-3405] add subnet-id and vpc-id options to spark_ec2.py + Mike Jennings , Mike Jennings + 2014-12-16 12:13:21 -0800 + Commit: d12c071, github.com/apache/spark/pull/2872 + + [SPARK-4855][mllib] testing the Chi-squared hypothesis test + jbencook + 2014-12-16 11:37:23 -0800 + Commit: cb48447, github.com/apache/spark/pull/3679 + + [SPARK-4437] update doc for WholeCombineFileRecordReader + Davies Liu , Josh Rosen + 2014-12-16 11:19:36 -0800 + Commit: ed36200, github.com/apache/spark/pull/3301 + + [SPARK-4841] fix zip with textFile() + Davies Liu + 2014-12-15 22:58:26 -0800 + Commit: c246b95, github.com/apache/spark/pull/3706 + + [SPARK-4792] Add error message when making local dir unsuccessfully + meiyoula <1039320815@qq.com> + 2014-12-15 22:30:18 -0800 + Commit: c762877, github.com/apache/spark/pull/3635 + + SPARK-4814 [CORE] Enable assertions in SBT, Maven tests / AssertionError from Hive's LazyBinaryInteger + Sean Owen + 2014-12-15 17:12:05 -0800 + Commit: 81112e4, github.com/apache/spark/pull/3692 + + [Minor][Core] fix comments in MapOutputTracker + wangfei + 2014-12-15 16:46:21 -0800 + Commit: 5c24759, github.com/apache/spark/pull/3700 + + SPARK-785 [CORE] ClosureCleaner not invoked on most PairRDDFunctions + Sean Owen + 2014-12-15 16:06:15 -0800 + Commit: 2a28bc6, github.com/apache/spark/pull/3690 + + [SPARK-4668] Fix some documentation typos. + Ryan Williams + 2014-12-15 14:52:17 -0800 + Commit: 8176b7a, github.com/apache/spark/pull/3523 + + [SPARK-1037] The name of findTaskFromList & findTask in TaskSetManager.scala is confusing + Ilya Ganelin + 2014-12-15 14:51:15 -0800 + Commit: 38703bb, github.com/apache/spark/pull/3665 + + [SPARK-4826] Fix generation of temp file names in WAL tests + Josh Rosen + 2014-12-15 14:33:43 -0800 + Commit: f6b8591, github.com/apache/spark/pull/3695. + + [SPARK-4494][mllib] IDFModel.transform() add support for single vector + Yuu ISHIKAWA + 2014-12-15 13:44:15 -0800 + Commit: 8098fab, github.com/apache/spark/pull/3603 + + HOTFIX: Disabling failing block manager test + Patrick Wendell + 2014-12-15 10:54:45 -0800 + Commit: 4c06738 + + fixed spelling errors in documentation + Peter Klipfel + 2014-12-14 00:01:16 -0800 + Commit: 2a2983f, github.com/apache/spark/pull/3691 + + MAINTENANCE: Automated closing of pull requests. + Patrick Wendell + 2014-12-11 23:38:40 -0800 + Commit: ef84dab, github.com/apache/spark/pull/3488 + + [SPARK-4829] [SQL] add rule to fold count(expr) if expr is not null + Daoyuan Wang + 2014-12-11 22:56:42 -0800 + Commit: 41a3f93, github.com/apache/spark/pull/3676 + + [SPARK-4742][SQL] The name of Parquet File generated by AppendingParquetOutputFormat should be zero padded + Sasaki Toru + 2014-12-11 22:54:21 -0800 + Commit: 8091dd6, github.com/apache/spark/pull/3602 + + [SPARK-4825] [SQL] CTAS fails to resolve when created using saveAsTable + Cheng Hao + 2014-12-11 22:51:49 -0800 + Commit: 0abbff2, github.com/apache/spark/pull/3673 + + [SQL] enable empty aggr test case + Daoyuan Wang + 2014-12-11 22:50:18 -0800 + Commit: cbb634a, github.com/apache/spark/pull/3445 + + [SPARK-4828] [SQL] sum and avg on empty table should always return null + Daoyuan Wang + 2014-12-11 22:49:27 -0800 + Commit: acb3be6, github.com/apache/spark/pull/3675 + + [SQL] Remove unnecessary case in HiveContext.toHiveString + scwf + 2014-12-11 22:48:03 -0800 + Commit: d8cf678, github.com/apache/spark/pull/3563 + + [SPARK-4293][SQL] Make Cast be able to handle complex types. + Takuya UESHIN + 2014-12-11 22:45:25 -0800 + Commit: 3344803, github.com/apache/spark/pull/3150 + + [SPARK-4639] [SQL] Pass maxIterations in as a parameter in Analyzer + Jacky Li + 2014-12-11 22:44:27 -0800 + Commit: c152dde, github.com/apache/spark/pull/3499 + + [SPARK-4662] [SQL] Whitelist more unittest + Cheng Hao + 2014-12-11 22:43:02 -0800 + Commit: a7f07f5, github.com/apache/spark/pull/3522 + + [SPARK-4713] [SQL] SchemaRDD.unpersist() should not raise exception if it is not persisted + Cheng Hao + 2014-12-11 22:41:36 -0800 + Commit: bf40cf8, github.com/apache/spark/pull/3572 + + [SPARK-4806] Streaming doc update for 1.2 + Tathagata Das , Josh Rosen , Josh Rosen + 2014-12-11 06:21:23 -0800 + Commit: b004150, github.com/apache/spark/pull/3653 + + [SPARK-4791] [sql] Infer schema from case class with multiple constructors + Joseph K. Bradley + 2014-12-10 23:41:15 -0800 + Commit: 2a5b5fd, github.com/apache/spark/pull/3646 + + [CORE]codeStyle: uniform ConcurrentHashMap define in StorageLevel.scala with other places + Zhang, Liye + 2014-12-10 20:44:59 -0800 + Commit: 57d37f9, github.com/apache/spark/pull/2793 + + SPARK-3526 Add section about data locality to the tuning guide + Andrew Ash + 2014-12-10 15:01:15 -0800 + Commit: 652b781, github.com/apache/spark/pull/2519 + + MAINTENANCE: Automated closing of pull requests. + Patrick Wendell + 2014-12-10 14:41:16 -0800 + Commit: 36bdb5b, github.com/apache/spark/pull/2883 + + [SPARK-4759] Fix driver hanging from coalescing partitions + Andrew Or + 2014-12-10 14:27:53 -0800 + Commit: 4f93d0c, github.com/apache/spark/pull/3633 + + [SPARK-4569] Rename 'externalSorting' in Aggregator + Ilya Ganelin + 2014-12-10 14:19:37 -0800 + Commit: 447ae2d, github.com/apache/spark/pull/3666 + + [SPARK-4793] [Deploy] ensure .jar at end of line + Daoyuan Wang + 2014-12-10 13:29:27 -0800 + Commit: e230da1, github.com/apache/spark/pull/3641 + + [SPARK-4215] Allow requesting / killing executors only in YARN mode + Andrew Or + 2014-12-10 12:48:24 -0800 + Commit: faa8fd8, github.com/apache/spark/pull/3615 + + [SPARK-4771][Docs] Document standalone cluster supervise mode + Andrew Or + 2014-12-10 12:41:36 -0800 + Commit: 5621283, github.com/apache/spark/pull/3627 + + [SPARK-4329][WebUI] HistoryPage pagenation + Kousuke Saruta + 2014-12-10 12:29:00 -0800 + Commit: 0fc637b, github.com/apache/spark/pull/3194 + + [SPARK-4161]Spark shell class path is not correctly set if "spark.driver.extraClassPath" is set in defaults.conf + GuoQiang Li + 2014-12-10 12:24:04 -0800 + Commit: 742e709, github.com/apache/spark/pull/3050 + + [SPARK-4772] Clear local copies of accumulators as soon as we're done with them + Nathan Kronenfeld + 2014-12-09 23:53:17 -0800 + Commit: 94b377f, github.com/apache/spark/pull/3570 + + [Minor] Use tag for help icon in web UI page header + Josh Rosen + 2014-12-09 23:47:05 -0800 + Commit: f79c1cf, github.com/apache/spark/pull/3659 + + Config updates for the new shuffle transport. + Reynold Xin + 2014-12-09 19:29:09 -0800 + Commit: 9bd9334, github.com/apache/spark/pull/3657 + + [SPARK-4740] Create multiple concurrent connections between two peer nodes in Netty. + Reynold Xin + 2014-12-09 17:49:59 -0800 + Commit: 2b9b726, github.com/apache/spark/pull/3625 + + SPARK-4805 [CORE] BlockTransferMessage.toByteArray() trips assertion + Sean Owen + 2014-12-09 16:38:27 -0800 + Commit: d8f84f2, github.com/apache/spark/pull/3650 + + SPARK-4567. Make SparkJobInfo and SparkStageInfo serializable + Sandy Ryza + 2014-12-09 16:26:07 -0800 + Commit: 5e4c06f, github.com/apache/spark/pull/3426 + + [SPARK-4714] BlockManager.dropFromMemory() should check whether block has been removed after synchronizing on BlockInfo instance. + hushan[胡珊] + 2014-12-09 15:11:20 -0800 + Commit: 30dca92, github.com/apache/spark/pull/3574 + + [SPARK-4765] Make GC time always shown in UI. + Kay Ousterhout + 2014-12-09 15:10:36 -0800 + Commit: 1f51106, github.com/apache/spark/pull/3622 + + [SPARK-4691][shuffle] Restructure a few lines in shuffle code + maji2014 + 2014-12-09 13:13:12 -0800 + Commit: b310744, github.com/apache/spark/pull/3553 + + [SPARK-874] adding a --wait flag + jbencook + 2014-12-09 12:16:19 -0800 + Commit: 61f1a70, github.com/apache/spark/pull/3567 + + SPARK-4338. [YARN] Ditch yarn-alpha. + Sandy Ryza + 2014-12-09 11:02:43 -0800 + Commit: 912563a, github.com/apache/spark/pull/3215 + + [SPARK-4785][SQL] Initilize Hive UDFs on the driver and serialize them with a wrapper + Cheng Hao , Cheng Lian + 2014-12-09 10:28:15 -0800 + Commit: 383c555, github.com/apache/spark/pull/3640 + + [SPARK-3154][STREAMING] Replace ConcurrentHashMap with mutable.HashMap and remove @volatile from 'stopped' + zsxwing + 2014-12-08 23:54:15 -0800 + Commit: bcb5cda, github.com/apache/spark/pull/3634 + + [SPARK-4769] [SQL] CTAS does not work when reading from temporary tables + Cheng Hao + 2014-12-08 17:39:12 -0800 + Commit: 51b1fe1, github.com/apache/spark/pull/3336 + + [SQL] remove unnecessary import in spark-sql + Jacky Li + 2014-12-08 17:27:46 -0800 + Commit: 9443843, github.com/apache/spark/pull/3630 + + SPARK-4770. [DOC] [YARN] spark.scheduler.minRegisteredResourcesRatio doc... + Sandy Ryza + 2014-12-08 16:28:36 -0800 + Commit: cda94d1, github.com/apache/spark/pull/3624 + + SPARK-3926 [CORE] Reopened: result of JavaRDD collectAsMap() is not serializable + Sean Owen + 2014-12-08 16:13:03 -0800 + Commit: e829bfa, github.com/apache/spark/pull/3587 + + [SPARK-4750] Dynamic allocation - synchronize kills + Andrew Or + 2014-12-08 16:02:33 -0800 + Commit: 65f929d, github.com/apache/spark/pull/3612 + + [SPARK-4774] [SQL] Makes HiveFromSpark more portable + Kostas Sakellis + 2014-12-08 15:44:18 -0800 + Commit: d6a972b, github.com/apache/spark/pull/3628 + + [SPARK-4764] Ensure that files are fetched atomically + Christophe Préaud + 2014-12-08 11:44:54 -0800 + Commit: ab2abcb, github.com/apache/spark/pull/2855 + + [SPARK-4620] Add unpersist in Graph and GraphImpl + Takeshi Yamamuro + 2014-12-07 19:42:02 -0800 + Commit: 8817fc7, github.com/apache/spark/pull/3476 + + [SPARK-4646] Replace Scala.util.Sorting.quickSort with Sorter(TimSort) in Spark + Takeshi Yamamuro + 2014-12-07 19:36:08 -0800 + Commit: 2e6b736, github.com/apache/spark/pull/3507 + + [SPARK-3623][GraphX] GraphX should support the checkpoint operation + GuoQiang Li + 2014-12-06 00:56:51 -0800 + Commit: e895e0c, github.com/apache/spark/pull/2631 + + Streaming doc : do you mean inadvertently? + CrazyJvm + 2014-12-05 13:42:13 -0800 + Commit: 6eb1b6f, github.com/apache/spark/pull/3620 + + [SPARK-4005][CORE] handle message replies in receive instead of in the individual private methods + Zhang, Liye + 2014-12-05 12:00:32 -0800 + Commit: 98a7d09, github.com/apache/spark/pull/2853 + + [SPARK-4761][SQL] Enables Kryo by default in Spark SQL Thrift server + Cheng Lian + 2014-12-05 10:27:40 -0800 + Commit: 6f61e1f, github.com/apache/spark/pull/3621 + + [SPARK-4753][SQL] Use catalyst for partition pruning in newParquet. + Michael Armbrust + 2014-12-04 22:25:21 -0800 + Commit: f5801e8, github.com/apache/spark/pull/3613 + + Revert "SPARK-2624 add datanucleus jars to the container in yarn-cluster" + Andrew Or + 2014-12-04 21:53:49 -0800 + Commit: fd85253 + + Revert "[HOT FIX] [YARN] Check whether `/lib` exists before listing its files" + Andrew Or + 2014-12-04 21:53:38 -0800 + Commit: 87437df + + [SPARK-4464] Description about configuration options need to be modified in docs. + Masayoshi TSUZUKI + 2014-12-04 19:33:02 -0800 + Commit: ca37903, github.com/apache/spark/pull/3329 + + Fix typo in Spark SQL docs. + Andy Konwinski + 2014-12-04 18:27:02 -0800 + Commit: 15cf3b0, github.com/apache/spark/pull/3611 + + [SPARK-4421] Wrong link in spark-standalone.html + Masayoshi TSUZUKI + 2014-12-04 18:14:36 -0800 + Commit: ddfc09c, github.com/apache/spark/pull/3279 + + [SPARK-4397] Move object RDD to the front of RDD.scala. + Reynold Xin + 2014-12-04 16:32:20 -0800 + Commit: ed92b47, github.com/apache/spark/pull/3580 + + [SPARK-4652][DOCS] Add docs about spark-git-repo option + lewuathe , Josh Rosen + 2014-12-04 15:14:36 -0800 + Commit: ab8177d, github.com/apache/spark/pull/3513 + + [SPARK-4459] Change groupBy type parameter from K to U + Saldanha + 2014-12-04 14:22:09 -0800 + Commit: 743a889, github.com/apache/spark/pull/3327 + + [SPARK-4745] Fix get_existing_cluster() function with multiple security groups + alexdebrie + 2014-12-04 14:13:59 -0800 + Commit: 794f3ae, github.com/apache/spark/pull/3596 + + [HOTFIX] Fixing two issues with the release script. + Patrick Wendell + 2014-12-04 12:11:41 -0800 + Commit: 8dae26f, github.com/apache/spark/pull/3608 + + [SPARK-4253] Ignore spark.driver.host in yarn-cluster and standalone-cluster modes + WangTaoTheTonic , WangTao + 2014-12-04 11:52:47 -0800 + Commit: 8106b1e, github.com/apache/spark/pull/3112 + + [SPARK-4683][SQL] Add a beeline.cmd to run on Windows + Cheng Lian + 2014-12-04 10:21:03 -0800 + Commit: 28c7aca, github.com/apache/spark/pull/3599 + + [FIX][DOC] Fix broken links in ml-guide.md + Xiangrui Meng + 2014-12-04 20:16:35 +0800 + Commit: 7e758d7, github.com/apache/spark/pull/3601 + + [SPARK-4575] [mllib] [docs] spark.ml pipelines doc + bug fixes + Joseph K. Bradley , jkbradley , Xiangrui Meng + 2014-12-04 17:00:06 +0800 + Commit: 469a6e5, github.com/apache/spark/pull/3588 + + [docs] Fix outdated comment in tuning guide + Joseph K. Bradley + 2014-12-04 00:59:32 -0800 + Commit: 529439b, github.com/apache/spark/pull/3592 + + [SQL] Minor: Avoid calling Seq#size in a loop + Aaron Davidson + 2014-12-04 00:58:42 -0800 + Commit: c6c7165, github.com/apache/spark/pull/3593 + + [SPARK-4685] Include all spark.ml and spark.mllib packages in JavaDoc's MLlib group + lewuathe , Xiangrui Meng + 2014-12-04 16:51:41 +0800 + Commit: 20bfea4, github.com/apache/spark/pull/3554 + + [SPARK-4719][API] Consolidate various narrow dep RDD classes with MapPartitionsRDD + Reynold Xin + 2014-12-04 00:45:57 -0800 + Commit: c3ad486, github.com/apache/spark/pull/3578 + + [SQL] remove unnecessary import + Jacky Li + 2014-12-04 00:43:55 -0800 + Commit: ed88db4, github.com/apache/spark/pull/3585 + + MAINTENANCE: Automated closing of pull requests. + Patrick Wendell + 2014-12-03 22:15:46 -0800 + Commit: 3cdae03, github.com/apache/spark/pull/1875 + + [Release] Correctly translate contributors name in release notes + Andrew Or + 2014-12-03 19:08:29 -0800 + Commit: a4dfb4e + + [SPARK-4580] [SPARK-4610] [mllib] [docs] Documentation for tree ensembles + DecisionTree API fix + Joseph K. Bradley , Joseph K. Bradley + 2014-12-04 09:57:50 +0800 + Commit: 657a888, github.com/apache/spark/pull/3461 + + [SPARK-4711] [mllib] [docs] Programming guide advice on choosing optimizer + Joseph K. Bradley + 2014-12-04 08:58:03 +0800 + Commit: 27ab0b8, github.com/apache/spark/pull/3569 + + [SPARK-4085] Propagate FetchFailedException when Spark fails to read local shuffle file. + Reynold Xin + 2014-12-03 16:28:24 -0800 + Commit: 1826372, github.com/apache/spark/pull/3579 + + [SPARK-4498][core] Don't transition ExecutorInfo to RUNNING until Driver adds Executor + Mark Hamstra + 2014-12-03 15:08:01 -0800 + Commit: 96b2785, github.com/apache/spark/pull/3550 + + [SPARK-4552][SQL] Avoid exception when reading empty parquet data through Hive + Michael Armbrust + 2014-12-03 14:13:35 -0800 + Commit: 513ef82, github.com/apache/spark/pull/3586 + + [HOT FIX] [YARN] Check whether `/lib` exists before listing its files + Andrew Or + 2014-12-03 13:56:23 -0800 + Commit: 90ec643, github.com/apache/spark/pull/3589 + + [SPARK-4642] Add description about spark.yarn.queue to running-on-YARN document. + Masayoshi TSUZUKI + 2014-12-03 13:16:24 -0800 + Commit: 692f493, github.com/apache/spark/pull/3500 + + [SPARK-4715][Core] Make sure tryToAcquire won't return a negative value + zsxwing + 2014-12-03 12:19:40 -0800 + Commit: edd3cd4, github.com/apache/spark/pull/3575 + + [SPARK-4701] Typo in sbt/sbt + Masayoshi TSUZUKI + 2014-12-03 12:08:00 -0800 + Commit: 96786e3, github.com/apache/spark/pull/3560 + + SPARK-2624 add datanucleus jars to the container in yarn-cluster + Jim Lim + 2014-12-03 11:16:02 -0800 + Commit: a975dc3, github.com/apache/spark/pull/3238 + + [SPARK-4717][MLlib] Optimize BLAS library to avoid de-reference multiple times in loop + DB Tsai + 2014-12-03 22:31:39 +0800 + Commit: d005429, github.com/apache/spark/pull/3577 + + [SPARK-4708][MLLib] Make k-mean runs two/three times faster with dense/sparse sample + DB Tsai + 2014-12-03 19:01:56 +0800 + Commit: 7fc49ed, github.com/apache/spark/pull/3565 + + [SPARK-4710] [mllib] Eliminate MLlib compilation warnings + Joseph K. Bradley + 2014-12-03 18:50:03 +0800 + Commit: 4ac2151, github.com/apache/spark/pull/3568 + + [SPARK-4397][Core] Change the 'since' value of '@deprecated' to '1.3.0' + zsxwing + 2014-12-03 02:05:17 -0800 + Commit: 8af551f, github.com/apache/spark/pull/3573 + + [SPARK-4672][Core]Checkpoint() should clear f to shorten the serialization chain + JerryLead , Lijie Xu + 2014-12-02 23:53:29 -0800 + Commit: 77be8b9, github.com/apache/spark/pull/3545 + + [SPARK-4672][GraphX]Non-transient PartitionsRDDs will lead to StackOverflow error + JerryLead , Lijie Xu + 2014-12-02 17:14:11 -0800 + Commit: 17c162f, github.com/apache/spark/pull/3544 + + [SPARK-4672][GraphX]Perform checkpoint() on PartitionsRDD to shorten the lineage + JerryLead , Lijie Xu + 2014-12-02 17:08:02 -0800 + Commit: fc0a147, github.com/apache/spark/pull/3549 + + [Release] Translate unknown author names automatically + Andrew Or + 2014-12-02 16:36:12 -0800 + Commit: 5da21f0 + + Minor nit style cleanup in GraphX. + Reynold Xin + 2014-12-02 14:40:26 -0800 + Commit: 2d4f6e7 + + [SPARK-4695][SQL] Get result using executeCollect + wangfei + 2014-12-02 14:30:44 -0800 + Commit: 3ae0cda, github.com/apache/spark/pull/3547 + + [SPARK-4670] [SQL] wrong symbol for bitwise not + Daoyuan Wang + 2014-12-02 14:25:12 -0800 + Commit: 1f5ddf1, github.com/apache/spark/pull/3528 + + [SPARK-4593][SQL] Return null when denominator is 0 + Daoyuan Wang + 2014-12-02 14:21:12 -0800 + Commit: f6df609, github.com/apache/spark/pull/3443 + + [SPARK-4676][SQL] JavaSchemaRDD.schema may throw NullType MatchError if sql has null + YanTangZhai , yantangzhai , Michael Armbrust + 2014-12-02 14:12:48 -0800 + Commit: 1066427, github.com/apache/spark/pull/3538 + + [SPARK-4663][sql]add finally to avoid resource leak + baishuo + 2014-12-02 12:12:03 -0800 + Commit: 69b6fed, github.com/apache/spark/pull/3526 + + [SPARK-4536][SQL] Add sqrt and abs to Spark SQL DSL + Kousuke Saruta + 2014-12-02 12:07:52 -0800 + Commit: e75e04f, github.com/apache/spark/pull/3401 + + Indent license header properly for interfaces.scala. + Reynold Xin + 2014-12-02 11:59:15 -0800 + Commit: b1f8fe3, github.com/apache/spark/pull/3552 + + [SPARK-4686] Link to allowed master URLs is broken + Kay Ousterhout + 2014-12-02 09:06:02 -0800 + Commit: d9a148b, github.com/apache/spark/pull/3542 + + [SPARK-4397][Core] Cleanup 'import SparkContext._' in core + zsxwing + 2014-12-02 00:18:41 -0800 + Commit: 6dfe38a, github.com/apache/spark/pull/3530 + + [SPARK-4611][MLlib] Implement the efficient vector norm + DB Tsai + 2014-12-02 11:40:43 +0800 + Commit: 64f3175, github.com/apache/spark/pull/3462 + + MAINTENANCE: Automated closing of pull requests. + Patrick Wendell + 2014-12-01 17:27:14 -0800 + Commit: b0a46d8, github.com/apache/spark/pull/1612 + + [SPARK-4268][SQL] Use #::: to get benefit from Stream in SqlLexical.allCaseVersions + zsxwing + 2014-12-01 16:39:54 -0800 + Commit: d3e02dd, github.com/apache/spark/pull/3132 + + [SPARK-4529] [SQL] support view with column alias + Daoyuan Wang + 2014-12-01 16:08:51 -0800 + Commit: 4df60a8, github.com/apache/spark/pull/3396 + + [SQL][DOC] Date type in SQL programming guide + Daoyuan Wang + 2014-12-01 14:03:57 -0800 + Commit: 5edbcbf, github.com/apache/spark/pull/3535 + + [SQL] Minor fix for doc and comment + wangfei + 2014-12-01 14:02:02 -0800 + Commit: 7b79957, github.com/apache/spark/pull/3533 + + [SPARK-4658][SQL] Code documentation issue in DDL of datasource API + ravipesala + 2014-12-01 13:31:27 -0800 + Commit: bc35381, github.com/apache/spark/pull/3516 + + [SPARK-4650][SQL] Supporting multi column support in countDistinct function like count(distinct c1,c2..) in Spark SQL + ravipesala , Michael Armbrust + 2014-12-01 13:26:44 -0800 + Commit: 6a9ff19, github.com/apache/spark/pull/3511 + + [SPARK-4358][SQL] Let BigDecimal do checking type compatibility + Liang-Chi Hsieh + 2014-12-01 13:17:56 -0800 + Commit: b57365a, github.com/apache/spark/pull/3208 + + [SQL] add @group tab in limit() and count() + Jacky Li + 2014-12-01 13:12:30 -0800 + Commit: bafee67, github.com/apache/spark/pull/3458 + + [SPARK-4258][SQL][DOC] Documents spark.sql.parquet.filterPushdown + Cheng Lian + 2014-12-01 13:09:51 -0800 + Commit: 5db8dca, github.com/apache/spark/pull/3440 + + Documentation: add description for repartitionAndSortWithinPartitions + Madhu Siddalingaiah + 2014-12-01 08:45:34 -0800 + Commit: 2b233f5, github.com/apache/spark/pull/3390 + + [SPARK-4661][Core] Minor code and docs cleanup + zsxwing + 2014-12-01 00:35:01 -0800 + Commit: 30a86ac, github.com/apache/spark/pull/3521 + + [SPARK-4664][Core] Throw an exception when spark.akka.frameSize > 2047 + zsxwing + 2014-12-01 00:32:54 -0800 + Commit: 1d238f2, github.com/apache/spark/pull/3527 + + SPARK-2192 [BUILD] Examples Data Not in Binary Distribution + Sean Owen + 2014-12-01 16:31:04 +0800 + Commit: 6384f42, github.com/apache/spark/pull/3480 + + Fix wrong file name pattern in .gitignore + Kousuke Saruta + 2014-12-01 00:29:28 -0800 + Commit: 97eb6d7, github.com/apache/spark/pull/3529 + + [SPARK-4632] version update + Prabeesh K + 2014-11-30 20:51:53 -0800 + Commit: 5e7a6dc, github.com/apache/spark/pull/3495 + + MAINTENANCE: Automated closing of pull requests. + Patrick Wendell + 2014-11-30 20:51:13 -0800 + Commit: 06dc1b1, github.com/apache/spark/pull/2915 + + [DOC] Fixes formatting typo in SQL programming guide + Cheng Lian + 2014-11-30 19:04:07 -0800 + Commit: 2a4d389, github.com/apache/spark/pull/3498 + + [SPARK-4656][Doc] Typo in Programming Guide markdown + lewuathe + 2014-11-30 17:18:50 -0800 + Commit: a217ec5, github.com/apache/spark/pull/3412 + + [SPARK-4623]Add the some error infomation if using spark-sql in yarn-cluster mode + carlmartin , huangzhaowei + 2014-11-30 16:19:41 -0800 + Commit: aea7a99, github.com/apache/spark/pull/3479 + + SPARK-2143 [WEB UI] Add Spark version to UI footer + Sean Owen + 2014-11-30 11:40:08 -0800 + Commit: 048ecca, github.com/apache/spark/pull/3410 + + [DOCS][BUILD] Add instruction to use change-version-to-2.11.sh in 'Building for Scala 2.11'. + Takuya UESHIN + 2014-11-30 00:10:31 -0500 + Commit: 0fcd24c, github.com/apache/spark/pull/3361 + + SPARK-4507: PR merge script should support closing multiple JIRA tickets + Takayuki Hasegawa + 2014-11-29 23:12:10 -0500 + Commit: 4316a7b, github.com/apache/spark/pull/3428 + + [SPARK-4505][Core] Add a ClassTag parameter to CompactBuffer[T] + zsxwing + 2014-11-29 20:23:08 -0500 + Commit: c062224, github.com/apache/spark/pull/3378 + + [SPARK-4057] Use -agentlib instead of -Xdebug in sbt-launch-lib.bash for debugging + Kousuke Saruta + 2014-11-29 20:14:14 -0500 + Commit: 938dc14, github.com/apache/spark/pull/2904 + + Include the key name when failing on an invalid value. + Stephen Haberman + 2014-11-29 20:12:05 -0500 + Commit: 95290bf, github.com/apache/spark/pull/3514 + + [SPARK-3398] [SPARK-4325] [EC2] Use EC2 status checks. + Nicholas Chammas + 2014-11-29 00:31:06 -0800 + Commit: 317e114, github.com/apache/spark/pull/3195 + + MAINTENANCE: Automated closing of pull requests. + Patrick Wendell + 2014-11-29 00:24:35 -0500 + Commit: 047ff57, github.com/apache/spark/pull/3451 + + [SPARK-4597] Use proper exception and reset variable in Utils.createTempDir() + Liang-Chi Hsieh + 2014-11-28 18:04:05 -0800 + Commit: 49fe879, github.com/apache/spark/pull/3449 + + SPARK-1450 [EC2] Specify the default zone in the EC2 script help + Sean Owen + 2014-11-28 17:43:38 -0500 + Commit: 48223d8, github.com/apache/spark/pull/3454 + + [SPARK-4584] [yarn] Remove security manager from Yarn AM. + Marcelo Vanzin + 2014-11-28 15:15:30 -0500 + Commit: 915f8ee, github.com/apache/spark/pull/3484 + + [SPARK-4193][BUILD] Disable doclint in Java 8 to prevent from build error. + Takuya UESHIN + 2014-11-28 13:00:15 -0500 + Commit: e464f0a, github.com/apache/spark/pull/3058 + + [SPARK-4643] [Build] Remove unneeded staging repositories from build + Daoyuan Wang + 2014-11-28 12:41:38 -0500 + Commit: 53ed7f1, github.com/apache/spark/pull/3504 + + Delete unnecessary function + KaiXinXiaoLei + 2014-11-28 12:34:07 -0500 + Commit: 052e658, github.com/apache/spark/pull/3224 + + [SPARK-4645][SQL] Disables asynchronous execution in Hive 0.13.1 HiveThriftServer2 + Cheng Lian + 2014-11-28 11:42:40 -0500 + Commit: 5b99bf2, github.com/apache/spark/pull/3506 + + [SPARK-4619][Storage]delete redundant time suffix + maji2014 + 2014-11-28 00:36:22 -0800 + Commit: ceb6281, github.com/apache/spark/pull/3475 + + [SPARK-4613][Core] Java API for JdbcRDD + Cheng Lian + 2014-11-27 18:01:14 -0800 + Commit: 120a350, github.com/apache/spark/pull/3478 + + [SPARK-4626] Kill a task only if the executorId is (still) registered with the scheduler + roxchkplusony + 2014-11-27 15:54:40 -0800 + Commit: 84376d3, github.com/apache/spark/pull/3483 + + SPARK-4170 [CORE] Closure problems when running Scala app that "extends App" + Sean Owen + 2014-11-27 09:03:17 -0800 + Commit: 5d7fe17, github.com/apache/spark/pull/3497 + + [Release] Automate generation of contributors list + Andrew Or + 2014-11-26 23:16:23 -0800 + Commit: c86e9bc + + [SPARK-732][SPARK-3628][CORE][RESUBMIT] eliminate duplicate update on accmulator + CodingCat + 2014-11-26 16:52:04 -0800 + Commit: 5af53ad, github.com/apache/spark/pull/2524 + + [SPARK-4614][MLLIB] Slight API changes in Matrix and Matrices + Xiangrui Meng + 2014-11-26 08:22:50 -0800 + Commit: 561d31d, github.com/apache/spark/pull/3468 + + Removing confusing TripletFields + Joseph E. Gonzalez + 2014-11-26 00:55:28 -0800 + Commit: 288ce58, github.com/apache/spark/pull/3472 + + [SPARK-4612] Reduce task latency and increase scheduling throughput by making configuration initialization lazy + Tathagata Das + 2014-11-25 23:15:58 -0800 + Commit: e7f4d25, github.com/apache/spark/pull/3463 + + [SPARK-4516] Avoid allocating Netty PooledByteBufAllocators unnecessarily + Aaron Davidson + 2014-11-26 00:32:45 -0500 + Commit: 346bc17, github.com/apache/spark/pull/3465 + + [SPARK-4516] Cap default number of Netty threads at 8 + Aaron Davidson + 2014-11-25 23:57:04 -0500 + Commit: f5f2d27, github.com/apache/spark/pull/3469 + + [SPARK-4604][MLLIB] make MatrixFactorizationModel public + Xiangrui Meng + 2014-11-25 20:11:40 -0800 + Commit: b5fb141, github.com/apache/spark/pull/3459 + + [HOTFIX]: Adding back without-hive dist + Patrick Wendell + 2014-11-25 23:10:19 -0500 + Commit: 4d95526 + + [SPARK-4583] [mllib] LogLoss for GradientBoostedTrees fix + doc updates + Joseph K. Bradley + 2014-11-25 20:10:15 -0800 + Commit: c251fd7, github.com/apache/spark/pull/3439 + + [Spark-4509] Revert EC2 tag-based cluster membership patch + Xiangrui Meng + 2014-11-25 16:07:09 -0800 + Commit: 7eba0fb, github.com/apache/spark/pull/3453 + + Fix SPARK-4471: blockManagerIdFromJson function throws exception while B... + hushan[胡珊] + 2014-11-25 15:51:08 -0800 + Commit: 9bdf5da, github.com/apache/spark/pull/3340 + + [SPARK-4546] Improve HistoryServer first time user experience + Andrew Or + 2014-11-25 15:48:02 -0800 + Commit: 9afcbe4, github.com/apache/spark/pull/3411 + + [SPARK-4592] Avoid duplicate worker registrations in standalone mode + Andrew Or + 2014-11-25 15:46:26 -0800 + Commit: 1b2ab1c, github.com/apache/spark/pull/3447 + + [SPARK-4196][SPARK-4602][Streaming] Fix serialization issue in PairDStreamFunctions.saveAsNewAPIHadoopFiles + Tathagata Das + 2014-11-25 14:16:27 -0800 + Commit: 8838ad7, github.com/apache/spark/pull/3457 + + [SPARK-4581][MLlib] Refactorize StandardScaler to improve the transformation performance + DB Tsai + 2014-11-25 11:07:11 -0800 + Commit: bf1a6aa, github.com/apache/spark/pull/3435 + + [SPARK-4601][Streaming] Set correct call site for streaming jobs so that it is displayed correctly on the Spark UI + Tathagata Das + 2014-11-25 06:50:36 -0800 + Commit: 69cd53e, github.com/apache/spark/pull/3455 + + [SPARK-4344][DOCS] adding documentation on spark.yarn.user.classpath.first + arahuja + 2014-11-25 08:23:41 -0600 + Commit: d240760, github.com/apache/spark/pull/3209 + + [SPARK-4381][Streaming]Add warning log when user set spark.master to local in Spark Streaming and there's no job executed + jerryshao + 2014-11-25 05:36:29 -0800 + Commit: fef27b2, github.com/apache/spark/pull/3244 + + [SPARK-4535][Streaming] Fix the error in comments + q00251598 + 2014-11-25 04:01:56 -0800 + Commit: a51118a, github.com/apache/spark/pull/3400 + + [SPARK-4526][MLLIB]GradientDescent get a wrong gradient value according to the gradient formula. + GuoQiang Li + 2014-11-25 02:01:19 -0800 + Commit: f515f94, github.com/apache/spark/pull/3399 + + [SPARK-4596][MLLib] Refactorize Normalizer to make code cleaner + DB Tsai + 2014-11-25 01:57:34 -0800 + Commit: 89f9122, github.com/apache/spark/pull/3446 + + [DOC][Build] Wrong cmd for build spark with apache hadoop 2.4.X and hive 12 + wangfei + 2014-11-24 22:32:39 -0800 + Commit: 0fe54cf, github.com/apache/spark/pull/3335 + + [SQL] Compute timeTaken correctly + w00228970 + 2014-11-24 21:17:24 -0800 + Commit: 723be60, github.com/apache/spark/pull/3423 + + [SPARK-4582][MLLIB] get raw vectors for further processing in Word2Vec + tkaessmann , tkaessmann + 2014-11-24 19:58:01 -0800 + Commit: 9ce2bf3, github.com/apache/spark/pull/3309 + + [SPARK-4525] Mesos should decline unused offers + Patrick Wendell , Jongyoul Lee + 2014-11-24 19:14:14 -0800 + Commit: f0afb62, github.com/apache/spark/pull/3436 + + Revert "[SPARK-4525] Mesos should decline unused offers" + Patrick Wendell + 2014-11-24 19:16:53 -0800 + Commit: a68d442 + + [SPARK-4525] Mesos should decline unused offers + Patrick Wendell , Jongyoul Lee + 2014-11-24 19:14:14 -0800 + Commit: b043c27, github.com/apache/spark/pull/3436 + + [SPARK-4266] [Web-UI] Reduce stage page load time. + Kay Ousterhout + 2014-11-24 18:03:10 -0800 + Commit: d24d5bf, github.com/apache/spark/pull/3328 + + [SPARK-4548] []SPARK-4517] improve performance of python broadcast + Davies Liu + 2014-11-24 17:17:03 -0800 + Commit: 6cf5076, github.com/apache/spark/pull/3417 + + [SPARK-4578] fix asDict() with nested Row() + Davies Liu + 2014-11-24 16:41:23 -0800 + Commit: 050616b, github.com/apache/spark/pull/3434 + + [SPARK-4562] [MLlib] speedup vector + Davies Liu + 2014-11-24 16:37:14 -0800 + Commit: b660de7, github.com/apache/spark/pull/3420 + + [SPARK-4518][SPARK-4519][Streaming] Refactored file stream to prevent files from being processed multiple times + Tathagata Das + 2014-11-24 13:50:20 -0800 + Commit: cb0e9b0, github.com/apache/spark/pull/3419 + + [SPARK-4145] Web UI job pages + Josh Rosen + 2014-11-24 13:18:14 -0800 + Commit: 4a90276, github.com/apache/spark/pull/3009 + + [SPARK-4487][SQL] Fix attribute reference resolution error when using ORDER BY. + Kousuke Saruta + 2014-11-24 12:54:37 -0800 + Commit: dd1c9cb, github.com/apache/spark/pull/3363 + + [SQL] Fix path in HiveFromSpark + scwf + 2014-11-24 12:49:08 -0800 + Commit: b384119, github.com/apache/spark/pull/3415 + + [SQL] Fix comment in HiveShim + Daniel Darabos + 2014-11-24 12:45:07 -0800 + Commit: d5834f0, github.com/apache/spark/pull/3432 + + [SPARK-4479][SQL] Avoids unnecessary defensive copies when sort based shuffle is on + Cheng Lian + 2014-11-24 12:43:45 -0800 + Commit: a6d7b61, github.com/apache/spark/pull/3422 + + SPARK-4457. Document how to build for Hadoop versions greater than 2.4 + Sandy Ryza + 2014-11-24 13:28:48 -0600 + Commit: 29372b6, github.com/apache/spark/pull/3322 + + [SPARK-4377] Fixed serialization issue by switching to akka provided serializer. + Prashant Sharma + 2014-11-22 14:05:38 -0800 + Commit: 9b2a3c6, github.com/apache/spark/pull/3402 + + [SPARK-4431][MLlib] Implement efficient foreachActive for dense and sparse vector + DB Tsai + 2014-11-21 18:15:07 -0800 + Commit: b5d17ef, github.com/apache/spark/pull/3288 + + [SPARK-4531] [MLlib] cache serialized java object + Davies Liu + 2014-11-21 15:02:31 -0800 + Commit: ce95bd8, github.com/apache/spark/pull/3397 + + SPARK-4532: Fix bug in detection of Hive in Spark 1.2 + Patrick Wendell + 2014-11-21 12:10:04 -0800 + Commit: a81918c, github.com/apache/spark/pull/3398 + + [SPARK-4397][Core] Reorganize 'implicit's to improve the API convenience + zsxwing + 2014-11-21 10:06:30 -0800 + Commit: 65b987c, github.com/apache/spark/pull/3262 + + [SPARK-4472][Shell] Print "Spark context available as sc." only when SparkContext is created... + zsxwing + 2014-11-21 00:42:43 -0800 + Commit: f1069b8, github.com/apache/spark/pull/3341 + + [Doc][GraphX] Remove unused png files. + Reynold Xin + 2014-11-21 00:30:58 -0800 + Commit: 28fdc6f + + [Doc][GraphX] Remove Motivation section and did some minor update. + Reynold Xin + 2014-11-21 00:29:02 -0800 + Commit: b97070e + + [SPARK-4522][SQL] Parse schema with missing metadata. + Michael Armbrust + 2014-11-20 20:34:43 -0800 + Commit: 90a6a46, github.com/apache/spark/pull/3392 + + add Sphinx as a dependency of building docs + Davies Liu + 2014-11-20 19:12:45 -0800 + Commit: 8cd6eea, github.com/apache/spark/pull/3388 + + [SPARK-4413][SQL] Parquet support through datasource API + Michael Armbrust + 2014-11-20 18:31:02 -0800 + Commit: 02ec058, github.com/apache/spark/pull/3269 + + [SPARK-4244] [SQL] Support Hive Generic UDFs with constant object inspector parameters + Cheng Hao + 2014-11-20 16:50:59 -0800 + Commit: 84d79ee, github.com/apache/spark/pull/3109 + + [SPARK-4477] [PySpark] remove numpy from RDDSampler + Davies Liu , Xiangrui Meng + 2014-11-20 16:40:25 -0800 + Commit: d39f2e9, github.com/apache/spark/pull/3351 + + [SQL] fix function description mistake + Jacky Li + 2014-11-20 15:48:36 -0800 + Commit: ad5f1f3, github.com/apache/spark/pull/3344 + + [SPARK-2918] [SQL] Support the CTAS in EXPLAIN command + Cheng Hao + 2014-11-20 15:46:00 -0800 + Commit: 6aa0fc9, github.com/apache/spark/pull/3357 + + [SPARK-4318][SQL] Fix empty sum distinct. + Takuya UESHIN + 2014-11-20 15:41:24 -0800 + Commit: 2c2e7a4, github.com/apache/spark/pull/3184 + + [SPARK-4513][SQL] Support relational operator '<=>' in Spark SQL + ravipesala + 2014-11-20 15:34:03 -0800 + Commit: 98e9419, github.com/apache/spark/pull/3387 + + [SPARK-4439] [MLlib] add python api for random forest + Davies Liu + 2014-11-20 15:31:28 -0800 + Commit: 1c53a5d, github.com/apache/spark/pull/3320 + + [SPARK-4228][SQL] SchemaRDD to JSON + Dan McClary + 2014-11-20 13:36:50 -0800 + Commit: b8e6886, github.com/apache/spark/pull/3213 + + [SPARK-3938][SQL] Names in-memory columnar RDD with corresponding table name + Cheng Lian + 2014-11-20 13:12:24 -0800 + Commit: abf2918, github.com/apache/spark/pull/3383 + + [SPARK-4486][MLLIB] Improve GradientBoosting APIs and doc + Xiangrui Meng + 2014-11-20 00:48:59 -0800 + Commit: 15cacc8, github.com/apache/spark/pull/3374 + + [SPARK-4446] [SPARK CORE] + Leolh + 2014-11-19 18:18:55 -0800 + Commit: e216ffa, github.com/apache/spark/pull/3306 + + [SPARK-4480] Avoid many small spills in external data structures + Andrew Or + 2014-11-19 18:07:27 -0800 + Commit: 0eb4a7f, github.com/apache/spark/pull/3353 + + [Spark-4484] Treat maxResultSize as unlimited when set to 0; improve error message + Nishkam Ravi , nravi , nishkamravi2 + 2014-11-19 17:23:42 -0800 + Commit: 73fedf5, github.com/apache/spark/pull/3360 + + [SPARK-4478] Keep totalRegisteredExecutors up-to-date + Akshat Aranya + 2014-11-19 17:20:20 -0800 + Commit: 9ccc53c, github.com/apache/spark/pull/3373 + + Updating GraphX programming guide and documentation + Joseph E. Gonzalez + 2014-11-19 16:53:33 -0800 + Commit: 377b068, github.com/apache/spark/pull/3359 + + [SPARK-4495] Fix memory leak in JobProgressListener + Josh Rosen + 2014-11-19 16:50:21 -0800 + Commit: 04d462f, github.com/apache/spark/pull/3372 + + [SPARK-4294][Streaming] UnionDStream stream should express the requirements in the same way as TransformedDStream + Yadong Qi + 2014-11-19 15:53:06 -0800 + Commit: c3002c4, github.com/apache/spark/pull/3152 + + [SPARK-4384] [PySpark] improve sort spilling + Davies Liu + 2014-11-19 15:45:37 -0800 + Commit: 73c8ea8, github.com/apache/spark/pull/3252 + + [SPARK-4429][BUILD] Build for Scala 2.11 using sbt fails. + Takuya UESHIN + 2014-11-19 14:40:21 -0800 + Commit: f9adda9, github.com/apache/spark/pull/3342 + + [DOC][PySpark][Streaming] Fix docstring for sphinx + Ken Takagiwa + 2014-11-19 14:23:18 -0800 + Commit: 9b7bbce, github.com/apache/spark/pull/3311 + + SPARK-3962 Marked scope as provided for external projects. + Prashant Sharma , Prashant Sharma + 2014-11-19 14:18:10 -0800 + Commit: 1c93841, github.com/apache/spark/pull/2959 + + [HOT FIX] MiMa tests are broken + Andrew Or + 2014-11-19 14:03:44 -0800 + Commit: 0df02ca, github.com/apache/spark/pull/3371 + + [SPARK-4481][Streaming][Doc] Fix the wrong description of updateFunc + zsxwing + 2014-11-19 13:17:15 -0800 + Commit: 3bf7cee, github.com/apache/spark/pull/3356 + + [SPARK-4482][Streaming] Disable ReceivedBlockTracker's write ahead log by default + Tathagata Das + 2014-11-19 13:06:48 -0800 + Commit: 22fc4e7, github.com/apache/spark/pull/3358 + + [SPARK-4470] Validate number of threads in local mode + Kenichi Maehashi + 2014-11-19 12:11:09 -0800 + Commit: eacc788, github.com/apache/spark/pull/3337 + + [SPARK-4467] fix elements read count for ExtrenalSorter + Tianshuo Deng + 2014-11-19 10:01:09 -0800 + Commit: d75579d, github.com/apache/spark/pull/3302 + + SPARK-4455 Exclude dependency on hbase-annotations module + tedyu + 2014-11-19 00:55:39 -0800 + Commit: 5f5ac2d, github.com/apache/spark/pull/3286 + + MAINTENANCE: Automated closing of pull requests. + Patrick Wendell + 2014-11-19 00:27:31 -0800 + Commit: 8327df6, github.com/apache/spark/pull/2777 + + [Spark-4432]close InStream after the block is accessed + Mingfei + 2014-11-18 22:17:06 -0800 + Commit: 165cec9, github.com/apache/spark/pull/3290 + + [SPARK-4441] Close Tachyon client when TachyonBlockManager is shutdown + Mingfei + 2014-11-18 22:16:36 -0800 + Commit: 67e9876, github.com/apache/spark/pull/3299 + + Bumping version to 1.3.0-SNAPSHOT. + Marcelo Vanzin + 2014-11-18 21:24:18 -0800 + Commit: 397d3aa, github.com/apache/spark/pull/3277 + + [SPARK-4468][SQL] Fixes Parquet filter creation for inequality predicates with literals on the left hand side + Cheng Lian + 2014-11-18 17:41:54 -0800 + Commit: 423baea, github.com/apache/spark/pull/3334 + + [SPARK-4327] [PySpark] Python API for RDD.randomSplit() + Davies Liu + 2014-11-18 16:37:35 -0800 + Commit: 7f22fa8, github.com/apache/spark/pull/3193 + + [SPARK-4433] fix a racing condition in zipWithIndex + Xiangrui Meng + 2014-11-18 16:25:44 -0800 + Commit: bb46046, github.com/apache/spark/pull/3291 + + [SPARK-3721] [PySpark] broadcast objects larger than 2G + Davies Liu , Davies Liu + 2014-11-18 16:17:51 -0800 + Commit: 4a377af, github.com/apache/spark/pull/2659 + + [SPARK-4306] [MLlib] Python API for LogisticRegressionWithLBFGS + Davies Liu + 2014-11-18 15:57:33 -0800 + Commit: d2e2951, github.com/apache/spark/pull/3307 + + [SPARK-4463] Add (de)select all button for add'l metrics. + Kay Ousterhout + 2014-11-18 15:01:06 -0800 + Commit: 010bc86, github.com/apache/spark/pull/3331 + + [SPARK-4017] show progress bar in console + Davies Liu + 2014-11-18 13:37:21 -0800 + Commit: e34f38f, github.com/apache/spark/pull/3029 + + [SPARK-4404] remove sys.exit() in shutdown hook + Davies Liu + 2014-11-18 13:11:38 -0800 + Commit: 80f3177, github.com/apache/spark/pull/3289 + + [SPARK-4075][SPARK-4434] Fix the URI validation logic for Application Jar name. + Kousuke Saruta + 2014-11-18 12:17:33 -0800 + Commit: bfebfd8, github.com/apache/spark/pull/3326 + + [SQL] Support partitioned parquet tables that have the key in both the directory and the file + Michael Armbrust + 2014-11-18 12:13:23 -0800 + Commit: 90d72ec, github.com/apache/spark/pull/3272 + + [SPARK-4396] allow lookup by index in Python's Rating + Xiangrui Meng + 2014-11-18 10:35:29 -0800 + Commit: b54c6ab, github.com/apache/spark/pull/3261 + + [SPARK-4435] [MLlib] [PySpark] improve classification + Davies Liu + 2014-11-18 10:11:13 -0800 + Commit: 8fbf72b, github.com/apache/spark/pull/3305 + + ALS implicit: added missing parameter alpha in doc string + Felix Maximilian Möller + 2014-11-18 10:08:24 -0800 + Commit: cedc3b5, github.com/apache/spark/pull/3343 + + SPARK-4466: Provide support for publishing Scala 2.11 artifacts to Maven + Patrick Wendell + 2014-11-17 21:07:50 -0800 + Commit: c6e0c2a, github.com/apache/spark/pull/3332 + + [SPARK-4453][SPARK-4213][SQL] Simplifies Parquet filter generation code + Cheng Lian + 2014-11-17 16:55:12 -0800 + Commit: 36b0956, github.com/apache/spark/pull/3317 + + [SPARK-4448] [SQL] unwrap for the ConstantObjectInspector + Cheng Hao + 2014-11-17 16:35:49 -0800 + Commit: ef7c464, github.com/apache/spark/pull/3308 + + [SPARK-4443][SQL] Fix statistics for external table in spark sql hive + w00228970 + 2014-11-17 16:33:50 -0800 + Commit: 42389b1, github.com/apache/spark/pull/3304 + + [SPARK-4309][SPARK-4407][SQL] Date type support for Thrift server, and fixes for complex types + Cheng Lian + 2014-11-17 16:31:05 -0800 + Commit: 6b7f2f7, github.com/apache/spark/pull/3298 + + [SQL] Construct the MutableRow from an Array + Cheng Hao + 2014-11-17 16:29:52 -0800 + Commit: 69e858c, github.com/apache/spark/pull/3217 + + [SPARK-4425][SQL] Handle NaN or Infinity cast to Timestamp correctly. + Takuya UESHIN + 2014-11-17 16:28:07 -0800 + Commit: 566c791, github.com/apache/spark/pull/3283 + + [SPARK-4420][SQL] Change nullability of Cast from DoubleType/FloatType to DecimalType. + Takuya UESHIN + 2014-11-17 16:26:48 -0800 + Commit: 3a81a1c, github.com/apache/spark/pull/3278 + + [SQL] Makes conjunction pushdown more aggressive for in-memory table + Cheng Lian + 2014-11-17 15:33:13 -0800 + Commit: 5ce7dae, github.com/apache/spark/pull/3318 + + [SPARK-4180] [Core] Prevent creation of multiple active SparkContexts + Josh Rosen + 2014-11-17 12:48:18 -0800 + Commit: 0f3ceb5, github.com/apache/spark/pull/3121 + + [DOCS][SQL] Fix broken link to Row class scaladoc + Andy Konwinski + 2014-11-17 11:52:23 -0800 + Commit: cec1116, github.com/apache/spark/pull/3323 + + Revert "[SPARK-4075] [Deploy] Jar url validation is not enough for Jar file" + Andrew Or + 2014-11-17 11:24:28 -0800 + Commit: dbb9da5 + + [SPARK-4444] Drop VD type parameter from EdgeRDD + Ankur Dave + 2014-11-17 11:06:31 -0800 + Commit: 9ac2bb1, github.com/apache/spark/pull/3303 + + SPARK-2811 upgrade algebird to 0.8.1 + Adam Pingel + 2014-11-17 10:47:29 -0800 + Commit: e7690ed, github.com/apache/spark/pull/3282 + + SPARK-4445, Don't display storage level in toDebugString unless RDD is persisted. + Prashant Sharma + 2014-11-17 10:40:33 -0800 + Commit: 5c92d47, github.com/apache/spark/pull/3310 + + [SPARK-4410][SQL] Add support for external sort + Michael Armbrust + 2014-11-16 21:55:57 -0800 + Commit: 64c6b9b, github.com/apache/spark/pull/3268 + + [SPARK-4422][MLLIB]In some cases, Vectors.fromBreeze get wrong results. + GuoQiang Li + 2014-11-16 21:31:51 -0800 + Commit: 5168c6c, github.com/apache/spark/pull/3281 + + Revert "[SPARK-4309][SPARK-4407][SQL] Date type support for Thrift server, and fixes for complex types" + Michael Armbrust + 2014-11-16 15:05:04 -0800 + Commit: 45ce327, github.com/apache/spark/pull/3292 + + [SPARK-4309][SPARK-4407][SQL] Date type support for Thrift server, and fixes for complex types + Cheng Lian + 2014-11-16 14:26:41 -0800 + Commit: cb6bd83, github.com/apache/spark/pull/3178 + + [SPARK-4393] Fix memory leak in ConnectionManager ACK timeout TimerTasks; use HashedWheelTimer + Josh Rosen + 2014-11-16 00:44:15 -0800 + Commit: 7850e0c, github.com/apache/spark/pull/3259 + + [SPARK-4426][SQL][Minor] The symbol of BitwiseOr is wrong, should not be '&' + Kousuke Saruta + 2014-11-15 22:23:47 -0800 + Commit: 84468b2, github.com/apache/spark/pull/3284 + + [SPARK-4419] Upgrade snappy-java to 1.1.1.6 + Josh Rosen + 2014-11-15 22:22:34 -0800 + Commit: 7d8e152, github.com/apache/spark/pull/3287 + + [SPARK-2321] Several progress API improvements / refactorings + Josh Rosen + 2014-11-14 23:46:25 -0800 + Commit: 40eb8b6, github.com/apache/spark/pull/3197 + + Added contains(key) to Metadata + kai + 2014-11-14 23:44:23 -0800 + Commit: cbddac2, github.com/apache/spark/pull/3273 + + [SPARK-4260] Httpbroadcast should set connection timeout. + Kousuke Saruta + 2014-11-14 22:36:56 -0800 + Commit: 60969b0, github.com/apache/spark/pull/3122 + + [SPARK-4363][Doc] Update the Broadcast example + zsxwing + 2014-11-14 22:28:48 -0800 + Commit: 861223e, github.com/apache/spark/pull/3226 + + [SPARK-4379][Core] Change Exception to SparkException in checkpoint + zsxwing + 2014-11-14 22:25:41 -0800 + Commit: dba1405, github.com/apache/spark/pull/3241 + + [SPARK-4415] [PySpark] JVM should exit after Python exit + Davies Liu + 2014-11-14 20:13:46 -0800 + Commit: 7fe08b4, github.com/apache/spark/pull/3274 + + [SPARK-4404]SparkSubmitDriverBootstrapper should stop after its SparkSubmit sub-proc... + WangTao , WangTaoTheTonic + 2014-11-14 20:11:51 -0800 + Commit: 303a4e4, github.com/apache/spark/pull/3266 + + SPARK-4214. With dynamic allocation, avoid outstanding requests for more... + Sandy Ryza + 2014-11-14 15:51:05 -0800 + Commit: ad42b28, github.com/apache/spark/pull/3204 + + [SPARK-4412][SQL] Fix Spark's control of Parquet logging. + Jim Carroll + 2014-11-14 15:33:21 -0800 + Commit: 37482ce, github.com/apache/spark/pull/3271 + + [SPARK-4365][SQL] Remove unnecessary filter call on records returned from parquet library + Yash Datta + 2014-11-14 15:16:36 -0800 + Commit: 63ca3af, github.com/apache/spark/pull/3229 + + [SPARK-4386] Improve performance when writing Parquet files. + Jim Carroll + 2014-11-14 15:11:53 -0800 + Commit: f76b968, github.com/apache/spark/pull/3254 + + [SPARK-4322][SQL] Enables struct fields as sub expressions of grouping fields + Cheng Lian + 2014-11-14 15:09:36 -0800 + Commit: 0c7b66b, github.com/apache/spark/pull/3248 + + [SQL] Don't shuffle code generated rows + Michael Armbrust + 2014-11-14 15:03:23 -0800 + Commit: 4b4b50c, github.com/apache/spark/pull/3263 + + [SQL] Minor cleanup of comments, errors and override. + Michael Armbrust + 2014-11-14 15:00:42 -0800 + Commit: f805025, github.com/apache/spark/pull/3257 + + [SPARK-4391][SQL] Configure parquet filters using SQLConf + Michael Armbrust + 2014-11-14 14:59:35 -0800 + Commit: e47c387, github.com/apache/spark/pull/3258 + + [SPARK-4390][SQL] Handle NaN cast to decimal correctly + Michael Armbrust + 2014-11-14 14:56:57 -0800 + Commit: a0300ea, github.com/apache/spark/pull/3256 + + [SPARK-4062][Streaming]Add ReliableKafkaReceiver in Spark Streaming Kafka connector + jerryshao , Tathagata Das , Saisai Shao + 2014-11-14 14:33:37 -0800 + Commit: 5930f64, github.com/apache/spark/pull/2991 + + [SPARK-4333][SQL] Correctly log number of iterations in RuleExecutor + DoingDone9 <799203320@qq.com> + 2014-11-14 14:28:06 -0800 + Commit: 0cbdb01, github.com/apache/spark/pull/3180 + + SPARK-4375. no longer require -Pscala-2.10 + Sandy Ryza + 2014-11-14 14:21:57 -0800 + Commit: f5f757e, github.com/apache/spark/pull/3239 + + [SPARK-4245][SQL] Fix containsNull of the result ArrayType of CreateArray expression. + Takuya UESHIN + 2014-11-14 14:21:16 -0800 + Commit: bbd8f5b, github.com/apache/spark/pull/3110 + + [SPARK-4239] [SQL] support view in HiveQl + Daoyuan Wang + 2014-11-14 13:51:20 -0800 + Commit: ade72c4, github.com/apache/spark/pull/3131 + + Update failed assert text to match code in SizeEstimatorSuite + Jeff Hammerbacher + 2014-11-14 13:37:48 -0800 + Commit: c258db9, github.com/apache/spark/pull/3242 + + [SPARK-4313][WebUI][Yarn] Fix link issue of the executor thread dump page in yarn-cluster mode + zsxwing + 2014-11-14 13:36:13 -0800 + Commit: 156cf33, github.com/apache/spark/pull/3183 + + SPARK-3663 Document SPARK_LOG_DIR and SPARK_PID_DIR + Andrew Ash + 2014-11-14 13:33:35 -0800 + Commit: 5c265cc, github.com/apache/spark/pull/2518 + + [Spark Core] SPARK-4380 Edit spilling log from MB to B + Hong Shen + 2014-11-14 13:29:41 -0800 + Commit: 0c56a03, github.com/apache/spark/pull/3243 + + [SPARK-4398][PySpark] specialize sc.parallelize(xrange) + Xiangrui Meng + 2014-11-14 12:43:17 -0800 + Commit: abd5817, github.com/apache/spark/pull/3264 + + [SPARK-4394][SQL] Data Sources API Improvements + Michael Armbrust + 2014-11-14 12:00:08 -0800 + Commit: 77e845c, github.com/apache/spark/pull/3260 + + [SPARK-3722][Docs]minor improvement and fix in docs + WangTao + 2014-11-14 08:09:42 -0600 + Commit: e421072, github.com/apache/spark/pull/2579 + + [SPARK-4310][WebUI] Sort 'Submitted' column in Stage page by time + zsxwing + 2014-11-13 14:37:04 -0800 + Commit: 825709a, github.com/apache/spark/pull/3179 + + [SPARK-4372][MLLIB] Make LR and SVM's default parameters consistent in Scala and Python + Xiangrui Meng + 2014-11-13 13:54:16 -0800 + Commit: 3221830, github.com/apache/spark/pull/3232 + + [SPARK-4326] fix unidoc + Xiangrui Meng + 2014-11-13 13:16:20 -0800 + Commit: 4b0c1ed, github.com/apache/spark/pull/3253 + + [HOT FIX] make-distribution.sh fails if Yarn shuffle jar DNE + Andrew Or + 2014-11-13 11:54:45 -0800 + Commit: a0fa1ba, github.com/apache/spark/pull/3250 + + [SPARK-4378][MLLIB] make ALS more Java-friendly + Xiangrui Meng + 2014-11-13 11:42:27 -0800 + Commit: ca26a21, github.com/apache/spark/pull/3240 + + [SPARK-4348] [PySpark] [MLlib] rename random.py to rand.py + Davies Liu + 2014-11-13 10:24:54 -0800 + Commit: ce0333f, github.com/apache/spark/pull/3216 + + [SPARK-4256] Make Binary Evaluation Metrics functions defined in cases where there ar... + Andrew Bullen + 2014-11-12 22:14:44 -0800 + Commit: 484fecb, github.com/apache/spark/pull/3118 + + [SPARK-4370] [Core] Limit number of Netty cores based on executor size + Aaron Davidson + 2014-11-12 18:46:37 -0800 + Commit: b9e1c2e, github.com/apache/spark/pull/3155 + + [SPARK-4373][MLLIB] fix MLlib maven tests + Xiangrui Meng + 2014-11-12 18:15:14 -0800 + Commit: 23f5bdf, github.com/apache/spark/pull/3235 + + [Release] Bring audit scripts up-to-date + Andrew Or + 2014-11-13 00:30:58 +0000 + Commit: 723a86b + + [SPARK-2672] support compressed file in wholeTextFile + Davies Liu + 2014-11-12 15:58:12 -0800 + Commit: d7d54a4, github.com/apache/spark/pull/3005 + + [SPARK-4369] [MLLib] fix TreeModel.predict() with RDD + Davies Liu + 2014-11-12 13:56:41 -0800 + Commit: bd86118, github.com/apache/spark/pull/3230 + + [SPARK-3666] Extract interfaces for EdgeRDD and VertexRDD + Ankur Dave + 2014-11-12 13:49:20 -0800 + Commit: a5ef581, github.com/apache/spark/pull/2530 + + [Release] Correct make-distribution.sh log path + Andrew Or + 2014-11-12 13:46:26 -0800 + Commit: c3afd32 + + Internal cleanup for aggregateMessages + Ankur Dave + 2014-11-12 13:44:49 -0800 + Commit: 0402be9, github.com/apache/spark/pull/3231 + + [SPARK-4281][Build] Package Yarn shuffle service into its own jar + Andrew Or + 2014-11-12 13:39:45 -0800 + Commit: aa43a8d, github.com/apache/spark/pull/3147 + + [Test] Better exception message from SparkSubmitSuite + Andrew Or + 2014-11-12 13:35:48 -0800 + Commit: 6e3c5a2, github.com/apache/spark/pull/3212 + + [SPARK-3660][STREAMING] Initial RDD for updateStateByKey transformation + Soumitra Kumar + 2014-11-12 12:25:31 -0800 + Commit: 36ddeb7, github.com/apache/spark/pull/2665 + + [SPARK-3530][MLLIB] pipeline and parameters with examples + Xiangrui Meng + 2014-11-12 10:38:57 -0800 + Commit: 4b736db, github.com/apache/spark/pull/3099 + + [SPARK-4355][MLLIB] fix OnlineSummarizer.merge when other.mean is zero + Xiangrui Meng + 2014-11-12 01:50:11 -0800 + Commit: 84324fb, github.com/apache/spark/pull/3220 + + [SPARK-3936] Add aggregateMessages, which supersedes mapReduceTriplets + Ankur Dave + 2014-11-11 23:38:27 -0800 + Commit: faeb41d, github.com/apache/spark/pull/3100 + + [MLLIB] SPARK-4347: Reducing GradientBoostingSuite run time. + Manish Amde + 2014-11-11 22:47:53 -0800 + Commit: 2ef016b, github.com/apache/spark/pull/3214 + + Support cross building for Scala 2.11 + Prashant Sharma , Patrick Wendell + 2014-11-11 21:36:48 -0800 + Commit: daaca14, github.com/apache/spark/pull/3159 + + [Release] Log build output for each distribution + Andrew Or + 2014-11-11 18:02:59 -0800 + Commit: 2ddb141 + + SPARK-2269 Refactor mesos scheduler resourceOffers and add unit test + Timothy Chen + 2014-11-11 14:29:18 -0800 + Commit: a878660, github.com/apache/spark/pull/1487 + + [SPARK-4282][YARN] Stopping flag in YarnClientSchedulerBackend should be volatile + Kousuke Saruta + 2014-11-11 12:33:53 -0600 + Commit: 7f37188, github.com/apache/spark/pull/3143 + + SPARK-4305 [BUILD] yarn-alpha profile won't build due to network/yarn module + Sean Owen + 2014-11-11 12:30:35 -0600 + Commit: f820b56, github.com/apache/spark/pull/3167 + + SPARK-1830 Deploy failover, Make Persistence engine and LeaderAgent Pluggable + Prashant Sharma + 2014-11-11 09:29:48 -0800 + Commit: deefd9d, github.com/apache/spark/pull/771 + + [Streaming][Minor]Replace some 'if-else' in Clock + huangzhaowei + 2014-11-11 03:02:12 -0800 + Commit: 6e03de3, github.com/apache/spark/pull/3088 + + [SPARK-2492][Streaming] kafkaReceiver minor changes to align with Kafka 0.8 + jerryshao + 2014-11-11 02:22:23 -0800 + Commit: c8850a3, github.com/apache/spark/pull/1420 + + [SPARK-4295][External]Fix exception in SparkSinkSuite + maji2014 + 2014-11-11 02:18:27 -0800 + Commit: f8811a5, github.com/apache/spark/pull/3177 + + [SPARK-4307] Initialize FileDescriptor lazily in FileRegion. + Reynold Xin , Reynold Xin + 2014-11-11 00:25:31 -0800 + Commit: ef29a9a, github.com/apache/spark/pull/3172 + + [SPARK-4324] [PySpark] [MLlib] support numpy.array for all MLlib API + Davies Liu + 2014-11-10 22:26:16 -0800 + Commit: 65083e9, github.com/apache/spark/pull/3189 + + [SPARK-4330][Doc] Link to proper URL for YARN overview + Kousuke Saruta + 2014-11-10 22:18:00 -0800 + Commit: 3c07b8f, github.com/apache/spark/pull/3196 + + [SPARK-3649] Remove GraphX custom serializers + Ankur Dave + 2014-11-10 19:31:52 -0800 + Commit: 300887b, github.com/apache/spark/pull/2503 + + [SPARK-4274] [SQL] Fix NPE in printing the details of the query plan + Cheng Hao + 2014-11-10 17:46:05 -0800 + Commit: c764d0a, github.com/apache/spark/pull/3139 + + [SPARK-3954][Streaming] Optimization to FileInputDStream + surq + 2014-11-10 17:37:16 -0800 + Commit: ce6ed2a, github.com/apache/spark/pull/2811 + + [SPARK-4149][SQL] ISO 8601 support for json date time strings + Daoyuan Wang + 2014-11-10 17:26:03 -0800 + Commit: a1fc059, github.com/apache/spark/pull/3012 + + [SPARK-4250] [SQL] Fix bug of constant null value mapping to ConstantObjectInspector + Cheng Hao + 2014-11-10 17:22:57 -0800 + Commit: fa77783, github.com/apache/spark/pull/3114 + + [SQL] remove a decimal case branch that has no effect at runtime + Xiangrui Meng + 2014-11-10 17:20:52 -0800 + Commit: d793d80, github.com/apache/spark/pull/3192 + + [SPARK-4308][SQL] Sets SQL operation state to ERROR when exception is thrown + Cheng Lian + 2014-11-10 16:56:36 -0800 + Commit: acb55ae, github.com/apache/spark/pull/3175 + + [SPARK-4000][Build] Uploads HiveCompatibilitySuite logs + Cheng Lian + 2014-11-10 16:17:52 -0800 + Commit: 534b231, github.com/apache/spark/pull/2993 + + [SPARK-4319][SQL] Enable an ignored test "null count". + Takuya UESHIN + 2014-11-10 15:55:15 -0800 + Commit: dbf1058, github.com/apache/spark/pull/3185 + + Revert "[SPARK-2703][Core]Make Tachyon related unit tests execute without deploying a Tachyon system locally." + Patrick Wendell + 2014-11-10 14:56:06 -0800 + Commit: 6e7a309 + + [SPARK-4047] - Generate runtime warnings for example implementation of PageRank + Varadharajan Mukundan + 2014-11-10 14:32:29 -0800 + Commit: 974d334, github.com/apache/spark/pull/2894 + + SPARK-1297 Upgrade HBase dependency to 0.98 + tedyu + 2014-11-10 13:23:33 -0800 + Commit: b32734e, github.com/apache/spark/pull/3115 + + SPARK-4230. Doc for spark.default.parallelism is incorrect + Sandy Ryza + 2014-11-10 12:40:41 -0800 + Commit: c6f4e70, github.com/apache/spark/pull/3107 + + [SPARK-4312] bash doesn't have "die" + Jey Kottalam + 2014-11-10 12:37:56 -0800 + Commit: c5db8e2, github.com/apache/spark/pull/2898 + + Update RecoverableNetworkWordCount.scala + comcmipi + 2014-11-10 12:33:48 -0800 + Commit: 0340c56, github.com/apache/spark/pull/2735 + + SPARK-2548 [STREAMING] JavaRecoverableWordCount is missing + Sean Owen + 2014-11-10 11:47:27 -0800 + Commit: 3a02d41, github.com/apache/spark/pull/2564 + + [SPARK-4169] [Core] Accommodate non-English Locales in unit tests + Niklas Wilcke <1wilcke@informatik.uni-hamburg.de> + 2014-11-10 11:37:38 -0800 + Commit: ed8bf1e, github.com/apache/spark/pull/3036 + + [SQL] support udt to hive types conversion (hive->udt is not supported) + Xiangrui Meng + 2014-11-10 11:04:12 -0800 + Commit: 894a724, github.com/apache/spark/pull/3164 + + [SPARK-2703][Core]Make Tachyon related unit tests execute without deploying a Tachyon system locally. + RongGu + 2014-11-09 23:48:15 -0800 + Commit: bd86cb1, github.com/apache/spark/pull/3030 + + MAINTENANCE: Automated closing of pull requests. + Patrick Wendell + 2014-11-09 23:07:14 -0800 + Commit: 227488d, github.com/apache/spark/pull/2898 + + SPARK-3179. Add task OutputMetrics. + Sandy Ryza + 2014-11-09 22:29:03 -0800 + Commit: 3c2cff4, github.com/apache/spark/pull/2968 + + SPARK-1209 [CORE] (Take 2) SparkHadoop{MapRed,MapReduce}Util should not use package org.apache.hadoop + Sean Owen + 2014-11-09 22:11:20 -0800 + Commit: f8e5732, github.com/apache/spark/pull/3048 + + MAINTENANCE: Automated closing of pull requests. + Patrick Wendell + 2014-11-09 18:16:20 -0800 + Commit: f73b56f, github.com/apache/spark/pull/464 + + SPARK-1344 [DOCS] Scala API docs for top methods + Sean Owen + 2014-11-09 17:42:08 -0800 + Commit: d136265, github.com/apache/spark/pull/3168 + + SPARK-971 [DOCS] Link to Confluence wiki from project website / documentation + Sean Owen + 2014-11-09 17:40:48 -0800 + Commit: 8c99a47, github.com/apache/spark/pull/3169 + + [SPARK-4301] StreamingContext should not allow start() to be called after calling stop() + Josh Rosen + 2014-11-08 18:10:23 -0800 + Commit: 7b41b17, github.com/apache/spark/pull/3160 + + [Minor] [Core] Don't NPE on closeQuietly(null) + Aaron Davidson + 2014-11-08 13:03:51 -0800 + Commit: 4af5c7e, github.com/apache/spark/pull/3166 + + [SPARK-4291][Build] Rename network module projects + Andrew Or + 2014-11-07 23:16:13 -0800 + Commit: 7afc856, github.com/apache/spark/pull/3148 + + [MLLIB] [PYTHON] SPARK-4221: Expose nonnegative ALS in the python API + Michelangelo D'Agostino + 2014-11-07 22:53:01 -0800 + Commit: 7e9d975, github.com/apache/spark/pull/3095 + + [SPARK-4304] [PySpark] Fix sort on empty RDD + Davies Liu + 2014-11-07 20:53:03 -0800 + Commit: 7779109, github.com/apache/spark/pull/3162 + + MAINTENANCE: Automated closing of pull requests. + Patrick Wendell + 2014-11-07 13:08:25 -0800 + Commit: 5923dd9, github.com/apache/spark/pull/3016 + + Update JavaCustomReceiver.java + xiao321 <1042460381@qq.com> + 2014-11-07 12:56:49 -0800 + Commit: 7c9ec52, github.com/apache/spark/pull/3153 + + [SPARK-4292][SQL] Result set iterator bug in JDBC/ODBC + wangfei + 2014-11-07 12:55:11 -0800 + Commit: d6e5552, github.com/apache/spark/pull/3149 + + [SPARK-4203][SQL] Partition directories in random order when inserting into hive table + Matthew Taylor + 2014-11-07 12:53:08 -0800 + Commit: ac70c97, github.com/apache/spark/pull/3076 + + [SPARK-4270][SQL] Fix Cast from DateType to DecimalType. + Takuya UESHIN + 2014-11-07 12:30:47 -0800 + Commit: a6405c5, github.com/apache/spark/pull/3134 + + [SPARK-4272] [SQL] Add more unwrapper functions for primitive type in TableReader + Cheng Hao + 2014-11-07 12:15:53 -0800 + Commit: 60ab80f, github.com/apache/spark/pull/3136 + + [SPARK-4213][SQL] ParquetFilters - No support for LT, LTE, GT, GTE operators + Kousuke Saruta + 2014-11-07 11:56:40 -0800 + Commit: 14c54f1, github.com/apache/spark/pull/3083 + + [SQL] Modify keyword val location according to ordering + Jacky Li + 2014-11-07 11:52:08 -0800 + Commit: 68609c5, github.com/apache/spark/pull/3080 + + [SQL] Support ScalaReflection of schema in different universes + Michael Armbrust + 2014-11-07 11:51:20 -0800 + Commit: 8154ed7, github.com/apache/spark/pull/3096 + + [SPARK-4225][SQL] Resorts to SparkContext.version to inspect Spark version + Cheng Lian + 2014-11-07 11:45:25 -0800 + Commit: 86e9eaa, github.com/apache/spark/pull/3105 + + [SQL][DOC][Minor] Spark SQL Hive now support dynamic partitioning + wangfei + 2014-11-07 11:43:35 -0800 + Commit: 636d7bc, github.com/apache/spark/pull/3127 + + [SPARK-4187] [Core] Switch to binary protocol for external shuffle service messages + Aaron Davidson + 2014-11-07 09:42:21 -0800 + Commit: d4fa04e, github.com/apache/spark/pull/3146 + + [SPARK-4204][Core][WebUI] Change Utils.exceptionString to contain the inner exceptions and make the error information in Web UI more friendly + zsxwing + 2014-11-06 21:52:12 -0800 + Commit: 3abdb1b, github.com/apache/spark/pull/3073 + + [SPARK-4236] Cleanup removed applications' files in shuffle service + Aaron Davidson + 2014-11-06 19:54:32 -0800 + Commit: 48a19a6, github.com/apache/spark/pull/3126 + + [SPARK-4188] [Core] Perform network-level retry of shuffle file fetches + Aaron Davidson + 2014-11-06 18:39:14 -0800 + Commit: f165b2b, github.com/apache/spark/pull/3101 + + [SPARK-4277] Support external shuffle service on Standalone Worker + Aaron Davidson + 2014-11-06 17:20:46 -0800 + Commit: 6e9ef10, github.com/apache/spark/pull/3142 + + [SPARK-3797] Minor addendum to Yarn shuffle service + Andrew Or + 2014-11-06 17:18:49 -0800 + Commit: 96136f2, github.com/apache/spark/pull/3144 + + [HOT FIX] Make distribution fails + Andrew Or + 2014-11-06 15:31:07 -0800 + Commit: 470881b, github.com/apache/spark/pull/3145 + + [SPARK-4249][GraphX]fix a problem of EdgePartitionBuilder in Graphx + lianhuiwang + 2014-11-06 10:46:45 -0800 + Commit: d15c6e9, github.com/apache/spark/pull/3138 + + [SPARK-4264] Completion iterator should only invoke callback once + Aaron Davidson + 2014-11-06 10:45:46 -0800 + Commit: 23eaf0e, github.com/apache/spark/pull/3128 + + [SPARK-4186] add binaryFiles and binaryRecords in Python + Davies Liu + 2014-11-06 00:22:19 -0800 + Commit: b41a39e, github.com/apache/spark/pull/3078 + + [SPARK-4255] Fix incorrect table striping + Kay Ousterhout + 2014-11-06 00:03:03 -0800 + Commit: 5f27ae1, github.com/apache/spark/pull/3117 + + [SPARK-4137] [EC2] Don't change working dir on user + Nicholas Chammas + 2014-11-05 20:45:35 -0800 + Commit: db45f5a, github.com/apache/spark/pull/2988 + + [SPARK-4262][SQL] add .schemaRDD to JavaSchemaRDD + Xiangrui Meng + 2014-11-05 19:56:16 -0800 + Commit: 3d2b5bc, github.com/apache/spark/pull/3125 + + [SPARK-4254] [mllib] MovieLensALS bug fix + Joseph K. Bradley + 2014-11-05 19:51:18 -0800 + Commit: c315d13, github.com/apache/spark/pull/3116 + + [SPARK-4158] Fix for missing resources. + Brenden Matthews + 2014-11-05 16:02:44 -0800 + Commit: cb0eae3, github.com/apache/spark/pull/3024 + + SPARK-3223 runAsSparkUser cannot change HDFS write permission properly i... + Jongyoul Lee + 2014-11-05 15:49:42 -0800 + Commit: f7ac8c2, github.com/apache/spark/pull/3034 + + SPARK-4040. Update documentation to exemplify use of local (n) value, fo... + jay@apache.org + 2014-11-05 15:45:34 -0800 + Commit: 868cd4c, github.com/apache/spark/pull/2964 + + [SPARK-3797] Run external shuffle service in Yarn NM + Andrew Or + 2014-11-05 15:42:05 -0800 + Commit: 61a5cce, github.com/apache/spark/pull/3082 + + SPARK-4222 [CORE] use readFully in FixedLengthBinaryRecordReader + industrial-sloth + 2014-11-05 15:38:48 -0800 + Commit: f37817b, github.com/apache/spark/pull/3093 + + [SPARK-3984] [SPARK-3983] Fix incorrect scheduler delay and display task deserialization time in UI + Kay Ousterhout + 2014-11-05 15:30:31 -0800 + Commit: a46497e, github.com/apache/spark/pull/2832 + + [SPARK-4242] [Core] Add SASL to external shuffle service + Aaron Davidson + 2014-11-05 14:38:43 -0800 + Commit: 4c42986, github.com/apache/spark/pull/3108 + + [SPARK-4197] [mllib] GradientBoosting API cleanup and examples in Scala, Java + Joseph K. Bradley + 2014-11-05 10:33:13 -0800 + Commit: 5b3b6f6, github.com/apache/spark/pull/3094 + + [SPARK-4029][Streaming] Update streaming driver to reliably save and recover received block metadata on driver failures + Tathagata Das + 2014-11-05 01:21:53 -0800 + Commit: 5f13759, github.com/apache/spark/pull/3026 + + [SPARK-3964] [MLlib] [PySpark] add Hypothesis test Python API + Davies Liu + 2014-11-04 21:35:52 -0800 + Commit: c8abddc, github.com/apache/spark/pull/3091 + + [SQL] Add String option for DSL AS + Michael Armbrust + 2014-11-04 18:14:28 -0800 + Commit: 515abb9, github.com/apache/spark/pull/3097 + + [SPARK-2938] Support SASL authentication in NettyBlockTransferService + Aaron Davidson + 2014-11-04 16:15:38 -0800 + Commit: 5e73138, github.com/apache/spark/pull/3087 + + [Spark-4060] [MLlib] exposing special rdd functions to the public + Niklas Wilcke <1wilcke@informatik.uni-hamburg.de> + 2014-11-04 09:57:03 -0800 + Commit: f90ad5d, github.com/apache/spark/pull/2907 + + fixed MLlib Naive-Bayes java example bug + Dariusz Kobylarz + 2014-11-04 09:53:43 -0800 + Commit: bcecd73, github.com/apache/spark/pull/3081 + + [SPARK-3886] [PySpark] simplify serializer, use AutoBatchedSerializer by default. + Davies Liu + 2014-11-03 23:56:14 -0800 + Commit: e4f4263, github.com/apache/spark/pull/2920 + + [SPARK-4166][Core] Add a backward compatibility test for ExecutorLostFailure + zsxwing + 2014-11-03 22:47:45 -0800 + Commit: b671ce0, github.com/apache/spark/pull/3085 + + [SPARK-4163][Core] Add a backward compatibility test for FetchFailed + zsxwing + 2014-11-03 22:40:43 -0800 + Commit: 9bdc841, github.com/apache/spark/pull/3086 + + [SPARK-3573][MLLIB] Make MLlib's Vector compatible with SQL's SchemaRDD + Xiangrui Meng + 2014-11-03 22:29:48 -0800 + Commit: 1a9c6cd, github.com/apache/spark/pull/3070 + + [SPARK-4192][SQL] Internal API for Python UDT + Xiangrui Meng + 2014-11-03 19:29:11 -0800 + Commit: 04450d1, github.com/apache/spark/pull/3068 + + [FIX][MLLIB] fix seed in BaggedPointSuite + Xiangrui Meng + 2014-11-03 18:50:37 -0800 + Commit: c5912ec, github.com/apache/spark/pull/3084 + + [SPARK-611] Display executor thread dumps in web UI + Josh Rosen + 2014-11-03 18:18:47 -0800 + Commit: 4f035dd, github.com/apache/spark/pull/2944 + + [SPARK-4168][WebUI] web statges number should show correctly when stages are more than 1000 + Zhang, Liye + 2014-11-03 18:17:32 -0800 + Commit: 97a466e, github.com/apache/spark/pull/3035 + + [SQL] Convert arguments to Scala UDFs + Michael Armbrust + 2014-11-03 18:04:51 -0800 + Commit: 15b58a2, github.com/apache/spark/pull/3077 + + SPARK-4178. Hadoop input metrics ignore bytes read in RecordReader insta... + Sandy Ryza + 2014-11-03 15:19:01 -0800 + Commit: 2812815, github.com/apache/spark/pull/3045 + + [SQL] More aggressive defaults + Michael Armbrust + 2014-11-03 14:08:27 -0800 + Commit: 25bef7e, github.com/apache/spark/pull/3064 + + [SPARK-4152] [SQL] Avoid data change in CTAS while table already existed + Cheng Hao + 2014-11-03 13:59:43 -0800 + Commit: e83f13e, github.com/apache/spark/pull/3013 + + [SPARK-4202][SQL] Simple DSL support for Scala UDF + Cheng Lian + 2014-11-03 13:20:33 -0800 + Commit: c238fb4, github.com/apache/spark/pull/3067 + + [SPARK-3594] [PySpark] [SQL] take more rows to infer schema or sampling + Davies Liu , Davies Liu + 2014-11-03 13:17:09 -0800 + Commit: 24544fb, github.com/apache/spark/pull/2716 + + [SPARK-4207][SQL] Query which has syntax like 'not like' is not working in Spark SQL + ravipesala + 2014-11-03 13:07:41 -0800 + Commit: 2b6e1ce, github.com/apache/spark/pull/3075 + + [SPARK-4211][Build] Fixes hive.version in Maven profile hive-0.13.1 + fi + 2014-11-03 12:56:56 -0800 + Commit: df607da, github.com/apache/spark/pull/3072 + + [SPARK-4148][PySpark] fix seed distribution and add some tests for rdd.sample + Xiangrui Meng + 2014-11-03 12:24:24 -0800 + Commit: 3cca196, github.com/apache/spark/pull/3010 + + [EC2] Factor out Mesos spark-ec2 branch + Nicholas Chammas + 2014-11-03 09:02:35 -0800 + Commit: 2aca97c, github.com/apache/spark/pull/3008 + diff --git a/LICENSE b/LICENSE index 21c42e9a20fa3..d0cd0dcb4bdb7 100644 --- a/LICENSE +++ b/LICENSE @@ -643,6 +643,36 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +======================================================================== +For d3 (core/src/main/resources/org/apache/spark/ui/static/d3.min.js): +======================================================================== + +Copyright (c) 2010-2015, Michael Bostock +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* The name Michael Bostock may not be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL MICHAEL BOSTOCK BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ======================================================================== For Scala Interpreter classes (all .scala files in repl/src/main/scala @@ -806,6 +836,68 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. +======================================================================== +For vis.js (core/src/main/resources/org/apache/spark/ui/static/vis.min.js): +======================================================================== +Copyright (C) 2010-2015 Almende B.V. + +Vis.js is dual licensed under both + + * The Apache 2.0 License + http://www.apache.org/licenses/LICENSE-2.0 + +and + + * The MIT License + http://opensource.org/licenses/MIT + +Vis.js may be distributed under either license. + +======================================================================== +For dagre-d3 (core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js): +======================================================================== +Copyright (c) 2013 Chris Pettitt + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +======================================================================== +For graphlib-dot (core/src/main/resources/org/apache/spark/ui/static/graphlib-dot.min.js): +======================================================================== +Copyright (c) 2012-2013 Chris Pettitt + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. ======================================================================== BSD-style licenses @@ -815,7 +907,7 @@ The following components are provided under a BSD-style license. See project lin (BSD 3 Clause) core (com.github.fommil.netlib:core:1.1.2 - https://github.com/fommil/netlib-java/core) (BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.1.15 - https://github.com/jpmml/jpmml-model) - (BSD 3-clause style license) jblas (org.jblas:jblas:1.2.3 - http://jblas.org/) + (BSD 3-clause style license) jblas (org.jblas:jblas:1.2.4 - http://jblas.org/) (BSD License) AntLR Parser Generator (antlr:antlr:2.7.7 - http://www.antlr.org/) (BSD License) Javolution (javolution:javolution:5.5.1 - http://javolution.org) (BSD licence) ANTLR ST4 4.0.4 (org.antlr:ST4:4.0.4 - http://www.stringtemplate.org) diff --git a/R/README.md b/R/README.md index a6970e39b55f3..d7d65b4f0eca5 100644 --- a/R/README.md +++ b/R/README.md @@ -52,7 +52,7 @@ The SparkR documentation (Rd files and HTML files) are not a part of the source SparkR comes with several sample programs in the `examples/src/main/r` directory. To run one of them, use `./bin/sparkR `. For example: - ./bin/sparkR examples/src/main/r/pi.R local[2] + ./bin/sparkR examples/src/main/r/dataframe.R You can also run the unit-tests for SparkR by running (you need to install the [testthat](http://cran.r-project.org/web/packages/testthat/index.html) package first): @@ -63,5 +63,5 @@ You can also run the unit-tests for SparkR by running (you need to install the [ The `./bin/spark-submit` and `./bin/sparkR` can also be used to submit jobs to YARN clusters. You will need to set YARN conf dir before doing so. For example on CDH you can run ``` export YARN_CONF_DIR=/etc/hadoop/conf -./bin/spark-submit --master yarn examples/src/main/r/pi.R 4 +./bin/spark-submit --master yarn examples/src/main/r/dataframe.R ``` diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 1c1779a763c7e..efc85bbc4b316 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -15,11 +15,11 @@ Suggests: Description: R frontend for Spark License: Apache License (== 2.0) Collate: + 'schema.R' 'generics.R' 'jobj.R' 'RDD.R' 'pairRDD.R' - 'schema.R' 'column.R' 'group.R' 'DataFrame.R' diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index e077eace74375..f9447f6c3288d 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -1,121 +1,49 @@ -#exportPattern("^[[:alpha:]]+") -exportClasses("RDD") -exportClasses("Broadcast") -exportMethods( - "aggregateByKey", - "aggregateRDD", - "cache", - "cartesian", - "checkpoint", - "coalesce", - "cogroup", - "collect", - "collectAsMap", - "collectPartition", - "combineByKey", - "count", - "countByKey", - "countByValue", - "distinct", - "Filter", - "filterRDD", - "first", - "flatMap", - "flatMapValues", - "fold", - "foldByKey", - "foreach", - "foreachPartition", - "fullOuterJoin", - "glom", - "groupByKey", - "intersection", - "join", - "keyBy", - "keys", - "length", - "lapply", - "lapplyPartition", - "lapplyPartitionsWithIndex", - "leftOuterJoin", - "lookup", - "map", - "mapPartitions", - "mapPartitionsWithIndex", - "mapValues", - "maximum", - "minimum", - "numPartitions", - "partitionBy", - "persist", - "pipeRDD", - "reduce", - "reduceByKey", - "reduceByKeyLocally", - "repartition", - "rightOuterJoin", - "sampleByKey", - "sampleRDD", - "saveAsTextFile", - "saveAsObjectFile", - "sortBy", - "sortByKey", - "subtract", - "subtractByKey", - "sumRDD", - "take", - "takeOrdered", - "takeSample", - "top", - "unionRDD", - "unpersist", - "value", - "values", - "zipPartitions", - "zipRDD", - "zipWithIndex", - "zipWithUniqueId" - ) +# Imports from base R +importFrom(methods, setGeneric, setMethod, setOldClass) + +# Disable native libraries till we figure out how to package it +# See SPARKR-7839 +#useDynLib(SparkR, stringHashCode) # S3 methods exported -export( - "textFile", - "objectFile", - "parallelize", - "hashCode", - "includePackage", - "broadcast", - "setBroadcastValue", - "setCheckpointDir" - ) export("sparkR.init") export("sparkR.stop") export("print.jobj") -useDynLib(SparkR, stringHashCode) -importFrom(methods, setGeneric, setMethod, setOldClass) - -# SparkRSQL exportClasses("DataFrame") -exportMethods("columns", +exportMethods("arrange", + "cache", + "collect", + "columns", + "count", + "describe", "distinct", + "dropna", "dtypes", "except", "explain", + "fillna", "filter", + "first", + "group_by", "groupBy", "head", "insertInto", "intersect", "isLocal", + "join", "limit", "orderBy", + "mutate", "names", + "persist", "printSchema", "registerTempTable", + "rename", "repartition", - "sampleDF", + "sample", + "sample_frac", "saveAsParquetFile", "saveAsTable", "saveDF", @@ -124,42 +52,68 @@ exportMethods("columns", "selectExpr", "show", "showDF", - "sortDF", - "toJSON", - "toRDD", + "summarize", + "take", "unionAll", + "unpersist", "where", "withColumn", - "withColumnRenamed") + "withColumnRenamed", + "write.df") exportClasses("Column") exportMethods("abs", + "acos", "alias", "approxCountDistinct", "asc", + "asin", + "atan", + "atan2", "avg", "cast", + "cbrt", + "ceiling", "contains", + "cos", + "cosh", "countDistinct", "desc", "endsWith", + "exp", + "expm1", + "floor", "getField", "getItem", + "hypot", "isNotNull", "isNull", "last", "like", + "log", + "log10", + "log1p", "lower", "max", "mean", "min", + "n", + "n_distinct", + "rint", "rlike", + "sign", + "sin", + "sinh", "sqrt", "startsWith", "substr", "sum", "sumDistinct", + "tan", + "tanh", + "toDegrees", + "toRadians", "upper") exportClasses("GroupedData") @@ -174,19 +128,15 @@ export("cacheTable", "createExternalTable", "dropTempTable", "jsonFile", - "jsonRDD", "loadDF", "parquetFile", + "read.df", "sql", "table", "tableNames", "tables", - "toDF", "uncacheTable") -export("sparkRSQL.init", - "sparkRHive.init") - export("structField", "structField.jobj", "structField.character", diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index b59b700af5dc9..0af5cb8881e35 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -45,6 +45,9 @@ setMethod("initialize", "DataFrame", function(.Object, sdf, isCached) { #' @rdname DataFrame #' @export +#' +#' @param sdf A Java object reference to the backing Scala DataFrame +#' @param isCached TRUE if the dataFrame is cached dataFrame <- function(sdf, isCached = FALSE) { new("DataFrame", sdf, isCached) } @@ -62,9 +65,9 @@ dataFrame <- function(sdf, isCached = FALSE) { #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' printSchema(df) #'} setMethod("printSchema", @@ -85,9 +88,9 @@ setMethod("printSchema", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' dfSchema <- schema(df) #'} setMethod("schema", @@ -107,9 +110,9 @@ setMethod("schema", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' explain(df, TRUE) #'} setMethod("explain", @@ -136,9 +139,9 @@ setMethod("explain", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' isLocal(df) #'} setMethod("isLocal", @@ -147,7 +150,7 @@ setMethod("isLocal", callJMethod(x@sdf, "isLocal") }) -#' ShowDF +#' showDF #' #' Print the first numRows rows of a DataFrame #' @@ -159,15 +162,16 @@ setMethod("isLocal", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' showDF(df) #'} setMethod("showDF", signature(x = "DataFrame"), function(x, numRows = 20) { - cat(callJMethod(x@sdf, "showString", numToInt(numRows)), "\n") + s <- callJMethod(x@sdf, "showString", numToInt(numRows)) + cat(s) }) #' show @@ -181,10 +185,10 @@ setMethod("showDF", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) -#' show(df) +#' df <- jsonFile(sqlContext, path) +#' df #'} setMethod("show", "DataFrame", function(object) { @@ -206,9 +210,9 @@ setMethod("show", "DataFrame", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' dtypes(df) #'} setMethod("dtypes", @@ -230,9 +234,9 @@ setMethod("dtypes", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' columns(df) #'} setMethod("columns", @@ -244,7 +248,7 @@ setMethod("columns", }) #' @rdname columns -#' @export +#' @aliases names,DataFrame,function-method setMethod("names", signature(x = "DataFrame"), function(x) { @@ -263,16 +267,16 @@ setMethod("names", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' registerTempTable(df, "json_df") -#' new_df <- sql(sqlCtx, "SELECT * FROM json_df") +#' new_df <- sql(sqlContext, "SELECT * FROM json_df") #'} setMethod("registerTempTable", signature(x = "DataFrame", tableName = "character"), function(x, tableName) { - callJMethod(x@sdf, "registerTempTable", tableName) + invisible(callJMethod(x@sdf, "registerTempTable", tableName)) }) #' insertInto @@ -289,9 +293,9 @@ setMethod("registerTempTable", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) -#' df <- loadDF(sqlCtx, path, "parquet") -#' df2 <- loadDF(sqlCtx, path2, "parquet") +#' sqlContext <- sparkRSQL.init(sc) +#' df <- read.df(sqlContext, path, "parquet") +#' df2 <- read.df(sqlContext, path2, "parquet") #' registerTempTable(df, "table1") #' insertInto(df2, "table1", overwrite = TRUE) #'} @@ -312,9 +316,9 @@ setMethod("insertInto", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' cache(df) #'} setMethod("cache", @@ -337,9 +341,9 @@ setMethod("cache", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' persist(df, "MEMORY_AND_DISK") #'} setMethod("persist", @@ -362,9 +366,9 @@ setMethod("persist", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' persist(df, "MEMORY_AND_DISK") #' unpersist(df) #'} @@ -387,9 +391,9 @@ setMethod("unpersist", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' newDF <- repartition(df, 2L) #'} setMethod("repartition", @@ -399,23 +403,23 @@ setMethod("repartition", dataFrame(sdf) }) -#' toJSON -#' -#' Convert the rows of a DataFrame into JSON objects and return an RDD where -#' each element contains a JSON string. -#' -#' @param x A SparkSQL DataFrame -#' @return A StringRRDD of JSON objects -#' @rdname tojson -#' @export -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) -#' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) -#' newRDD <- toJSON(df) -#'} +# toJSON +# +# Convert the rows of a DataFrame into JSON objects and return an RDD where +# each element contains a JSON string. +# +#@param x A SparkSQL DataFrame +# @return A StringRRDD of JSON objects +# @rdname tojson +# @export +# @examples +#\dontrun{ +# sc <- sparkR.init() +# sqlContext <- sparkRSQL.init(sc) +# path <- "path/to/file.json" +# df <- jsonFile(sqlContext, path) +# newRDD <- toJSON(df) +#} setMethod("toJSON", signature(x = "DataFrame"), function(x) { @@ -436,9 +440,9 @@ setMethod("toJSON", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' saveAsParquetFile(df, "/tmp/sparkr-tmp/") #'} setMethod("saveAsParquetFile", @@ -457,9 +461,9 @@ setMethod("saveAsParquetFile", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' distinctDF <- distinct(df) #'} setMethod("distinct", @@ -469,25 +473,26 @@ setMethod("distinct", dataFrame(sdf) }) -#' SampleDF +#' Sample #' #' Return a sampled subset of this DataFrame using a random seed. #' #' @param x A SparkSQL DataFrame #' @param withReplacement Sampling with replacement or not #' @param fraction The (rough) sample target fraction -#' @rdname sampleDF +#' @rdname sample +#' @aliases sample_frac #' @export #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) -#' collect(sampleDF(df, FALSE, 0.5)) -#' collect(sampleDF(df, TRUE, 0.5)) +#' df <- jsonFile(sqlContext, path) +#' collect(sample(df, FALSE, 0.5)) +#' collect(sample(df, TRUE, 0.5)) #'} -setMethod("sampleDF", +setMethod("sample", # TODO : Figure out how to send integer as java.lang.Long to JVM so # we can send seed as an argument through callJMethod signature(x = "DataFrame", withReplacement = "logical", @@ -498,6 +503,15 @@ setMethod("sampleDF", dataFrame(sdf) }) +#' @rdname sample +#' @aliases sample +setMethod("sample_frac", + signature(x = "DataFrame", withReplacement = "logical", + fraction = "numeric"), + function(x, withReplacement, fraction) { + sample(x, withReplacement, fraction) + }) + #' Count #' #' Returns the number of rows in a DataFrame @@ -509,9 +523,9 @@ setMethod("sampleDF", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' count(df) #' } setMethod("count", @@ -531,9 +545,9 @@ setMethod("count", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' collected <- collect(df) #' firstName <- collected[[1]]$name #' } @@ -566,9 +580,9 @@ setMethod("collect", #' @examples #' \dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' limitedDF <- limit(df, 10) #' } setMethod("limit", @@ -578,16 +592,16 @@ setMethod("limit", dataFrame(res) }) -# Take the first NUM rows of a DataFrame and return a the results as a data.frame - +#' Take the first NUM rows of a DataFrame and return a the results as a data.frame +#' #' @rdname take #' @export #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' take(df, 2) #' } setMethod("take", @@ -612,9 +626,9 @@ setMethod("take", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' head(df) #' } setMethod("head", @@ -633,9 +647,9 @@ setMethod("head", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' first(df) #' } setMethod("first", @@ -644,22 +658,22 @@ setMethod("first", take(x, 1) }) -#' toRDD() -#' -#' Converts a Spark DataFrame to an RDD while preserving column names. -#' -#' @param x A Spark DataFrame -#' -#' @rdname DataFrame -#' @export -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) -#' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) -#' rdd <- toRDD(df) -#' } +# toRDD() +# +# Converts a Spark DataFrame to an RDD while preserving column names. +# +# @param x A Spark DataFrame +# +# @rdname DataFrame +# @export +# @examples +#\dontrun{ +# sc <- sparkR.init() +# sqlContext <- sparkRSQL.init(sc) +# path <- "path/to/file.json" +# df <- jsonFile(sqlContext, path) +# rdd <- toRDD(df) +# } setMethod("toRDD", signature(x = "DataFrame"), function(x) { @@ -679,7 +693,8 @@ setMethod("toRDD", #' @param x a DataFrame #' @return a GroupedData #' @seealso GroupedData -#' @rdname DataFrame +#' @aliases group_by +#' @rdname groupBy #' @export #' @examples #' \dontrun{ @@ -702,11 +717,21 @@ setMethod("groupBy", groupedData(sgd) }) -#' Agg +#' @rdname groupBy +#' @aliases group_by +setMethod("group_by", + signature(x = "DataFrame"), + function(x, ...) { + groupBy(x, ...) + }) + +#' Summarize data across columns #' #' Compute aggregates by specifying a list of columns #' +#' @param x a DataFrame #' @rdname DataFrame +#' @aliases summarize #' @export setMethod("agg", signature(x = "DataFrame"), @@ -714,6 +739,14 @@ setMethod("agg", agg(groupBy(x), ...) }) +#' @rdname DataFrame +#' @aliases agg +setMethod("summarize", + signature(x = "DataFrame"), + function(x, ...) { + agg(x, ...) + }) + ############################## RDD Map Functions ################################## # All of the following functions mirror the existing RDD map functions, # @@ -721,7 +754,7 @@ setMethod("agg", # the requested map function. # ################################################################################### -#' @rdname lapply +# @rdname lapply setMethod("lapply", signature(X = "DataFrame", FUN = "function"), function(X, FUN) { @@ -729,14 +762,14 @@ setMethod("lapply", lapply(rdd, FUN) }) -#' @rdname lapply +# @rdname lapply setMethod("map", signature(X = "DataFrame", FUN = "function"), function(X, FUN) { lapply(X, FUN) }) -#' @rdname flatMap +# @rdname flatMap setMethod("flatMap", signature(X = "DataFrame", FUN = "function"), function(X, FUN) { @@ -744,7 +777,7 @@ setMethod("flatMap", flatMap(rdd, FUN) }) -#' @rdname lapplyPartition +# @rdname lapplyPartition setMethod("lapplyPartition", signature(X = "DataFrame", FUN = "function"), function(X, FUN) { @@ -752,14 +785,14 @@ setMethod("lapplyPartition", lapplyPartition(rdd, FUN) }) -#' @rdname lapplyPartition +# @rdname lapplyPartition setMethod("mapPartitions", signature(X = "DataFrame", FUN = "function"), function(X, FUN) { lapplyPartition(X, FUN) }) -#' @rdname foreach +# @rdname foreach setMethod("foreach", signature(x = "DataFrame", func = "function"), function(x, func) { @@ -767,7 +800,7 @@ setMethod("foreach", foreach(rdd, func) }) -#' @rdname foreach +# @rdname foreach setMethod("foreachPartition", signature(x = "DataFrame", func = "function"), function(x, func) { @@ -788,6 +821,7 @@ setMethod("$", signature(x = "DataFrame"), getColumn(x, name) }) +#' @rdname select setMethod("$<-", signature(x = "DataFrame"), function(x, name, value) { stopifnot(class(value) == "Column" || is.null(value)) @@ -881,7 +915,7 @@ setMethod("select", signature(x = "DataFrame", col = "list"), function(x, col) { cols <- lapply(col, function(c) { - if (class(c)== "Column") { + if (class(c) == "Column") { c@jc } else { col(c)@jc @@ -904,9 +938,9 @@ setMethod("select", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' selectExpr(df, "col1", "(col2 * 5) as newCol") #' } setMethod("selectExpr", @@ -930,9 +964,9 @@ setMethod("selectExpr", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' newDF <- withColumn(df, "newCol", df$col1 * 5) #' } setMethod("withColumn", @@ -941,6 +975,42 @@ setMethod("withColumn", select(x, x$"*", alias(col, colName)) }) +#' Mutate +#' +#' Return a new DataFrame with the specified columns added. +#' +#' @param x A DataFrame +#' @param col a named argument of the form name = col +#' @return A new DataFrame with the new columns added. +#' @rdname withColumn +#' @aliases withColumn +#' @export +#' @examples +#'\dontrun{ +#' sc <- sparkR.init() +#' sqlContext <- sparkRSQL.init(sc) +#' path <- "path/to/file.json" +#' df <- jsonFile(sqlContext, path) +#' newDF <- mutate(df, newCol = df$col1 * 5, newCol2 = df$col1 * 2) +#' names(newDF) # Will contain newCol, newCol2 +#' } +setMethod("mutate", + signature(x = "DataFrame"), + function(x, ...) { + cols <- list(...) + stopifnot(length(cols) > 0) + stopifnot(class(cols[[1]]) == "Column") + ns <- names(cols) + if (!is.null(ns)) { + for (n in ns) { + if (n != "") { + cols[[n]] <- alias(cols[[n]], n) + } + } + } + do.call(select, c(x, x$"*", cols)) + }) + #' WithColumnRenamed #' #' Rename an existing column in a DataFrame. @@ -954,9 +1024,9 @@ setMethod("withColumn", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' newDF <- withColumnRenamed(df, "col1", "newCol1") #' } setMethod("withColumnRenamed", @@ -972,9 +1042,47 @@ setMethod("withColumnRenamed", select(x, cols) }) +#' Rename +#' +#' Rename an existing column in a DataFrame. +#' +#' @param x A DataFrame +#' @param newCol A named pair of the form new_column_name = existing_column +#' @return A DataFrame with the column name changed. +#' @rdname withColumnRenamed +#' @aliases withColumnRenamed +#' @export +#' @examples +#'\dontrun{ +#' sc <- sparkR.init() +#' sqlContext <- sparkRSQL.init(sc) +#' path <- "path/to/file.json" +#' df <- jsonFile(sqlContext, path) +#' newDF <- rename(df, col1 = df$newCol1) +#' } +setMethod("rename", + signature(x = "DataFrame"), + function(x, ...) { + renameCols <- list(...) + stopifnot(length(renameCols) > 0) + stopifnot(class(renameCols[[1]]) == "Column") + newNames <- names(renameCols) + oldNames <- lapply(renameCols, function(col) { + callJMethod(col@jc, "toString") + }) + cols <- lapply(columns(x), function(c) { + if (c %in% oldNames) { + alias(col(c), newNames[[match(c, oldNames)]]) + } else { + col(c) + } + }) + select(x, cols) + }) + setClassUnion("characterOrColumn", c("character", "Column")) -#' SortDF +#' Arrange #' #' Sort a DataFrame by the specified column(s). #' @@ -982,19 +1090,19 @@ setClassUnion("characterOrColumn", c("character", "Column")) #' @param col Either a Column object or character vector indicating the field to sort on #' @param ... Additional sorting fields #' @return A DataFrame where all elements are sorted. -#' @rdname sortDF +#' @rdname arrange #' @export #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) -#' sortDF(df, df$col1) -#' sortDF(df, "col1") -#' sortDF(df, asc(df$col1), desc(abs(df$col2))) +#' df <- jsonFile(sqlContext, path) +#' arrange(df, df$col1) +#' arrange(df, "col1") +#' arrange(df, asc(df$col1), desc(abs(df$col2))) #' } -setMethod("sortDF", +setMethod("arrange", signature(x = "DataFrame", col = "characterOrColumn"), function(x, col, ...) { if (class(col) == "character") { @@ -1008,12 +1116,12 @@ setMethod("sortDF", dataFrame(sdf) }) -#' @rdname sortDF -#' @export +#' @rdname arrange +#' @aliases orderBy,DataFrame,function-method setMethod("orderBy", signature(x = "DataFrame", col = "characterOrColumn"), function(x, col) { - sortDF(x, col) + arrange(x, col) }) #' Filter @@ -1021,7 +1129,7 @@ setMethod("orderBy", #' Filter the rows of a DataFrame according to a given condition. #' #' @param x A DataFrame to be sorted. -#' @param condition The condition to sort on. This may either be a Column expression +#' @param condition The condition to filter on. This may either be a Column expression #' or a string containing a SQL statement #' @return A DataFrame containing only the rows that meet the condition. #' @rdname filter @@ -1029,9 +1137,9 @@ setMethod("orderBy", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' filter(df, "col1 > 0") #' filter(df, df$col2 != "abcdefg") #' } @@ -1046,7 +1154,7 @@ setMethod("filter", }) #' @rdname filter -#' @export +#' @aliases where,DataFrame,function-method setMethod("where", signature(x = "DataFrame", condition = "characterOrColumn"), function(x, condition) { @@ -1069,9 +1177,9 @@ setMethod("where", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) -#' df1 <- jsonFile(sqlCtx, path) -#' df2 <- jsonFile(sqlCtx, path2) +#' sqlContext <- sparkRSQL.init(sc) +#' df1 <- jsonFile(sqlContext, path) +#' df2 <- jsonFile(sqlContext, path2) #' join(df1, df2) # Performs a Cartesian #' join(df1, df2, df1$col1 == df2$col2) # Performs an inner join based on expression #' join(df1, df2, df1$col1 == df2$col2, "right_outer") @@ -1101,6 +1209,7 @@ setMethod("join", #' #' Return a new DataFrame containing the union of rows in this DataFrame #' and another DataFrame. This is equivalent to `UNION ALL` in SQL. +#' Note that this does not remove duplicate rows across the two DataFrames. #' #' @param x A Spark DataFrame #' @param y A Spark DataFrame @@ -1110,9 +1219,9 @@ setMethod("join", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) -#' df1 <- jsonFile(sqlCtx, path) -#' df2 <- jsonFile(sqlCtx, path2) +#' sqlContext <- sparkRSQL.init(sc) +#' df1 <- jsonFile(sqlContext, path) +#' df2 <- jsonFile(sqlContext, path2) #' unioned <- unionAll(df, df2) #' } setMethod("unionAll", @@ -1135,9 +1244,9 @@ setMethod("unionAll", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) -#' df1 <- jsonFile(sqlCtx, path) -#' df2 <- jsonFile(sqlCtx, path2) +#' sqlContext <- sparkRSQL.init(sc) +#' df1 <- jsonFile(sqlContext, path) +#' df2 <- jsonFile(sqlContext, path2) #' intersectDF <- intersect(df, df2) #' } setMethod("intersect", @@ -1160,9 +1269,9 @@ setMethod("intersect", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) -#' df1 <- jsonFile(sqlCtx, path) -#' df2 <- jsonFile(sqlCtx, path2) +#' sqlContext <- sparkRSQL.init(sc) +#' df1 <- jsonFile(sqlContext, path) +#' df2 <- jsonFile(sqlContext, path2) #' exceptDF <- except(df, df2) #' } #' @rdname except @@ -1194,23 +1303,22 @@ setMethod("except", #' @param source A name for external data source #' @param mode One of 'append', 'overwrite', 'error', 'ignore' #' -#' @rdname saveAsTable +#' @rdname write.df #' @export #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) -#' saveAsTable(df, "myfile") +#' df <- jsonFile(sqlContext, path) +#' write.df(df, "myfile", "parquet", "overwrite") #' } -setMethod("saveDF", - signature(df = "DataFrame", path = 'character', source = 'character', - mode = 'character'), - function(df, path = NULL, source = NULL, mode = "append", ...){ +setMethod("write.df", + signature(df = "DataFrame", path = 'character'), + function(df, path, source = NULL, mode = "append", ...){ if (is.null(source)) { - sqlCtx <- get(".sparkRSQLsc", envir = .sparkREnv) - source <- callJMethod(sqlCtx, "getConf", "spark.sql.sources.default", + sqlContext <- get(".sparkRSQLsc", envir = .sparkREnv) + source <- callJMethod(sqlContext, "getConf", "spark.sql.sources.default", "org.apache.spark.sql.parquet") } allModes <- c("append", "overwrite", "error", "ignore") @@ -1225,6 +1333,14 @@ setMethod("saveDF", callJMethod(df@sdf, "save", source, jmode, options) }) +#' @rdname write.df +#' @aliases saveDF +#' @export +setMethod("saveDF", + signature(df = "DataFrame", path = 'character'), + function(df, path, source = NULL, mode = "append", ...){ + write.df(df, path, source, mode, ...) + }) #' saveAsTable #' @@ -1253,9 +1369,9 @@ setMethod("saveDF", #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' saveAsTable(df, "myfile") #' } setMethod("saveAsTable", @@ -1263,8 +1379,8 @@ setMethod("saveAsTable", mode = 'character'), function(df, tableName, source = NULL, mode="append", ...){ if (is.null(source)) { - sqlCtx <- get(".sparkRSQLsc", envir = .sparkREnv) - source <- callJMethod(sqlCtx, "getConf", "spark.sql.sources.default", + sqlContext <- get(".sparkRSQLsc", envir = .sparkREnv) + source <- callJMethod(sqlContext, "getConf", "spark.sql.sources.default", "org.apache.spark.sql.parquet") } allModes <- c("append", "overwrite", "error", "ignore") @@ -1276,3 +1392,165 @@ setMethod("saveAsTable", callJMethod(df@sdf, "saveAsTable", tableName, source, jmode, options) }) +#' describe +#' +#' Computes statistics for numeric columns. +#' If no columns are given, this function computes statistics for all numerical columns. +#' +#' @param x A DataFrame to be computed. +#' @param col A string of name +#' @param ... Additional expressions +#' @return A DataFrame +#' @rdname describe +#' @export +#' @examples +#'\dontrun{ +#' sc <- sparkR.init() +#' sqlContext <- sparkRSQL.init(sc) +#' path <- "path/to/file.json" +#' df <- jsonFile(sqlContext, path) +#' describe(df) +#' describe(df, "col1") +#' describe(df, "col1", "col2") +#' } +setMethod("describe", + signature(x = "DataFrame", col = "character"), + function(x, col, ...) { + colList <- list(col, ...) + sdf <- callJMethod(x@sdf, "describe", listToSeq(colList)) + dataFrame(sdf) + }) + +#' @rdname describe +setMethod("describe", + signature(x = "DataFrame"), + function(x) { + colList <- as.list(c(columns(x))) + sdf <- callJMethod(x@sdf, "describe", listToSeq(colList)) + dataFrame(sdf) + }) + +#' dropna +#' +#' Returns a new DataFrame omitting rows with null values. +#' +#' @param x A SparkSQL DataFrame. +#' @param how "any" or "all". +#' if "any", drop a row if it contains any nulls. +#' if "all", drop a row only if all its values are null. +#' if minNonNulls is specified, how is ignored. +#' @param minNonNulls If specified, drop rows that have less than +#' minNonNulls non-null values. +#' This overwrites the how parameter. +#' @param cols Optional list of column names to consider. +#' @return A DataFrame +#' +#' @rdname nafunctions +#' @export +#' @examples +#'\dontrun{ +#' sc <- sparkR.init() +#' sqlCtx <- sparkRSQL.init(sc) +#' path <- "path/to/file.json" +#' df <- jsonFile(sqlCtx, path) +#' dropna(df) +#' } +setMethod("dropna", + signature(x = "DataFrame"), + function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) { + how <- match.arg(how) + if (is.null(cols)) { + cols <- columns(x) + } + if (is.null(minNonNulls)) { + minNonNulls <- if (how == "any") { length(cols) } else { 1 } + } + + naFunctions <- callJMethod(x@sdf, "na") + sdf <- callJMethod(naFunctions, "drop", + as.integer(minNonNulls), listToSeq(as.list(cols))) + dataFrame(sdf) + }) + +#' @aliases dropna +#' @export +setMethod("na.omit", + signature(x = "DataFrame"), + function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) { + dropna(x, how, minNonNulls, cols) + }) + +#' fillna +#' +#' Replace null values. +#' +#' @param x A SparkSQL DataFrame. +#' @param value Value to replace null values with. +#' Should be an integer, numeric, character or named list. +#' If the value is a named list, then cols is ignored and +#' value must be a mapping from column name (character) to +#' replacement value. The replacement value must be an +#' integer, numeric or character. +#' @param cols optional list of column names to consider. +#' Columns specified in cols that do not have matching data +#' type are ignored. For example, if value is a character, and +#' subset contains a non-character column, then the non-character +#' column is simply ignored. +#' @return A DataFrame +#' +#' @rdname nafunctions +#' @export +#' @examples +#'\dontrun{ +#' sc <- sparkR.init() +#' sqlCtx <- sparkRSQL.init(sc) +#' path <- "path/to/file.json" +#' df <- jsonFile(sqlCtx, path) +#' fillna(df, 1) +#' fillna(df, list("age" = 20, "name" = "unknown")) +#' } +setMethod("fillna", + signature(x = "DataFrame"), + function(x, value, cols = NULL) { + if (!(class(value) %in% c("integer", "numeric", "character", "list"))) { + stop("value should be an integer, numeric, charactor or named list.") + } + + if (class(value) == "list") { + # Check column names in the named list + colNames <- names(value) + if (length(colNames) == 0 || !all(colNames != "")) { + stop("value should be an a named list with each name being a column name.") + } + + # Convert to the named list to an environment to be passed to JVM + valueMap <- new.env() + for (col in colNames) { + # Check each item in the named list is of valid type + v <- value[[col]] + if (!(class(v) %in% c("integer", "numeric", "character"))) { + stop("Each item in value should be an integer, numeric or charactor.") + } + valueMap[[col]] <- v + } + + # When value is a named list, caller is expected not to pass in cols + if (!is.null(cols)) { + warning("When value is a named list, cols is ignored!") + cols <- NULL + } + + value <- valueMap + } else if (is.integer(value)) { + # Cast an integer to a numeric + value <- as.numeric(value) + } + + naFunctions <- callJMethod(x@sdf, "na") + sdf <- if (length(cols) == 0) { + callJMethod(naFunctions, "fill", value) + } else { + callJMethod(naFunctions, "fill", value, listToSeq(as.list(cols))) + } + dataFrame(sdf) + }) diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R index a3a0421a0746d..0513299515644 100644 --- a/R/pkg/R/RDD.R +++ b/R/pkg/R/RDD.R @@ -19,16 +19,16 @@ setOldClass("jobj") -#' @title S4 class that represents an RDD -#' @description RDD can be created using functions like -#' \code{parallelize}, \code{textFile} etc. -#' @rdname RDD -#' @seealso parallelize, textFile -#' -#' @slot env An R environment that stores bookkeeping states of the RDD -#' @slot jrdd Java object reference to the backing JavaRDD -#' to an RDD -#' @export +# @title S4 class that represents an RDD +# @description RDD can be created using functions like +# \code{parallelize}, \code{textFile} etc. +# @rdname RDD +# @seealso parallelize, textFile +# +# @slot env An R environment that stores bookkeeping states of the RDD +# @slot jrdd Java object reference to the backing JavaRDD +# to an RDD +# @export setClass("RDD", slots = list(env = "environment", jrdd = "jobj")) @@ -67,8 +67,8 @@ setMethod("initialize", "RDD", function(.Object, jrdd, serializedMode, }) setMethod("show", "RDD", - function(.Object) { - cat(paste(callJMethod(.Object@jrdd, "toString"), "\n", sep="")) + function(object) { + cat(paste(callJMethod(getJRDD(object), "toString"), "\n", sep="")) }) setMethod("initialize", "PipelinedRDD", function(.Object, prev, func, jrdd_val) { @@ -108,14 +108,14 @@ setMethod("initialize", "PipelinedRDD", function(.Object, prev, func, jrdd_val) .Object }) -#' @rdname RDD -#' @export -#' -#' @param jrdd Java object reference to the backing JavaRDD -#' @param serializedMode Use "byte" if the RDD stores data serialized in R, "string" if the RDD -#' stores strings, and "row" if the RDD stores the rows of a DataFrame -#' @param isCached TRUE if the RDD is cached -#' @param isCheckpointed TRUE if the RDD has been checkpointed +# @rdname RDD +# @export +# +# @param jrdd Java object reference to the backing JavaRDD +# @param serializedMode Use "byte" if the RDD stores data serialized in R, "string" if the RDD +# stores strings, and "row" if the RDD stores the rows of a DataFrame +# @param isCached TRUE if the RDD is cached +# @param isCheckpointed TRUE if the RDD has been checkpointed RDD <- function(jrdd, serializedMode = "byte", isCached = FALSE, isCheckpointed = FALSE) { new("RDD", jrdd, serializedMode, isCached, isCheckpointed) @@ -200,19 +200,19 @@ setValidity("RDD", ############ Actions and Transformations ############ -#' Persist an RDD -#' -#' Persist this RDD with the default storage level (MEMORY_ONLY). -#' -#' @param x The RDD to cache -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:10, 2L) -#' cache(rdd) -#'} -#' @rdname cache-methods -#' @aliases cache,RDD-method +# Persist an RDD +# +# Persist this RDD with the default storage level (MEMORY_ONLY). +# +# @param x The RDD to cache +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:10, 2L) +# cache(rdd) +#} +# @rdname cache-methods +# @aliases cache,RDD-method setMethod("cache", signature(x = "RDD"), function(x) { @@ -221,45 +221,45 @@ setMethod("cache", x }) -#' Persist an RDD -#' -#' Persist this RDD with the specified storage level. For details of the -#' supported storage levels, refer to -#' http://spark.apache.org/docs/latest/programming-guide.html#rdd-persistence. -#' -#' @param x The RDD to persist -#' @param newLevel The new storage level to be assigned -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:10, 2L) -#' persist(rdd, "MEMORY_AND_DISK") -#'} -#' @rdname persist -#' @aliases persist,RDD-method +# Persist an RDD +# +# Persist this RDD with the specified storage level. For details of the +# supported storage levels, refer to +# http://spark.apache.org/docs/latest/programming-guide.html#rdd-persistence. +# +# @param x The RDD to persist +# @param newLevel The new storage level to be assigned +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:10, 2L) +# persist(rdd, "MEMORY_AND_DISK") +#} +# @rdname persist +# @aliases persist,RDD-method setMethod("persist", signature(x = "RDD", newLevel = "character"), - function(x, newLevel) { + function(x, newLevel = "MEMORY_ONLY") { callJMethod(getJRDD(x), "persist", getStorageLevel(newLevel)) x@env$isCached <- TRUE x }) -#' Unpersist an RDD -#' -#' Mark the RDD as non-persistent, and remove all blocks for it from memory and -#' disk. -#' -#' @param x The RDD to unpersist -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:10, 2L) -#' cache(rdd) # rdd@@env$isCached == TRUE -#' unpersist(rdd) # rdd@@env$isCached == FALSE -#'} -#' @rdname unpersist-methods -#' @aliases unpersist,RDD-method +# Unpersist an RDD +# +# Mark the RDD as non-persistent, and remove all blocks for it from memory and +# disk. +# +# @param x The RDD to unpersist +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:10, 2L) +# cache(rdd) # rdd@@env$isCached == TRUE +# unpersist(rdd) # rdd@@env$isCached == FALSE +#} +# @rdname unpersist-methods +# @aliases unpersist,RDD-method setMethod("unpersist", signature(x = "RDD"), function(x) { @@ -268,24 +268,24 @@ setMethod("unpersist", x }) -#' Checkpoint an RDD -#' -#' Mark this RDD for checkpointing. It will be saved to a file inside the -#' checkpoint directory set with setCheckpointDir() and all references to its -#' parent RDDs will be removed. This function must be called before any job has -#' been executed on this RDD. It is strongly recommended that this RDD is -#' persisted in memory, otherwise saving it on a file will require recomputation. -#' -#' @param x The RDD to checkpoint -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' setCheckpointDir(sc, "checkpoint") -#' rdd <- parallelize(sc, 1:10, 2L) -#' checkpoint(rdd) -#'} -#' @rdname checkpoint-methods -#' @aliases checkpoint,RDD-method +# Checkpoint an RDD +# +# Mark this RDD for checkpointing. It will be saved to a file inside the +# checkpoint directory set with setCheckpointDir() and all references to its +# parent RDDs will be removed. This function must be called before any job has +# been executed on this RDD. It is strongly recommended that this RDD is +# persisted in memory, otherwise saving it on a file will require recomputation. +# +# @param x The RDD to checkpoint +# @examples +#\dontrun{ +# sc <- sparkR.init() +# setCheckpointDir(sc, "checkpoint") +# rdd <- parallelize(sc, 1:10, 2L) +# checkpoint(rdd) +#} +# @rdname checkpoint-methods +# @aliases checkpoint,RDD-method setMethod("checkpoint", signature(x = "RDD"), function(x) { @@ -295,18 +295,18 @@ setMethod("checkpoint", x }) -#' Gets the number of partitions of an RDD -#' -#' @param x A RDD. -#' @return the number of partitions of rdd as an integer. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:10, 2L) -#' numPartitions(rdd) # 2L -#'} -#' @rdname numPartitions -#' @aliases numPartitions,RDD-method +# Gets the number of partitions of an RDD +# +# @param x A RDD. +# @return the number of partitions of rdd as an integer. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:10, 2L) +# numPartitions(rdd) # 2L +#} +# @rdname numPartitions +# @aliases numPartitions,RDD-method setMethod("numPartitions", signature(x = "RDD"), function(x) { @@ -315,24 +315,24 @@ setMethod("numPartitions", callJMethod(partitions, "size") }) -#' Collect elements of an RDD -#' -#' @description -#' \code{collect} returns a list that contains all of the elements in this RDD. -#' -#' @param x The RDD to collect -#' @param ... Other optional arguments to collect -#' @param flatten FALSE if the list should not flattened -#' @return a list containing elements in the RDD -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:10, 2L) -#' collect(rdd) # list from 1 to 10 -#' collectPartition(rdd, 0L) # list from 1 to 5 -#'} -#' @rdname collect-methods -#' @aliases collect,RDD-method +# Collect elements of an RDD +# +# @description +# \code{collect} returns a list that contains all of the elements in this RDD. +# +# @param x The RDD to collect +# @param ... Other optional arguments to collect +# @param flatten FALSE if the list should not flattened +# @return a list containing elements in the RDD +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:10, 2L) +# collect(rdd) # list from 1 to 10 +# collectPartition(rdd, 0L) # list from 1 to 5 +#} +# @rdname collect-methods +# @aliases collect,RDD-method setMethod("collect", signature(x = "RDD"), function(x, flatten = TRUE) { @@ -343,12 +343,12 @@ setMethod("collect", }) -#' @description -#' \code{collectPartition} returns a list that contains all of the elements -#' in the specified partition of the RDD. -#' @param partitionId the partition to collect (starts from 0) -#' @rdname collect-methods -#' @aliases collectPartition,integer,RDD-method +# @description +# \code{collectPartition} returns a list that contains all of the elements +# in the specified partition of the RDD. +# @param partitionId the partition to collect (starts from 0) +# @rdname collect-methods +# @aliases collectPartition,integer,RDD-method setMethod("collectPartition", signature(x = "RDD", partitionId = "integer"), function(x, partitionId) { @@ -361,17 +361,17 @@ setMethod("collectPartition", serializedMode = getSerializedMode(x)) }) -#' @description -#' \code{collectAsMap} returns a named list as a map that contains all of the elements -#' in a key-value pair RDD. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, list(list(1, 2), list(3, 4)), 2L) -#' collectAsMap(rdd) # list(`1` = 2, `3` = 4) -#'} -#' @rdname collect-methods -#' @aliases collectAsMap,RDD-method +# @description +# \code{collectAsMap} returns a named list as a map that contains all of the elements +# in a key-value pair RDD. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, list(list(1, 2), list(3, 4)), 2L) +# collectAsMap(rdd) # list(`1` = 2, `3` = 4) +#} +# @rdname collect-methods +# @aliases collectAsMap,RDD-method setMethod("collectAsMap", signature(x = "RDD"), function(x) { @@ -381,19 +381,19 @@ setMethod("collectAsMap", as.list(map) }) -#' Return the number of elements in the RDD. -#' -#' @param x The RDD to count -#' @return number of elements in the RDD. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:10) -#' count(rdd) # 10 -#' length(rdd) # Same as count -#'} -#' @rdname count -#' @aliases count,RDD-method +# Return the number of elements in the RDD. +# +# @param x The RDD to count +# @return number of elements in the RDD. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:10) +# count(rdd) # 10 +# length(rdd) # Same as count +#} +# @rdname count +# @aliases count,RDD-method setMethod("count", signature(x = "RDD"), function(x) { @@ -405,31 +405,31 @@ setMethod("count", sum(as.integer(vals)) }) -#' Return the number of elements in the RDD -#' @export -#' @rdname count +# Return the number of elements in the RDD +# @export +# @rdname count setMethod("length", signature(x = "RDD"), function(x) { count(x) }) -#' Return the count of each unique value in this RDD as a list of -#' (value, count) pairs. -#' -#' Same as countByValue in Spark. -#' -#' @param x The RDD to count -#' @return list of (value, count) pairs, where count is number of each unique -#' value in rdd. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, c(1,2,3,2,1)) -#' countByValue(rdd) # (1,2L), (2,2L), (3,1L) -#'} -#' @rdname countByValue -#' @aliases countByValue,RDD-method +# Return the count of each unique value in this RDD as a list of +# (value, count) pairs. +# +# Same as countByValue in Spark. +# +# @param x The RDD to count +# @return list of (value, count) pairs, where count is number of each unique +# value in rdd. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, c(1,2,3,2,1)) +# countByValue(rdd) # (1,2L), (2,2L), (3,1L) +#} +# @rdname countByValue +# @aliases countByValue,RDD-method setMethod("countByValue", signature(x = "RDD"), function(x) { @@ -437,23 +437,23 @@ setMethod("countByValue", collect(reduceByKey(ones, `+`, numPartitions(x))) }) -#' Apply a function to all elements -#' -#' This function creates a new RDD by applying the given transformation to all -#' elements of the given RDD -#' -#' @param X The RDD to apply the transformation. -#' @param FUN the transformation to apply on each element -#' @return a new RDD created by the transformation. -#' @rdname lapply -#' @aliases lapply -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:10) -#' multiplyByTwo <- lapply(rdd, function(x) { x * 2 }) -#' collect(multiplyByTwo) # 2,4,6... -#'} +# Apply a function to all elements +# +# This function creates a new RDD by applying the given transformation to all +# elements of the given RDD +# +# @param X The RDD to apply the transformation. +# @param FUN the transformation to apply on each element +# @return a new RDD created by the transformation. +# @rdname lapply +# @aliases lapply +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:10) +# multiplyByTwo <- lapply(rdd, function(x) { x * 2 }) +# collect(multiplyByTwo) # 2,4,6... +#} setMethod("lapply", signature(X = "RDD", FUN = "function"), function(X, FUN) { @@ -463,31 +463,31 @@ setMethod("lapply", lapplyPartitionsWithIndex(X, func) }) -#' @rdname lapply -#' @aliases map,RDD,function-method +# @rdname lapply +# @aliases map,RDD,function-method setMethod("map", signature(X = "RDD", FUN = "function"), function(X, FUN) { lapply(X, FUN) }) -#' Flatten results after apply a function to all elements -#' -#' This function return a new RDD by first applying a function to all -#' elements of this RDD, and then flattening the results. -#' -#' @param X The RDD to apply the transformation. -#' @param FUN the transformation to apply on each element -#' @return a new RDD created by the transformation. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:10) -#' multiplyByTwo <- flatMap(rdd, function(x) { list(x*2, x*10) }) -#' collect(multiplyByTwo) # 2,20,4,40,6,60... -#'} -#' @rdname flatMap -#' @aliases flatMap,RDD,function-method +# Flatten results after apply a function to all elements +# +# This function return a new RDD by first applying a function to all +# elements of this RDD, and then flattening the results. +# +# @param X The RDD to apply the transformation. +# @param FUN the transformation to apply on each element +# @return a new RDD created by the transformation. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:10) +# multiplyByTwo <- flatMap(rdd, function(x) { list(x*2, x*10) }) +# collect(multiplyByTwo) # 2,20,4,40,6,60... +#} +# @rdname flatMap +# @aliases flatMap,RDD,function-method setMethod("flatMap", signature(X = "RDD", FUN = "function"), function(X, FUN) { @@ -500,83 +500,83 @@ setMethod("flatMap", lapplyPartition(X, partitionFunc) }) -#' Apply a function to each partition of an RDD -#' -#' Return a new RDD by applying a function to each partition of this RDD. -#' -#' @param X The RDD to apply the transformation. -#' @param FUN the transformation to apply on each partition. -#' @return a new RDD created by the transformation. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:10) -#' partitionSum <- lapplyPartition(rdd, function(part) { Reduce("+", part) }) -#' collect(partitionSum) # 15, 40 -#'} -#' @rdname lapplyPartition -#' @aliases lapplyPartition,RDD,function-method +# Apply a function to each partition of an RDD +# +# Return a new RDD by applying a function to each partition of this RDD. +# +# @param X The RDD to apply the transformation. +# @param FUN the transformation to apply on each partition. +# @return a new RDD created by the transformation. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:10) +# partitionSum <- lapplyPartition(rdd, function(part) { Reduce("+", part) }) +# collect(partitionSum) # 15, 40 +#} +# @rdname lapplyPartition +# @aliases lapplyPartition,RDD,function-method setMethod("lapplyPartition", signature(X = "RDD", FUN = "function"), function(X, FUN) { lapplyPartitionsWithIndex(X, function(s, part) { FUN(part) }) }) -#' mapPartitions is the same as lapplyPartition. -#' -#' @rdname lapplyPartition -#' @aliases mapPartitions,RDD,function-method +# mapPartitions is the same as lapplyPartition. +# +# @rdname lapplyPartition +# @aliases mapPartitions,RDD,function-method setMethod("mapPartitions", signature(X = "RDD", FUN = "function"), function(X, FUN) { lapplyPartition(X, FUN) }) -#' Return a new RDD by applying a function to each partition of this RDD, while -#' tracking the index of the original partition. -#' -#' @param X The RDD to apply the transformation. -#' @param FUN the transformation to apply on each partition; takes the partition -#' index and a list of elements in the particular partition. -#' @return a new RDD created by the transformation. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:10, 5L) -#' prod <- lapplyPartitionsWithIndex(rdd, function(partIndex, part) { -#' partIndex * Reduce("+", part) }) -#' collect(prod, flatten = FALSE) # 0, 7, 22, 45, 76 -#'} -#' @rdname lapplyPartitionsWithIndex -#' @aliases lapplyPartitionsWithIndex,RDD,function-method +# Return a new RDD by applying a function to each partition of this RDD, while +# tracking the index of the original partition. +# +# @param X The RDD to apply the transformation. +# @param FUN the transformation to apply on each partition; takes the partition +# index and a list of elements in the particular partition. +# @return a new RDD created by the transformation. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:10, 5L) +# prod <- lapplyPartitionsWithIndex(rdd, function(partIndex, part) { +# partIndex * Reduce("+", part) }) +# collect(prod, flatten = FALSE) # 0, 7, 22, 45, 76 +#} +# @rdname lapplyPartitionsWithIndex +# @aliases lapplyPartitionsWithIndex,RDD,function-method setMethod("lapplyPartitionsWithIndex", signature(X = "RDD", FUN = "function"), function(X, FUN) { PipelinedRDD(X, FUN) }) -#' @rdname lapplyPartitionsWithIndex -#' @aliases mapPartitionsWithIndex,RDD,function-method +# @rdname lapplyPartitionsWithIndex +# @aliases mapPartitionsWithIndex,RDD,function-method setMethod("mapPartitionsWithIndex", signature(X = "RDD", FUN = "function"), function(X, FUN) { lapplyPartitionsWithIndex(X, FUN) }) -#' This function returns a new RDD containing only the elements that satisfy -#' a predicate (i.e. returning TRUE in a given logical function). -#' The same as `filter()' in Spark. -#' -#' @param x The RDD to be filtered. -#' @param f A unary predicate function. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:10) -#' unlist(collect(filterRDD(rdd, function (x) { x < 3 }))) # c(1, 2) -#'} -#' @rdname filterRDD -#' @aliases filterRDD,RDD,function-method +# This function returns a new RDD containing only the elements that satisfy +# a predicate (i.e. returning TRUE in a given logical function). +# The same as `filter()' in Spark. +# +# @param x The RDD to be filtered. +# @param f A unary predicate function. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:10) +# unlist(collect(filterRDD(rdd, function (x) { x < 3 }))) # c(1, 2) +#} +# @rdname filterRDD +# @aliases filterRDD,RDD,function-method setMethod("filterRDD", signature(x = "RDD", f = "function"), function(x, f) { @@ -586,30 +586,30 @@ setMethod("filterRDD", lapplyPartition(x, filter.func) }) -#' @rdname filterRDD -#' @aliases Filter +# @rdname filterRDD +# @aliases Filter setMethod("Filter", signature(f = "function", x = "RDD"), function(f, x) { filterRDD(x, f) }) -#' Reduce across elements of an RDD. -#' -#' This function reduces the elements of this RDD using the -#' specified commutative and associative binary operator. -#' -#' @param x The RDD to reduce -#' @param func Commutative and associative function to apply on elements -#' of the RDD. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:10) -#' reduce(rdd, "+") # 55 -#'} -#' @rdname reduce -#' @aliases reduce,RDD,ANY-method +# Reduce across elements of an RDD. +# +# This function reduces the elements of this RDD using the +# specified commutative and associative binary operator. +# +# @param x The RDD to reduce +# @param func Commutative and associative function to apply on elements +# of the RDD. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:10) +# reduce(rdd, "+") # 55 +#} +# @rdname reduce +# @aliases reduce,RDD,ANY-method setMethod("reduce", signature(x = "RDD", func = "ANY"), function(x, func) { @@ -623,70 +623,70 @@ setMethod("reduce", Reduce(func, partitionList) }) -#' Get the maximum element of an RDD. -#' -#' @param x The RDD to get the maximum element from -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:10) -#' maximum(rdd) # 10 -#'} -#' @rdname maximum -#' @aliases maximum,RDD +# Get the maximum element of an RDD. +# +# @param x The RDD to get the maximum element from +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:10) +# maximum(rdd) # 10 +#} +# @rdname maximum +# @aliases maximum,RDD setMethod("maximum", signature(x = "RDD"), function(x) { reduce(x, max) }) -#' Get the minimum element of an RDD. -#' -#' @param x The RDD to get the minimum element from -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:10) -#' minimum(rdd) # 1 -#'} -#' @rdname minimum -#' @aliases minimum,RDD +# Get the minimum element of an RDD. +# +# @param x The RDD to get the minimum element from +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:10) +# minimum(rdd) # 1 +#} +# @rdname minimum +# @aliases minimum,RDD setMethod("minimum", signature(x = "RDD"), function(x) { reduce(x, min) }) -#' Add up the elements in an RDD. -#' -#' @param x The RDD to add up the elements in -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:10) -#' sumRDD(rdd) # 55 -#'} -#' @rdname sumRDD -#' @aliases sumRDD,RDD +# Add up the elements in an RDD. +# +# @param x The RDD to add up the elements in +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:10) +# sumRDD(rdd) # 55 +#} +# @rdname sumRDD +# @aliases sumRDD,RDD setMethod("sumRDD", signature(x = "RDD"), function(x) { reduce(x, "+") }) -#' Applies a function to all elements in an RDD, and force evaluation. -#' -#' @param x The RDD to apply the function -#' @param func The function to be applied. -#' @return invisible NULL. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:10) -#' foreach(rdd, function(x) { save(x, file=...) }) -#'} -#' @rdname foreach -#' @aliases foreach,RDD,function-method +# Applies a function to all elements in an RDD, and force evaluation. +# +# @param x The RDD to apply the function +# @param func The function to be applied. +# @return invisible NULL. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:10) +# foreach(rdd, function(x) { save(x, file=...) }) +#} +# @rdname foreach +# @aliases foreach,RDD,function-method setMethod("foreach", signature(x = "RDD", func = "function"), function(x, func) { @@ -697,37 +697,37 @@ setMethod("foreach", invisible(collect(mapPartitions(x, partition.func))) }) -#' Applies a function to each partition in an RDD, and force evaluation. -#' -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:10) -#' foreachPartition(rdd, function(part) { save(part, file=...); NULL }) -#'} -#' @rdname foreach -#' @aliases foreachPartition,RDD,function-method +# Applies a function to each partition in an RDD, and force evaluation. +# +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:10) +# foreachPartition(rdd, function(part) { save(part, file=...); NULL }) +#} +# @rdname foreach +# @aliases foreachPartition,RDD,function-method setMethod("foreachPartition", signature(x = "RDD", func = "function"), function(x, func) { invisible(collect(mapPartitions(x, func))) }) -#' Take elements from an RDD. -#' -#' This function takes the first NUM elements in the RDD and -#' returns them in a list. -#' -#' @param x The RDD to take elements from -#' @param num Number of elements to take -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:10) -#' take(rdd, 2L) # list(1, 2) -#'} -#' @rdname take -#' @aliases take,RDD,numeric-method +# Take elements from an RDD. +# +# This function takes the first NUM elements in the RDD and +# returns them in a list. +# +# @param x The RDD to take elements from +# @param num Number of elements to take +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:10) +# take(rdd, 2L) # list(1, 2) +#} +# @rdname take +# @aliases take,RDD,numeric-method setMethod("take", signature(x = "RDD", num = "numeric"), function(x, num) { @@ -762,42 +762,42 @@ setMethod("take", }) -#' First -#' -#' Return the first element of an RDD -#' -#' @rdname first -#' @export -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:10) -#' first(rdd) -#' } +# First +# +# Return the first element of an RDD +# +# @rdname first +# @export +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:10) +# first(rdd) +# } setMethod("first", signature(x = "RDD"), function(x) { take(x, 1)[[1]] }) -#' Removes the duplicates from RDD. -#' -#' This function returns a new RDD containing the distinct elements in the -#' given RDD. The same as `distinct()' in Spark. -#' -#' @param x The RDD to remove duplicates from. -#' @param numPartitions Number of partitions to create. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, c(1,2,2,3,3,3)) -#' sort(unlist(collect(distinct(rdd)))) # c(1, 2, 3) -#'} -#' @rdname distinct -#' @aliases distinct,RDD-method +# Removes the duplicates from RDD. +# +# This function returns a new RDD containing the distinct elements in the +# given RDD. The same as `distinct()' in Spark. +# +# @param x The RDD to remove duplicates from. +# @param numPartitions Number of partitions to create. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, c(1,2,2,3,3,3)) +# sort(unlist(collect(distinct(rdd)))) # c(1, 2, 3) +#} +# @rdname distinct +# @aliases distinct,RDD-method setMethod("distinct", signature(x = "RDD"), - function(x, numPartitions = SparkR::numPartitions(x)) { + function(x, numPartitions = SparkR:::numPartitions(x)) { identical.mapped <- lapply(x, function(x) { list(x, NULL) }) reduced <- reduceByKey(identical.mapped, function(x, y) { x }, @@ -806,24 +806,24 @@ setMethod("distinct", resRDD }) -#' Return an RDD that is a sampled subset of the given RDD. -#' -#' The same as `sample()' in Spark. (We rename it due to signature -#' inconsistencies with the `sample()' function in R's base package.) -#' -#' @param x The RDD to sample elements from -#' @param withReplacement Sampling with replacement or not -#' @param fraction The (rough) sample target fraction -#' @param seed Randomness seed value -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:10) -#' collect(sampleRDD(rdd, FALSE, 0.5, 1618L)) # ~5 distinct elements -#' collect(sampleRDD(rdd, TRUE, 0.5, 9L)) # ~5 elements possibly with duplicates -#'} -#' @rdname sampleRDD -#' @aliases sampleRDD,RDD +# Return an RDD that is a sampled subset of the given RDD. +# +# The same as `sample()' in Spark. (We rename it due to signature +# inconsistencies with the `sample()' function in R's base package.) +# +# @param x The RDD to sample elements from +# @param withReplacement Sampling with replacement or not +# @param fraction The (rough) sample target fraction +# @param seed Randomness seed value +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:10) +# collect(sampleRDD(rdd, FALSE, 0.5, 1618L)) # ~5 distinct elements +# collect(sampleRDD(rdd, TRUE, 0.5, 9L)) # ~5 elements possibly with duplicates +#} +# @rdname sampleRDD +# @aliases sampleRDD,RDD setMethod("sampleRDD", signature(x = "RDD", withReplacement = "logical", fraction = "numeric", seed = "integer"), @@ -867,23 +867,23 @@ setMethod("sampleRDD", lapplyPartitionsWithIndex(x, samplingFunc) }) -#' Return a list of the elements that are a sampled subset of the given RDD. -#' -#' @param x The RDD to sample elements from -#' @param withReplacement Sampling with replacement or not -#' @param num Number of elements to return -#' @param seed Randomness seed value -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:100) -#' # exactly 5 elements sampled, which may not be distinct -#' takeSample(rdd, TRUE, 5L, 1618L) -#' # exactly 5 distinct elements sampled -#' takeSample(rdd, FALSE, 5L, 16181618L) -#'} -#' @rdname takeSample -#' @aliases takeSample,RDD +# Return a list of the elements that are a sampled subset of the given RDD. +# +# @param x The RDD to sample elements from +# @param withReplacement Sampling with replacement or not +# @param num Number of elements to return +# @param seed Randomness seed value +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:100) +# # exactly 5 elements sampled, which may not be distinct +# takeSample(rdd, TRUE, 5L, 1618L) +# # exactly 5 distinct elements sampled +# takeSample(rdd, FALSE, 5L, 16181618L) +#} +# @rdname takeSample +# @aliases takeSample,RDD setMethod("takeSample", signature(x = "RDD", withReplacement = "logical", num = "integer", seed = "integer"), function(x, withReplacement, num, seed) { @@ -927,21 +927,21 @@ setMethod("takeSample", signature(x = "RDD", withReplacement = "logical", MAXINT))))) # TODO(zongheng): investigate if this call is an in-place shuffle? - sample(samples)[1:total] + base::sample(samples)[1:total] }) -#' Creates tuples of the elements in this RDD by applying a function. -#' -#' @param x The RDD. -#' @param func The function to be applied. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, list(1, 2, 3)) -#' collect(keyBy(rdd, function(x) { x*x })) # list(list(1, 1), list(4, 2), list(9, 3)) -#'} -#' @rdname keyBy -#' @aliases keyBy,RDD +# Creates tuples of the elements in this RDD by applying a function. +# +# @param x The RDD. +# @param func The function to be applied. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, list(1, 2, 3)) +# collect(keyBy(rdd, function(x) { x*x })) # list(list(1, 1), list(4, 2), list(9, 3)) +#} +# @rdname keyBy +# @aliases keyBy,RDD setMethod("keyBy", signature(x = "RDD", func = "function"), function(x, func) { @@ -951,52 +951,52 @@ setMethod("keyBy", lapply(x, apply.func) }) -#' Return a new RDD that has exactly numPartitions partitions. -#' Can increase or decrease the level of parallelism in this RDD. Internally, -#' this uses a shuffle to redistribute data. -#' If you are decreasing the number of partitions in this RDD, consider using -#' coalesce, which can avoid performing a shuffle. -#' -#' @param x The RDD. -#' @param numPartitions Number of partitions to create. -#' @seealso coalesce -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, list(1, 2, 3, 4, 5, 6, 7), 4L) -#' numPartitions(rdd) # 4 -#' numPartitions(repartition(rdd, 2L)) # 2 -#'} -#' @rdname repartition -#' @aliases repartition,RDD +# Return a new RDD that has exactly numPartitions partitions. +# Can increase or decrease the level of parallelism in this RDD. Internally, +# this uses a shuffle to redistribute data. +# If you are decreasing the number of partitions in this RDD, consider using +# coalesce, which can avoid performing a shuffle. +# +# @param x The RDD. +# @param numPartitions Number of partitions to create. +# @seealso coalesce +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, list(1, 2, 3, 4, 5, 6, 7), 4L) +# numPartitions(rdd) # 4 +# numPartitions(repartition(rdd, 2L)) # 2 +#} +# @rdname repartition +# @aliases repartition,RDD setMethod("repartition", signature(x = "RDD", numPartitions = "numeric"), function(x, numPartitions) { coalesce(x, numPartitions, TRUE) }) -#' Return a new RDD that is reduced into numPartitions partitions. -#' -#' @param x The RDD. -#' @param numPartitions Number of partitions to create. -#' @seealso repartition -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, list(1, 2, 3, 4, 5), 3L) -#' numPartitions(rdd) # 3 -#' numPartitions(coalesce(rdd, 1L)) # 1 -#'} -#' @rdname coalesce -#' @aliases coalesce,RDD +# Return a new RDD that is reduced into numPartitions partitions. +# +# @param x The RDD. +# @param numPartitions Number of partitions to create. +# @seealso repartition +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, list(1, 2, 3, 4, 5), 3L) +# numPartitions(rdd) # 3 +# numPartitions(coalesce(rdd, 1L)) # 1 +#} +# @rdname coalesce +# @aliases coalesce,RDD setMethod("coalesce", signature(x = "RDD", numPartitions = "numeric"), function(x, numPartitions, shuffle = FALSE) { numPartitions <- numToInt(numPartitions) - if (shuffle || numPartitions > SparkR::numPartitions(x)) { + if (shuffle || numPartitions > SparkR:::numPartitions(x)) { func <- function(partIndex, part) { set.seed(partIndex) # partIndex as seed - start <- as.integer(sample(numPartitions, 1) - 1) + start <- as.integer(base::sample(numPartitions, 1) - 1) lapply(seq_along(part), function(i) { pos <- (start + i) %% numPartitions @@ -1012,19 +1012,19 @@ setMethod("coalesce", } }) -#' Save this RDD as a SequenceFile of serialized objects. -#' -#' @param x The RDD to save -#' @param path The directory where the file is saved -#' @seealso objectFile -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:3) -#' saveAsObjectFile(rdd, "/tmp/sparkR-tmp") -#'} -#' @rdname saveAsObjectFile -#' @aliases saveAsObjectFile,RDD +# Save this RDD as a SequenceFile of serialized objects. +# +# @param x The RDD to save +# @param path The directory where the file is saved +# @seealso objectFile +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:3) +# saveAsObjectFile(rdd, "/tmp/sparkR-tmp") +#} +# @rdname saveAsObjectFile +# @aliases saveAsObjectFile,RDD setMethod("saveAsObjectFile", signature(x = "RDD", path = "character"), function(x, path) { @@ -1037,18 +1037,18 @@ setMethod("saveAsObjectFile", invisible(callJMethod(getJRDD(x), "saveAsObjectFile", path)) }) -#' Save this RDD as a text file, using string representations of elements. -#' -#' @param x The RDD to save -#' @param path The directory where the partitions of the text file are saved -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:3) -#' saveAsTextFile(rdd, "/tmp/sparkR-tmp") -#'} -#' @rdname saveAsTextFile -#' @aliases saveAsTextFile,RDD +# Save this RDD as a text file, using string representations of elements. +# +# @param x The RDD to save +# @param path The directory where the partitions of the text file are saved +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:3) +# saveAsTextFile(rdd, "/tmp/sparkR-tmp") +#} +# @rdname saveAsTextFile +# @aliases saveAsTextFile,RDD setMethod("saveAsTextFile", signature(x = "RDD", path = "character"), function(x, path) { @@ -1061,24 +1061,24 @@ setMethod("saveAsTextFile", callJMethod(getJRDD(stringRdd, serializedMode = "string"), "saveAsTextFile", path)) }) -#' Sort an RDD by the given key function. -#' -#' @param x An RDD to be sorted. -#' @param func A function used to compute the sort key for each element. -#' @param ascending A flag to indicate whether the sorting is ascending or descending. -#' @param numPartitions Number of partitions to create. -#' @return An RDD where all elements are sorted. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, list(3, 2, 1)) -#' collect(sortBy(rdd, function(x) { x })) # list (1, 2, 3) -#'} -#' @rdname sortBy -#' @aliases sortBy,RDD,RDD-method +# Sort an RDD by the given key function. +# +# @param x An RDD to be sorted. +# @param func A function used to compute the sort key for each element. +# @param ascending A flag to indicate whether the sorting is ascending or descending. +# @param numPartitions Number of partitions to create. +# @return An RDD where all elements are sorted. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, list(3, 2, 1)) +# collect(sortBy(rdd, function(x) { x })) # list (1, 2, 3) +#} +# @rdname sortBy +# @aliases sortBy,RDD,RDD-method setMethod("sortBy", signature(x = "RDD", func = "function"), - function(x, func, ascending = TRUE, numPartitions = SparkR::numPartitions(x)) { + function(x, func, ascending = TRUE, numPartitions = SparkR:::numPartitions(x)) { values(sortByKey(keyBy(x, func), ascending, numPartitions)) }) @@ -1137,97 +1137,97 @@ takeOrderedElem <- function(x, num, ascending = TRUE) { resList } -#' Returns the first N elements from an RDD in ascending order. -#' -#' @param x An RDD. -#' @param num Number of elements to return. -#' @return The first N elements from the RDD in ascending order. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, list(10, 1, 2, 9, 3, 4, 5, 6, 7)) -#' takeOrdered(rdd, 6L) # list(1, 2, 3, 4, 5, 6) -#'} -#' @rdname takeOrdered -#' @aliases takeOrdered,RDD,RDD-method +# Returns the first N elements from an RDD in ascending order. +# +# @param x An RDD. +# @param num Number of elements to return. +# @return The first N elements from the RDD in ascending order. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, list(10, 1, 2, 9, 3, 4, 5, 6, 7)) +# takeOrdered(rdd, 6L) # list(1, 2, 3, 4, 5, 6) +#} +# @rdname takeOrdered +# @aliases takeOrdered,RDD,RDD-method setMethod("takeOrdered", signature(x = "RDD", num = "integer"), function(x, num) { takeOrderedElem(x, num) }) -#' Returns the top N elements from an RDD. -#' -#' @param x An RDD. -#' @param num Number of elements to return. -#' @return The top N elements from the RDD. -#' @rdname top -#' @export -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, list(10, 1, 2, 9, 3, 4, 5, 6, 7)) -#' top(rdd, 6L) # list(10, 9, 7, 6, 5, 4) -#'} -#' @rdname top -#' @aliases top,RDD,RDD-method +# Returns the top N elements from an RDD. +# +# @param x An RDD. +# @param num Number of elements to return. +# @return The top N elements from the RDD. +# @rdname top +# @export +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, list(10, 1, 2, 9, 3, 4, 5, 6, 7)) +# top(rdd, 6L) # list(10, 9, 7, 6, 5, 4) +#} +# @rdname top +# @aliases top,RDD,RDD-method setMethod("top", signature(x = "RDD", num = "integer"), function(x, num) { takeOrderedElem(x, num, FALSE) }) -#' Fold an RDD using a given associative function and a neutral "zero value". -#' -#' Aggregate the elements of each partition, and then the results for all the -#' partitions, using a given associative function and a neutral "zero value". -#' -#' @param x An RDD. -#' @param zeroValue A neutral "zero value". -#' @param op An associative function for the folding operation. -#' @return The folding result. -#' @rdname fold -#' @seealso reduce -#' @export -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, list(1, 2, 3, 4, 5)) -#' fold(rdd, 0, "+") # 15 -#'} -#' @rdname fold -#' @aliases fold,RDD,RDD-method +# Fold an RDD using a given associative function and a neutral "zero value". +# +# Aggregate the elements of each partition, and then the results for all the +# partitions, using a given associative function and a neutral "zero value". +# +# @param x An RDD. +# @param zeroValue A neutral "zero value". +# @param op An associative function for the folding operation. +# @return The folding result. +# @rdname fold +# @seealso reduce +# @export +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, list(1, 2, 3, 4, 5)) +# fold(rdd, 0, "+") # 15 +#} +# @rdname fold +# @aliases fold,RDD,RDD-method setMethod("fold", signature(x = "RDD", zeroValue = "ANY", op = "ANY"), function(x, zeroValue, op) { aggregateRDD(x, zeroValue, op, op) }) -#' Aggregate an RDD using the given combine functions and a neutral "zero value". -#' -#' Aggregate the elements of each partition, and then the results for all the -#' partitions, using given combine functions and a neutral "zero value". -#' -#' @param x An RDD. -#' @param zeroValue A neutral "zero value". -#' @param seqOp A function to aggregate the RDD elements. It may return a different -#' result type from the type of the RDD elements. -#' @param combOp A function to aggregate results of seqOp. -#' @return The aggregation result. -#' @rdname aggregateRDD -#' @seealso reduce -#' @export -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, list(1, 2, 3, 4)) -#' zeroValue <- list(0, 0) -#' seqOp <- function(x, y) { list(x[[1]] + y, x[[2]] + 1) } -#' combOp <- function(x, y) { list(x[[1]] + y[[1]], x[[2]] + y[[2]]) } -#' aggregateRDD(rdd, zeroValue, seqOp, combOp) # list(10, 4) -#'} -#' @rdname aggregateRDD -#' @aliases aggregateRDD,RDD,RDD-method +# Aggregate an RDD using the given combine functions and a neutral "zero value". +# +# Aggregate the elements of each partition, and then the results for all the +# partitions, using given combine functions and a neutral "zero value". +# +# @param x An RDD. +# @param zeroValue A neutral "zero value". +# @param seqOp A function to aggregate the RDD elements. It may return a different +# result type from the type of the RDD elements. +# @param combOp A function to aggregate results of seqOp. +# @return The aggregation result. +# @rdname aggregateRDD +# @seealso reduce +# @export +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, list(1, 2, 3, 4)) +# zeroValue <- list(0, 0) +# seqOp <- function(x, y) { list(x[[1]] + y, x[[2]] + 1) } +# combOp <- function(x, y) { list(x[[1]] + y[[1]], x[[2]] + y[[2]]) } +# aggregateRDD(rdd, zeroValue, seqOp, combOp) # list(10, 4) +#} +# @rdname aggregateRDD +# @aliases aggregateRDD,RDD,RDD-method setMethod("aggregateRDD", signature(x = "RDD", zeroValue = "ANY", seqOp = "ANY", combOp = "ANY"), function(x, zeroValue, seqOp, combOp) { @@ -1240,25 +1240,25 @@ setMethod("aggregateRDD", Reduce(combOp, partitionList, zeroValue) }) -#' Pipes elements to a forked external process. -#' -#' The same as 'pipe()' in Spark. -#' -#' @param x The RDD whose elements are piped to the forked external process. -#' @param command The command to fork an external process. -#' @param env A named list to set environment variables of the external process. -#' @return A new RDD created by piping all elements to a forked external process. -#' @rdname pipeRDD -#' @export -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:10) -#' collect(pipeRDD(rdd, "more") -#' Output: c("1", "2", ..., "10") -#'} -#' @rdname pipeRDD -#' @aliases pipeRDD,RDD,character-method +# Pipes elements to a forked external process. +# +# The same as 'pipe()' in Spark. +# +# @param x The RDD whose elements are piped to the forked external process. +# @param command The command to fork an external process. +# @param env A named list to set environment variables of the external process. +# @return A new RDD created by piping all elements to a forked external process. +# @rdname pipeRDD +# @export +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:10) +# collect(pipeRDD(rdd, "more") +# Output: c("1", "2", ..., "10") +#} +# @rdname pipeRDD +# @aliases pipeRDD,RDD,character-method setMethod("pipeRDD", signature(x = "RDD", command = "character"), function(x, command, env = list()) { @@ -1274,41 +1274,41 @@ setMethod("pipeRDD", }) # TODO: Consider caching the name in the RDD's environment -#' Return an RDD's name. -#' -#' @param x The RDD whose name is returned. -#' @rdname name -#' @export -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, list(1,2,3)) -#' name(rdd) # NULL (if not set before) -#'} -#' @rdname name -#' @aliases name,RDD +# Return an RDD's name. +# +# @param x The RDD whose name is returned. +# @rdname name +# @export +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, list(1,2,3)) +# name(rdd) # NULL (if not set before) +#} +# @rdname name +# @aliases name,RDD setMethod("name", signature(x = "RDD"), function(x) { callJMethod(getJRDD(x), "name") }) -#' Set an RDD's name. -#' -#' @param x The RDD whose name is to be set. -#' @param name The RDD name to be set. -#' @return a new RDD renamed. -#' @rdname setName -#' @export -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, list(1,2,3)) -#' setName(rdd, "myRDD") -#' name(rdd) # "myRDD" -#'} -#' @rdname setName -#' @aliases setName,RDD +# Set an RDD's name. +# +# @param x The RDD whose name is to be set. +# @param name The RDD name to be set. +# @return a new RDD renamed. +# @rdname setName +# @export +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, list(1,2,3)) +# setName(rdd, "myRDD") +# name(rdd) # "myRDD" +#} +# @rdname setName +# @aliases setName,RDD setMethod("setName", signature(x = "RDD", name = "character"), function(x, name) { @@ -1316,25 +1316,25 @@ setMethod("setName", x }) -#' Zip an RDD with generated unique Long IDs. -#' -#' Items in the kth partition will get ids k, n+k, 2*n+k, ..., where -#' n is the number of partitions. So there may exist gaps, but this -#' method won't trigger a spark job, which is different from -#' zipWithIndex. -#' -#' @param x An RDD to be zipped. -#' @return An RDD with zipped items. -#' @seealso zipWithIndex -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L) -#' collect(zipWithUniqueId(rdd)) -#' # list(list("a", 0), list("b", 3), list("c", 1), list("d", 4), list("e", 2)) -#'} -#' @rdname zipWithUniqueId -#' @aliases zipWithUniqueId,RDD +# Zip an RDD with generated unique Long IDs. +# +# Items in the kth partition will get ids k, n+k, 2*n+k, ..., where +# n is the number of partitions. So there may exist gaps, but this +# method won't trigger a spark job, which is different from +# zipWithIndex. +# +# @param x An RDD to be zipped. +# @return An RDD with zipped items. +# @seealso zipWithIndex +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L) +# collect(zipWithUniqueId(rdd)) +# # list(list("a", 0), list("b", 3), list("c", 1), list("d", 4), list("e", 2)) +#} +# @rdname zipWithUniqueId +# @aliases zipWithUniqueId,RDD setMethod("zipWithUniqueId", signature(x = "RDD"), function(x) { @@ -1353,28 +1353,28 @@ setMethod("zipWithUniqueId", lapplyPartitionsWithIndex(x, partitionFunc) }) -#' Zip an RDD with its element indices. -#' -#' The ordering is first based on the partition index and then the -#' ordering of items within each partition. So the first item in -#' the first partition gets index 0, and the last item in the last -#' partition receives the largest index. -#' -#' This method needs to trigger a Spark job when this RDD contains -#' more than one partition. -#' -#' @param x An RDD to be zipped. -#' @return An RDD with zipped items. -#' @seealso zipWithUniqueId -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L) -#' collect(zipWithIndex(rdd)) -#' # list(list("a", 0), list("b", 1), list("c", 2), list("d", 3), list("e", 4)) -#'} -#' @rdname zipWithIndex -#' @aliases zipWithIndex,RDD +# Zip an RDD with its element indices. +# +# The ordering is first based on the partition index and then the +# ordering of items within each partition. So the first item in +# the first partition gets index 0, and the last item in the last +# partition receives the largest index. +# +# This method needs to trigger a Spark job when this RDD contains +# more than one partition. +# +# @param x An RDD to be zipped. +# @return An RDD with zipped items. +# @seealso zipWithUniqueId +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L) +# collect(zipWithIndex(rdd)) +# # list(list("a", 0), list("b", 1), list("c", 2), list("d", 3), list("e", 4)) +#} +# @rdname zipWithIndex +# @aliases zipWithIndex,RDD setMethod("zipWithIndex", signature(x = "RDD"), function(x) { @@ -1406,20 +1406,20 @@ setMethod("zipWithIndex", lapplyPartitionsWithIndex(x, partitionFunc) }) -#' Coalesce all elements within each partition of an RDD into a list. -#' -#' @param x An RDD. -#' @return An RDD created by coalescing all elements within -#' each partition into a list. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, as.list(1:4), 2L) -#' collect(glom(rdd)) -#' # list(list(1, 2), list(3, 4)) -#'} -#' @rdname glom -#' @aliases glom,RDD +# Coalesce all elements within each partition of an RDD into a list. +# +# @param x An RDD. +# @return An RDD created by coalescing all elements within +# each partition into a list. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, as.list(1:4), 2L) +# collect(glom(rdd)) +# # list(list(1, 2), list(3, 4)) +#} +# @rdname glom +# @aliases glom,RDD setMethod("glom", signature(x = "RDD"), function(x) { @@ -1432,21 +1432,21 @@ setMethod("glom", ############ Binary Functions ############# -#' Return the union RDD of two RDDs. -#' The same as union() in Spark. -#' -#' @param x An RDD. -#' @param y An RDD. -#' @return a new RDD created by performing the simple union (witout removing -#' duplicates) of two input RDDs. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:3) -#' unionRDD(rdd, rdd) # 1, 2, 3, 1, 2, 3 -#'} -#' @rdname unionRDD -#' @aliases unionRDD,RDD,RDD-method +# Return the union RDD of two RDDs. +# The same as union() in Spark. +# +# @param x An RDD. +# @param y An RDD. +# @return a new RDD created by performing the simple union (witout removing +# duplicates) of two input RDDs. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:3) +# unionRDD(rdd, rdd) # 1, 2, 3, 1, 2, 3 +#} +# @rdname unionRDD +# @aliases unionRDD,RDD,RDD-method setMethod("unionRDD", signature(x = "RDD", y = "RDD"), function(x, y) { @@ -1463,27 +1463,27 @@ setMethod("unionRDD", union.rdd }) -#' Zip an RDD with another RDD. -#' -#' Zips this RDD with another one, returning key-value pairs with the -#' first element in each RDD second element in each RDD, etc. Assumes -#' that the two RDDs have the same number of partitions and the same -#' number of elements in each partition (e.g. one was made through -#' a map on the other). -#' -#' @param x An RDD to be zipped. -#' @param other Another RDD to be zipped. -#' @return An RDD zipped from the two RDDs. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd1 <- parallelize(sc, 0:4) -#' rdd2 <- parallelize(sc, 1000:1004) -#' collect(zipRDD(rdd1, rdd2)) -#' # list(list(0, 1000), list(1, 1001), list(2, 1002), list(3, 1003), list(4, 1004)) -#'} -#' @rdname zipRDD -#' @aliases zipRDD,RDD +# Zip an RDD with another RDD. +# +# Zips this RDD with another one, returning key-value pairs with the +# first element in each RDD second element in each RDD, etc. Assumes +# that the two RDDs have the same number of partitions and the same +# number of elements in each partition (e.g. one was made through +# a map on the other). +# +# @param x An RDD to be zipped. +# @param other Another RDD to be zipped. +# @return An RDD zipped from the two RDDs. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd1 <- parallelize(sc, 0:4) +# rdd2 <- parallelize(sc, 1000:1004) +# collect(zipRDD(rdd1, rdd2)) +# # list(list(0, 1000), list(1, 1001), list(2, 1002), list(3, 1003), list(4, 1004)) +#} +# @rdname zipRDD +# @aliases zipRDD,RDD setMethod("zipRDD", signature(x = "RDD", other = "RDD"), function(x, other) { @@ -1502,24 +1502,24 @@ setMethod("zipRDD", mergePartitions(rdd, TRUE) }) -#' Cartesian product of this RDD and another one. -#' -#' Return the Cartesian product of this RDD and another one, -#' that is, the RDD of all pairs of elements (a, b) where a -#' is in this and b is in other. -#' -#' @param x An RDD. -#' @param other An RDD. -#' @return A new RDD which is the Cartesian product of these two RDDs. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:2) -#' sortByKey(cartesian(rdd, rdd)) -#' # list(list(1, 1), list(1, 2), list(2, 1), list(2, 2)) -#'} -#' @rdname cartesian -#' @aliases cartesian,RDD,RDD-method +# Cartesian product of this RDD and another one. +# +# Return the Cartesian product of this RDD and another one, +# that is, the RDD of all pairs of elements (a, b) where a +# is in this and b is in other. +# +# @param x An RDD. +# @param other An RDD. +# @return A new RDD which is the Cartesian product of these two RDDs. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:2) +# sortByKey(cartesian(rdd, rdd)) +# # list(list(1, 1), list(1, 2), list(2, 1), list(2, 2)) +#} +# @rdname cartesian +# @aliases cartesian,RDD,RDD-method setMethod("cartesian", signature(x = "RDD", other = "RDD"), function(x, other) { @@ -1532,58 +1532,58 @@ setMethod("cartesian", mergePartitions(rdd, FALSE) }) -#' Subtract an RDD with another RDD. -#' -#' Return an RDD with the elements from this that are not in other. -#' -#' @param x An RDD. -#' @param other An RDD. -#' @param numPartitions Number of the partitions in the result RDD. -#' @return An RDD with the elements from this that are not in other. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd1 <- parallelize(sc, list(1, 1, 2, 2, 3, 4)) -#' rdd2 <- parallelize(sc, list(2, 4)) -#' collect(subtract(rdd1, rdd2)) -#' # list(1, 1, 3) -#'} -#' @rdname subtract -#' @aliases subtract,RDD +# Subtract an RDD with another RDD. +# +# Return an RDD with the elements from this that are not in other. +# +# @param x An RDD. +# @param other An RDD. +# @param numPartitions Number of the partitions in the result RDD. +# @return An RDD with the elements from this that are not in other. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd1 <- parallelize(sc, list(1, 1, 2, 2, 3, 4)) +# rdd2 <- parallelize(sc, list(2, 4)) +# collect(subtract(rdd1, rdd2)) +# # list(1, 1, 3) +#} +# @rdname subtract +# @aliases subtract,RDD setMethod("subtract", signature(x = "RDD", other = "RDD"), - function(x, other, numPartitions = SparkR::numPartitions(x)) { + function(x, other, numPartitions = SparkR:::numPartitions(x)) { mapFunction <- function(e) { list(e, NA) } rdd1 <- map(x, mapFunction) rdd2 <- map(other, mapFunction) keys(subtractByKey(rdd1, rdd2, numPartitions)) }) -#' Intersection of this RDD and another one. -#' -#' Return the intersection of this RDD and another one. -#' The output will not contain any duplicate elements, -#' even if the input RDDs did. Performs a hash partition -#' across the cluster. -#' Note that this method performs a shuffle internally. -#' -#' @param x An RDD. -#' @param other An RDD. -#' @param numPartitions The number of partitions in the result RDD. -#' @return An RDD which is the intersection of these two RDDs. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd1 <- parallelize(sc, list(1, 10, 2, 3, 4, 5)) -#' rdd2 <- parallelize(sc, list(1, 6, 2, 3, 7, 8)) -#' collect(sortBy(intersection(rdd1, rdd2), function(x) { x })) -#' # list(1, 2, 3) -#'} -#' @rdname intersection -#' @aliases intersection,RDD +# Intersection of this RDD and another one. +# +# Return the intersection of this RDD and another one. +# The output will not contain any duplicate elements, +# even if the input RDDs did. Performs a hash partition +# across the cluster. +# Note that this method performs a shuffle internally. +# +# @param x An RDD. +# @param other An RDD. +# @param numPartitions The number of partitions in the result RDD. +# @return An RDD which is the intersection of these two RDDs. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd1 <- parallelize(sc, list(1, 10, 2, 3, 4, 5)) +# rdd2 <- parallelize(sc, list(1, 6, 2, 3, 7, 8)) +# collect(sortBy(intersection(rdd1, rdd2), function(x) { x })) +# # list(1, 2, 3) +#} +# @rdname intersection +# @aliases intersection,RDD setMethod("intersection", signature(x = "RDD", other = "RDD"), - function(x, other, numPartitions = SparkR::numPartitions(x)) { + function(x, other, numPartitions = SparkR:::numPartitions(x)) { rdd1 <- map(x, function(v) { list(v, NA) }) rdd2 <- map(other, function(v) { list(v, NA) }) @@ -1596,26 +1596,26 @@ setMethod("intersection", keys(filterRDD(cogroup(rdd1, rdd2, numPartitions = numPartitions), filterFunction)) }) -#' Zips an RDD's partitions with one (or more) RDD(s). -#' Same as zipPartitions in Spark. -#' -#' @param ... RDDs to be zipped. -#' @param func A function to transform zipped partitions. -#' @return A new RDD by applying a function to the zipped partitions. -#' Assumes that all the RDDs have the *same number of partitions*, but -#' does *not* require them to have the same number of elements in each partition. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd1 <- parallelize(sc, 1:2, 2L) # 1, 2 -#' rdd2 <- parallelize(sc, 1:4, 2L) # 1:2, 3:4 -#' rdd3 <- parallelize(sc, 1:6, 2L) # 1:3, 4:6 -#' collect(zipPartitions(rdd1, rdd2, rdd3, -#' func = function(x, y, z) { list(list(x, y, z))} )) -#' # list(list(1, c(1,2), c(1,2,3)), list(2, c(3,4), c(4,5,6))) -#'} -#' @rdname zipRDD -#' @aliases zipPartitions,RDD +# Zips an RDD's partitions with one (or more) RDD(s). +# Same as zipPartitions in Spark. +# +# @param ... RDDs to be zipped. +# @param func A function to transform zipped partitions. +# @return A new RDD by applying a function to the zipped partitions. +# Assumes that all the RDDs have the *same number of partitions*, but +# does *not* require them to have the same number of elements in each partition. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd1 <- parallelize(sc, 1:2, 2L) # 1, 2 +# rdd2 <- parallelize(sc, 1:4, 2L) # 1:2, 3:4 +# rdd3 <- parallelize(sc, 1:6, 2L) # 1:3, 4:6 +# collect(zipPartitions(rdd1, rdd2, rdd3, +# func = function(x, y, z) { list(list(x, y, z))} )) +# # list(list(1, c(1,2), c(1,2,3)), list(2, c(3,4), c(4,5,6))) +#} +# @rdname zipRDD +# @aliases zipPartitions,RDD setMethod("zipPartitions", "RDD", function(..., func) { diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 4f05ba524a01a..88e1a508f37c4 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -69,7 +69,7 @@ infer_type <- function(x) { #' #' Converts an RDD to a DataFrame by infer the types. #' -#' @param sqlCtx A SQLContext +#' @param sqlContext A SQLContext #' @param data An RDD or list or data.frame #' @param schema a list of column names or named list (StructType), optional #' @return an DataFrame @@ -77,13 +77,13 @@ infer_type <- function(x) { #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' rdd <- lapply(parallelize(sc, 1:10), function(x) list(a=x, b=as.character(x))) -#' df <- createDataFrame(sqlCtx, rdd) +#' df <- createDataFrame(sqlContext, rdd) #' } # TODO(davies): support sampling and infer type from NA -createDataFrame <- function(sqlCtx, data, schema = NULL, samplingRatio = 1.0) { +createDataFrame <- function(sqlContext, data, schema = NULL, samplingRatio = 1.0) { if (is.data.frame(data)) { # get the names of columns, they will be put into RDD schema <- names(data) @@ -102,7 +102,7 @@ createDataFrame <- function(sqlCtx, data, schema = NULL, samplingRatio = 1.0) { }) } if (is.list(data)) { - sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sqlCtx) + sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sqlContext) rdd <- parallelize(sc, data) } else if (inherits(data, "RDD")) { rdd <- data @@ -146,38 +146,38 @@ createDataFrame <- function(sqlCtx, data, schema = NULL, samplingRatio = 1.0) { jrdd <- getJRDD(lapply(rdd, function(x) x), "row") srdd <- callJMethod(jrdd, "rdd") sdf <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "createDF", - srdd, schema$jobj, sqlCtx) + srdd, schema$jobj, sqlContext) dataFrame(sdf) } -#' toDF -#' -#' Converts an RDD to a DataFrame by infer the types. -#' -#' @param x An RDD -#' -#' @rdname DataFrame -#' @export -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) -#' rdd <- lapply(parallelize(sc, 1:10), function(x) list(a=x, b=as.character(x))) -#' df <- toDF(rdd) -#' } +# toDF +# +# Converts an RDD to a DataFrame by infer the types. +# +# @param x An RDD +# +# @rdname DataFrame +# @export +# @examples +#\dontrun{ +# sc <- sparkR.init() +# sqlContext <- sparkRSQL.init(sc) +# rdd <- lapply(parallelize(sc, 1:10), function(x) list(a=x, b=as.character(x))) +# df <- toDF(rdd) +# } setGeneric("toDF", function(x, ...) { standardGeneric("toDF") }) setMethod("toDF", signature(x = "RDD"), function(x, ...) { - sqlCtx <- if (exists(".sparkRHivesc", envir = .sparkREnv)) { + sqlContext <- if (exists(".sparkRHivesc", envir = .sparkREnv)) { get(".sparkRHivesc", envir = .sparkREnv) } else if (exists(".sparkRSQLsc", envir = .sparkREnv)) { get(".sparkRSQLsc", envir = .sparkREnv) } else { stop("no SQL context available") } - createDataFrame(sqlCtx, x, ...) + createDataFrame(sqlContext, x, ...) }) #' Create a DataFrame from a JSON file. @@ -185,51 +185,51 @@ setMethod("toDF", signature(x = "RDD"), #' Loads a JSON file (one object per line), returning the result as a DataFrame #' It goes through the entire dataset once to determine the schema. #' -#' @param sqlCtx SQLContext to use +#' @param sqlContext SQLContext to use #' @param path Path of file to read. A vector of multiple paths is allowed. #' @return DataFrame #' @export #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' } -jsonFile <- function(sqlCtx, path) { +jsonFile <- function(sqlContext, path) { # Allow the user to have a more flexible definiton of the text file path path <- normalizePath(path) # Convert a string vector of paths to a string containing comma separated paths path <- paste(path, collapse = ",") - sdf <- callJMethod(sqlCtx, "jsonFile", path) + sdf <- callJMethod(sqlContext, "jsonFile", path) dataFrame(sdf) } -#' JSON RDD -#' -#' Loads an RDD storing one JSON object per string as a DataFrame. -#' -#' @param sqlCtx SQLContext to use -#' @param rdd An RDD of JSON string -#' @param schema A StructType object to use as schema -#' @param samplingRatio The ratio of simpling used to infer the schema -#' @return A DataFrame -#' @export -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) -#' rdd <- texFile(sc, "path/to/json") -#' df <- jsonRDD(sqlCtx, rdd) -#' } +# JSON RDD +# +# Loads an RDD storing one JSON object per string as a DataFrame. +# +# @param sqlContext SQLContext to use +# @param rdd An RDD of JSON string +# @param schema A StructType object to use as schema +# @param samplingRatio The ratio of simpling used to infer the schema +# @return A DataFrame +# @export +# @examples +#\dontrun{ +# sc <- sparkR.init() +# sqlContext <- sparkRSQL.init(sc) +# rdd <- texFile(sc, "path/to/json") +# df <- jsonRDD(sqlContext, rdd) +# } # TODO: support schema -jsonRDD <- function(sqlCtx, rdd, schema = NULL, samplingRatio = 1.0) { +jsonRDD <- function(sqlContext, rdd, schema = NULL, samplingRatio = 1.0) { rdd <- serializeToString(rdd) if (is.null(schema)) { - sdf <- callJMethod(sqlCtx, "jsonRDD", callJMethod(getJRDD(rdd), "rdd"), samplingRatio) + sdf <- callJMethod(sqlContext, "jsonRDD", callJMethod(getJRDD(rdd), "rdd"), samplingRatio) dataFrame(sdf) } else { stop("not implemented") @@ -241,64 +241,63 @@ jsonRDD <- function(sqlCtx, rdd, schema = NULL, samplingRatio = 1.0) { #' #' Loads a Parquet file, returning the result as a DataFrame. #' -#' @param sqlCtx SQLContext to use +#' @param sqlContext SQLContext to use #' @param ... Path(s) of parquet file(s) to read. #' @return DataFrame #' @export # TODO: Implement saveasParquetFile and write examples for both -parquetFile <- function(sqlCtx, ...) { +parquetFile <- function(sqlContext, ...) { # Allow the user to have a more flexible definiton of the text file path paths <- lapply(list(...), normalizePath) - sdf <- callJMethod(sqlCtx, "parquetFile", paths) + sdf <- callJMethod(sqlContext, "parquetFile", paths) dataFrame(sdf) } #' SQL Query -#' +#' #' Executes a SQL query using Spark, returning the result as a DataFrame. #' -#' @param sqlCtx SQLContext to use +#' @param sqlContext SQLContext to use #' @param sqlQuery A character vector containing the SQL query #' @return DataFrame #' @export #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' registerTempTable(df, "table") -#' new_df <- sql(sqlCtx, "SELECT * FROM table") +#' new_df <- sql(sqlContext, "SELECT * FROM table") #' } -sql <- function(sqlCtx, sqlQuery) { - sdf <- callJMethod(sqlCtx, "sql", sqlQuery) - dataFrame(sdf) +sql <- function(sqlContext, sqlQuery) { + sdf <- callJMethod(sqlContext, "sql", sqlQuery) + dataFrame(sdf) } - #' Create a DataFrame from a SparkSQL Table #' #' Returns the specified Table as a DataFrame. The Table must have already been registered #' in the SQLContext. #' -#' @param sqlCtx SQLContext to use +#' @param sqlContext SQLContext to use #' @param tableName The SparkSQL Table to convert to a DataFrame. #' @return DataFrame #' @export #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' registerTempTable(df, "table") -#' new_df <- table(sqlCtx, "table") +#' new_df <- table(sqlContext, "table") #' } -table <- function(sqlCtx, tableName) { - sdf <- callJMethod(sqlCtx, "table", tableName) +table <- function(sqlContext, tableName) { + sdf <- callJMethod(sqlContext, "table", tableName) dataFrame(sdf) } @@ -307,22 +306,22 @@ table <- function(sqlCtx, tableName) { #' #' Returns a DataFrame containing names of tables in the given database. #' -#' @param sqlCtx SQLContext to use +#' @param sqlContext SQLContext to use #' @param databaseName name of the database #' @return a DataFrame #' @export #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) -#' tables(sqlCtx, "hive") +#' sqlContext <- sparkRSQL.init(sc) +#' tables(sqlContext, "hive") #' } -tables <- function(sqlCtx, databaseName = NULL) { +tables <- function(sqlContext, databaseName = NULL) { jdf <- if (is.null(databaseName)) { - callJMethod(sqlCtx, "tables") + callJMethod(sqlContext, "tables") } else { - callJMethod(sqlCtx, "tables", databaseName) + callJMethod(sqlContext, "tables", databaseName) } dataFrame(jdf) } @@ -332,22 +331,22 @@ tables <- function(sqlCtx, databaseName = NULL) { #' #' Returns the names of tables in the given database as an array. #' -#' @param sqlCtx SQLContext to use +#' @param sqlContext SQLContext to use #' @param databaseName name of the database #' @return a list of table names #' @export #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) -#' tableNames(sqlCtx, "hive") +#' sqlContext <- sparkRSQL.init(sc) +#' tableNames(sqlContext, "hive") #' } -tableNames <- function(sqlCtx, databaseName = NULL) { +tableNames <- function(sqlContext, databaseName = NULL) { if (is.null(databaseName)) { - callJMethod(sqlCtx, "tableNames") + callJMethod(sqlContext, "tableNames") } else { - callJMethod(sqlCtx, "tableNames", databaseName) + callJMethod(sqlContext, "tableNames", databaseName) } } @@ -356,58 +355,58 @@ tableNames <- function(sqlCtx, databaseName = NULL) { #' #' Caches the specified table in-memory. #' -#' @param sqlCtx SQLContext to use +#' @param sqlContext SQLContext to use #' @param tableName The name of the table being cached #' @return DataFrame #' @export #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' registerTempTable(df, "table") -#' cacheTable(sqlCtx, "table") +#' cacheTable(sqlContext, "table") #' } -cacheTable <- function(sqlCtx, tableName) { - callJMethod(sqlCtx, "cacheTable", tableName) +cacheTable <- function(sqlContext, tableName) { + callJMethod(sqlContext, "cacheTable", tableName) } #' Uncache Table #' #' Removes the specified table from the in-memory cache. #' -#' @param sqlCtx SQLContext to use +#' @param sqlContext SQLContext to use #' @param tableName The name of the table being uncached #' @return DataFrame #' @export #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #' path <- "path/to/file.json" -#' df <- jsonFile(sqlCtx, path) +#' df <- jsonFile(sqlContext, path) #' registerTempTable(df, "table") -#' uncacheTable(sqlCtx, "table") +#' uncacheTable(sqlContext, "table") #' } -uncacheTable <- function(sqlCtx, tableName) { - callJMethod(sqlCtx, "uncacheTable", tableName) +uncacheTable <- function(sqlContext, tableName) { + callJMethod(sqlContext, "uncacheTable", tableName) } #' Clear Cache #' #' Removes all cached tables from the in-memory cache. #' -#' @param sqlCtx SQLContext to use +#' @param sqlContext SQLContext to use #' @examples #' \dontrun{ -#' clearCache(sqlCtx) +#' clearCache(sqlContext) #' } -clearCache <- function(sqlCtx) { - callJMethod(sqlCtx, "clearCache") +clearCache <- function(sqlContext) { + callJMethod(sqlContext, "clearCache") } #' Drop Temporary Table @@ -415,22 +414,22 @@ clearCache <- function(sqlCtx) { #' Drops the temporary table with the given table name in the catalog. #' If the table has been cached/persisted before, it's also unpersisted. #' -#' @param sqlCtx SQLContext to use +#' @param sqlContext SQLContext to use #' @param tableName The name of the SparkSQL table to be dropped. #' @examples #' \dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) -#' df <- loadDF(sqlCtx, path, "parquet") +#' sqlContext <- sparkRSQL.init(sc) +#' df <- read.df(sqlContext, path, "parquet") #' registerTempTable(df, "table") -#' dropTempTable(sqlCtx, "table") +#' dropTempTable(sqlContext, "table") #' } -dropTempTable <- function(sqlCtx, tableName) { +dropTempTable <- function(sqlContext, tableName) { if (class(tableName) != "character") { stop("tableName must be a string.") } - callJMethod(sqlCtx, "dropTempTable", tableName) + callJMethod(sqlContext, "dropTempTable", tableName) } #' Load an DataFrame @@ -441,7 +440,7 @@ dropTempTable <- function(sqlCtx, tableName) { #' If `source` is not specified, the default data source configured by #' "spark.sql.sources.default" will be used. #' -#' @param sqlCtx SQLContext to use +#' @param sqlContext SQLContext to use #' @param path The path of files to load #' @param source the name of external data source #' @return DataFrame @@ -449,19 +448,31 @@ dropTempTable <- function(sqlCtx, tableName) { #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) -#' df <- load(sqlCtx, "path/to/file.json", source = "json") +#' sqlContext <- sparkRSQL.init(sc) +#' df <- read.df(sqlContext, "path/to/file.json", source = "json") #' } -loadDF <- function(sqlCtx, path = NULL, source = NULL, ...) { +read.df <- function(sqlContext, path = NULL, source = NULL, ...) { options <- varargsToEnv(...) if (!is.null(path)) { options[['path']] <- path } - sdf <- callJMethod(sqlCtx, "load", source, options) + if (is.null(source)) { + sqlContext <- get(".sparkRSQLsc", envir = .sparkREnv) + source <- callJMethod(sqlContext, "getConf", "spark.sql.sources.default", + "org.apache.spark.sql.parquet") + } + sdf <- callJMethod(sqlContext, "load", source, options) dataFrame(sdf) } +#' @aliases loadDF +#' @export + +loadDF <- function(sqlContext, path = NULL, source = NULL, ...) { + read.df(sqlContext, path, source, ...) +} + #' Create an external table #' #' Creates an external table based on the dataset in a data source, @@ -471,7 +482,7 @@ loadDF <- function(sqlCtx, path = NULL, source = NULL, ...) { #' If `source` is not specified, the default data source configured by #' "spark.sql.sources.default" will be used. #' -#' @param sqlCtx SQLContext to use +#' @param sqlContext SQLContext to use #' @param tableName A name of the table #' @param path The path of files to load #' @param source the name of external data source @@ -480,15 +491,15 @@ loadDF <- function(sqlCtx, path = NULL, source = NULL, ...) { #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) -#' df <- sparkRSQL.createExternalTable(sqlCtx, "myjson", path="path/to/json", source="json") +#' sqlContext <- sparkRSQL.init(sc) +#' df <- sparkRSQL.createExternalTable(sqlContext, "myjson", path="path/to/json", source="json") #' } -createExternalTable <- function(sqlCtx, tableName, path = NULL, source = NULL, ...) { +createExternalTable <- function(sqlContext, tableName, path = NULL, source = NULL, ...) { options <- varargsToEnv(...) if (!is.null(path)) { options[['path']] <- path } - sdf <- callJMethod(sqlCtx, "createExternalTable", tableName, source, options) + sdf <- callJMethod(sqlContext, "createExternalTable", tableName, source, options) dataFrame(sdf) } diff --git a/R/pkg/R/broadcast.R b/R/pkg/R/broadcast.R index 583fa2e7fdcfd..23dc38780716e 100644 --- a/R/pkg/R/broadcast.R +++ b/R/pkg/R/broadcast.R @@ -23,21 +23,21 @@ .broadcastValues <- new.env() .broadcastIdToName <- new.env() -#' @title S4 class that represents a Broadcast variable -#' @description Broadcast variables can be created using the broadcast -#' function from a \code{SparkContext}. -#' @rdname broadcast-class -#' @seealso broadcast -#' -#' @param id Id of the backing Spark broadcast variable -#' @export +# @title S4 class that represents a Broadcast variable +# @description Broadcast variables can be created using the broadcast +# function from a \code{SparkContext}. +# @rdname broadcast-class +# @seealso broadcast +# +# @param id Id of the backing Spark broadcast variable +# @export setClass("Broadcast", slots = list(id = "character")) -#' @rdname broadcast-class -#' @param value Value of the broadcast variable -#' @param jBroadcastRef reference to the backing Java broadcast object -#' @param objName name of broadcasted object -#' @export +# @rdname broadcast-class +# @param value Value of the broadcast variable +# @param jBroadcastRef reference to the backing Java broadcast object +# @param objName name of broadcasted object +# @export Broadcast <- function(id, value, jBroadcastRef, objName) { .broadcastValues[[id]] <- value .broadcastNames[[as.character(objName)]] <- jBroadcastRef @@ -45,13 +45,13 @@ Broadcast <- function(id, value, jBroadcastRef, objName) { new("Broadcast", id = id) } -#' @description -#' \code{value} can be used to get the value of a broadcast variable inside -#' a distributed function. -#' -#' @param bcast The broadcast variable to get -#' @rdname broadcast -#' @aliases value,Broadcast-method +# @description +# \code{value} can be used to get the value of a broadcast variable inside +# a distributed function. +# +# @param bcast The broadcast variable to get +# @rdname broadcast +# @aliases value,Broadcast-method setMethod("value", signature(bcast = "Broadcast"), function(bcast) { @@ -62,24 +62,24 @@ setMethod("value", } }) -#' Internal function to set values of a broadcast variable. -#' -#' This function is used internally by Spark to set the value of a broadcast -#' variable on workers. Not intended for use outside the package. -#' -#' @rdname broadcast-internal -#' @seealso broadcast, value +# Internal function to set values of a broadcast variable. +# +# This function is used internally by Spark to set the value of a broadcast +# variable on workers. Not intended for use outside the package. +# +# @rdname broadcast-internal +# @seealso broadcast, value -#' @param bcastId The id of broadcast variable to set -#' @param value The value to be set -#' @export +# @param bcastId The id of broadcast variable to set +# @param value The value to be set +# @export setBroadcastValue <- function(bcastId, value) { bcastIdStr <- as.character(bcastId) .broadcastValues[[bcastIdStr]] <- value } -#' Helper function to clear the list of broadcast variables we know about -#' Should be called when the SparkR JVM backend is shutdown +# Helper function to clear the list of broadcast variables we know about +# Should be called when the SparkR JVM backend is shutdown clearBroadcastVariables <- function() { bcasts <- ls(.broadcastNames) rm(list = bcasts, envir = .broadcastNames) diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index 95fb9ff0887b6..80e92d3105a36 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -55,12 +55,17 @@ operators <- list( "+" = "plus", "-" = "minus", "*" = "multiply", "/" = "divide", "%%" = "mod", "==" = "equalTo", ">" = "gt", "<" = "lt", "!=" = "notEqual", "<=" = "leq", ">=" = "geq", # we can not override `&&` and `||`, so use `&` and `|` instead - "&" = "and", "|" = "or" #, "!" = "unary_$bang" + "&" = "and", "|" = "or", #, "!" = "unary_$bang" + "^" = "pow" ) column_functions1 <- c("asc", "desc", "isNull", "isNotNull") column_functions2 <- c("like", "rlike", "startsWith", "endsWith", "getField", "getItem", "contains") functions <- c("min", "max", "sum", "avg", "mean", "count", "abs", "sqrt", - "first", "last", "lower", "upper", "sumDistinct") + "first", "last", "lower", "upper", "sumDistinct", + "acos", "asin", "atan", "cbrt", "ceiling", "cos", "cosh", "exp", + "expm1", "floor", "log", "log10", "log1p", "rint", "sign", + "sin", "sinh", "tan", "tanh", "toDegrees", "toRadians") +binary_mathfunctions<- c("atan2", "hypot") createOperator <- function(op) { setMethod(op, @@ -76,7 +81,11 @@ createOperator <- function(op) { if (class(e2) == "Column") { e2 <- e2@jc } - callJMethod(e1@jc, operators[[op]], e2) + if (op == "^") { + jc <- callJStatic("org.apache.spark.sql.functions", operators[[op]], e1@jc, e2) + } else { + callJMethod(e1@jc, operators[[op]], e2) + } } column(jc) }) @@ -106,11 +115,29 @@ createStaticFunction <- function(name) { setMethod(name, signature(x = "Column"), function(x) { + if (name == "ceiling") { + name <- "ceil" + } + if (name == "sign") { + name <- "signum" + } jc <- callJStatic("org.apache.spark.sql.functions", name, x@jc) column(jc) }) } +createBinaryMathfunctions <- function(name) { + setMethod(name, + signature(y = "Column"), + function(y, x) { + if (class(x) == "Column") { + x <- x@jc + } + jc <- callJStatic("org.apache.spark.sql.functions", name, y@jc, x) + column(jc) + }) +} + createMethods <- function() { for (op in names(operators)) { createOperator(op) @@ -124,6 +151,9 @@ createMethods <- function() { for (x in functions) { createStaticFunction(x) } + for (name in binary_mathfunctions) { + createBinaryMathfunctions(name) + } } createMethods() @@ -131,6 +161,8 @@ createMethods() #' alias #' #' Set a new name for a column + +#' @rdname column setMethod("alias", signature(object = "Column"), function(object, data) { @@ -141,8 +173,12 @@ setMethod("alias", } }) +#' substr +#' #' An expression that returns a substring. #' +#' @rdname column +#' #' @param start starting position #' @param stop ending position setMethod("substr", signature(x = "Column"), @@ -152,6 +188,9 @@ setMethod("substr", signature(x = "Column"), }) #' Casts the column to a different data type. +#' +#' @rdname column +#' #' @examples #' \dontrun{ #' cast(df$age, "string") @@ -173,8 +212,8 @@ setMethod("cast", #' Approx Count Distinct #' -#' Returns the approximate number of distinct items in a group. -#' +#' @rdname column +#' @return the approximate number of distinct items in a group. setMethod("approxCountDistinct", signature(x = "Column"), function(x, rsd = 0.95) { @@ -184,8 +223,8 @@ setMethod("approxCountDistinct", #' Count Distinct #' -#' returns the number of distinct items in a group. -#' +#' @rdname column +#' @return the number of distinct items in a group. setMethod("countDistinct", signature(x = "Column"), function(x, ...) { @@ -197,3 +236,18 @@ setMethod("countDistinct", column(jc) }) +#' @rdname column +#' @aliases countDistinct +setMethod("n_distinct", + signature(x = "Column"), + function(x, ...) { + countDistinct(x, ...) + }) + +#' @rdname column +#' @aliases count +setMethod("n", + signature(x = "Column"), + function(x) { + count(x) + }) diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R index b4845b6948997..43be9c904fdf6 100644 --- a/R/pkg/R/context.R +++ b/R/pkg/R/context.R @@ -25,27 +25,27 @@ getMinPartitions <- function(sc, minPartitions) { as.integer(minPartitions) } -#' Create an RDD from a text file. -#' -#' This function reads a text file from HDFS, a local file system (available on all -#' nodes), or any Hadoop-supported file system URI, and creates an -#' RDD of strings from it. -#' -#' @param sc SparkContext to use -#' @param path Path of file to read. A vector of multiple paths is allowed. -#' @param minPartitions Minimum number of partitions to be created. If NULL, the default -#' value is chosen based on available parallelism. -#' @return RDD where each item is of type \code{character} -#' @export -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' lines <- textFile(sc, "myfile.txt") -#'} +# Create an RDD from a text file. +# +# This function reads a text file from HDFS, a local file system (available on all +# nodes), or any Hadoop-supported file system URI, and creates an +# RDD of strings from it. +# +# @param sc SparkContext to use +# @param path Path of file to read. A vector of multiple paths is allowed. +# @param minPartitions Minimum number of partitions to be created. If NULL, the default +# value is chosen based on available parallelism. +# @return RDD where each item is of type \code{character} +# @export +# @examples +#\dontrun{ +# sc <- sparkR.init() +# lines <- textFile(sc, "myfile.txt") +#} textFile <- function(sc, path, minPartitions = NULL) { # Allow the user to have a more flexible definiton of the text file path path <- suppressWarnings(normalizePath(path)) - #' Convert a string vector of paths to a string containing comma separated paths + # Convert a string vector of paths to a string containing comma separated paths path <- paste(path, collapse = ",") jrdd <- callJMethod(sc, "textFile", path, getMinPartitions(sc, minPartitions)) @@ -53,27 +53,27 @@ textFile <- function(sc, path, minPartitions = NULL) { RDD(jrdd, "string") } -#' Load an RDD saved as a SequenceFile containing serialized objects. -#' -#' The file to be loaded should be one that was previously generated by calling -#' saveAsObjectFile() of the RDD class. -#' -#' @param sc SparkContext to use -#' @param path Path of file to read. A vector of multiple paths is allowed. -#' @param minPartitions Minimum number of partitions to be created. If NULL, the default -#' value is chosen based on available parallelism. -#' @return RDD containing serialized R objects. -#' @seealso saveAsObjectFile -#' @export -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- objectFile(sc, "myfile") -#'} +# Load an RDD saved as a SequenceFile containing serialized objects. +# +# The file to be loaded should be one that was previously generated by calling +# saveAsObjectFile() of the RDD class. +# +# @param sc SparkContext to use +# @param path Path of file to read. A vector of multiple paths is allowed. +# @param minPartitions Minimum number of partitions to be created. If NULL, the default +# value is chosen based on available parallelism. +# @return RDD containing serialized R objects. +# @seealso saveAsObjectFile +# @export +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- objectFile(sc, "myfile") +#} objectFile <- function(sc, path, minPartitions = NULL) { # Allow the user to have a more flexible definiton of the text file path path <- suppressWarnings(normalizePath(path)) - #' Convert a string vector of paths to a string containing comma separated paths + # Convert a string vector of paths to a string containing comma separated paths path <- paste(path, collapse = ",") jrdd <- callJMethod(sc, "objectFile", path, getMinPartitions(sc, minPartitions)) @@ -81,24 +81,24 @@ objectFile <- function(sc, path, minPartitions = NULL) { RDD(jrdd, "byte") } -#' Create an RDD from a homogeneous list or vector. -#' -#' This function creates an RDD from a local homogeneous list in R. The elements -#' in the list are split into \code{numSlices} slices and distributed to nodes -#' in the cluster. -#' -#' @param sc SparkContext to use -#' @param coll collection to parallelize -#' @param numSlices number of partitions to create in the RDD -#' @return an RDD created from this collection -#' @export -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:10, 2) -#' # The RDD should contain 10 elements -#' length(rdd) -#'} +# Create an RDD from a homogeneous list or vector. +# +# This function creates an RDD from a local homogeneous list in R. The elements +# in the list are split into \code{numSlices} slices and distributed to nodes +# in the cluster. +# +# @param sc SparkContext to use +# @param coll collection to parallelize +# @param numSlices number of partitions to create in the RDD +# @return an RDD created from this collection +# @export +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:10, 2) +# # The RDD should contain 10 elements +# length(rdd) +#} parallelize <- function(sc, coll, numSlices = 1) { # TODO: bound/safeguard numSlices # TODO: unit tests for if the split works for all primitives @@ -133,33 +133,33 @@ parallelize <- function(sc, coll, numSlices = 1) { RDD(jrdd, "byte") } -#' Include this specified package on all workers -#' -#' This function can be used to include a package on all workers before the -#' user's code is executed. This is useful in scenarios where other R package -#' functions are used in a function passed to functions like \code{lapply}. -#' NOTE: The package is assumed to be installed on every node in the Spark -#' cluster. -#' -#' @param sc SparkContext to use -#' @param pkg Package name -#' -#' @export -#' @examples -#'\dontrun{ -#' library(Matrix) -#' -#' sc <- sparkR.init() -#' # Include the matrix library we will be using -#' includePackage(sc, Matrix) -#' -#' generateSparse <- function(x) { -#' sparseMatrix(i=c(1, 2, 3), j=c(1, 2, 3), x=c(1, 2, 3)) -#' } -#' -#' rdd <- lapplyPartition(parallelize(sc, 1:2, 2L), generateSparse) -#' collect(rdd) -#'} +# Include this specified package on all workers +# +# This function can be used to include a package on all workers before the +# user's code is executed. This is useful in scenarios where other R package +# functions are used in a function passed to functions like \code{lapply}. +# NOTE: The package is assumed to be installed on every node in the Spark +# cluster. +# +# @param sc SparkContext to use +# @param pkg Package name +# +# @export +# @examples +#\dontrun{ +# library(Matrix) +# +# sc <- sparkR.init() +# # Include the matrix library we will be using +# includePackage(sc, Matrix) +# +# generateSparse <- function(x) { +# sparseMatrix(i=c(1, 2, 3), j=c(1, 2, 3), x=c(1, 2, 3)) +# } +# +# rdd <- lapplyPartition(parallelize(sc, 1:2, 2L), generateSparse) +# collect(rdd) +#} includePackage <- function(sc, pkg) { pkg <- as.character(substitute(pkg)) if (exists(".packages", .sparkREnv)) { @@ -171,30 +171,30 @@ includePackage <- function(sc, pkg) { .sparkREnv$.packages <- packages } -#' @title Broadcast a variable to all workers -#' -#' @description -#' Broadcast a read-only variable to the cluster, returning a \code{Broadcast} -#' object for reading it in distributed functions. -#' -#' @param sc Spark Context to use -#' @param object Object to be broadcast -#' @export -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:2, 2L) -#' -#' # Large Matrix object that we want to broadcast -#' randomMat <- matrix(nrow=100, ncol=10, data=rnorm(1000)) -#' randomMatBr <- broadcast(sc, randomMat) -#' -#' # Use the broadcast variable inside the function -#' useBroadcast <- function(x) { -#' sum(value(randomMatBr) * x) -#' } -#' sumRDD <- lapply(rdd, useBroadcast) -#'} +# @title Broadcast a variable to all workers +# +# @description +# Broadcast a read-only variable to the cluster, returning a \code{Broadcast} +# object for reading it in distributed functions. +# +# @param sc Spark Context to use +# @param object Object to be broadcast +# @export +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:2, 2L) +# +# # Large Matrix object that we want to broadcast +# randomMat <- matrix(nrow=100, ncol=10, data=rnorm(1000)) +# randomMatBr <- broadcast(sc, randomMat) +# +# # Use the broadcast variable inside the function +# useBroadcast <- function(x) { +# sum(value(randomMatBr) * x) +# } +# sumRDD <- lapply(rdd, useBroadcast) +#} broadcast <- function(sc, object) { objName <- as.character(substitute(object)) serializedObj <- serialize(object, connection = NULL) @@ -205,21 +205,21 @@ broadcast <- function(sc, object) { Broadcast(id, object, jBroadcast, objName) } -#' @title Set the checkpoint directory -#' -#' Set the directory under which RDDs are going to be checkpointed. The -#' directory must be a HDFS path if running on a cluster. -#' -#' @param sc Spark Context to use -#' @param dirName Directory path -#' @export -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' setCheckpointDir(sc, "~/checkpoint") -#' rdd <- parallelize(sc, 1:2, 2L) -#' checkpoint(rdd) -#'} +# @title Set the checkpoint directory +# +# Set the directory under which RDDs are going to be checkpointed. The +# directory must be a HDFS path if running on a cluster. +# +# @param sc Spark Context to use +# @param dirName Directory path +# @export +# @examples +#\dontrun{ +# sc <- sparkR.init() +# setCheckpointDir(sc, "~/checkpoint") +# rdd <- parallelize(sc, 1:2, 2L) +# checkpoint(rdd) +#} setCheckpointDir <- function(sc, dirName) { invisible(callJMethod(sc, "setCheckpointDir", suppressWarnings(normalizePath(dirName)))) } diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index e88729387ef95..12e09176c9f92 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -17,353 +17,353 @@ ############ RDD Actions and Transformations ############ -#' @rdname aggregateRDD -#' @seealso reduce -#' @export +# @rdname aggregateRDD +# @seealso reduce +# @export setGeneric("aggregateRDD", function(x, zeroValue, seqOp, combOp) { standardGeneric("aggregateRDD") }) -#' @rdname cache-methods -#' @export +# @rdname cache-methods +# @export setGeneric("cache", function(x) { standardGeneric("cache") }) -#' @rdname coalesce -#' @seealso repartition -#' @export +# @rdname coalesce +# @seealso repartition +# @export setGeneric("coalesce", function(x, numPartitions, ...) { standardGeneric("coalesce") }) -#' @rdname checkpoint-methods -#' @export +# @rdname checkpoint-methods +# @export setGeneric("checkpoint", function(x) { standardGeneric("checkpoint") }) -#' @rdname collect-methods -#' @export +# @rdname collect-methods +# @export setGeneric("collect", function(x, ...) { standardGeneric("collect") }) -#' @rdname collect-methods -#' @export +# @rdname collect-methods +# @export setGeneric("collectAsMap", function(x) { standardGeneric("collectAsMap") }) -#' @rdname collect-methods -#' @export +# @rdname collect-methods +# @export setGeneric("collectPartition", function(x, partitionId) { standardGeneric("collectPartition") }) -#' @rdname count -#' @export +# @rdname count +# @export setGeneric("count", function(x) { standardGeneric("count") }) -#' @rdname countByValue -#' @export +# @rdname countByValue +# @export setGeneric("countByValue", function(x) { standardGeneric("countByValue") }) -#' @rdname distinct -#' @export +# @rdname distinct +# @export setGeneric("distinct", function(x, numPartitions = 1) { standardGeneric("distinct") }) -#' @rdname filterRDD -#' @export +# @rdname filterRDD +# @export setGeneric("filterRDD", function(x, f) { standardGeneric("filterRDD") }) -#' @rdname first -#' @export +# @rdname first +# @export setGeneric("first", function(x) { standardGeneric("first") }) -#' @rdname flatMap -#' @export +# @rdname flatMap +# @export setGeneric("flatMap", function(X, FUN) { standardGeneric("flatMap") }) -#' @rdname fold -#' @seealso reduce -#' @export +# @rdname fold +# @seealso reduce +# @export setGeneric("fold", function(x, zeroValue, op) { standardGeneric("fold") }) -#' @rdname foreach -#' @export +# @rdname foreach +# @export setGeneric("foreach", function(x, func) { standardGeneric("foreach") }) -#' @rdname foreach -#' @export +# @rdname foreach +# @export setGeneric("foreachPartition", function(x, func) { standardGeneric("foreachPartition") }) # The jrdd accessor function. setGeneric("getJRDD", function(rdd, ...) { standardGeneric("getJRDD") }) -#' @rdname glom -#' @export +# @rdname glom +# @export setGeneric("glom", function(x) { standardGeneric("glom") }) -#' @rdname keyBy -#' @export +# @rdname keyBy +# @export setGeneric("keyBy", function(x, func) { standardGeneric("keyBy") }) -#' @rdname lapplyPartition -#' @export +# @rdname lapplyPartition +# @export setGeneric("lapplyPartition", function(X, FUN) { standardGeneric("lapplyPartition") }) -#' @rdname lapplyPartitionsWithIndex -#' @export +# @rdname lapplyPartitionsWithIndex +# @export setGeneric("lapplyPartitionsWithIndex", function(X, FUN) { standardGeneric("lapplyPartitionsWithIndex") }) -#' @rdname lapply -#' @export +# @rdname lapply +# @export setGeneric("map", function(X, FUN) { standardGeneric("map") }) -#' @rdname lapplyPartition -#' @export +# @rdname lapplyPartition +# @export setGeneric("mapPartitions", function(X, FUN) { standardGeneric("mapPartitions") }) -#' @rdname lapplyPartitionsWithIndex -#' @export +# @rdname lapplyPartitionsWithIndex +# @export setGeneric("mapPartitionsWithIndex", function(X, FUN) { standardGeneric("mapPartitionsWithIndex") }) -#' @rdname maximum -#' @export +# @rdname maximum +# @export setGeneric("maximum", function(x) { standardGeneric("maximum") }) -#' @rdname minimum -#' @export +# @rdname minimum +# @export setGeneric("minimum", function(x) { standardGeneric("minimum") }) -#' @rdname sumRDD -#' @export +# @rdname sumRDD +# @export setGeneric("sumRDD", function(x) { standardGeneric("sumRDD") }) -#' @rdname name -#' @export +# @rdname name +# @export setGeneric("name", function(x) { standardGeneric("name") }) -#' @rdname numPartitions -#' @export +# @rdname numPartitions +# @export setGeneric("numPartitions", function(x) { standardGeneric("numPartitions") }) -#' @rdname persist -#' @export +# @rdname persist +# @export setGeneric("persist", function(x, newLevel) { standardGeneric("persist") }) -#' @rdname pipeRDD -#' @export +# @rdname pipeRDD +# @export setGeneric("pipeRDD", function(x, command, env = list()) { standardGeneric("pipeRDD")}) -#' @rdname reduce -#' @export +# @rdname reduce +# @export setGeneric("reduce", function(x, func) { standardGeneric("reduce") }) -#' @rdname repartition -#' @seealso coalesce -#' @export +# @rdname repartition +# @seealso coalesce +# @export setGeneric("repartition", function(x, numPartitions) { standardGeneric("repartition") }) -#' @rdname sampleRDD -#' @export +# @rdname sampleRDD +# @export setGeneric("sampleRDD", function(x, withReplacement, fraction, seed) { standardGeneric("sampleRDD") }) -#' @rdname saveAsObjectFile -#' @seealso objectFile -#' @export +# @rdname saveAsObjectFile +# @seealso objectFile +# @export setGeneric("saveAsObjectFile", function(x, path) { standardGeneric("saveAsObjectFile") }) -#' @rdname saveAsTextFile -#' @export +# @rdname saveAsTextFile +# @export setGeneric("saveAsTextFile", function(x, path) { standardGeneric("saveAsTextFile") }) -#' @rdname setName -#' @export +# @rdname setName +# @export setGeneric("setName", function(x, name) { standardGeneric("setName") }) -#' @rdname sortBy -#' @export +# @rdname sortBy +# @export setGeneric("sortBy", function(x, func, ascending = TRUE, numPartitions = 1) { standardGeneric("sortBy") }) -#' @rdname take -#' @export +# @rdname take +# @export setGeneric("take", function(x, num) { standardGeneric("take") }) -#' @rdname takeOrdered -#' @export +# @rdname takeOrdered +# @export setGeneric("takeOrdered", function(x, num) { standardGeneric("takeOrdered") }) -#' @rdname takeSample -#' @export +# @rdname takeSample +# @export setGeneric("takeSample", function(x, withReplacement, num, seed) { standardGeneric("takeSample") }) -#' @rdname top -#' @export +# @rdname top +# @export setGeneric("top", function(x, num) { standardGeneric("top") }) -#' @rdname unionRDD -#' @export +# @rdname unionRDD +# @export setGeneric("unionRDD", function(x, y) { standardGeneric("unionRDD") }) -#' @rdname unpersist-methods -#' @export +# @rdname unpersist-methods +# @export setGeneric("unpersist", function(x, ...) { standardGeneric("unpersist") }) -#' @rdname zipRDD -#' @export +# @rdname zipRDD +# @export setGeneric("zipRDD", function(x, other) { standardGeneric("zipRDD") }) -#' @rdname zipRDD -#' @export +# @rdname zipRDD +# @export setGeneric("zipPartitions", function(..., func) { standardGeneric("zipPartitions") }, signature = "...") -#' @rdname zipWithIndex -#' @seealso zipWithUniqueId -#' @export +# @rdname zipWithIndex +# @seealso zipWithUniqueId +# @export setGeneric("zipWithIndex", function(x) { standardGeneric("zipWithIndex") }) -#' @rdname zipWithUniqueId -#' @seealso zipWithIndex -#' @export +# @rdname zipWithUniqueId +# @seealso zipWithIndex +# @export setGeneric("zipWithUniqueId", function(x) { standardGeneric("zipWithUniqueId") }) ############ Binary Functions ############# -#' @rdname cartesian -#' @export +# @rdname cartesian +# @export setGeneric("cartesian", function(x, other) { standardGeneric("cartesian") }) -#' @rdname countByKey -#' @export +# @rdname countByKey +# @export setGeneric("countByKey", function(x) { standardGeneric("countByKey") }) -#' @rdname flatMapValues -#' @export +# @rdname flatMapValues +# @export setGeneric("flatMapValues", function(X, FUN) { standardGeneric("flatMapValues") }) -#' @rdname intersection -#' @export +# @rdname intersection +# @export setGeneric("intersection", function(x, other, numPartitions = 1) { standardGeneric("intersection") }) -#' @rdname keys -#' @export +# @rdname keys +# @export setGeneric("keys", function(x) { standardGeneric("keys") }) -#' @rdname lookup -#' @export +# @rdname lookup +# @export setGeneric("lookup", function(x, key) { standardGeneric("lookup") }) -#' @rdname mapValues -#' @export +# @rdname mapValues +# @export setGeneric("mapValues", function(X, FUN) { standardGeneric("mapValues") }) -#' @rdname sampleByKey -#' @export +# @rdname sampleByKey +# @export setGeneric("sampleByKey", function(x, withReplacement, fractions, seed) { standardGeneric("sampleByKey") }) -#' @rdname values -#' @export +# @rdname values +# @export setGeneric("values", function(x) { standardGeneric("values") }) ############ Shuffle Functions ############ -#' @rdname aggregateByKey -#' @seealso foldByKey, combineByKey -#' @export +# @rdname aggregateByKey +# @seealso foldByKey, combineByKey +# @export setGeneric("aggregateByKey", function(x, zeroValue, seqOp, combOp, numPartitions) { standardGeneric("aggregateByKey") }) -#' @rdname cogroup -#' @export +# @rdname cogroup +# @export setGeneric("cogroup", function(..., numPartitions) { standardGeneric("cogroup") }, signature = "...") -#' @rdname combineByKey -#' @seealso groupByKey, reduceByKey -#' @export +# @rdname combineByKey +# @seealso groupByKey, reduceByKey +# @export setGeneric("combineByKey", function(x, createCombiner, mergeValue, mergeCombiners, numPartitions) { standardGeneric("combineByKey") }) -#' @rdname foldByKey -#' @seealso aggregateByKey, combineByKey -#' @export +# @rdname foldByKey +# @seealso aggregateByKey, combineByKey +# @export setGeneric("foldByKey", function(x, zeroValue, func, numPartitions) { standardGeneric("foldByKey") }) -#' @rdname join-methods -#' @export +# @rdname join-methods +# @export setGeneric("fullOuterJoin", function(x, y, numPartitions) { standardGeneric("fullOuterJoin") }) -#' @rdname groupByKey -#' @seealso reduceByKey -#' @export +# @rdname groupByKey +# @seealso reduceByKey +# @export setGeneric("groupByKey", function(x, numPartitions) { standardGeneric("groupByKey") }) -#' @rdname join-methods -#' @export +# @rdname join-methods +# @export setGeneric("join", function(x, y, ...) { standardGeneric("join") }) -#' @rdname join-methods -#' @export +# @rdname join-methods +# @export setGeneric("leftOuterJoin", function(x, y, numPartitions) { standardGeneric("leftOuterJoin") }) -#' @rdname partitionBy -#' @export +# @rdname partitionBy +# @export setGeneric("partitionBy", function(x, numPartitions, ...) { standardGeneric("partitionBy") }) -#' @rdname reduceByKey -#' @seealso groupByKey -#' @export +# @rdname reduceByKey +# @seealso groupByKey +# @export setGeneric("reduceByKey", function(x, combineFunc, numPartitions) { standardGeneric("reduceByKey")}) -#' @rdname reduceByKeyLocally -#' @seealso reduceByKey -#' @export +# @rdname reduceByKeyLocally +# @seealso reduceByKey +# @export setGeneric("reduceByKeyLocally", function(x, combineFunc) { standardGeneric("reduceByKeyLocally") }) -#' @rdname join-methods -#' @export +# @rdname join-methods +# @export setGeneric("rightOuterJoin", function(x, y, numPartitions) { standardGeneric("rightOuterJoin") }) -#' @rdname sortByKey -#' @export +# @rdname sortByKey +# @export setGeneric("sortByKey", function(x, ascending = TRUE, numPartitions = 1) { standardGeneric("sortByKey") }) -#' @rdname subtract -#' @export +# @rdname subtract +# @export setGeneric("subtract", function(x, other, numPartitions = 1) { standardGeneric("subtract") }) -#' @rdname subtractByKey -#' @export +# @rdname subtractByKey +# @export setGeneric("subtractByKey", function(x, other, numPartitions = 1) { standardGeneric("subtractByKey") @@ -372,18 +372,44 @@ setGeneric("subtractByKey", ################### Broadcast Variable Methods ################# -#' @rdname broadcast -#' @export +# @rdname broadcast +# @export setGeneric("value", function(bcast) { standardGeneric("value") }) #################### DataFrame Methods ######################## +#' @rdname agg +#' @export +setGeneric("agg", function (x, ...) { standardGeneric("agg") }) + +#' @rdname arrange +#' @export +setGeneric("arrange", function(x, col, ...) { standardGeneric("arrange") }) + #' @rdname schema #' @export setGeneric("columns", function(x) {standardGeneric("columns") }) +#' @rdname describe +#' @export +setGeneric("describe", function(x, col, ...) { standardGeneric("describe") }) + +#' @rdname nafunctions +#' @export +setGeneric("dropna", + function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) { + standardGeneric("dropna") + }) + +#' @rdname nafunctions +#' @export +setGeneric("na.omit", + function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) { + standardGeneric("na.omit") + }) + #' @rdname schema #' @export setGeneric("dtypes", function(x) { standardGeneric("dtypes") }) @@ -396,10 +422,18 @@ setGeneric("explain", function(x, ...) { standardGeneric("explain") }) #' @export setGeneric("except", function(x, y) { standardGeneric("except") }) +#' @rdname nafunctions +#' @export +setGeneric("fillna", function(x, value, cols = NULL) { standardGeneric("fillna") }) + #' @rdname filter #' @export setGeneric("filter", function(x, condition) { standardGeneric("filter") }) +#' @rdname groupBy +#' @export +setGeneric("group_by", function(x, ...) { standardGeneric("group_by") }) + #' @rdname DataFrame #' @export setGeneric("groupBy", function(x, ...) { standardGeneric("groupBy") }) @@ -420,7 +454,11 @@ setGeneric("isLocal", function(x) { standardGeneric("isLocal") }) #' @export setGeneric("limit", function(x, num) {standardGeneric("limit") }) -#' @rdname sortDF +#' @rdname withColumn +#' @export +setGeneric("mutate", function(x, ...) {standardGeneric("mutate") }) + +#' @rdname arrange #' @export setGeneric("orderBy", function(x, col) { standardGeneric("orderBy") }) @@ -428,16 +466,27 @@ setGeneric("orderBy", function(x, col) { standardGeneric("orderBy") }) #' @export setGeneric("printSchema", function(x) { standardGeneric("printSchema") }) +#' @rdname withColumnRenamed +#' @export +setGeneric("rename", function(x, ...) { standardGeneric("rename") }) + #' @rdname registerTempTable #' @export setGeneric("registerTempTable", function(x, tableName) { standardGeneric("registerTempTable") }) -#' @rdname sampleDF +#' @rdname sample +#' @export +setGeneric("sample", + function(x, withReplacement, fraction, seed) { + standardGeneric("sample") + }) + +#' @rdname sample #' @export -setGeneric("sampleDF", +setGeneric("sample_frac", function(x, withReplacement, fraction, seed) { - standardGeneric("sampleDF") - }) + standardGeneric("sample_frac") + }) #' @rdname saveAsParquetFile #' @export @@ -449,9 +498,13 @@ setGeneric("saveAsTable", function(df, tableName, source, mode, ...) { standardGeneric("saveAsTable") }) -#' @rdname saveAsTable +#' @rdname write.df +#' @export +setGeneric("write.df", function(df, path, ...) { standardGeneric("write.df") }) + +#' @rdname write.df #' @export -setGeneric("saveDF", function(df, path, source, mode, ...) { standardGeneric("saveDF") }) +setGeneric("saveDF", function(df, path, ...) { standardGeneric("saveDF") }) #' @rdname schema #' @export @@ -469,12 +522,12 @@ setGeneric("selectExpr", function(x, expr, ...) { standardGeneric("selectExpr") #' @export setGeneric("showDF", function(x,...) { standardGeneric("showDF") }) -#' @rdname sortDF +#' @rdname agg #' @export -setGeneric("sortDF", function(x, col, ...) { standardGeneric("sortDF") }) +setGeneric("summarize", function(x,...) { standardGeneric("summarize") }) -#' @rdname tojson -#' @export +# @rdname tojson +# @export setGeneric("toJSON", function(x) { standardGeneric("toJSON") }) #' @rdname DataFrame @@ -517,6 +570,10 @@ setGeneric("avg", function(x, ...) { standardGeneric("avg") }) #' @export setGeneric("cast", function(x, dataType) { standardGeneric("cast") }) +#' @rdname column +#' @export +setGeneric("cbrt", function(x) { standardGeneric("cbrt") }) + #' @rdname column #' @export setGeneric("contains", function(x, ...) { standardGeneric("contains") }) @@ -540,6 +597,10 @@ setGeneric("getField", function(x, ...) { standardGeneric("getField") }) #' @export setGeneric("getItem", function(x, ...) { standardGeneric("getItem") }) +#' @rdname column +#' @export +setGeneric("hypot", function(y, x) { standardGeneric("hypot") }) + #' @rdname column #' @export setGeneric("isNull", function(x) { standardGeneric("isNull") }) @@ -560,6 +621,18 @@ setGeneric("like", function(x, ...) { standardGeneric("like") }) #' @export setGeneric("lower", function(x) { standardGeneric("lower") }) +#' @rdname column +#' @export +setGeneric("n", function(x) { standardGeneric("n") }) + +#' @rdname column +#' @export +setGeneric("n_distinct", function(x, ...) { standardGeneric("n_distinct") }) + +#' @rdname column +#' @export +setGeneric("rint", function(x, ...) { standardGeneric("rint") }) + #' @rdname column #' @export setGeneric("rlike", function(x, ...) { standardGeneric("rlike") }) @@ -572,6 +645,14 @@ setGeneric("startsWith", function(x, ...) { standardGeneric("startsWith") }) #' @export setGeneric("sumDistinct", function(x) { standardGeneric("sumDistinct") }) +#' @rdname column +#' @export +setGeneric("toDegrees", function(x) { standardGeneric("toDegrees") }) + +#' @rdname column +#' @export +setGeneric("toRadians", function(x) { standardGeneric("toRadians") }) + #' @rdname column #' @export setGeneric("upper", function(x) { standardGeneric("upper") }) diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R index 02237b3672d6b..b758481997574 100644 --- a/R/pkg/R/group.R +++ b/R/pkg/R/group.R @@ -56,6 +56,7 @@ setMethod("show", "GroupedData", #' #' @param x a GroupedData #' @return a DataFrame +#' @rdname agg #' @export #' @examples #' \dontrun{ @@ -83,8 +84,6 @@ setMethod("count", #' df2 <- agg(df, age = "sum") # new column name will be created as 'SUM(age#0)' #' df2 <- agg(df, ageSum = sum(df$age)) # Creates a new column named ageSum #' } -setGeneric("agg", function (x, ...) { standardGeneric("agg") }) - setMethod("agg", signature(x = "GroupedData"), function(x, ...) { @@ -103,15 +102,20 @@ setMethod("agg", } } jcols <- lapply(cols, function(c) { c@jc }) - # the GroupedData.agg(col, cols*) API does not contain grouping Column - sdf <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "aggWithGrouping", - x@sgd, listToSeq(jcols)) + sdf <- callJMethod(x@sgd, "agg", jcols[[1]], listToSeq(jcols[-1])) } else { stop("agg can only support Column or character") } dataFrame(sdf) }) +#' @rdname agg +#' @aliases agg +setMethod("summarize", + signature(x = "GroupedData"), + function(x, ...) { + agg(x, ...) + }) # sum/mean/avg/min/max methods <- c("sum", "mean", "avg", "min", "max") diff --git a/R/pkg/R/pairRDD.R b/R/pkg/R/pairRDD.R index 9791e55791bae..1e24286dbcae2 100644 --- a/R/pkg/R/pairRDD.R +++ b/R/pkg/R/pairRDD.R @@ -21,23 +21,23 @@ NULL ############ Actions and Transformations ############ -#' Look up elements of a key in an RDD -#' -#' @description -#' \code{lookup} returns a list of values in this RDD for key key. -#' -#' @param x The RDD to collect -#' @param key The key to look up for -#' @return a list of values in this RDD for key key -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' pairs <- list(c(1, 1), c(2, 2), c(1, 3)) -#' rdd <- parallelize(sc, pairs) -#' lookup(rdd, 1) # list(1, 3) -#'} -#' @rdname lookup -#' @aliases lookup,RDD-method +# Look up elements of a key in an RDD +# +# @description +# \code{lookup} returns a list of values in this RDD for key key. +# +# @param x The RDD to collect +# @param key The key to look up for +# @return a list of values in this RDD for key key +# @examples +#\dontrun{ +# sc <- sparkR.init() +# pairs <- list(c(1, 1), c(2, 2), c(1, 3)) +# rdd <- parallelize(sc, pairs) +# lookup(rdd, 1) # list(1, 3) +#} +# @rdname lookup +# @aliases lookup,RDD-method setMethod("lookup", signature(x = "RDD", key = "ANY"), function(x, key) { @@ -49,21 +49,21 @@ setMethod("lookup", collect(valsRDD) }) -#' Count the number of elements for each key, and return the result to the -#' master as lists of (key, count) pairs. -#' -#' Same as countByKey in Spark. -#' -#' @param x The RDD to count keys. -#' @return list of (key, count) pairs, where count is number of each key in rdd. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, list(c("a", 1), c("b", 1), c("a", 1))) -#' countByKey(rdd) # ("a", 2L), ("b", 1L) -#'} -#' @rdname countByKey -#' @aliases countByKey,RDD-method +# Count the number of elements for each key, and return the result to the +# master as lists of (key, count) pairs. +# +# Same as countByKey in Spark. +# +# @param x The RDD to count keys. +# @return list of (key, count) pairs, where count is number of each key in rdd. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, list(c("a", 1), c("b", 1), c("a", 1))) +# countByKey(rdd) # ("a", 2L), ("b", 1L) +#} +# @rdname countByKey +# @aliases countByKey,RDD-method setMethod("countByKey", signature(x = "RDD"), function(x) { @@ -71,17 +71,17 @@ setMethod("countByKey", countByValue(keys) }) -#' Return an RDD with the keys of each tuple. -#' -#' @param x The RDD from which the keys of each tuple is returned. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, list(list(1, 2), list(3, 4))) -#' collect(keys(rdd)) # list(1, 3) -#'} -#' @rdname keys -#' @aliases keys,RDD +# Return an RDD with the keys of each tuple. +# +# @param x The RDD from which the keys of each tuple is returned. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, list(list(1, 2), list(3, 4))) +# collect(keys(rdd)) # list(1, 3) +#} +# @rdname keys +# @aliases keys,RDD setMethod("keys", signature(x = "RDD"), function(x) { @@ -91,17 +91,17 @@ setMethod("keys", lapply(x, func) }) -#' Return an RDD with the values of each tuple. -#' -#' @param x The RDD from which the values of each tuple is returned. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, list(list(1, 2), list(3, 4))) -#' collect(values(rdd)) # list(2, 4) -#'} -#' @rdname values -#' @aliases values,RDD +# Return an RDD with the values of each tuple. +# +# @param x The RDD from which the values of each tuple is returned. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, list(list(1, 2), list(3, 4))) +# collect(values(rdd)) # list(2, 4) +#} +# @rdname values +# @aliases values,RDD setMethod("values", signature(x = "RDD"), function(x) { @@ -111,23 +111,23 @@ setMethod("values", lapply(x, func) }) -#' Applies a function to all values of the elements, without modifying the keys. -#' -#' The same as `mapValues()' in Spark. -#' -#' @param X The RDD to apply the transformation. -#' @param FUN the transformation to apply on the value of each element. -#' @return a new RDD created by the transformation. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:10) -#' makePairs <- lapply(rdd, function(x) { list(x, x) }) -#' collect(mapValues(makePairs, function(x) { x * 2) }) -#' Output: list(list(1,2), list(2,4), list(3,6), ...) -#'} -#' @rdname mapValues -#' @aliases mapValues,RDD,function-method +# Applies a function to all values of the elements, without modifying the keys. +# +# The same as `mapValues()' in Spark. +# +# @param X The RDD to apply the transformation. +# @param FUN the transformation to apply on the value of each element. +# @return a new RDD created by the transformation. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:10) +# makePairs <- lapply(rdd, function(x) { list(x, x) }) +# collect(mapValues(makePairs, function(x) { x * 2) }) +# Output: list(list(1,2), list(2,4), list(3,6), ...) +#} +# @rdname mapValues +# @aliases mapValues,RDD,function-method setMethod("mapValues", signature(X = "RDD", FUN = "function"), function(X, FUN) { @@ -137,23 +137,23 @@ setMethod("mapValues", lapply(X, func) }) -#' Pass each value in the key-value pair RDD through a flatMap function without -#' changing the keys; this also retains the original RDD's partitioning. -#' -#' The same as 'flatMapValues()' in Spark. -#' -#' @param X The RDD to apply the transformation. -#' @param FUN the transformation to apply on the value of each element. -#' @return a new RDD created by the transformation. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, list(list(1, c(1,2)), list(2, c(3,4)))) -#' collect(flatMapValues(rdd, function(x) { x })) -#' Output: list(list(1,1), list(1,2), list(2,3), list(2,4)) -#'} -#' @rdname flatMapValues -#' @aliases flatMapValues,RDD,function-method +# Pass each value in the key-value pair RDD through a flatMap function without +# changing the keys; this also retains the original RDD's partitioning. +# +# The same as 'flatMapValues()' in Spark. +# +# @param X The RDD to apply the transformation. +# @param FUN the transformation to apply on the value of each element. +# @return a new RDD created by the transformation. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, list(list(1, c(1,2)), list(2, c(3,4)))) +# collect(flatMapValues(rdd, function(x) { x })) +# Output: list(list(1,1), list(1,2), list(2,3), list(2,4)) +#} +# @rdname flatMapValues +# @aliases flatMapValues,RDD,function-method setMethod("flatMapValues", signature(X = "RDD", FUN = "function"), function(X, FUN) { @@ -165,30 +165,30 @@ setMethod("flatMapValues", ############ Shuffle Functions ############ -#' Partition an RDD by key -#' -#' This function operates on RDDs where every element is of the form list(K, V) or c(K, V). -#' For each element of this RDD, the partitioner is used to compute a hash -#' function and the RDD is partitioned using this hash value. -#' -#' @param x The RDD to partition. Should be an RDD where each element is -#' list(K, V) or c(K, V). -#' @param numPartitions Number of partitions to create. -#' @param ... Other optional arguments to partitionBy. -#' -#' @param partitionFunc The partition function to use. Uses a default hashCode -#' function if not provided -#' @return An RDD partitioned using the specified partitioner. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' pairs <- list(list(1, 2), list(1.1, 3), list(1, 4)) -#' rdd <- parallelize(sc, pairs) -#' parts <- partitionBy(rdd, 2L) -#' collectPartition(parts, 0L) # First partition should contain list(1, 2) and list(1, 4) -#'} -#' @rdname partitionBy -#' @aliases partitionBy,RDD,integer-method +# Partition an RDD by key +# +# This function operates on RDDs where every element is of the form list(K, V) or c(K, V). +# For each element of this RDD, the partitioner is used to compute a hash +# function and the RDD is partitioned using this hash value. +# +# @param x The RDD to partition. Should be an RDD where each element is +# list(K, V) or c(K, V). +# @param numPartitions Number of partitions to create. +# @param ... Other optional arguments to partitionBy. +# +# @param partitionFunc The partition function to use. Uses a default hashCode +# function if not provided +# @return An RDD partitioned using the specified partitioner. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# pairs <- list(list(1, 2), list(1.1, 3), list(1, 4)) +# rdd <- parallelize(sc, pairs) +# parts <- partitionBy(rdd, 2L) +# collectPartition(parts, 0L) # First partition should contain list(1, 2) and list(1, 4) +#} +# @rdname partitionBy +# @aliases partitionBy,RDD,integer-method setMethod("partitionBy", signature(x = "RDD", numPartitions = "numeric"), function(x, numPartitions, partitionFunc = hashCode) { @@ -234,27 +234,27 @@ setMethod("partitionBy", RDD(r, serializedMode = "byte") }) -#' Group values by key -#' -#' This function operates on RDDs where every element is of the form list(K, V) or c(K, V). -#' and group values for each key in the RDD into a single sequence. -#' -#' @param x The RDD to group. Should be an RDD where each element is -#' list(K, V) or c(K, V). -#' @param numPartitions Number of partitions to create. -#' @return An RDD where each element is list(K, list(V)) -#' @seealso reduceByKey -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' pairs <- list(list(1, 2), list(1.1, 3), list(1, 4)) -#' rdd <- parallelize(sc, pairs) -#' parts <- groupByKey(rdd, 2L) -#' grouped <- collect(parts) -#' grouped[[1]] # Should be a list(1, list(2, 4)) -#'} -#' @rdname groupByKey -#' @aliases groupByKey,RDD,integer-method +# Group values by key +# +# This function operates on RDDs where every element is of the form list(K, V) or c(K, V). +# and group values for each key in the RDD into a single sequence. +# +# @param x The RDD to group. Should be an RDD where each element is +# list(K, V) or c(K, V). +# @param numPartitions Number of partitions to create. +# @return An RDD where each element is list(K, list(V)) +# @seealso reduceByKey +# @examples +#\dontrun{ +# sc <- sparkR.init() +# pairs <- list(list(1, 2), list(1.1, 3), list(1, 4)) +# rdd <- parallelize(sc, pairs) +# parts <- groupByKey(rdd, 2L) +# grouped <- collect(parts) +# grouped[[1]] # Should be a list(1, list(2, 4)) +#} +# @rdname groupByKey +# @aliases groupByKey,RDD,integer-method setMethod("groupByKey", signature(x = "RDD", numPartitions = "numeric"), function(x, numPartitions) { @@ -292,28 +292,28 @@ setMethod("groupByKey", lapplyPartition(shuffled, groupVals) }) -#' Merge values by key -#' -#' This function operates on RDDs where every element is of the form list(K, V) or c(K, V). -#' and merges the values for each key using an associative reduce function. -#' -#' @param x The RDD to reduce by key. Should be an RDD where each element is -#' list(K, V) or c(K, V). -#' @param combineFunc The associative reduce function to use. -#' @param numPartitions Number of partitions to create. -#' @return An RDD where each element is list(K, V') where V' is the merged -#' value -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' pairs <- list(list(1, 2), list(1.1, 3), list(1, 4)) -#' rdd <- parallelize(sc, pairs) -#' parts <- reduceByKey(rdd, "+", 2L) -#' reduced <- collect(parts) -#' reduced[[1]] # Should be a list(1, 6) -#'} -#' @rdname reduceByKey -#' @aliases reduceByKey,RDD,integer-method +# Merge values by key +# +# This function operates on RDDs where every element is of the form list(K, V) or c(K, V). +# and merges the values for each key using an associative reduce function. +# +# @param x The RDD to reduce by key. Should be an RDD where each element is +# list(K, V) or c(K, V). +# @param combineFunc The associative reduce function to use. +# @param numPartitions Number of partitions to create. +# @return An RDD where each element is list(K, V') where V' is the merged +# value +# @examples +#\dontrun{ +# sc <- sparkR.init() +# pairs <- list(list(1, 2), list(1.1, 3), list(1, 4)) +# rdd <- parallelize(sc, pairs) +# parts <- reduceByKey(rdd, "+", 2L) +# reduced <- collect(parts) +# reduced[[1]] # Should be a list(1, 6) +#} +# @rdname reduceByKey +# @aliases reduceByKey,RDD,integer-method setMethod("reduceByKey", signature(x = "RDD", combineFunc = "ANY", numPartitions = "numeric"), function(x, combineFunc, numPartitions) { @@ -329,31 +329,31 @@ setMethod("reduceByKey", convertEnvsToList(keys, vals) } locallyReduced <- lapplyPartition(x, reduceVals) - shuffled <- partitionBy(locallyReduced, numPartitions) + shuffled <- partitionBy(locallyReduced, numToInt(numPartitions)) lapplyPartition(shuffled, reduceVals) }) -#' Merge values by key locally -#' -#' This function operates on RDDs where every element is of the form list(K, V) or c(K, V). -#' and merges the values for each key using an associative reduce function, but return the -#' results immediately to the driver as an R list. -#' -#' @param x The RDD to reduce by key. Should be an RDD where each element is -#' list(K, V) or c(K, V). -#' @param combineFunc The associative reduce function to use. -#' @return A list of elements of type list(K, V') where V' is the merged value for each key -#' @seealso reduceByKey -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' pairs <- list(list(1, 2), list(1.1, 3), list(1, 4)) -#' rdd <- parallelize(sc, pairs) -#' reduced <- reduceByKeyLocally(rdd, "+") -#' reduced # list(list(1, 6), list(1.1, 3)) -#'} -#' @rdname reduceByKeyLocally -#' @aliases reduceByKeyLocally,RDD,integer-method +# Merge values by key locally +# +# This function operates on RDDs where every element is of the form list(K, V) or c(K, V). +# and merges the values for each key using an associative reduce function, but return the +# results immediately to the driver as an R list. +# +# @param x The RDD to reduce by key. Should be an RDD where each element is +# list(K, V) or c(K, V). +# @param combineFunc The associative reduce function to use. +# @return A list of elements of type list(K, V') where V' is the merged value for each key +# @seealso reduceByKey +# @examples +#\dontrun{ +# sc <- sparkR.init() +# pairs <- list(list(1, 2), list(1.1, 3), list(1, 4)) +# rdd <- parallelize(sc, pairs) +# reduced <- reduceByKeyLocally(rdd, "+") +# reduced # list(list(1, 6), list(1.1, 3)) +#} +# @rdname reduceByKeyLocally +# @aliases reduceByKeyLocally,RDD,integer-method setMethod("reduceByKeyLocally", signature(x = "RDD", combineFunc = "ANY"), function(x, combineFunc) { @@ -385,41 +385,41 @@ setMethod("reduceByKeyLocally", convertEnvsToList(merged[[1]], merged[[2]]) }) -#' Combine values by key -#' -#' Generic function to combine the elements for each key using a custom set of -#' aggregation functions. Turns an RDD[(K, V)] into a result of type RDD[(K, C)], -#' for a "combined type" C. Note that V and C can be different -- for example, one -#' might group an RDD of type (Int, Int) into an RDD of type (Int, Seq[Int]). - -#' Users provide three functions: -#' \itemize{ -#' \item createCombiner, which turns a V into a C (e.g., creates a one-element list) -#' \item mergeValue, to merge a V into a C (e.g., adds it to the end of a list) - -#' \item mergeCombiners, to combine two C's into a single one (e.g., concatentates -#' two lists). -#' } -#' -#' @param x The RDD to combine. Should be an RDD where each element is -#' list(K, V) or c(K, V). -#' @param createCombiner Create a combiner (C) given a value (V) -#' @param mergeValue Merge the given value (V) with an existing combiner (C) -#' @param mergeCombiners Merge two combiners and return a new combiner -#' @param numPartitions Number of partitions to create. -#' @return An RDD where each element is list(K, C) where C is the combined type -#' -#' @seealso groupByKey, reduceByKey -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' pairs <- list(list(1, 2), list(1.1, 3), list(1, 4)) -#' rdd <- parallelize(sc, pairs) -#' parts <- combineByKey(rdd, function(x) { x }, "+", "+", 2L) -#' combined <- collect(parts) -#' combined[[1]] # Should be a list(1, 6) -#'} -#' @rdname combineByKey -#' @aliases combineByKey,RDD,ANY,ANY,ANY,integer-method +# Combine values by key +# +# Generic function to combine the elements for each key using a custom set of +# aggregation functions. Turns an RDD[(K, V)] into a result of type RDD[(K, C)], +# for a "combined type" C. Note that V and C can be different -- for example, one +# might group an RDD of type (Int, Int) into an RDD of type (Int, Seq[Int]). + +# Users provide three functions: +# \itemize{ +# \item createCombiner, which turns a V into a C (e.g., creates a one-element list) +# \item mergeValue, to merge a V into a C (e.g., adds it to the end of a list) - +# \item mergeCombiners, to combine two C's into a single one (e.g., concatentates +# two lists). +# } +# +# @param x The RDD to combine. Should be an RDD where each element is +# list(K, V) or c(K, V). +# @param createCombiner Create a combiner (C) given a value (V) +# @param mergeValue Merge the given value (V) with an existing combiner (C) +# @param mergeCombiners Merge two combiners and return a new combiner +# @param numPartitions Number of partitions to create. +# @return An RDD where each element is list(K, C) where C is the combined type +# +# @seealso groupByKey, reduceByKey +# @examples +#\dontrun{ +# sc <- sparkR.init() +# pairs <- list(list(1, 2), list(1.1, 3), list(1, 4)) +# rdd <- parallelize(sc, pairs) +# parts <- combineByKey(rdd, function(x) { x }, "+", "+", 2L) +# combined <- collect(parts) +# combined[[1]] # Should be a list(1, 6) +#} +# @rdname combineByKey +# @aliases combineByKey,RDD,ANY,ANY,ANY,integer-method setMethod("combineByKey", signature(x = "RDD", createCombiner = "ANY", mergeValue = "ANY", mergeCombiners = "ANY", numPartitions = "numeric"), @@ -436,7 +436,7 @@ setMethod("combineByKey", convertEnvsToList(keys, combiners) } locallyCombined <- lapplyPartition(x, combineLocally) - shuffled <- partitionBy(locallyCombined, numPartitions) + shuffled <- partitionBy(locallyCombined, numToInt(numPartitions)) mergeAfterShuffle <- function(part) { combiners <- new.env() keys <- new.env() @@ -451,36 +451,36 @@ setMethod("combineByKey", lapplyPartition(shuffled, mergeAfterShuffle) }) -#' Aggregate a pair RDD by each key. -#' -#' Aggregate the values of each key in an RDD, using given combine functions -#' and a neutral "zero value". This function can return a different result type, -#' U, than the type of the values in this RDD, V. Thus, we need one operation -#' for merging a V into a U and one operation for merging two U's, The former -#' operation is used for merging values within a partition, and the latter is -#' used for merging values between partitions. To avoid memory allocation, both -#' of these functions are allowed to modify and return their first argument -#' instead of creating a new U. -#' -#' @param x An RDD. -#' @param zeroValue A neutral "zero value". -#' @param seqOp A function to aggregate the values of each key. It may return -#' a different result type from the type of the values. -#' @param combOp A function to aggregate results of seqOp. -#' @return An RDD containing the aggregation result. -#' @seealso foldByKey, combineByKey -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, list(list(1, 1), list(1, 2), list(2, 3), list(2, 4))) -#' zeroValue <- list(0, 0) -#' seqOp <- function(x, y) { list(x[[1]] + y, x[[2]] + 1) } -#' combOp <- function(x, y) { list(x[[1]] + y[[1]], x[[2]] + y[[2]]) } -#' aggregateByKey(rdd, zeroValue, seqOp, combOp, 2L) -#' # list(list(1, list(3, 2)), list(2, list(7, 2))) -#'} -#' @rdname aggregateByKey -#' @aliases aggregateByKey,RDD,ANY,ANY,ANY,integer-method +# Aggregate a pair RDD by each key. +# +# Aggregate the values of each key in an RDD, using given combine functions +# and a neutral "zero value". This function can return a different result type, +# U, than the type of the values in this RDD, V. Thus, we need one operation +# for merging a V into a U and one operation for merging two U's, The former +# operation is used for merging values within a partition, and the latter is +# used for merging values between partitions. To avoid memory allocation, both +# of these functions are allowed to modify and return their first argument +# instead of creating a new U. +# +# @param x An RDD. +# @param zeroValue A neutral "zero value". +# @param seqOp A function to aggregate the values of each key. It may return +# a different result type from the type of the values. +# @param combOp A function to aggregate results of seqOp. +# @return An RDD containing the aggregation result. +# @seealso foldByKey, combineByKey +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, list(list(1, 1), list(1, 2), list(2, 3), list(2, 4))) +# zeroValue <- list(0, 0) +# seqOp <- function(x, y) { list(x[[1]] + y, x[[2]] + 1) } +# combOp <- function(x, y) { list(x[[1]] + y[[1]], x[[2]] + y[[2]]) } +# aggregateByKey(rdd, zeroValue, seqOp, combOp, 2L) +# # list(list(1, list(3, 2)), list(2, list(7, 2))) +#} +# @rdname aggregateByKey +# @aliases aggregateByKey,RDD,ANY,ANY,ANY,integer-method setMethod("aggregateByKey", signature(x = "RDD", zeroValue = "ANY", seqOp = "ANY", combOp = "ANY", numPartitions = "numeric"), @@ -492,26 +492,26 @@ setMethod("aggregateByKey", combineByKey(x, createCombiner, seqOp, combOp, numPartitions) }) -#' Fold a pair RDD by each key. -#' -#' Aggregate the values of each key in an RDD, using an associative function "func" -#' and a neutral "zero value" which may be added to the result an arbitrary -#' number of times, and must not change the result (e.g., 0 for addition, or -#' 1 for multiplication.). -#' -#' @param x An RDD. -#' @param zeroValue A neutral "zero value". -#' @param func An associative function for folding values of each key. -#' @return An RDD containing the aggregation result. -#' @seealso aggregateByKey, combineByKey -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, list(list(1, 1), list(1, 2), list(2, 3), list(2, 4))) -#' foldByKey(rdd, 0, "+", 2L) # list(list(1, 3), list(2, 7)) -#'} -#' @rdname foldByKey -#' @aliases foldByKey,RDD,ANY,ANY,integer-method +# Fold a pair RDD by each key. +# +# Aggregate the values of each key in an RDD, using an associative function "func" +# and a neutral "zero value" which may be added to the result an arbitrary +# number of times, and must not change the result (e.g., 0 for addition, or +# 1 for multiplication.). +# +# @param x An RDD. +# @param zeroValue A neutral "zero value". +# @param func An associative function for folding values of each key. +# @return An RDD containing the aggregation result. +# @seealso aggregateByKey, combineByKey +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, list(list(1, 1), list(1, 2), list(2, 3), list(2, 4))) +# foldByKey(rdd, 0, "+", 2L) # list(list(1, 3), list(2, 7)) +#} +# @rdname foldByKey +# @aliases foldByKey,RDD,ANY,ANY,integer-method setMethod("foldByKey", signature(x = "RDD", zeroValue = "ANY", func = "ANY", numPartitions = "numeric"), @@ -521,28 +521,28 @@ setMethod("foldByKey", ############ Binary Functions ############# -#' Join two RDDs -#' -#' @description -#' \code{join} This function joins two RDDs where every element is of the form list(K, V). -#' The key types of the two RDDs should be the same. -#' -#' @param x An RDD to be joined. Should be an RDD where each element is -#' list(K, V). -#' @param y An RDD to be joined. Should be an RDD where each element is -#' list(K, V). -#' @param numPartitions Number of partitions to create. -#' @return a new RDD containing all pairs of elements with matching keys in -#' two input RDDs. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4))) -#' rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3))) -#' join(rdd1, rdd2, 2L) # list(list(1, list(1, 2)), list(1, list(1, 3)) -#'} -#' @rdname join-methods -#' @aliases join,RDD,RDD-method +# Join two RDDs +# +# @description +# \code{join} This function joins two RDDs where every element is of the form list(K, V). +# The key types of the two RDDs should be the same. +# +# @param x An RDD to be joined. Should be an RDD where each element is +# list(K, V). +# @param y An RDD to be joined. Should be an RDD where each element is +# list(K, V). +# @param numPartitions Number of partitions to create. +# @return a new RDD containing all pairs of elements with matching keys in +# two input RDDs. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4))) +# rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3))) +# join(rdd1, rdd2, 2L) # list(list(1, list(1, 2)), list(1, list(1, 3)) +#} +# @rdname join-methods +# @aliases join,RDD,RDD-method setMethod("join", signature(x = "RDD", y = "RDD"), function(x, y, numPartitions) { @@ -557,30 +557,30 @@ setMethod("join", doJoin) }) -#' Left outer join two RDDs -#' -#' @description -#' \code{leftouterjoin} This function left-outer-joins two RDDs where every element is of the form list(K, V). -#' The key types of the two RDDs should be the same. -#' -#' @param x An RDD to be joined. Should be an RDD where each element is -#' list(K, V). -#' @param y An RDD to be joined. Should be an RDD where each element is -#' list(K, V). -#' @param numPartitions Number of partitions to create. -#' @return For each element (k, v) in x, the resulting RDD will either contain -#' all pairs (k, (v, w)) for (k, w) in rdd2, or the pair (k, (v, NULL)) -#' if no elements in rdd2 have key k. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4))) -#' rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3))) -#' leftOuterJoin(rdd1, rdd2, 2L) -#' # list(list(1, list(1, 2)), list(1, list(1, 3)), list(2, list(4, NULL))) -#'} -#' @rdname join-methods -#' @aliases leftOuterJoin,RDD,RDD-method +# Left outer join two RDDs +# +# @description +# \code{leftouterjoin} This function left-outer-joins two RDDs where every element is of the form list(K, V). +# The key types of the two RDDs should be the same. +# +# @param x An RDD to be joined. Should be an RDD where each element is +# list(K, V). +# @param y An RDD to be joined. Should be an RDD where each element is +# list(K, V). +# @param numPartitions Number of partitions to create. +# @return For each element (k, v) in x, the resulting RDD will either contain +# all pairs (k, (v, w)) for (k, w) in rdd2, or the pair (k, (v, NULL)) +# if no elements in rdd2 have key k. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4))) +# rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3))) +# leftOuterJoin(rdd1, rdd2, 2L) +# # list(list(1, list(1, 2)), list(1, list(1, 3)), list(2, list(4, NULL))) +#} +# @rdname join-methods +# @aliases leftOuterJoin,RDD,RDD-method setMethod("leftOuterJoin", signature(x = "RDD", y = "RDD", numPartitions = "numeric"), function(x, y, numPartitions) { @@ -594,30 +594,30 @@ setMethod("leftOuterJoin", joined <- flatMapValues(groupByKey(unionRDD(xTagged, yTagged), numPartitions), doJoin) }) -#' Right outer join two RDDs -#' -#' @description -#' \code{rightouterjoin} This function right-outer-joins two RDDs where every element is of the form list(K, V). -#' The key types of the two RDDs should be the same. -#' -#' @param x An RDD to be joined. Should be an RDD where each element is -#' list(K, V). -#' @param y An RDD to be joined. Should be an RDD where each element is -#' list(K, V). -#' @param numPartitions Number of partitions to create. -#' @return For each element (k, w) in y, the resulting RDD will either contain -#' all pairs (k, (v, w)) for (k, v) in x, or the pair (k, (NULL, w)) -#' if no elements in x have key k. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd1 <- parallelize(sc, list(list(1, 2), list(1, 3))) -#' rdd2 <- parallelize(sc, list(list(1, 1), list(2, 4))) -#' rightOuterJoin(rdd1, rdd2, 2L) -#' # list(list(1, list(2, 1)), list(1, list(3, 1)), list(2, list(NULL, 4))) -#'} -#' @rdname join-methods -#' @aliases rightOuterJoin,RDD,RDD-method +# Right outer join two RDDs +# +# @description +# \code{rightouterjoin} This function right-outer-joins two RDDs where every element is of the form list(K, V). +# The key types of the two RDDs should be the same. +# +# @param x An RDD to be joined. Should be an RDD where each element is +# list(K, V). +# @param y An RDD to be joined. Should be an RDD where each element is +# list(K, V). +# @param numPartitions Number of partitions to create. +# @return For each element (k, w) in y, the resulting RDD will either contain +# all pairs (k, (v, w)) for (k, v) in x, or the pair (k, (NULL, w)) +# if no elements in x have key k. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd1 <- parallelize(sc, list(list(1, 2), list(1, 3))) +# rdd2 <- parallelize(sc, list(list(1, 1), list(2, 4))) +# rightOuterJoin(rdd1, rdd2, 2L) +# # list(list(1, list(2, 1)), list(1, list(3, 1)), list(2, list(NULL, 4))) +#} +# @rdname join-methods +# @aliases rightOuterJoin,RDD,RDD-method setMethod("rightOuterJoin", signature(x = "RDD", y = "RDD", numPartitions = "numeric"), function(x, y, numPartitions) { @@ -631,33 +631,33 @@ setMethod("rightOuterJoin", joined <- flatMapValues(groupByKey(unionRDD(xTagged, yTagged), numPartitions), doJoin) }) -#' Full outer join two RDDs -#' -#' @description -#' \code{fullouterjoin} This function full-outer-joins two RDDs where every element is of the form list(K, V). -#' The key types of the two RDDs should be the same. -#' -#' @param x An RDD to be joined. Should be an RDD where each element is -#' list(K, V). -#' @param y An RDD to be joined. Should be an RDD where each element is -#' list(K, V). -#' @param numPartitions Number of partitions to create. -#' @return For each element (k, v) in x and (k, w) in y, the resulting RDD -#' will contain all pairs (k, (v, w)) for both (k, v) in x and -#' (k, w) in y, or the pair (k, (NULL, w))/(k, (v, NULL)) if no elements -#' in x/y have key k. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd1 <- parallelize(sc, list(list(1, 2), list(1, 3), list(3, 3))) -#' rdd2 <- parallelize(sc, list(list(1, 1), list(2, 4))) -#' fullOuterJoin(rdd1, rdd2, 2L) # list(list(1, list(2, 1)), -#' # list(1, list(3, 1)), -#' # list(2, list(NULL, 4))) -#' # list(3, list(3, NULL)), -#'} -#' @rdname join-methods -#' @aliases fullOuterJoin,RDD,RDD-method +# Full outer join two RDDs +# +# @description +# \code{fullouterjoin} This function full-outer-joins two RDDs where every element is of the form list(K, V). +# The key types of the two RDDs should be the same. +# +# @param x An RDD to be joined. Should be an RDD where each element is +# list(K, V). +# @param y An RDD to be joined. Should be an RDD where each element is +# list(K, V). +# @param numPartitions Number of partitions to create. +# @return For each element (k, v) in x and (k, w) in y, the resulting RDD +# will contain all pairs (k, (v, w)) for both (k, v) in x and +# (k, w) in y, or the pair (k, (NULL, w))/(k, (v, NULL)) if no elements +# in x/y have key k. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd1 <- parallelize(sc, list(list(1, 2), list(1, 3), list(3, 3))) +# rdd2 <- parallelize(sc, list(list(1, 1), list(2, 4))) +# fullOuterJoin(rdd1, rdd2, 2L) # list(list(1, list(2, 1)), +# # list(1, list(3, 1)), +# # list(2, list(NULL, 4))) +# # list(3, list(3, NULL)), +#} +# @rdname join-methods +# @aliases fullOuterJoin,RDD,RDD-method setMethod("fullOuterJoin", signature(x = "RDD", y = "RDD", numPartitions = "numeric"), function(x, y, numPartitions) { @@ -671,23 +671,23 @@ setMethod("fullOuterJoin", joined <- flatMapValues(groupByKey(unionRDD(xTagged, yTagged), numPartitions), doJoin) }) -#' For each key k in several RDDs, return a resulting RDD that -#' whose values are a list of values for the key in all RDDs. -#' -#' @param ... Several RDDs. -#' @param numPartitions Number of partitions to create. -#' @return a new RDD containing all pairs of elements with values in a list -#' in all RDDs. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4))) -#' rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3))) -#' cogroup(rdd1, rdd2, numPartitions = 2L) -#' # list(list(1, list(1, list(2, 3))), list(2, list(list(4), list())) -#'} -#' @rdname cogroup -#' @aliases cogroup,RDD-method +# For each key k in several RDDs, return a resulting RDD that +# whose values are a list of values for the key in all RDDs. +# +# @param ... Several RDDs. +# @param numPartitions Number of partitions to create. +# @return a new RDD containing all pairs of elements with values in a list +# in all RDDs. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4))) +# rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3))) +# cogroup(rdd1, rdd2, numPartitions = 2L) +# # list(list(1, list(1, list(2, 3))), list(2, list(list(4), list())) +#} +# @rdname cogroup +# @aliases cogroup,RDD-method setMethod("cogroup", "RDD", function(..., numPartitions) { @@ -723,23 +723,23 @@ setMethod("cogroup", group.func) }) -#' Sort a (k, v) pair RDD by k. -#' -#' @param x A (k, v) pair RDD to be sorted. -#' @param ascending A flag to indicate whether the sorting is ascending or descending. -#' @param numPartitions Number of partitions to create. -#' @return An RDD where all (k, v) pair elements are sorted. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, list(list(3, 1), list(2, 2), list(1, 3))) -#' collect(sortByKey(rdd)) # list (list(1, 3), list(2, 2), list(3, 1)) -#'} -#' @rdname sortByKey -#' @aliases sortByKey,RDD,RDD-method +# Sort a (k, v) pair RDD by k. +# +# @param x A (k, v) pair RDD to be sorted. +# @param ascending A flag to indicate whether the sorting is ascending or descending. +# @param numPartitions Number of partitions to create. +# @return An RDD where all (k, v) pair elements are sorted. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, list(list(3, 1), list(2, 2), list(1, 3))) +# collect(sortByKey(rdd)) # list (list(1, 3), list(2, 2), list(3, 1)) +#} +# @rdname sortByKey +# @aliases sortByKey,RDD,RDD-method setMethod("sortByKey", signature(x = "RDD"), - function(x, ascending = TRUE, numPartitions = SparkR::numPartitions(x)) { + function(x, ascending = TRUE, numPartitions = SparkR:::numPartitions(x)) { rangeBounds <- list() if (numPartitions > 1) { @@ -785,28 +785,28 @@ setMethod("sortByKey", lapplyPartition(newRDD, partitionFunc) }) -#' Subtract a pair RDD with another pair RDD. -#' -#' Return an RDD with the pairs from x whose keys are not in other. -#' -#' @param x An RDD. -#' @param other An RDD. -#' @param numPartitions Number of the partitions in the result RDD. -#' @return An RDD with the pairs from x whose keys are not in other. -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd1 <- parallelize(sc, list(list("a", 1), list("b", 4), -#' list("b", 5), list("a", 2))) -#' rdd2 <- parallelize(sc, list(list("a", 3), list("c", 1))) -#' collect(subtractByKey(rdd1, rdd2)) -#' # list(list("b", 4), list("b", 5)) -#'} -#' @rdname subtractByKey -#' @aliases subtractByKey,RDD +# Subtract a pair RDD with another pair RDD. +# +# Return an RDD with the pairs from x whose keys are not in other. +# +# @param x An RDD. +# @param other An RDD. +# @param numPartitions Number of the partitions in the result RDD. +# @return An RDD with the pairs from x whose keys are not in other. +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd1 <- parallelize(sc, list(list("a", 1), list("b", 4), +# list("b", 5), list("a", 2))) +# rdd2 <- parallelize(sc, list(list("a", 3), list("c", 1))) +# collect(subtractByKey(rdd1, rdd2)) +# # list(list("b", 4), list("b", 5)) +#} +# @rdname subtractByKey +# @aliases subtractByKey,RDD setMethod("subtractByKey", signature(x = "RDD", other = "RDD"), - function(x, other, numPartitions = SparkR::numPartitions(x)) { + function(x, other, numPartitions = SparkR:::numPartitions(x)) { filterFunction <- function(elem) { iters <- elem[[2]] (length(iters[[1]]) > 0) && (length(iters[[2]]) == 0) @@ -819,41 +819,41 @@ setMethod("subtractByKey", function (v) { v[[1]] }) }) -#' Return a subset of this RDD sampled by key. -#' -#' @description -#' \code{sampleByKey} Create a sample of this RDD using variable sampling rates -#' for different keys as specified by fractions, a key to sampling rate map. -#' -#' @param x The RDD to sample elements by key, where each element is -#' list(K, V) or c(K, V). -#' @param withReplacement Sampling with replacement or not -#' @param fraction The (rough) sample target fraction -#' @param seed Randomness seed value -#' @examples -#'\dontrun{ -#' sc <- sparkR.init() -#' rdd <- parallelize(sc, 1:3000) -#' pairs <- lapply(rdd, function(x) { if (x %% 3 == 0) list("a", x) -#' else { if (x %% 3 == 1) list("b", x) else list("c", x) }}) -#' fractions <- list(a = 0.2, b = 0.1, c = 0.3) -#' sample <- sampleByKey(pairs, FALSE, fractions, 1618L) -#' 100 < length(lookup(sample, "a")) && 300 > length(lookup(sample, "a")) # TRUE -#' 50 < length(lookup(sample, "b")) && 150 > length(lookup(sample, "b")) # TRUE -#' 200 < length(lookup(sample, "c")) && 400 > length(lookup(sample, "c")) # TRUE -#' lookup(sample, "a")[which.min(lookup(sample, "a"))] >= 0 # TRUE -#' lookup(sample, "a")[which.max(lookup(sample, "a"))] <= 2000 # TRUE -#' lookup(sample, "b")[which.min(lookup(sample, "b"))] >= 0 # TRUE -#' lookup(sample, "b")[which.max(lookup(sample, "b"))] <= 2000 # TRUE -#' lookup(sample, "c")[which.min(lookup(sample, "c"))] >= 0 # TRUE -#' lookup(sample, "c")[which.max(lookup(sample, "c"))] <= 2000 # TRUE -#' fractions <- list(a = 0.2, b = 0.1, c = 0.3, d = 0.4) -#' sample <- sampleByKey(pairs, FALSE, fractions, 1618L) # Key "d" will be ignored -#' fractions <- list(a = 0.2, b = 0.1) -#' sample <- sampleByKey(pairs, FALSE, fractions, 1618L) # KeyError: "c" -#'} -#' @rdname sampleByKey -#' @aliases sampleByKey,RDD-method +# Return a subset of this RDD sampled by key. +# +# @description +# \code{sampleByKey} Create a sample of this RDD using variable sampling rates +# for different keys as specified by fractions, a key to sampling rate map. +# +# @param x The RDD to sample elements by key, where each element is +# list(K, V) or c(K, V). +# @param withReplacement Sampling with replacement or not +# @param fraction The (rough) sample target fraction +# @param seed Randomness seed value +# @examples +#\dontrun{ +# sc <- sparkR.init() +# rdd <- parallelize(sc, 1:3000) +# pairs <- lapply(rdd, function(x) { if (x %% 3 == 0) list("a", x) +# else { if (x %% 3 == 1) list("b", x) else list("c", x) }}) +# fractions <- list(a = 0.2, b = 0.1, c = 0.3) +# sample <- sampleByKey(pairs, FALSE, fractions, 1618L) +# 100 < length(lookup(sample, "a")) && 300 > length(lookup(sample, "a")) # TRUE +# 50 < length(lookup(sample, "b")) && 150 > length(lookup(sample, "b")) # TRUE +# 200 < length(lookup(sample, "c")) && 400 > length(lookup(sample, "c")) # TRUE +# lookup(sample, "a")[which.min(lookup(sample, "a"))] >= 0 # TRUE +# lookup(sample, "a")[which.max(lookup(sample, "a"))] <= 2000 # TRUE +# lookup(sample, "b")[which.min(lookup(sample, "b"))] >= 0 # TRUE +# lookup(sample, "b")[which.max(lookup(sample, "b"))] <= 2000 # TRUE +# lookup(sample, "c")[which.min(lookup(sample, "c"))] >= 0 # TRUE +# lookup(sample, "c")[which.max(lookup(sample, "c"))] <= 2000 # TRUE +# fractions <- list(a = 0.2, b = 0.1, c = 0.3, d = 0.4) +# sample <- sampleByKey(pairs, FALSE, fractions, 1618L) # Key "d" will be ignored +# fractions <- list(a = 0.2, b = 0.1) +# sample <- sampleByKey(pairs, FALSE, fractions, 1618L) # KeyError: "c" +#} +# @rdname sampleByKey +# @aliases sampleByKey,RDD-method setMethod("sampleByKey", signature(x = "RDD", withReplacement = "logical", fractions = "vector", seed = "integer"), diff --git a/R/pkg/R/serialize.R b/R/pkg/R/serialize.R index c53d0a961016f..2081786e6f833 100644 --- a/R/pkg/R/serialize.R +++ b/R/pkg/R/serialize.R @@ -160,6 +160,14 @@ writeList <- function(con, arr) { } } +# Used to pass arrays where the elements can be of different types +writeGenericList <- function(con, list) { + writeInt(con, length(list)) + for (elem in list) { + writeObject(con, elem) + } +} + # Used to pass in hash maps required on Java side. writeEnv <- function(con, env) { len <- length(env) @@ -168,7 +176,7 @@ writeEnv <- function(con, env) { if (len > 0) { writeList(con, as.list(ls(env))) vals <- lapply(ls(env), function(x) { env[[x]] }) - writeList(con, as.list(vals)) + writeGenericList(con, as.list(vals)) } } diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index bc82df01f0fff..5ced7c688f98a 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -222,19 +222,26 @@ sparkR.init <- function( #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRSQL.init(sc) +#' sqlContext <- sparkRSQL.init(sc) #'} -sparkRSQL.init <- function(jsc) { +sparkRSQL.init <- function(jsc = NULL) { if (exists(".sparkRSQLsc", envir = .sparkREnv)) { return(get(".sparkRSQLsc", envir = .sparkREnv)) } - sqlCtx <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", - "createSQLContext", - jsc) - assign(".sparkRSQLsc", sqlCtx, envir = .sparkREnv) - sqlCtx + # If jsc is NULL, create a Spark Context + sc <- if (is.null(jsc)) { + sparkR.init() + } else { + jsc + } + + sqlContext <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", + "createSQLContext", + sc) + assign(".sparkRSQLsc", sqlContext, envir = .sparkREnv) + sqlContext } #' Initialize a new HiveContext. @@ -246,15 +253,22 @@ sparkRSQL.init <- function(jsc) { #' @examples #'\dontrun{ #' sc <- sparkR.init() -#' sqlCtx <- sparkRHive.init(sc) +#' sqlContext <- sparkRHive.init(sc) #'} -sparkRHive.init <- function(jsc) { +sparkRHive.init <- function(jsc = NULL) { if (exists(".sparkRHivesc", envir = .sparkREnv)) { return(get(".sparkRHivesc", envir = .sparkREnv)) } - ssc <- callJMethod(jsc, "sc") + # If jsc is NULL, create a Spark Context + sc <- if (is.null(jsc)) { + sparkR.init() + } else { + jsc + } + + ssc <- callJMethod(sc, "sc") hiveCtx <- tryCatch({ newJObject("org.apache.spark.sql.hive.HiveContext", ssc) }, error = function(err) { diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index 0e7b7bd5a5b34..69b2700191c9a 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -122,13 +122,49 @@ hashCode <- function(key) { intBits <- packBits(rawToBits(rawVec), "integer") as.integer(bitwXor(intBits[2], intBits[1])) } else if (class(key) == "character") { - .Call("stringHashCode", key) + # TODO: SPARK-7839 means we might not have the native library available + if (is.loaded("stringHashCode")) { + .Call("stringHashCode", key) + } else { + n <- nchar(key) + if (n == 0) { + 0L + } else { + asciiVals <- sapply(charToRaw(key), function(x) { strtoi(x, 16L) }) + hashC <- 0 + for (k in 1:length(asciiVals)) { + hashC <- mult31AndAdd(hashC, asciiVals[k]) + } + as.integer(hashC) + } + } } else { warning(paste("Could not hash object, returning 0", sep = "")) as.integer(0) } } +# Helper function used to wrap a 'numeric' value to integer bounds. +# Useful for implementing C-like integer arithmetic +wrapInt <- function(value) { + if (value > .Machine$integer.max) { + value <- value - 2 * .Machine$integer.max - 2 + } else if (value < -1 * .Machine$integer.max) { + value <- 2 * .Machine$integer.max + value + 2 + } + value +} + +# Multiply `val` by 31 and add `addVal` to the result. Ensures that +# integer-overflows are handled at every step. +mult31AndAdd <- function(val, addVal) { + vec <- c(bitwShiftL(val, c(4,3,2,1,0)), addVal) + Reduce(function(a, b) { + wrapInt(as.numeric(a) + as.numeric(b)) + }, + vec) +} + # Create a new RDD with serializedMode == "byte". # Return itself if already in "byte" format. serializeToBytes <- function(rdd) { diff --git a/R/pkg/inst/profile/shell.R b/R/pkg/inst/profile/shell.R index 7a7f2031152a0..ca94f1d4e7fd5 100644 --- a/R/pkg/inst/profile/shell.R +++ b/R/pkg/inst/profile/shell.R @@ -20,12 +20,14 @@ .libPaths(c(file.path(home, "R", "lib"), .libPaths())) Sys.setenv(NOAWT=1) - library(utils) - library(SparkR) - sc <- sparkR.init(Sys.getenv("MASTER", unset = "")) + # Make sure SparkR package is the last loaded one + old <- getOption("defaultPackages") + options(defaultPackages = c(old, "SparkR")) + + sc <- SparkR::sparkR.init(Sys.getenv("MASTER", unset = "")) assign("sc", sc, envir=.GlobalEnv) - sqlCtx <- sparkRSQL.init(sc) - assign("sqlCtx", sqlCtx, envir=.GlobalEnv) + sqlContext <- SparkR::sparkRSQL.init(sc) + assign("sqlContext", sqlContext, envir=.GlobalEnv) cat("\n Welcome to SparkR!") - cat("\n Spark context is available as sc, SQL context is available as sqlCtx\n") + cat("\n Spark context is available as sc, SQL context is available as sqlContext\n") } diff --git a/R/pkg/inst/tests/test_broadcast.R b/R/pkg/inst/tests/test_broadcast.R index fee91a427d6d5..bb86a5c922bde 100644 --- a/R/pkg/inst/tests/test_broadcast.R +++ b/R/pkg/inst/tests/test_broadcast.R @@ -29,7 +29,7 @@ test_that("using broadcast variable", { randomMatBr <- broadcast(sc, randomMat) useBroadcast <- function(x) { - sum(value(randomMatBr) * x) + sum(SparkR:::value(randomMatBr) * x) } actual <- collect(lapply(rrdd, useBroadcast)) expected <- list(sum(randomMat) * 1, sum(randomMat) * 2) diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index af7a6c582047a..d2d82e791e876 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -23,7 +23,7 @@ context("SparkSQL functions") sc <- sparkR.init() -sqlCtx <- sparkRSQL.init(sc) +sqlContext <- sparkRSQL.init(sc) mockLines <- c("{\"name\":\"Michael\"}", "{\"name\":\"Andy\", \"age\":30}", @@ -32,6 +32,15 @@ jsonPath <- tempfile(pattern="sparkr-test", fileext=".tmp") parquetPath <- tempfile(pattern="sparkr-test", fileext=".parquet") writeLines(mockLines, jsonPath) +# For test nafunctions, like dropna(), fillna(),... +mockLinesNa <- c("{\"name\":\"Bob\",\"age\":16,\"height\":176.5}", + "{\"name\":\"Alice\",\"age\":null,\"height\":164.3}", + "{\"name\":\"David\",\"age\":60,\"height\":null}", + "{\"name\":\"Amy\",\"age\":null,\"height\":null}", + "{\"name\":null,\"age\":null,\"height\":null}") +jsonPathNa <- tempfile(pattern="sparkr-test", fileext=".tmp") +writeLines(mockLinesNa, jsonPathNa) + test_that("infer types", { expect_equal(infer_type(1L), "integer") expect_equal(infer_type(1.0), "double") @@ -67,25 +76,25 @@ test_that("structType and structField", { test_that("create DataFrame from RDD", { rdd <- lapply(parallelize(sc, 1:10), function(x) { list(x, as.character(x)) }) - df <- createDataFrame(sqlCtx, rdd, list("a", "b")) + df <- createDataFrame(sqlContext, rdd, list("a", "b")) expect_true(inherits(df, "DataFrame")) expect_true(count(df) == 10) expect_equal(columns(df), c("a", "b")) expect_equal(dtypes(df), list(c("a", "int"), c("b", "string"))) - df <- createDataFrame(sqlCtx, rdd) + df <- createDataFrame(sqlContext, rdd) expect_true(inherits(df, "DataFrame")) expect_equal(columns(df), c("_1", "_2")) schema <- structType(structField(x = "a", type = "integer", nullable = TRUE), structField(x = "b", type = "string", nullable = TRUE)) - df <- createDataFrame(sqlCtx, rdd, schema) + df <- createDataFrame(sqlContext, rdd, schema) expect_true(inherits(df, "DataFrame")) expect_equal(columns(df), c("a", "b")) expect_equal(dtypes(df), list(c("a", "int"), c("b", "string"))) rdd <- lapply(parallelize(sc, 1:10), function(x) { list(a = x, b = as.character(x)) }) - df <- createDataFrame(sqlCtx, rdd) + df <- createDataFrame(sqlContext, rdd) expect_true(inherits(df, "DataFrame")) expect_true(count(df) == 10) expect_equal(columns(df), c("a", "b")) @@ -121,17 +130,17 @@ test_that("toDF", { test_that("create DataFrame from list or data.frame", { l <- list(list(1, 2), list(3, 4)) - df <- createDataFrame(sqlCtx, l, c("a", "b")) + df <- createDataFrame(sqlContext, l, c("a", "b")) expect_equal(columns(df), c("a", "b")) l <- list(list(a=1, b=2), list(a=3, b=4)) - df <- createDataFrame(sqlCtx, l) + df <- createDataFrame(sqlContext, l) expect_equal(columns(df), c("a", "b")) a <- 1:3 b <- c("a", "b", "c") ldf <- data.frame(a, b) - df <- createDataFrame(sqlCtx, ldf) + df <- createDataFrame(sqlContext, ldf) expect_equal(columns(df), c("a", "b")) expect_equal(dtypes(df), list(c("a", "int"), c("b", "string"))) expect_equal(count(df), 3) @@ -142,7 +151,7 @@ test_that("create DataFrame from list or data.frame", { test_that("create DataFrame with different data types", { l <- list(a = 1L, b = 2, c = TRUE, d = "ss", e = as.Date("2012-12-13"), f = as.POSIXct("2015-03-15 12:13:14.056")) - df <- createDataFrame(sqlCtx, list(l)) + df <- createDataFrame(sqlContext, list(l)) expect_equal(dtypes(df), list(c("a", "int"), c("b", "double"), c("c", "boolean"), c("d", "string"), c("e", "date"), c("f", "timestamp"))) expect_equal(count(df), 1) @@ -154,7 +163,7 @@ test_that("create DataFrame with different data types", { # e <- new.env() # assign("n", 3L, envir = e) # l <- list(1:10, list("a", "b"), e, list(a="aa", b=3L)) -# df <- createDataFrame(sqlCtx, list(l), c("a", "b", "c", "d")) +# df <- createDataFrame(sqlContext, list(l), c("a", "b", "c", "d")) # expect_equal(dtypes(df), list(c("a", "array"), c("b", "array"), # c("c", "map"), c("d", "struct"))) # expect_equal(count(df), 1) @@ -163,7 +172,7 @@ test_that("create DataFrame with different data types", { #}) test_that("jsonFile() on a local file returns a DataFrame", { - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) expect_true(inherits(df, "DataFrame")) expect_true(count(df) == 3) }) @@ -171,88 +180,88 @@ test_that("jsonFile() on a local file returns a DataFrame", { test_that("jsonRDD() on a RDD with json string", { rdd <- parallelize(sc, mockLines) expect_true(count(rdd) == 3) - df <- jsonRDD(sqlCtx, rdd) + df <- jsonRDD(sqlContext, rdd) expect_true(inherits(df, "DataFrame")) expect_true(count(df) == 3) rdd2 <- flatMap(rdd, function(x) c(x, x)) - df <- jsonRDD(sqlCtx, rdd2) + df <- jsonRDD(sqlContext, rdd2) expect_true(inherits(df, "DataFrame")) expect_true(count(df) == 6) }) test_that("test cache, uncache and clearCache", { - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) registerTempTable(df, "table1") - cacheTable(sqlCtx, "table1") - uncacheTable(sqlCtx, "table1") - clearCache(sqlCtx) - dropTempTable(sqlCtx, "table1") + cacheTable(sqlContext, "table1") + uncacheTable(sqlContext, "table1") + clearCache(sqlContext) + dropTempTable(sqlContext, "table1") }) test_that("test tableNames and tables", { - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) registerTempTable(df, "table1") - expect_true(length(tableNames(sqlCtx)) == 1) - df <- tables(sqlCtx) + expect_true(length(tableNames(sqlContext)) == 1) + df <- tables(sqlContext) expect_true(count(df) == 1) - dropTempTable(sqlCtx, "table1") + dropTempTable(sqlContext, "table1") }) test_that("registerTempTable() results in a queryable table and sql() results in a new DataFrame", { - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) registerTempTable(df, "table1") - newdf <- sql(sqlCtx, "SELECT * FROM table1 where name = 'Michael'") + newdf <- sql(sqlContext, "SELECT * FROM table1 where name = 'Michael'") expect_true(inherits(newdf, "DataFrame")) expect_true(count(newdf) == 1) - dropTempTable(sqlCtx, "table1") + dropTempTable(sqlContext, "table1") }) test_that("insertInto() on a registered table", { - df <- loadDF(sqlCtx, jsonPath, "json") - saveDF(df, parquetPath, "parquet", "overwrite") - dfParquet <- loadDF(sqlCtx, parquetPath, "parquet") + df <- read.df(sqlContext, jsonPath, "json") + write.df(df, parquetPath, "parquet", "overwrite") + dfParquet <- read.df(sqlContext, parquetPath, "parquet") lines <- c("{\"name\":\"Bob\", \"age\":24}", "{\"name\":\"James\", \"age\":35}") jsonPath2 <- tempfile(pattern="jsonPath2", fileext=".tmp") parquetPath2 <- tempfile(pattern = "parquetPath2", fileext = ".parquet") writeLines(lines, jsonPath2) - df2 <- loadDF(sqlCtx, jsonPath2, "json") - saveDF(df2, parquetPath2, "parquet", "overwrite") - dfParquet2 <- loadDF(sqlCtx, parquetPath2, "parquet") + df2 <- read.df(sqlContext, jsonPath2, "json") + write.df(df2, parquetPath2, "parquet", "overwrite") + dfParquet2 <- read.df(sqlContext, parquetPath2, "parquet") registerTempTable(dfParquet, "table1") insertInto(dfParquet2, "table1") - expect_true(count(sql(sqlCtx, "select * from table1")) == 5) - expect_true(first(sql(sqlCtx, "select * from table1 order by age"))$name == "Michael") - dropTempTable(sqlCtx, "table1") + expect_true(count(sql(sqlContext, "select * from table1")) == 5) + expect_true(first(sql(sqlContext, "select * from table1 order by age"))$name == "Michael") + dropTempTable(sqlContext, "table1") registerTempTable(dfParquet, "table1") insertInto(dfParquet2, "table1", overwrite = TRUE) - expect_true(count(sql(sqlCtx, "select * from table1")) == 2) - expect_true(first(sql(sqlCtx, "select * from table1 order by age"))$name == "Bob") - dropTempTable(sqlCtx, "table1") + expect_true(count(sql(sqlContext, "select * from table1")) == 2) + expect_true(first(sql(sqlContext, "select * from table1 order by age"))$name == "Bob") + dropTempTable(sqlContext, "table1") }) test_that("table() returns a new DataFrame", { - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) registerTempTable(df, "table1") - tabledf <- table(sqlCtx, "table1") + tabledf <- table(sqlContext, "table1") expect_true(inherits(tabledf, "DataFrame")) expect_true(count(tabledf) == 3) - dropTempTable(sqlCtx, "table1") + dropTempTable(sqlContext, "table1") }) test_that("toRDD() returns an RRDD", { - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) testRDD <- toRDD(df) expect_true(inherits(testRDD, "RDD")) expect_true(count(testRDD) == 3) }) test_that("union on two RDDs created from DataFrames returns an RRDD", { - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) RDD1 <- toRDD(df) RDD2 <- toRDD(df) unioned <- unionRDD(RDD1, RDD2) @@ -274,7 +283,7 @@ test_that("union on mixed serialization types correctly returns a byte RRDD", { writeLines(textLines, textPath) textRDD <- textFile(sc, textPath) - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) dfRDD <- toRDD(df) unionByte <- unionRDD(rdd, dfRDD) @@ -292,7 +301,7 @@ test_that("union on mixed serialization types correctly returns a byte RRDD", { test_that("objectFile() works with row serialization", { objectPath <- tempfile(pattern="spark-test", fileext=".tmp") - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) dfRDD <- toRDD(df) saveAsObjectFile(coalesce(dfRDD, 1L), objectPath) objectIn <- objectFile(sc, objectPath) @@ -303,7 +312,7 @@ test_that("objectFile() works with row serialization", { }) test_that("lapply() on a DataFrame returns an RDD with the correct columns", { - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) testRDD <- lapply(df, function(row) { row$newCol <- row$age + 5 row @@ -315,7 +324,7 @@ test_that("lapply() on a DataFrame returns an RDD with the correct columns", { }) test_that("collect() returns a data.frame", { - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) rdf <- collect(df) expect_true(is.data.frame(rdf)) expect_true(names(rdf)[1] == "age") @@ -324,20 +333,20 @@ test_that("collect() returns a data.frame", { }) test_that("limit() returns DataFrame with the correct number of rows", { - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) dfLimited <- limit(df, 2) expect_true(inherits(dfLimited, "DataFrame")) expect_true(count(dfLimited) == 2) }) test_that("collect() and take() on a DataFrame return the same number of rows and columns", { - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) expect_true(nrow(collect(df)) == nrow(take(df, 10))) expect_true(ncol(collect(df)) == ncol(take(df, 10))) }) test_that("multiple pipeline transformations starting with a DataFrame result in an RDD with the correct values", { - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) first <- lapply(df, function(row) { row$age <- row$age + 5 row @@ -354,7 +363,7 @@ test_that("multiple pipeline transformations starting with a DataFrame result in }) test_that("cache(), persist(), and unpersist() on a DataFrame", { - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) expect_false(df@env$isCached) cache(df) expect_true(df@env$isCached) @@ -373,7 +382,7 @@ test_that("cache(), persist(), and unpersist() on a DataFrame", { }) test_that("schema(), dtypes(), columns(), names() return the correct values/format", { - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) testSchema <- schema(df) expect_true(length(testSchema$fields()) == 2) expect_true(testSchema$fields()[[1]]$dataType.toString() == "LongType") @@ -394,7 +403,7 @@ test_that("schema(), dtypes(), columns(), names() return the correct values/form }) test_that("head() and first() return the correct data", { - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) testHead <- head(df) expect_true(nrow(testHead) == 3) expect_true(ncol(testHead) == 2) @@ -415,23 +424,27 @@ test_that("distinct() on DataFrames", { jsonPathWithDup <- tempfile(pattern="sparkr-test", fileext=".tmp") writeLines(lines, jsonPathWithDup) - df <- jsonFile(sqlCtx, jsonPathWithDup) + df <- jsonFile(sqlContext, jsonPathWithDup) uniques <- distinct(df) expect_true(inherits(uniques, "DataFrame")) expect_true(count(uniques) == 3) }) -test_that("sampleDF on a DataFrame", { - df <- jsonFile(sqlCtx, jsonPath) - sampled <- sampleDF(df, FALSE, 1.0) +test_that("sample on a DataFrame", { + df <- jsonFile(sqlContext, jsonPath) + sampled <- sample(df, FALSE, 1.0) expect_equal(nrow(collect(sampled)), count(df)) expect_true(inherits(sampled, "DataFrame")) - sampled2 <- sampleDF(df, FALSE, 0.1) + sampled2 <- sample(df, FALSE, 0.1) expect_true(count(sampled2) < 3) + + # Also test sample_frac + sampled3 <- sample_frac(df, FALSE, 0.1) + expect_true(count(sampled3) < 3) }) test_that("select operators", { - df <- select(jsonFile(sqlCtx, jsonPath), "name", "age") + df <- select(jsonFile(sqlContext, jsonPath), "name", "age") expect_true(inherits(df$name, "Column")) expect_true(inherits(df[[2]], "Column")) expect_true(inherits(df[["age"]], "Column")) @@ -457,7 +470,7 @@ test_that("select operators", { }) test_that("select with column", { - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) df1 <- select(df, "name") expect_true(columns(df1) == c("name")) expect_true(count(df1) == 3) @@ -468,7 +481,7 @@ test_that("select with column", { }) test_that("selectExpr() on a DataFrame", { - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) selected <- selectExpr(df, "age * 2") expect_true(names(selected) == "(age * 2)") expect_equal(collect(selected), collect(select(df, df$age * 2L))) @@ -479,7 +492,7 @@ test_that("selectExpr() on a DataFrame", { }) test_that("column calculation", { - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) d <- collect(select(df, alias(df$age + 1, "age2"))) expect_true(names(d) == c("age2")) df2 <- select(df, lower(df$name), abs(df$age)) @@ -487,16 +500,16 @@ test_that("column calculation", { expect_true(count(df2) == 3) }) -test_that("load() from json file", { - df <- loadDF(sqlCtx, jsonPath, "json") +test_that("read.df() from json file", { + df <- read.df(sqlContext, jsonPath, "json") expect_true(inherits(df, "DataFrame")) expect_true(count(df) == 3) }) -test_that("save() as parquet file", { - df <- loadDF(sqlCtx, jsonPath, "json") - saveDF(df, parquetPath, "parquet", mode="overwrite") - df2 <- loadDF(sqlCtx, parquetPath, "parquet") +test_that("write.df() as parquet file", { + df <- read.df(sqlContext, jsonPath, "json") + write.df(df, parquetPath, "parquet", mode="overwrite") + df2 <- read.df(sqlContext, parquetPath, "parquet") expect_true(inherits(df2, "DataFrame")) expect_true(count(df2) == 3) }) @@ -526,6 +539,7 @@ test_that("column operators", { c2 <- (- c + 1 - 2) * 3 / 4.0 c3 <- (c + c2 - c2) * c2 %% c2 c4 <- (c > c2) & (c2 <= c3) | (c == c2) & (c2 != c3) + c5 <- c2 ^ c3 ^ c4 }) test_that("column functions", { @@ -533,10 +547,34 @@ test_that("column functions", { c2 <- min(c) + max(c) + sum(c) + avg(c) + count(c) + abs(c) + sqrt(c) c3 <- lower(c) + upper(c) + first(c) + last(c) c4 <- approxCountDistinct(c) + countDistinct(c) + cast(c, "string") + c5 <- n(c) + n_distinct(c) + c5 <- acos(c) + asin(c) + atan(c) + cbrt(c) + c6 <- ceiling(c) + cos(c) + cosh(c) + exp(c) + expm1(c) + c7 <- floor(c) + log(c) + log10(c) + log1p(c) + rint(c) + c8 <- sign(c) + sin(c) + sinh(c) + tan(c) + tanh(c) + c9 <- toDegrees(c) + toRadians(c) +}) + +test_that("column binary mathfunctions", { + lines <- c("{\"a\":1, \"b\":5}", + "{\"a\":2, \"b\":6}", + "{\"a\":3, \"b\":7}", + "{\"a\":4, \"b\":8}") + jsonPathWithDup <- tempfile(pattern="sparkr-test", fileext=".tmp") + writeLines(lines, jsonPathWithDup) + df <- jsonFile(sqlContext, jsonPathWithDup) + expect_equal(collect(select(df, atan2(df$a, df$b)))[1, "ATAN2(a, b)"], atan2(1, 5)) + expect_equal(collect(select(df, atan2(df$a, df$b)))[2, "ATAN2(a, b)"], atan2(2, 6)) + expect_equal(collect(select(df, atan2(df$a, df$b)))[3, "ATAN2(a, b)"], atan2(3, 7)) + expect_equal(collect(select(df, atan2(df$a, df$b)))[4, "ATAN2(a, b)"], atan2(4, 8)) + expect_equal(collect(select(df, hypot(df$a, df$b)))[1, "HYPOT(a, b)"], sqrt(1^2 + 5^2)) + expect_equal(collect(select(df, hypot(df$a, df$b)))[2, "HYPOT(a, b)"], sqrt(2^2 + 6^2)) + expect_equal(collect(select(df, hypot(df$a, df$b)))[3, "HYPOT(a, b)"], sqrt(3^2 + 7^2)) + expect_equal(collect(select(df, hypot(df$a, df$b)))[4, "HYPOT(a, b)"], sqrt(4^2 + 8^2)) }) test_that("string operators", { - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) expect_equal(count(where(df, like(df$name, "A%"))), 1) expect_equal(count(where(df, startsWith(df$name, "A"))), 1) expect_equal(first(select(df, substr(df$name, 1, 2)))[[1]], "Mi") @@ -544,7 +582,7 @@ test_that("string operators", { }) test_that("group by", { - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) df1 <- agg(df, name = "max", age = "sum") expect_true(1 == count(df1)) df1 <- agg(df, age2 = max(df$age)) @@ -557,6 +595,13 @@ test_that("group by", { expect_true(inherits(df2, "DataFrame")) expect_true(3 == count(df2)) + # Also test group_by, summarize, mean + gd1 <- group_by(df, "name") + expect_true(inherits(gd1, "GroupedData")) + df_summarized <- summarize(gd, mean_age = mean(df$age)) + expect_true(inherits(df_summarized, "DataFrame")) + expect_true(3 == count(df_summarized)) + df3 <- agg(gd, age = "sum") expect_true(inherits(df3, "DataFrame")) expect_true(3 == count(df3)) @@ -573,12 +618,12 @@ test_that("group by", { expect_true(3 == count(max(gd, "age"))) }) -test_that("sortDF() and orderBy() on a DataFrame", { - df <- jsonFile(sqlCtx, jsonPath) - sorted <- sortDF(df, df$age) +test_that("arrange() and orderBy() on a DataFrame", { + df <- jsonFile(sqlContext, jsonPath) + sorted <- arrange(df, df$age) expect_true(collect(sorted)[1,2] == "Michael") - sorted2 <- sortDF(df, "name") + sorted2 <- arrange(df, "name") expect_true(collect(sorted2)[2,"age"] == 19) sorted3 <- orderBy(df, asc(df$age)) @@ -591,7 +636,7 @@ test_that("sortDF() and orderBy() on a DataFrame", { }) test_that("filter() on a DataFrame", { - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) filtered <- filter(df, "age > 20") expect_true(count(filtered) == 1) expect_true(collect(filtered)$name == "Andy") @@ -601,7 +646,7 @@ test_that("filter() on a DataFrame", { }) test_that("join() on a DataFrame", { - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) mockLines2 <- c("{\"name\":\"Michael\", \"test\": \"yes\"}", "{\"name\":\"Andy\", \"test\": \"no\"}", @@ -609,7 +654,7 @@ test_that("join() on a DataFrame", { "{\"name\":\"Bob\", \"test\": \"yes\"}") jsonPath2 <- tempfile(pattern="sparkr-test", fileext=".tmp") writeLines(mockLines2, jsonPath2) - df2 <- jsonFile(sqlCtx, jsonPath2) + df2 <- jsonFile(sqlContext, jsonPath2) joined <- join(df, df2) expect_equal(names(joined), c("age", "name", "name", "test")) @@ -632,7 +677,7 @@ test_that("join() on a DataFrame", { }) test_that("toJSON() returns an RDD of the correct values", { - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) testRDD <- toJSON(df) expect_true(inherits(testRDD, "RDD")) expect_true(SparkR:::getSerializedMode(testRDD) == "string") @@ -640,43 +685,44 @@ test_that("toJSON() returns an RDD of the correct values", { }) test_that("showDF()", { - df <- jsonFile(sqlCtx, jsonPath) - expect_output(showDF(df), "age name \nnull Michael\n30 Andy \n19 Justin ") + df <- jsonFile(sqlContext, jsonPath) + s <- capture.output(showDF(df)) + expect_output(s , "+----+-------+\n| age| name|\n+----+-------+\n|null|Michael|\n| 30| Andy|\n| 19| Justin|\n+----+-------+\n") }) test_that("isLocal()", { - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) expect_false(isLocal(df)) }) test_that("unionAll(), except(), and intersect() on a DataFrame", { - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) lines <- c("{\"name\":\"Bob\", \"age\":24}", "{\"name\":\"Andy\", \"age\":30}", "{\"name\":\"James\", \"age\":35}") jsonPath2 <- tempfile(pattern="sparkr-test", fileext=".tmp") writeLines(lines, jsonPath2) - df2 <- loadDF(sqlCtx, jsonPath2, "json") + df2 <- read.df(sqlContext, jsonPath2, "json") - unioned <- sortDF(unionAll(df, df2), df$age) + unioned <- arrange(unionAll(df, df2), df$age) expect_true(inherits(unioned, "DataFrame")) expect_true(count(unioned) == 6) expect_true(first(unioned)$name == "Michael") - excepted <- sortDF(except(df, df2), desc(df$age)) + excepted <- arrange(except(df, df2), desc(df$age)) expect_true(inherits(unioned, "DataFrame")) expect_true(count(excepted) == 2) expect_true(first(excepted)$name == "Justin") - intersected <- sortDF(intersect(df, df2), df$age) + intersected <- arrange(intersect(df, df2), df$age) expect_true(inherits(unioned, "DataFrame")) expect_true(count(intersected) == 1) expect_true(first(intersected)$name == "Andy") }) test_that("withColumn() and withColumnRenamed()", { - df <- jsonFile(sqlCtx, jsonPath) + df <- jsonFile(sqlContext, jsonPath) newDF <- withColumn(df, "newAge", df$age + 2) expect_true(length(columns(newDF)) == 3) expect_true(columns(newDF)[3] == "newAge") @@ -687,23 +733,146 @@ test_that("withColumn() and withColumnRenamed()", { expect_true(columns(newDF2)[1] == "newerAge") }) -test_that("saveDF() on DataFrame and works with parquetFile", { - df <- jsonFile(sqlCtx, jsonPath) - saveDF(df, parquetPath, "parquet", mode="overwrite") - parquetDF <- parquetFile(sqlCtx, parquetPath) +test_that("mutate() and rename()", { + df <- jsonFile(sqlContext, jsonPath) + newDF <- mutate(df, newAge = df$age + 2) + expect_true(length(columns(newDF)) == 3) + expect_true(columns(newDF)[3] == "newAge") + expect_true(first(filter(newDF, df$name != "Michael"))$newAge == 32) + + newDF2 <- rename(df, newerAge = df$age) + expect_true(length(columns(newDF2)) == 2) + expect_true(columns(newDF2)[1] == "newerAge") +}) + +test_that("write.df() on DataFrame and works with parquetFile", { + df <- jsonFile(sqlContext, jsonPath) + write.df(df, parquetPath, "parquet", mode="overwrite") + parquetDF <- parquetFile(sqlContext, parquetPath) expect_true(inherits(parquetDF, "DataFrame")) expect_equal(count(df), count(parquetDF)) }) test_that("parquetFile works with multiple input paths", { - df <- jsonFile(sqlCtx, jsonPath) - saveDF(df, parquetPath, "parquet", mode="overwrite") + df <- jsonFile(sqlContext, jsonPath) + write.df(df, parquetPath, "parquet", mode="overwrite") parquetPath2 <- tempfile(pattern = "parquetPath2", fileext = ".parquet") - saveDF(df, parquetPath2, "parquet", mode="overwrite") - parquetDF <- parquetFile(sqlCtx, parquetPath, parquetPath2) + write.df(df, parquetPath2, "parquet", mode="overwrite") + parquetDF <- parquetFile(sqlContext, parquetPath, parquetPath2) expect_true(inherits(parquetDF, "DataFrame")) expect_true(count(parquetDF) == count(df)*2) }) +test_that("describe() on a DataFrame", { + df <- jsonFile(sqlContext, jsonPath) + stats <- describe(df, "age") + expect_equal(collect(stats)[1, "summary"], "count") + expect_equal(collect(stats)[2, "age"], "24.5") + expect_equal(collect(stats)[3, "age"], "5.5") + stats <- describe(df) + expect_equal(collect(stats)[4, "name"], "Andy") + expect_equal(collect(stats)[5, "age"], "30") +}) + +test_that("dropna() on a DataFrame", { + df <- jsonFile(sqlContext, jsonPathNa) + rows <- collect(df) + + # drop with columns + + expected <- rows[!is.na(rows$name),] + actual <- collect(dropna(df, cols = "name")) + expect_true(identical(expected, actual)) + + expected <- rows[!is.na(rows$age),] + actual <- collect(dropna(df, cols = "age")) + row.names(expected) <- row.names(actual) + # identical on two dataframes does not work here. Don't know why. + # use identical on all columns as a workaround. + expect_true(identical(expected$age, actual$age)) + expect_true(identical(expected$height, actual$height)) + expect_true(identical(expected$name, actual$name)) + + expected <- rows[!is.na(rows$age) & !is.na(rows$height),] + actual <- collect(dropna(df, cols = c("age", "height"))) + expect_true(identical(expected, actual)) + + expected <- rows[!is.na(rows$age) & !is.na(rows$height) & !is.na(rows$name),] + actual <- collect(dropna(df)) + expect_true(identical(expected, actual)) + + # drop with how + + expected <- rows[!is.na(rows$age) & !is.na(rows$height) & !is.na(rows$name),] + actual <- collect(dropna(df)) + expect_true(identical(expected, actual)) + + expected <- rows[!is.na(rows$age) | !is.na(rows$height) | !is.na(rows$name),] + actual <- collect(dropna(df, "all")) + expect_true(identical(expected, actual)) + + expected <- rows[!is.na(rows$age) & !is.na(rows$height) & !is.na(rows$name),] + actual <- collect(dropna(df, "any")) + expect_true(identical(expected, actual)) + + expected <- rows[!is.na(rows$age) & !is.na(rows$height),] + actual <- collect(dropna(df, "any", cols = c("age", "height"))) + expect_true(identical(expected, actual)) + + expected <- rows[!is.na(rows$age) | !is.na(rows$height),] + actual <- collect(dropna(df, "all", cols = c("age", "height"))) + expect_true(identical(expected, actual)) + + # drop with threshold + + expected <- rows[as.integer(!is.na(rows$age)) + as.integer(!is.na(rows$height)) >= 2,] + actual <- collect(dropna(df, minNonNulls = 2, cols = c("age", "height"))) + expect_true(identical(expected, actual)) + + expected <- rows[as.integer(!is.na(rows$age)) + + as.integer(!is.na(rows$height)) + + as.integer(!is.na(rows$name)) >= 3,] + actual <- collect(dropna(df, minNonNulls = 3, cols = c("name", "age", "height"))) + expect_true(identical(expected, actual)) +}) + +test_that("fillna() on a DataFrame", { + df <- jsonFile(sqlContext, jsonPathNa) + rows <- collect(df) + + # fill with value + + expected <- rows + expected$age[is.na(expected$age)] <- 50 + expected$height[is.na(expected$height)] <- 50.6 + actual <- collect(fillna(df, 50.6)) + expect_true(identical(expected, actual)) + + expected <- rows + expected$name[is.na(expected$name)] <- "unknown" + actual <- collect(fillna(df, "unknown")) + expect_true(identical(expected, actual)) + + expected <- rows + expected$age[is.na(expected$age)] <- 50 + actual <- collect(fillna(df, 50.6, "age")) + expect_true(identical(expected, actual)) + + expected <- rows + expected$name[is.na(expected$name)] <- "unknown" + actual <- collect(fillna(df, "unknown", c("age", "name"))) + expect_true(identical(expected, actual)) + + # fill with named list + + expected <- rows + expected$age[is.na(expected$age)] <- 50 + expected$height[is.na(expected$height)] <- 50.6 + expected$name[is.na(expected$name)] <- "unknown" + actual <- collect(fillna(df, list("age" = 50, "height" = 50.6, "name" = "unknown"))) + expect_true(identical(expected, actual)) +}) + unlink(parquetPath) unlink(jsonPath) +unlink(jsonPathNa) diff --git a/R/pkg/inst/tests/test_utils.R b/R/pkg/inst/tests/test_utils.R index 9c5bb427932b4..539e3a3c19df3 100644 --- a/R/pkg/inst/tests/test_utils.R +++ b/R/pkg/inst/tests/test_utils.R @@ -92,7 +92,10 @@ test_that("cleanClosure on R functions", { } newF <- cleanClosure(f) env <- environment(newF) - expect_equal(length(ls(env)), 3) # Only "g", "l" and "f". No "base", "field" or "defUse". + # TODO(shivaram): length(ls(env)) is 4 here for some reason and `lapply` is included in `env`. + # Disabling this test till we debug this. + # + # expect_equal(length(ls(env)), 3) # Only "g", "l" and "f". No "base", "field" or "defUse". expect_true("g" %in% ls(env)) expect_true("l" %in% ls(env)) expect_true("f" %in% ls(env)) diff --git a/R/pkg/inst/worker/worker.R b/R/pkg/inst/worker/worker.R index 014bf7bd7b3fe..7e3b5fc403b25 100644 --- a/R/pkg/inst/worker/worker.R +++ b/R/pkg/inst/worker/worker.R @@ -72,7 +72,7 @@ if (numBroadcastVars > 0) { for (bcast in seq(1:numBroadcastVars)) { bcastId <- SparkR:::readInt(inputCon) value <- unserialize(SparkR:::readRaw(inputCon)) - setBroadcastValue(bcastId, value) + SparkR:::setBroadcastValue(bcastId, value) } } diff --git a/R/pkg/src/Makefile b/R/pkg/src-native/Makefile similarity index 100% rename from R/pkg/src/Makefile rename to R/pkg/src-native/Makefile diff --git a/R/pkg/src/Makefile.win b/R/pkg/src-native/Makefile.win similarity index 100% rename from R/pkg/src/Makefile.win rename to R/pkg/src-native/Makefile.win diff --git a/R/pkg/src/string_hash_code.c b/R/pkg/src-native/string_hash_code.c similarity index 100% rename from R/pkg/src/string_hash_code.c rename to R/pkg/src-native/string_hash_code.c diff --git a/assembly/pom.xml b/assembly/pom.xml index 2b4d0a990bf22..b8a821dcf6a6c 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../pom.xml @@ -92,6 +92,27 @@ true + + + org.apache.maven.plugins + maven-antrun-plugin + + + package + + run + + + + + + + + + + + + org.apache.maven.plugins diff --git a/bagel/pom.xml b/bagel/pom.xml index 1f3dec91314f2..c1aa32bd435df 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../pom.xml diff --git a/bin/pyspark b/bin/pyspark index 8acad6113797d..7cb19c51b43a2 100755 --- a/bin/pyspark +++ b/bin/pyspark @@ -90,11 +90,7 @@ if [[ -n "$SPARK_TESTING" ]]; then unset YARN_CONF_DIR unset HADOOP_CONF_DIR export PYTHONHASHSEED=0 - if [[ -n "$PYSPARK_DOC_TEST" ]]; then - exec "$PYSPARK_DRIVER_PYTHON" -m doctest $1 - else - exec "$PYSPARK_DRIVER_PYTHON" $1 - fi + exec "$PYSPARK_DRIVER_PYTHON" -m $1 exit fi diff --git a/core/pom.xml b/core/pom.xml index 2dfb00d7ecf26..31b61520ef07c 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../pom.xml @@ -228,6 +228,14 @@ json4s-jackson_${scala.binary.version} 3.2.10 + + com.sun.jersey + jersey-server + + + com.sun.jersey + jersey-core + org.apache.mesos mesos @@ -273,7 +281,6 @@ org.apache.ivy ivy - ${ivy.version} oro @@ -354,15 +361,31 @@ junit test + + org.hamcrest + hamcrest-core + test + + + org.hamcrest + hamcrest-library + test + com.novocode junit-interface test - org.spark-project + net.razorvine pyrolite - 2.0.1 + 4.4 + + + net.razorvine + serpent + + net.sf.py4j @@ -374,35 +397,6 @@ target/scala-${scala.binary.version}/classes target/scala-${scala.binary.version}/test-classes - - - org.apache.maven.plugins - maven-antrun-plugin - - - generate-resources - - run - - - - - - - - - - - maven-clean-plugin - - - - ${basedir}/../python/build - - - true - - org.apache.maven.plugins maven-dependency-plugin @@ -431,24 +425,6 @@ - - - - src/main/resources - - - ../python - - pyspark/*.py - - - - ../python/build - - py4j/*.py - - - @@ -499,6 +475,29 @@ + + sparkr-docs + + + + org.codehaus.mojo + exec-maven-plugin + + + sparkr-pkg-docs + compile + + exec + + + + + ..${path.separator}R${path.separator}create-docs${script.extension} + + + + + diff --git a/core/src/main/java/org/apache/spark/JobExecutionStatus.java b/core/src/main/java/org/apache/spark/JobExecutionStatus.java index 6e161313702bb..0287fb79f8dd2 100644 --- a/core/src/main/java/org/apache/spark/JobExecutionStatus.java +++ b/core/src/main/java/org/apache/spark/JobExecutionStatus.java @@ -17,9 +17,15 @@ package org.apache.spark; +import org.apache.spark.util.EnumUtil; + public enum JobExecutionStatus { RUNNING, SUCCEEDED, FAILED, - UNKNOWN + UNKNOWN; + + public static JobExecutionStatus fromString(String str) { + return EnumUtil.parseIgnoreCase(JobExecutionStatus.class, str); + } } diff --git a/core/src/main/java/org/apache/spark/shuffle/unsafe/DummySerializerInstance.java b/core/src/main/java/org/apache/spark/shuffle/unsafe/DummySerializerInstance.java new file mode 100644 index 0000000000000..3f746b886bc9b --- /dev/null +++ b/core/src/main/java/org/apache/spark/shuffle/unsafe/DummySerializerInstance.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.shuffle.unsafe; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteBuffer; + +import scala.reflect.ClassTag; + +import org.apache.spark.serializer.DeserializationStream; +import org.apache.spark.serializer.SerializationStream; +import org.apache.spark.serializer.SerializerInstance; +import org.apache.spark.unsafe.PlatformDependent; + +/** + * Unfortunately, we need a serializer instance in order to construct a DiskBlockObjectWriter. + * Our shuffle write path doesn't actually use this serializer (since we end up calling the + * `write() OutputStream methods), but DiskBlockObjectWriter still calls some methods on it. To work + * around this, we pass a dummy no-op serializer. + */ +final class DummySerializerInstance extends SerializerInstance { + + public static final DummySerializerInstance INSTANCE = new DummySerializerInstance(); + + private DummySerializerInstance() { } + + @Override + public SerializationStream serializeStream(final OutputStream s) { + return new SerializationStream() { + @Override + public void flush() { + // Need to implement this because DiskObjectWriter uses it to flush the compression stream + try { + s.flush(); + } catch (IOException e) { + PlatformDependent.throwException(e); + } + } + + @Override + public SerializationStream writeObject(T t, ClassTag ev1) { + throw new UnsupportedOperationException(); + } + + @Override + public void close() { + // Need to implement this because DiskObjectWriter uses it to close the compression stream + try { + s.close(); + } catch (IOException e) { + PlatformDependent.throwException(e); + } + } + }; + } + + @Override + public ByteBuffer serialize(T t, ClassTag ev1) { + throw new UnsupportedOperationException(); + } + + @Override + public DeserializationStream deserializeStream(InputStream s) { + throw new UnsupportedOperationException(); + } + + @Override + public T deserialize(ByteBuffer bytes, ClassLoader loader, ClassTag ev1) { + throw new UnsupportedOperationException(); + } + + @Override + public T deserialize(ByteBuffer bytes, ClassTag ev1) { + throw new UnsupportedOperationException(); + } +} diff --git a/core/src/main/java/org/apache/spark/shuffle/unsafe/PackedRecordPointer.java b/core/src/main/java/org/apache/spark/shuffle/unsafe/PackedRecordPointer.java new file mode 100644 index 0000000000000..4ee6a82c0423e --- /dev/null +++ b/core/src/main/java/org/apache/spark/shuffle/unsafe/PackedRecordPointer.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.shuffle.unsafe; + +/** + * Wrapper around an 8-byte word that holds a 24-bit partition number and 40-bit record pointer. + *

+ * Within the long, the data is laid out as follows: + *

+ *   [24 bit partition number][13 bit memory page number][27 bit offset in page]
+ * 
+ * This implies that the maximum addressable page size is 2^27 bits = 128 megabytes, assuming that + * our offsets in pages are not 8-byte-word-aligned. Since we have 2^13 pages (based off the + * 13-bit page numbers assigned by {@link org.apache.spark.unsafe.memory.TaskMemoryManager}), this + * implies that we can address 2^13 * 128 megabytes = 1 terabyte of RAM per task. + *

+ * Assuming word-alignment would allow for a 1 gigabyte maximum page size, but we leave this + * optimization to future work as it will require more careful design to ensure that addresses are + * properly aligned (e.g. by padding records). + */ +final class PackedRecordPointer { + + static final int MAXIMUM_PAGE_SIZE_BYTES = 1 << 27; // 128 megabytes + + /** + * The maximum partition identifier that can be encoded. Note that partition ids start from 0. + */ + static final int MAXIMUM_PARTITION_ID = (1 << 24) - 1; // 16777215 + + /** Bit mask for the lower 40 bits of a long. */ + private static final long MASK_LONG_LOWER_40_BITS = (1L << 40) - 1; + + /** Bit mask for the upper 24 bits of a long */ + private static final long MASK_LONG_UPPER_24_BITS = ~MASK_LONG_LOWER_40_BITS; + + /** Bit mask for the lower 27 bits of a long. */ + private static final long MASK_LONG_LOWER_27_BITS = (1L << 27) - 1; + + /** Bit mask for the lower 51 bits of a long. */ + private static final long MASK_LONG_LOWER_51_BITS = (1L << 51) - 1; + + /** Bit mask for the upper 13 bits of a long */ + private static final long MASK_LONG_UPPER_13_BITS = ~MASK_LONG_LOWER_51_BITS; + + /** + * Pack a record address and partition id into a single word. + * + * @param recordPointer a record pointer encoded by TaskMemoryManager. + * @param partitionId a shuffle partition id (maximum value of 2^24). + * @return a packed pointer that can be decoded using the {@link PackedRecordPointer} class. + */ + public static long packPointer(long recordPointer, int partitionId) { + assert (partitionId <= MAXIMUM_PARTITION_ID); + // Note that without word alignment we can address 2^27 bytes = 128 megabytes per page. + // Also note that this relies on some internals of how TaskMemoryManager encodes its addresses. + final long pageNumber = (recordPointer & MASK_LONG_UPPER_13_BITS) >>> 24; + final long compressedAddress = pageNumber | (recordPointer & MASK_LONG_LOWER_27_BITS); + return (((long) partitionId) << 40) | compressedAddress; + } + + private long packedRecordPointer; + + public void set(long packedRecordPointer) { + this.packedRecordPointer = packedRecordPointer; + } + + public int getPartitionId() { + return (int) ((packedRecordPointer & MASK_LONG_UPPER_24_BITS) >>> 40); + } + + public long getRecordPointer() { + final long pageNumber = (packedRecordPointer << 24) & MASK_LONG_UPPER_13_BITS; + final long offsetInPage = packedRecordPointer & MASK_LONG_LOWER_27_BITS; + return pageNumber | offsetInPage; + } + +} diff --git a/core/src/main/java/org/apache/spark/shuffle/unsafe/SpillInfo.java b/core/src/main/java/org/apache/spark/shuffle/unsafe/SpillInfo.java new file mode 100644 index 0000000000000..7bac0dc0bbeb6 --- /dev/null +++ b/core/src/main/java/org/apache/spark/shuffle/unsafe/SpillInfo.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.shuffle.unsafe; + +import java.io.File; + +import org.apache.spark.storage.TempShuffleBlockId; + +/** + * Metadata for a block of data written by {@link UnsafeShuffleExternalSorter}. + */ +final class SpillInfo { + final long[] partitionLengths; + final File file; + final TempShuffleBlockId blockId; + + public SpillInfo(int numPartitions, File file, TempShuffleBlockId blockId) { + this.partitionLengths = new long[numPartitions]; + this.file = file; + this.blockId = blockId; + } +} diff --git a/core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleExternalSorter.java b/core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleExternalSorter.java new file mode 100644 index 0000000000000..9e9ed94b7890c --- /dev/null +++ b/core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleExternalSorter.java @@ -0,0 +1,422 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.shuffle.unsafe; + +import java.io.File; +import java.io.IOException; +import java.util.LinkedList; + +import scala.Tuple2; + +import com.google.common.annotations.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.spark.SparkConf; +import org.apache.spark.TaskContext; +import org.apache.spark.executor.ShuffleWriteMetrics; +import org.apache.spark.serializer.SerializerInstance; +import org.apache.spark.shuffle.ShuffleMemoryManager; +import org.apache.spark.storage.*; +import org.apache.spark.unsafe.PlatformDependent; +import org.apache.spark.unsafe.memory.MemoryBlock; +import org.apache.spark.unsafe.memory.TaskMemoryManager; +import org.apache.spark.util.Utils; + +/** + * An external sorter that is specialized for sort-based shuffle. + *

+ * Incoming records are appended to data pages. When all records have been inserted (or when the + * current thread's shuffle memory limit is reached), the in-memory records are sorted according to + * their partition ids (using a {@link UnsafeShuffleInMemorySorter}). The sorted records are then + * written to a single output file (or multiple files, if we've spilled). The format of the output + * files is the same as the format of the final output file written by + * {@link org.apache.spark.shuffle.sort.SortShuffleWriter}: each output partition's records are + * written as a single serialized, compressed stream that can be read with a new decompression and + * deserialization stream. + *

+ * Unlike {@link org.apache.spark.util.collection.ExternalSorter}, this sorter does not merge its + * spill files. Instead, this merging is performed in {@link UnsafeShuffleWriter}, which uses a + * specialized merge procedure that avoids extra serialization/deserialization. + */ +final class UnsafeShuffleExternalSorter { + + private final Logger logger = LoggerFactory.getLogger(UnsafeShuffleExternalSorter.class); + + private static final int PAGE_SIZE = PackedRecordPointer.MAXIMUM_PAGE_SIZE_BYTES; + @VisibleForTesting + static final int DISK_WRITE_BUFFER_SIZE = 1024 * 1024; + @VisibleForTesting + static final int MAX_RECORD_SIZE = PAGE_SIZE - 4; + + private final int initialSize; + private final int numPartitions; + private final TaskMemoryManager memoryManager; + private final ShuffleMemoryManager shuffleMemoryManager; + private final BlockManager blockManager; + private final TaskContext taskContext; + private final ShuffleWriteMetrics writeMetrics; + + /** The buffer size to use when writing spills using DiskBlockObjectWriter */ + private final int fileBufferSizeBytes; + + /** + * Memory pages that hold the records being sorted. The pages in this list are freed when + * spilling, although in principle we could recycle these pages across spills (on the other hand, + * this might not be necessary if we maintained a pool of re-usable pages in the TaskMemoryManager + * itself). + */ + private final LinkedList allocatedPages = new LinkedList(); + + private final LinkedList spills = new LinkedList(); + + // These variables are reset after spilling: + private UnsafeShuffleInMemorySorter sorter; + private MemoryBlock currentPage = null; + private long currentPagePosition = -1; + private long freeSpaceInCurrentPage = 0; + + public UnsafeShuffleExternalSorter( + TaskMemoryManager memoryManager, + ShuffleMemoryManager shuffleMemoryManager, + BlockManager blockManager, + TaskContext taskContext, + int initialSize, + int numPartitions, + SparkConf conf, + ShuffleWriteMetrics writeMetrics) throws IOException { + this.memoryManager = memoryManager; + this.shuffleMemoryManager = shuffleMemoryManager; + this.blockManager = blockManager; + this.taskContext = taskContext; + this.initialSize = initialSize; + this.numPartitions = numPartitions; + // Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided + this.fileBufferSizeBytes = (int) conf.getSizeAsKb("spark.shuffle.file.buffer", "32k") * 1024; + + this.writeMetrics = writeMetrics; + initializeForWriting(); + } + + /** + * Allocates new sort data structures. Called when creating the sorter and after each spill. + */ + private void initializeForWriting() throws IOException { + // TODO: move this sizing calculation logic into a static method of sorter: + final long memoryRequested = initialSize * 8L; + final long memoryAcquired = shuffleMemoryManager.tryToAcquire(memoryRequested); + if (memoryAcquired != memoryRequested) { + shuffleMemoryManager.release(memoryAcquired); + throw new IOException("Could not acquire " + memoryRequested + " bytes of memory"); + } + + this.sorter = new UnsafeShuffleInMemorySorter(initialSize); + } + + /** + * Sorts the in-memory records and writes the sorted records to an on-disk file. + * This method does not free the sort data structures. + * + * @param isLastFile if true, this indicates that we're writing the final output file and that the + * bytes written should be counted towards shuffle spill metrics rather than + * shuffle write metrics. + */ + private void writeSortedFile(boolean isLastFile) throws IOException { + + final ShuffleWriteMetrics writeMetricsToUse; + + if (isLastFile) { + // We're writing the final non-spill file, so we _do_ want to count this as shuffle bytes. + writeMetricsToUse = writeMetrics; + } else { + // We're spilling, so bytes written should be counted towards spill rather than write. + // Create a dummy WriteMetrics object to absorb these metrics, since we don't want to count + // them towards shuffle bytes written. + writeMetricsToUse = new ShuffleWriteMetrics(); + } + + // This call performs the actual sort. + final UnsafeShuffleInMemorySorter.UnsafeShuffleSorterIterator sortedRecords = + sorter.getSortedIterator(); + + // Currently, we need to open a new DiskBlockObjectWriter for each partition; we can avoid this + // after SPARK-5581 is fixed. + BlockObjectWriter writer; + + // Small writes to DiskBlockObjectWriter will be fairly inefficient. Since there doesn't seem to + // be an API to directly transfer bytes from managed memory to the disk writer, we buffer + // data through a byte array. This array does not need to be large enough to hold a single + // record; + final byte[] writeBuffer = new byte[DISK_WRITE_BUFFER_SIZE]; + + // Because this output will be read during shuffle, its compression codec must be controlled by + // spark.shuffle.compress instead of spark.shuffle.spill.compress, so we need to use + // createTempShuffleBlock here; see SPARK-3426 for more details. + final Tuple2 spilledFileInfo = + blockManager.diskBlockManager().createTempShuffleBlock(); + final File file = spilledFileInfo._2(); + final TempShuffleBlockId blockId = spilledFileInfo._1(); + final SpillInfo spillInfo = new SpillInfo(numPartitions, file, blockId); + + // Unfortunately, we need a serializer instance in order to construct a DiskBlockObjectWriter. + // Our write path doesn't actually use this serializer (since we end up calling the `write()` + // OutputStream methods), but DiskBlockObjectWriter still calls some methods on it. To work + // around this, we pass a dummy no-op serializer. + final SerializerInstance ser = DummySerializerInstance.INSTANCE; + + writer = blockManager.getDiskWriter(blockId, file, ser, fileBufferSizeBytes, writeMetricsToUse); + + int currentPartition = -1; + while (sortedRecords.hasNext()) { + sortedRecords.loadNext(); + final int partition = sortedRecords.packedRecordPointer.getPartitionId(); + assert (partition >= currentPartition); + if (partition != currentPartition) { + // Switch to the new partition + if (currentPartition != -1) { + writer.commitAndClose(); + spillInfo.partitionLengths[currentPartition] = writer.fileSegment().length(); + } + currentPartition = partition; + writer = + blockManager.getDiskWriter(blockId, file, ser, fileBufferSizeBytes, writeMetricsToUse); + } + + final long recordPointer = sortedRecords.packedRecordPointer.getRecordPointer(); + final Object recordPage = memoryManager.getPage(recordPointer); + final long recordOffsetInPage = memoryManager.getOffsetInPage(recordPointer); + int dataRemaining = PlatformDependent.UNSAFE.getInt(recordPage, recordOffsetInPage); + long recordReadPosition = recordOffsetInPage + 4; // skip over record length + while (dataRemaining > 0) { + final int toTransfer = Math.min(DISK_WRITE_BUFFER_SIZE, dataRemaining); + PlatformDependent.copyMemory( + recordPage, + recordReadPosition, + writeBuffer, + PlatformDependent.BYTE_ARRAY_OFFSET, + toTransfer); + writer.write(writeBuffer, 0, toTransfer); + recordReadPosition += toTransfer; + dataRemaining -= toTransfer; + } + writer.recordWritten(); + } + + if (writer != null) { + writer.commitAndClose(); + // If `writeSortedFile()` was called from `closeAndGetSpills()` and no records were inserted, + // then the file might be empty. Note that it might be better to avoid calling + // writeSortedFile() in that case. + if (currentPartition != -1) { + spillInfo.partitionLengths[currentPartition] = writer.fileSegment().length(); + spills.add(spillInfo); + } + } + + if (!isLastFile) { // i.e. this is a spill file + // The current semantics of `shuffleRecordsWritten` seem to be that it's updated when records + // are written to disk, not when they enter the shuffle sorting code. DiskBlockObjectWriter + // relies on its `recordWritten()` method being called in order to trigger periodic updates to + // `shuffleBytesWritten`. If we were to remove the `recordWritten()` call and increment that + // counter at a higher-level, then the in-progress metrics for records written and bytes + // written would get out of sync. + // + // When writing the last file, we pass `writeMetrics` directly to the DiskBlockObjectWriter; + // in all other cases, we pass in a dummy write metrics to capture metrics, then copy those + // metrics to the true write metrics here. The reason for performing this copying is so that + // we can avoid reporting spilled bytes as shuffle write bytes. + // + // Note that we intentionally ignore the value of `writeMetricsToUse.shuffleWriteTime()`. + // Consistent with ExternalSorter, we do not count this IO towards shuffle write time. + // This means that this IO time is not accounted for anywhere; SPARK-3577 will fix this. + writeMetrics.incShuffleRecordsWritten(writeMetricsToUse.shuffleRecordsWritten()); + taskContext.taskMetrics().incDiskBytesSpilled(writeMetricsToUse.shuffleBytesWritten()); + } + } + + /** + * Sort and spill the current records in response to memory pressure. + */ + @VisibleForTesting + void spill() throws IOException { + logger.info("Thread {} spilling sort data of {} to disk ({} {} so far)", + Thread.currentThread().getId(), + Utils.bytesToString(getMemoryUsage()), + spills.size(), + spills.size() > 1 ? " times" : " time"); + + writeSortedFile(false); + final long sorterMemoryUsage = sorter.getMemoryUsage(); + sorter = null; + shuffleMemoryManager.release(sorterMemoryUsage); + final long spillSize = freeMemory(); + taskContext.taskMetrics().incMemoryBytesSpilled(spillSize); + + initializeForWriting(); + } + + private long getMemoryUsage() { + return sorter.getMemoryUsage() + (allocatedPages.size() * (long) PAGE_SIZE); + } + + private long freeMemory() { + long memoryFreed = 0; + for (MemoryBlock block : allocatedPages) { + memoryManager.freePage(block); + shuffleMemoryManager.release(block.size()); + memoryFreed += block.size(); + } + allocatedPages.clear(); + currentPage = null; + currentPagePosition = -1; + freeSpaceInCurrentPage = 0; + return memoryFreed; + } + + /** + * Force all memory and spill files to be deleted; called by shuffle error-handling code. + */ + public void cleanupAfterError() { + freeMemory(); + for (SpillInfo spill : spills) { + if (spill.file.exists() && !spill.file.delete()) { + logger.error("Unable to delete spill file {}", spill.file.getPath()); + } + } + if (sorter != null) { + shuffleMemoryManager.release(sorter.getMemoryUsage()); + sorter = null; + } + } + + /** + * Checks whether there is enough space to insert a new record into the sorter. + * + * @param requiredSpace the required space in the data page, in bytes, including space for storing + * the record size. + + * @return true if the record can be inserted without requiring more allocations, false otherwise. + */ + private boolean haveSpaceForRecord(int requiredSpace) { + assert (requiredSpace > 0); + return (sorter.hasSpaceForAnotherRecord() && (requiredSpace <= freeSpaceInCurrentPage)); + } + + /** + * Allocates more memory in order to insert an additional record. This will request additional + * memory from the {@link ShuffleMemoryManager} and spill if the requested memory can not be + * obtained. + * + * @param requiredSpace the required space in the data page, in bytes, including space for storing + * the record size. + */ + private void allocateSpaceForRecord(int requiredSpace) throws IOException { + if (!sorter.hasSpaceForAnotherRecord()) { + logger.debug("Attempting to expand sort pointer array"); + final long oldPointerArrayMemoryUsage = sorter.getMemoryUsage(); + final long memoryToGrowPointerArray = oldPointerArrayMemoryUsage * 2; + final long memoryAcquired = shuffleMemoryManager.tryToAcquire(memoryToGrowPointerArray); + if (memoryAcquired < memoryToGrowPointerArray) { + shuffleMemoryManager.release(memoryAcquired); + spill(); + } else { + sorter.expandPointerArray(); + shuffleMemoryManager.release(oldPointerArrayMemoryUsage); + } + } + if (requiredSpace > freeSpaceInCurrentPage) { + logger.trace("Required space {} is less than free space in current page ({})", requiredSpace, + freeSpaceInCurrentPage); + // TODO: we should track metrics on the amount of space wasted when we roll over to a new page + // without using the free space at the end of the current page. We should also do this for + // BytesToBytesMap. + if (requiredSpace > PAGE_SIZE) { + throw new IOException("Required space " + requiredSpace + " is greater than page size (" + + PAGE_SIZE + ")"); + } else { + final long memoryAcquired = shuffleMemoryManager.tryToAcquire(PAGE_SIZE); + if (memoryAcquired < PAGE_SIZE) { + shuffleMemoryManager.release(memoryAcquired); + spill(); + final long memoryAcquiredAfterSpilling = shuffleMemoryManager.tryToAcquire(PAGE_SIZE); + if (memoryAcquiredAfterSpilling != PAGE_SIZE) { + shuffleMemoryManager.release(memoryAcquiredAfterSpilling); + throw new IOException("Unable to acquire " + PAGE_SIZE + " bytes of memory"); + } + } + currentPage = memoryManager.allocatePage(PAGE_SIZE); + currentPagePosition = currentPage.getBaseOffset(); + freeSpaceInCurrentPage = PAGE_SIZE; + allocatedPages.add(currentPage); + } + } + } + + /** + * Write a record to the shuffle sorter. + */ + public void insertRecord( + Object recordBaseObject, + long recordBaseOffset, + int lengthInBytes, + int partitionId) throws IOException { + // Need 4 bytes to store the record length. + final int totalSpaceRequired = lengthInBytes + 4; + if (!haveSpaceForRecord(totalSpaceRequired)) { + allocateSpaceForRecord(totalSpaceRequired); + } + + final long recordAddress = + memoryManager.encodePageNumberAndOffset(currentPage, currentPagePosition); + final Object dataPageBaseObject = currentPage.getBaseObject(); + PlatformDependent.UNSAFE.putInt(dataPageBaseObject, currentPagePosition, lengthInBytes); + currentPagePosition += 4; + freeSpaceInCurrentPage -= 4; + PlatformDependent.copyMemory( + recordBaseObject, + recordBaseOffset, + dataPageBaseObject, + currentPagePosition, + lengthInBytes); + currentPagePosition += lengthInBytes; + freeSpaceInCurrentPage -= lengthInBytes; + sorter.insertRecord(recordAddress, partitionId); + } + + /** + * Close the sorter, causing any buffered data to be sorted and written out to disk. + * + * @return metadata for the spill files written by this sorter. If no records were ever inserted + * into this sorter, then this will return an empty array. + * @throws IOException + */ + public SpillInfo[] closeAndGetSpills() throws IOException { + try { + if (sorter != null) { + // Do not count the final file towards the spill count. + writeSortedFile(true); + freeMemory(); + } + return spills.toArray(new SpillInfo[spills.size()]); + } catch (IOException e) { + cleanupAfterError(); + throw e; + } + } + +} diff --git a/core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleInMemorySorter.java b/core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleInMemorySorter.java new file mode 100644 index 0000000000000..5bab501da9364 --- /dev/null +++ b/core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleInMemorySorter.java @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.shuffle.unsafe; + +import java.util.Comparator; + +import org.apache.spark.util.collection.Sorter; + +final class UnsafeShuffleInMemorySorter { + + private final Sorter sorter; + private static final class SortComparator implements Comparator { + @Override + public int compare(PackedRecordPointer left, PackedRecordPointer right) { + return left.getPartitionId() - right.getPartitionId(); + } + } + private static final SortComparator SORT_COMPARATOR = new SortComparator(); + + /** + * An array of record pointers and partition ids that have been encoded by + * {@link PackedRecordPointer}. The sort operates on this array instead of directly manipulating + * records. + */ + private long[] pointerArray; + + /** + * The position in the pointer array where new records can be inserted. + */ + private int pointerArrayInsertPosition = 0; + + public UnsafeShuffleInMemorySorter(int initialSize) { + assert (initialSize > 0); + this.pointerArray = new long[initialSize]; + this.sorter = new Sorter(UnsafeShuffleSortDataFormat.INSTANCE); + } + + public void expandPointerArray() { + final long[] oldArray = pointerArray; + // Guard against overflow: + final int newLength = oldArray.length * 2 > 0 ? (oldArray.length * 2) : Integer.MAX_VALUE; + pointerArray = new long[newLength]; + System.arraycopy(oldArray, 0, pointerArray, 0, oldArray.length); + } + + public boolean hasSpaceForAnotherRecord() { + return pointerArrayInsertPosition + 1 < pointerArray.length; + } + + public long getMemoryUsage() { + return pointerArray.length * 8L; + } + + /** + * Inserts a record to be sorted. + * + * @param recordPointer a pointer to the record, encoded by the task memory manager. Due to + * certain pointer compression techniques used by the sorter, the sort can + * only operate on pointers that point to locations in the first + * {@link PackedRecordPointer#MAXIMUM_PAGE_SIZE_BYTES} bytes of a data page. + * @param partitionId the partition id, which must be less than or equal to + * {@link PackedRecordPointer#MAXIMUM_PARTITION_ID}. + */ + public void insertRecord(long recordPointer, int partitionId) { + if (!hasSpaceForAnotherRecord()) { + if (pointerArray.length == Integer.MAX_VALUE) { + throw new IllegalStateException("Sort pointer array has reached maximum size"); + } else { + expandPointerArray(); + } + } + pointerArray[pointerArrayInsertPosition] = + PackedRecordPointer.packPointer(recordPointer, partitionId); + pointerArrayInsertPosition++; + } + + /** + * An iterator-like class that's used instead of Java's Iterator in order to facilitate inlining. + */ + public static final class UnsafeShuffleSorterIterator { + + private final long[] pointerArray; + private final int numRecords; + final PackedRecordPointer packedRecordPointer = new PackedRecordPointer(); + private int position = 0; + + public UnsafeShuffleSorterIterator(int numRecords, long[] pointerArray) { + this.numRecords = numRecords; + this.pointerArray = pointerArray; + } + + public boolean hasNext() { + return position < numRecords; + } + + public void loadNext() { + packedRecordPointer.set(pointerArray[position]); + position++; + } + } + + /** + * Return an iterator over record pointers in sorted order. + */ + public UnsafeShuffleSorterIterator getSortedIterator() { + sorter.sort(pointerArray, 0, pointerArrayInsertPosition, SORT_COMPARATOR); + return new UnsafeShuffleSorterIterator(pointerArrayInsertPosition, pointerArray); + } +} diff --git a/core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleSortDataFormat.java b/core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleSortDataFormat.java new file mode 100644 index 0000000000000..a66d74ee44782 --- /dev/null +++ b/core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleSortDataFormat.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.shuffle.unsafe; + +import org.apache.spark.util.collection.SortDataFormat; + +final class UnsafeShuffleSortDataFormat extends SortDataFormat { + + public static final UnsafeShuffleSortDataFormat INSTANCE = new UnsafeShuffleSortDataFormat(); + + private UnsafeShuffleSortDataFormat() { } + + @Override + public PackedRecordPointer getKey(long[] data, int pos) { + // Since we re-use keys, this method shouldn't be called. + throw new UnsupportedOperationException(); + } + + @Override + public PackedRecordPointer newKey() { + return new PackedRecordPointer(); + } + + @Override + public PackedRecordPointer getKey(long[] data, int pos, PackedRecordPointer reuse) { + reuse.set(data[pos]); + return reuse; + } + + @Override + public void swap(long[] data, int pos0, int pos1) { + final long temp = data[pos0]; + data[pos0] = data[pos1]; + data[pos1] = temp; + } + + @Override + public void copyElement(long[] src, int srcPos, long[] dst, int dstPos) { + dst[dstPos] = src[srcPos]; + } + + @Override + public void copyRange(long[] src, int srcPos, long[] dst, int dstPos, int length) { + System.arraycopy(src, srcPos, dst, dstPos, length); + } + + @Override + public long[] allocate(int length) { + return new long[length]; + } + +} diff --git a/core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriter.java new file mode 100644 index 0000000000000..ad7eb04afcd8c --- /dev/null +++ b/core/src/main/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriter.java @@ -0,0 +1,438 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.shuffle.unsafe; + +import java.io.*; +import java.nio.channels.FileChannel; +import java.util.Iterator; +import javax.annotation.Nullable; + +import scala.Option; +import scala.Product2; +import scala.collection.JavaConversions; +import scala.reflect.ClassTag; +import scala.reflect.ClassTag$; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.io.ByteStreams; +import com.google.common.io.Closeables; +import com.google.common.io.Files; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.spark.*; +import org.apache.spark.annotation.Private; +import org.apache.spark.io.CompressionCodec; +import org.apache.spark.io.CompressionCodec$; +import org.apache.spark.io.LZFCompressionCodec; +import org.apache.spark.executor.ShuffleWriteMetrics; +import org.apache.spark.network.util.LimitedInputStream; +import org.apache.spark.scheduler.MapStatus; +import org.apache.spark.scheduler.MapStatus$; +import org.apache.spark.serializer.SerializationStream; +import org.apache.spark.serializer.Serializer; +import org.apache.spark.serializer.SerializerInstance; +import org.apache.spark.shuffle.IndexShuffleBlockResolver; +import org.apache.spark.shuffle.ShuffleMemoryManager; +import org.apache.spark.shuffle.ShuffleWriter; +import org.apache.spark.storage.BlockManager; +import org.apache.spark.storage.TimeTrackingOutputStream; +import org.apache.spark.unsafe.PlatformDependent; +import org.apache.spark.unsafe.memory.TaskMemoryManager; + +@Private +public class UnsafeShuffleWriter extends ShuffleWriter { + + private final Logger logger = LoggerFactory.getLogger(UnsafeShuffleWriter.class); + + private static final ClassTag OBJECT_CLASS_TAG = ClassTag$.MODULE$.Object(); + + @VisibleForTesting + static final int INITIAL_SORT_BUFFER_SIZE = 4096; + + private final BlockManager blockManager; + private final IndexShuffleBlockResolver shuffleBlockResolver; + private final TaskMemoryManager memoryManager; + private final ShuffleMemoryManager shuffleMemoryManager; + private final SerializerInstance serializer; + private final Partitioner partitioner; + private final ShuffleWriteMetrics writeMetrics; + private final int shuffleId; + private final int mapId; + private final TaskContext taskContext; + private final SparkConf sparkConf; + private final boolean transferToEnabled; + + private MapStatus mapStatus = null; + private UnsafeShuffleExternalSorter sorter = null; + + /** Subclass of ByteArrayOutputStream that exposes `buf` directly. */ + private static final class MyByteArrayOutputStream extends ByteArrayOutputStream { + public MyByteArrayOutputStream(int size) { super(size); } + public byte[] getBuf() { return buf; } + } + + private MyByteArrayOutputStream serBuffer; + private SerializationStream serOutputStream; + + /** + * Are we in the process of stopping? Because map tasks can call stop() with success = true + * and then call stop() with success = false if they get an exception, we want to make sure + * we don't try deleting files, etc twice. + */ + private boolean stopping = false; + + public UnsafeShuffleWriter( + BlockManager blockManager, + IndexShuffleBlockResolver shuffleBlockResolver, + TaskMemoryManager memoryManager, + ShuffleMemoryManager shuffleMemoryManager, + UnsafeShuffleHandle handle, + int mapId, + TaskContext taskContext, + SparkConf sparkConf) throws IOException { + final int numPartitions = handle.dependency().partitioner().numPartitions(); + if (numPartitions > UnsafeShuffleManager.MAX_SHUFFLE_OUTPUT_PARTITIONS()) { + throw new IllegalArgumentException( + "UnsafeShuffleWriter can only be used for shuffles with at most " + + UnsafeShuffleManager.MAX_SHUFFLE_OUTPUT_PARTITIONS() + " reduce partitions"); + } + this.blockManager = blockManager; + this.shuffleBlockResolver = shuffleBlockResolver; + this.memoryManager = memoryManager; + this.shuffleMemoryManager = shuffleMemoryManager; + this.mapId = mapId; + final ShuffleDependency dep = handle.dependency(); + this.shuffleId = dep.shuffleId(); + this.serializer = Serializer.getSerializer(dep.serializer()).newInstance(); + this.partitioner = dep.partitioner(); + this.writeMetrics = new ShuffleWriteMetrics(); + taskContext.taskMetrics().shuffleWriteMetrics_$eq(Option.apply(writeMetrics)); + this.taskContext = taskContext; + this.sparkConf = sparkConf; + this.transferToEnabled = sparkConf.getBoolean("spark.file.transferTo", true); + open(); + } + + /** + * This convenience method should only be called in test code. + */ + @VisibleForTesting + public void write(Iterator> records) throws IOException { + write(JavaConversions.asScalaIterator(records)); + } + + @Override + public void write(scala.collection.Iterator> records) throws IOException { + boolean success = false; + try { + while (records.hasNext()) { + insertRecordIntoSorter(records.next()); + } + closeAndWriteOutput(); + success = true; + } finally { + if (!success) { + sorter.cleanupAfterError(); + } + } + } + + private void open() throws IOException { + assert (sorter == null); + sorter = new UnsafeShuffleExternalSorter( + memoryManager, + shuffleMemoryManager, + blockManager, + taskContext, + INITIAL_SORT_BUFFER_SIZE, + partitioner.numPartitions(), + sparkConf, + writeMetrics); + serBuffer = new MyByteArrayOutputStream(1024 * 1024); + serOutputStream = serializer.serializeStream(serBuffer); + } + + @VisibleForTesting + void closeAndWriteOutput() throws IOException { + serBuffer = null; + serOutputStream = null; + final SpillInfo[] spills = sorter.closeAndGetSpills(); + sorter = null; + final long[] partitionLengths; + try { + partitionLengths = mergeSpills(spills); + } finally { + for (SpillInfo spill : spills) { + if (spill.file.exists() && ! spill.file.delete()) { + logger.error("Error while deleting spill file {}", spill.file.getPath()); + } + } + } + shuffleBlockResolver.writeIndexFile(shuffleId, mapId, partitionLengths); + mapStatus = MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths); + } + + @VisibleForTesting + void insertRecordIntoSorter(Product2 record) throws IOException { + final K key = record._1(); + final int partitionId = partitioner.getPartition(key); + serBuffer.reset(); + serOutputStream.writeKey(key, OBJECT_CLASS_TAG); + serOutputStream.writeValue(record._2(), OBJECT_CLASS_TAG); + serOutputStream.flush(); + + final int serializedRecordSize = serBuffer.size(); + assert (serializedRecordSize > 0); + + sorter.insertRecord( + serBuffer.getBuf(), PlatformDependent.BYTE_ARRAY_OFFSET, serializedRecordSize, partitionId); + } + + @VisibleForTesting + void forceSorterToSpill() throws IOException { + assert (sorter != null); + sorter.spill(); + } + + /** + * Merge zero or more spill files together, choosing the fastest merging strategy based on the + * number of spills and the IO compression codec. + * + * @return the partition lengths in the merged file. + */ + private long[] mergeSpills(SpillInfo[] spills) throws IOException { + final File outputFile = shuffleBlockResolver.getDataFile(shuffleId, mapId); + final boolean compressionEnabled = sparkConf.getBoolean("spark.shuffle.compress", true); + final CompressionCodec compressionCodec = CompressionCodec$.MODULE$.createCodec(sparkConf); + final boolean fastMergeEnabled = + sparkConf.getBoolean("spark.shuffle.unsafe.fastMergeEnabled", true); + final boolean fastMergeIsSupported = + !compressionEnabled || compressionCodec instanceof LZFCompressionCodec; + try { + if (spills.length == 0) { + new FileOutputStream(outputFile).close(); // Create an empty file + return new long[partitioner.numPartitions()]; + } else if (spills.length == 1) { + // Here, we don't need to perform any metrics updates because the bytes written to this + // output file would have already been counted as shuffle bytes written. + Files.move(spills[0].file, outputFile); + return spills[0].partitionLengths; + } else { + final long[] partitionLengths; + // There are multiple spills to merge, so none of these spill files' lengths were counted + // towards our shuffle write count or shuffle write time. If we use the slow merge path, + // then the final output file's size won't necessarily be equal to the sum of the spill + // files' sizes. To guard against this case, we look at the output file's actual size when + // computing shuffle bytes written. + // + // We allow the individual merge methods to report their own IO times since different merge + // strategies use different IO techniques. We count IO during merge towards the shuffle + // shuffle write time, which appears to be consistent with the "not bypassing merge-sort" + // branch in ExternalSorter. + if (fastMergeEnabled && fastMergeIsSupported) { + // Compression is disabled or we are using an IO compression codec that supports + // decompression of concatenated compressed streams, so we can perform a fast spill merge + // that doesn't need to interpret the spilled bytes. + if (transferToEnabled) { + logger.debug("Using transferTo-based fast merge"); + partitionLengths = mergeSpillsWithTransferTo(spills, outputFile); + } else { + logger.debug("Using fileStream-based fast merge"); + partitionLengths = mergeSpillsWithFileStream(spills, outputFile, null); + } + } else { + logger.debug("Using slow merge"); + partitionLengths = mergeSpillsWithFileStream(spills, outputFile, compressionCodec); + } + // When closing an UnsafeShuffleExternalSorter that has already spilled once but also has + // in-memory records, we write out the in-memory records to a file but do not count that + // final write as bytes spilled (instead, it's accounted as shuffle write). The merge needs + // to be counted as shuffle write, but this will lead to double-counting of the final + // SpillInfo's bytes. + writeMetrics.decShuffleBytesWritten(spills[spills.length - 1].file.length()); + writeMetrics.incShuffleBytesWritten(outputFile.length()); + return partitionLengths; + } + } catch (IOException e) { + if (outputFile.exists() && !outputFile.delete()) { + logger.error("Unable to delete output file {}", outputFile.getPath()); + } + throw e; + } + } + + /** + * Merges spill files using Java FileStreams. This code path is slower than the NIO-based merge, + * {@link UnsafeShuffleWriter#mergeSpillsWithTransferTo(SpillInfo[], File)}, so it's only used in + * cases where the IO compression codec does not support concatenation of compressed data, or in + * cases where users have explicitly disabled use of {@code transferTo} in order to work around + * kernel bugs. + * + * @param spills the spills to merge. + * @param outputFile the file to write the merged data to. + * @param compressionCodec the IO compression codec, or null if shuffle compression is disabled. + * @return the partition lengths in the merged file. + */ + private long[] mergeSpillsWithFileStream( + SpillInfo[] spills, + File outputFile, + @Nullable CompressionCodec compressionCodec) throws IOException { + assert (spills.length >= 2); + final int numPartitions = partitioner.numPartitions(); + final long[] partitionLengths = new long[numPartitions]; + final InputStream[] spillInputStreams = new FileInputStream[spills.length]; + OutputStream mergedFileOutputStream = null; + + boolean threwException = true; + try { + for (int i = 0; i < spills.length; i++) { + spillInputStreams[i] = new FileInputStream(spills[i].file); + } + for (int partition = 0; partition < numPartitions; partition++) { + final long initialFileLength = outputFile.length(); + mergedFileOutputStream = + new TimeTrackingOutputStream(writeMetrics, new FileOutputStream(outputFile, true)); + if (compressionCodec != null) { + mergedFileOutputStream = compressionCodec.compressedOutputStream(mergedFileOutputStream); + } + + for (int i = 0; i < spills.length; i++) { + final long partitionLengthInSpill = spills[i].partitionLengths[partition]; + if (partitionLengthInSpill > 0) { + InputStream partitionInputStream = + new LimitedInputStream(spillInputStreams[i], partitionLengthInSpill); + if (compressionCodec != null) { + partitionInputStream = compressionCodec.compressedInputStream(partitionInputStream); + } + ByteStreams.copy(partitionInputStream, mergedFileOutputStream); + } + } + mergedFileOutputStream.flush(); + mergedFileOutputStream.close(); + partitionLengths[partition] = (outputFile.length() - initialFileLength); + } + threwException = false; + } finally { + // To avoid masking exceptions that caused us to prematurely enter the finally block, only + // throw exceptions during cleanup if threwException == false. + for (InputStream stream : spillInputStreams) { + Closeables.close(stream, threwException); + } + Closeables.close(mergedFileOutputStream, threwException); + } + return partitionLengths; + } + + /** + * Merges spill files by using NIO's transferTo to concatenate spill partitions' bytes. + * This is only safe when the IO compression codec and serializer support concatenation of + * serialized streams. + * + * @return the partition lengths in the merged file. + */ + private long[] mergeSpillsWithTransferTo(SpillInfo[] spills, File outputFile) throws IOException { + assert (spills.length >= 2); + final int numPartitions = partitioner.numPartitions(); + final long[] partitionLengths = new long[numPartitions]; + final FileChannel[] spillInputChannels = new FileChannel[spills.length]; + final long[] spillInputChannelPositions = new long[spills.length]; + FileChannel mergedFileOutputChannel = null; + + boolean threwException = true; + try { + for (int i = 0; i < spills.length; i++) { + spillInputChannels[i] = new FileInputStream(spills[i].file).getChannel(); + } + // This file needs to opened in append mode in order to work around a Linux kernel bug that + // affects transferTo; see SPARK-3948 for more details. + mergedFileOutputChannel = new FileOutputStream(outputFile, true).getChannel(); + + long bytesWrittenToMergedFile = 0; + for (int partition = 0; partition < numPartitions; partition++) { + for (int i = 0; i < spills.length; i++) { + final long partitionLengthInSpill = spills[i].partitionLengths[partition]; + long bytesToTransfer = partitionLengthInSpill; + final FileChannel spillInputChannel = spillInputChannels[i]; + final long writeStartTime = System.nanoTime(); + while (bytesToTransfer > 0) { + final long actualBytesTransferred = spillInputChannel.transferTo( + spillInputChannelPositions[i], + bytesToTransfer, + mergedFileOutputChannel); + spillInputChannelPositions[i] += actualBytesTransferred; + bytesToTransfer -= actualBytesTransferred; + } + writeMetrics.incShuffleWriteTime(System.nanoTime() - writeStartTime); + bytesWrittenToMergedFile += partitionLengthInSpill; + partitionLengths[partition] += partitionLengthInSpill; + } + } + // Check the position after transferTo loop to see if it is in the right position and raise an + // exception if it is incorrect. The position will not be increased to the expected length + // after calling transferTo in kernel version 2.6.32. This issue is described at + // https://bugs.openjdk.java.net/browse/JDK-7052359 and SPARK-3948. + if (mergedFileOutputChannel.position() != bytesWrittenToMergedFile) { + throw new IOException( + "Current position " + mergedFileOutputChannel.position() + " does not equal expected " + + "position " + bytesWrittenToMergedFile + " after transferTo. Please check your kernel" + + " version to see if it is 2.6.32, as there is a kernel bug which will lead to " + + "unexpected behavior when using transferTo. You can set spark.file.transferTo=false " + + "to disable this NIO feature." + ); + } + threwException = false; + } finally { + // To avoid masking exceptions that caused us to prematurely enter the finally block, only + // throw exceptions during cleanup if threwException == false. + for (int i = 0; i < spills.length; i++) { + assert(spillInputChannelPositions[i] == spills[i].file.length()); + Closeables.close(spillInputChannels[i], threwException); + } + Closeables.close(mergedFileOutputChannel, threwException); + } + return partitionLengths; + } + + @Override + public Option stop(boolean success) { + try { + if (stopping) { + return Option.apply(null); + } else { + stopping = true; + if (success) { + if (mapStatus == null) { + throw new IllegalStateException("Cannot call stop(true) without having called write()"); + } + return Option.apply(mapStatus); + } else { + // The map task failed, so delete our output data. + shuffleBlockResolver.removeDataByMap(shuffleId, mapId); + return Option.apply(null); + } + } + } finally { + if (sorter != null) { + // If sorter is non-null, then this implies that we called stop() in response to an error, + // so we need to clean up memory and spill files created by the sorter + sorter.cleanupAfterError(); + } + } + } +} diff --git a/core/src/main/java/org/apache/spark/status/api/v1/ApplicationStatus.java b/core/src/main/java/org/apache/spark/status/api/v1/ApplicationStatus.java new file mode 100644 index 0000000000000..8c7dcf776fda8 --- /dev/null +++ b/core/src/main/java/org/apache/spark/status/api/v1/ApplicationStatus.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.status.api.v1; + +import org.apache.spark.util.EnumUtil; + +public enum ApplicationStatus { + COMPLETED, + RUNNING; + + public static ApplicationStatus fromString(String str) { + return EnumUtil.parseIgnoreCase(ApplicationStatus.class, str); + } + +} diff --git a/core/src/main/java/org/apache/spark/status/api/v1/StageStatus.java b/core/src/main/java/org/apache/spark/status/api/v1/StageStatus.java new file mode 100644 index 0000000000000..9dbb565aab707 --- /dev/null +++ b/core/src/main/java/org/apache/spark/status/api/v1/StageStatus.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.status.api.v1; + +import org.apache.spark.util.EnumUtil; + +public enum StageStatus { + ACTIVE, + COMPLETE, + FAILED, + PENDING; + + public static StageStatus fromString(String str) { + return EnumUtil.parseIgnoreCase(StageStatus.class, str); + } +} diff --git a/core/src/main/java/org/apache/spark/status/api/v1/TaskSorting.java b/core/src/main/java/org/apache/spark/status/api/v1/TaskSorting.java new file mode 100644 index 0000000000000..f19ed01d5aebf --- /dev/null +++ b/core/src/main/java/org/apache/spark/status/api/v1/TaskSorting.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.status.api.v1; + +import org.apache.spark.util.EnumUtil; + +import java.util.HashSet; +import java.util.Set; + +public enum TaskSorting { + ID, + INCREASING_RUNTIME("runtime"), + DECREASING_RUNTIME("-runtime"); + + private final Set alternateNames; + private TaskSorting(String... names) { + alternateNames = new HashSet(); + for (String n: names) { + alternateNames.add(n); + } + } + + public static TaskSorting fromString(String str) { + String lower = str.toLowerCase(); + for (TaskSorting t: values()) { + if (t.alternateNames.contains(lower)) { + return t; + } + } + return EnumUtil.parseIgnoreCase(TaskSorting.class, str); + } + +} diff --git a/core/src/main/java/org/apache/spark/storage/TimeTrackingOutputStream.java b/core/src/main/java/org/apache/spark/storage/TimeTrackingOutputStream.java new file mode 100644 index 0000000000000..dc2aa30466cc6 --- /dev/null +++ b/core/src/main/java/org/apache/spark/storage/TimeTrackingOutputStream.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.storage; + +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.spark.annotation.Private; +import org.apache.spark.executor.ShuffleWriteMetrics; + +/** + * Intercepts write calls and tracks total time spent writing in order to update shuffle write + * metrics. Not thread safe. + */ +@Private +public final class TimeTrackingOutputStream extends OutputStream { + + private final ShuffleWriteMetrics writeMetrics; + private final OutputStream outputStream; + + public TimeTrackingOutputStream(ShuffleWriteMetrics writeMetrics, OutputStream outputStream) { + this.writeMetrics = writeMetrics; + this.outputStream = outputStream; + } + + @Override + public void write(int b) throws IOException { + final long startTime = System.nanoTime(); + outputStream.write(b); + writeMetrics.incShuffleWriteTime(System.nanoTime() - startTime); + } + + @Override + public void write(byte[] b) throws IOException { + final long startTime = System.nanoTime(); + outputStream.write(b); + writeMetrics.incShuffleWriteTime(System.nanoTime() - startTime); + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + final long startTime = System.nanoTime(); + outputStream.write(b, off, len); + writeMetrics.incShuffleWriteTime(System.nanoTime() - startTime); + } + + @Override + public void flush() throws IOException { + final long startTime = System.nanoTime(); + outputStream.flush(); + writeMetrics.incShuffleWriteTime(System.nanoTime() - startTime); + } + + @Override + public void close() throws IOException { + final long startTime = System.nanoTime(); + outputStream.close(); + writeMetrics.incShuffleWriteTime(System.nanoTime() - startTime); + } +} diff --git a/core/src/main/java/org/apache/spark/util/EnumUtil.java b/core/src/main/java/org/apache/spark/util/EnumUtil.java new file mode 100644 index 0000000000000..c40c7e727613c --- /dev/null +++ b/core/src/main/java/org/apache/spark/util/EnumUtil.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.util; + +import com.google.common.base.Joiner; +import org.apache.spark.annotation.Private; + +@Private +public class EnumUtil { + public static > E parseIgnoreCase(Class clz, String str) { + E[] constants = clz.getEnumConstants(); + if (str == null) { + return null; + } + for (E e : constants) { + if (e.name().equalsIgnoreCase(str)) { + return e; + } + } + throw new IllegalArgumentException( + String.format("Illegal type='%s'. Supported type values: %s", + str, Joiner.on(", ").join(constants))); + } +} diff --git a/core/src/main/resources/org/apache/spark/ui/static/bootstrap-tooltip.js b/core/src/main/resources/org/apache/spark/ui/static/bootstrap-tooltip.js index 2934181c1006a..acd6096e6743e 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/bootstrap-tooltip.js +++ b/core/src/main/resources/org/apache/spark/ui/static/bootstrap-tooltip.js @@ -1,9 +1,9 @@ /* =========================================================== - * bootstrap-tooltip.js v2.2.2 - * http://twitter.github.com/bootstrap/javascript.html#tooltips + * bootstrap-tooltip.js v2.3.2 + * http://getbootstrap.com/2.3.2/javascript.html#tooltips * Inspired by the original jQuery.tipsy by Jason Frame * =========================================================== - * Copyright 2012 Twitter, Inc. + * Copyright 2013 Twitter, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,19 +38,27 @@ , init: function (type, element, options) { var eventIn , eventOut + , triggers + , trigger + , i this.type = type this.$element = $(element) this.options = this.getOptions(options) this.enabled = true - if (this.options.trigger == 'click') { - this.$element.on('click.' + this.type, this.options.selector, $.proxy(this.toggle, this)) - } else if (this.options.trigger != 'manual') { - eventIn = this.options.trigger == 'hover' ? 'mouseenter' : 'focus' - eventOut = this.options.trigger == 'hover' ? 'mouseleave' : 'blur' - this.$element.on(eventIn + '.' + this.type, this.options.selector, $.proxy(this.enter, this)) - this.$element.on(eventOut + '.' + this.type, this.options.selector, $.proxy(this.leave, this)) + triggers = this.options.trigger.split(' ') + + for (i = triggers.length; i--;) { + trigger = triggers[i] + if (trigger == 'click') { + this.$element.on('click.' + this.type, this.options.selector, $.proxy(this.toggle, this)) + } else if (trigger != 'manual') { + eventIn = trigger == 'hover' ? 'mouseenter' : 'focus' + eventOut = trigger == 'hover' ? 'mouseleave' : 'blur' + this.$element.on(eventIn + '.' + this.type, this.options.selector, $.proxy(this.enter, this)) + this.$element.on(eventOut + '.' + this.type, this.options.selector, $.proxy(this.leave, this)) + } } this.options.selector ? @@ -59,7 +67,7 @@ } , getOptions: function (options) { - options = $.extend({}, $.fn[this.type].defaults, options, this.$element.data()) + options = $.extend({}, $.fn[this.type].defaults, this.$element.data(), options) if (options.delay && typeof options.delay == 'number') { options.delay = { @@ -72,7 +80,15 @@ } , enter: function (e) { - var self = $(e.currentTarget)[this.type](this._options).data(this.type) + var defaults = $.fn[this.type].defaults + , options = {} + , self + + this._options && $.each(this._options, function (key, value) { + if (defaults[key] != value) options[key] = value + }, this) + + self = $(e.currentTarget)[this.type](options).data(this.type) if (!self.options.delay || !self.options.delay.show) return self.show() @@ -97,14 +113,16 @@ , show: function () { var $tip - , inside , pos , actualWidth , actualHeight , placement , tp + , e = $.Event('show') if (this.hasContent() && this.enabled) { + this.$element.trigger(e) + if (e.isDefaultPrevented()) return $tip = this.tip() this.setContent() @@ -116,19 +134,18 @@ this.options.placement.call(this, $tip[0], this.$element[0]) : this.options.placement - inside = /in/.test(placement) - $tip .detach() .css({ top: 0, left: 0, display: 'block' }) - .insertAfter(this.$element) - pos = this.getPosition(inside) + this.options.container ? $tip.appendTo(this.options.container) : $tip.insertAfter(this.$element) + + pos = this.getPosition() actualWidth = $tip[0].offsetWidth actualHeight = $tip[0].offsetHeight - switch (inside ? placement.split(' ')[1] : placement) { + switch (placement) { case 'bottom': tp = {top: pos.top + pos.height, left: pos.left + pos.width / 2 - actualWidth / 2} break @@ -143,11 +160,56 @@ break } - $tip - .offset(tp) - .addClass(placement) - .addClass('in') + this.applyPlacement(tp, placement) + this.$element.trigger('shown') + } + } + + , applyPlacement: function(offset, placement){ + var $tip = this.tip() + , width = $tip[0].offsetWidth + , height = $tip[0].offsetHeight + , actualWidth + , actualHeight + , delta + , replace + + $tip + .offset(offset) + .addClass(placement) + .addClass('in') + + actualWidth = $tip[0].offsetWidth + actualHeight = $tip[0].offsetHeight + + if (placement == 'top' && actualHeight != height) { + offset.top = offset.top + height - actualHeight + replace = true + } + + if (placement == 'bottom' || placement == 'top') { + delta = 0 + + if (offset.left < 0){ + delta = offset.left * -2 + offset.left = 0 + $tip.offset(offset) + actualWidth = $tip[0].offsetWidth + actualHeight = $tip[0].offsetHeight + } + + this.replaceArrow(delta - width + actualWidth, actualWidth, 'left') + } else { + this.replaceArrow(actualHeight - height, actualHeight, 'top') } + + if (replace) $tip.offset(offset) + } + + , replaceArrow: function(delta, dimension, position){ + this + .arrow() + .css(position, delta ? (50 * (1 - delta / dimension) + "%") : '') } , setContent: function () { @@ -161,6 +223,10 @@ , hide: function () { var that = this , $tip = this.tip() + , e = $.Event('hide') + + this.$element.trigger(e) + if (e.isDefaultPrevented()) return $tip.removeClass('in') @@ -179,6 +245,8 @@ removeWithAnimation() : $tip.detach() + this.$element.trigger('hidden') + return this } @@ -193,11 +261,12 @@ return this.getTitle() } - , getPosition: function (inside) { - return $.extend({}, (inside ? {top: 0, left: 0} : this.$element.offset()), { - width: this.$element[0].offsetWidth - , height: this.$element[0].offsetHeight - }) + , getPosition: function () { + var el = this.$element[0] + return $.extend({}, (typeof el.getBoundingClientRect == 'function') ? el.getBoundingClientRect() : { + width: el.offsetWidth + , height: el.offsetHeight + }, this.$element.offset()) } , getTitle: function () { @@ -215,6 +284,10 @@ return this.$tip = this.$tip || $(this.options.template) } + , arrow: function(){ + return this.$arrow = this.$arrow || this.tip().find(".tooltip-arrow") + } + , validate: function () { if (!this.$element[0].parentNode) { this.hide() @@ -236,8 +309,8 @@ } , toggle: function (e) { - var self = $(e.currentTarget)[this.type](this._options).data(this.type) - self[self.tip().hasClass('in') ? 'hide' : 'show']() + var self = e ? $(e.currentTarget)[this.type](this._options).data(this.type) : this + self.tip().hasClass('in') ? self.hide() : self.show() } , destroy: function () { @@ -269,10 +342,11 @@ , placement: 'top' , selector: false , template: '
' - , trigger: 'hover' + , trigger: 'hover focus' , title: '' , delay: 0 , html: false + , container: false } @@ -285,4 +359,3 @@ } }(window.jQuery); - diff --git a/core/src/main/resources/org/apache/spark/ui/static/d3.min.js b/core/src/main/resources/org/apache/spark/ui/static/d3.min.js new file mode 100644 index 0000000000000..30cd292198b91 --- /dev/null +++ b/core/src/main/resources/org/apache/spark/ui/static/d3.min.js @@ -0,0 +1,5 @@ +/*v3.5.5*/!function(){function n(n){return n&&(n.ownerDocument||n.document||n).documentElement}function t(n){return n&&(n.ownerDocument&&n.ownerDocument.defaultView||n.document&&n||n.defaultView)}function e(n,t){return t>n?-1:n>t?1:n>=t?0:0/0}function r(n){return null===n?0/0:+n}function u(n){return!isNaN(n)}function i(n){return{left:function(t,e,r,u){for(arguments.length<3&&(r=0),arguments.length<4&&(u=t.length);u>r;){var i=r+u>>>1;n(t[i],e)<0?r=i+1:u=i}return r},right:function(t,e,r,u){for(arguments.length<3&&(r=0),arguments.length<4&&(u=t.length);u>r;){var i=r+u>>>1;n(t[i],e)>0?u=i:r=i+1}return r}}}function o(n){return n.length}function a(n){for(var t=1;n*t%1;)t*=10;return t}function c(n,t){for(var e in t)Object.defineProperty(n.prototype,e,{value:t[e],enumerable:!1})}function l(){this._=Object.create(null)}function s(n){return(n+="")===pa||n[0]===va?va+n:n}function f(n){return(n+="")[0]===va?n.slice(1):n}function h(n){return s(n)in this._}function g(n){return(n=s(n))in this._&&delete this._[n]}function p(){var n=[];for(var t in this._)n.push(f(t));return n}function v(){var n=0;for(var t in this._)++n;return n}function d(){for(var n in this._)return!1;return!0}function m(){this._=Object.create(null)}function y(n){return n}function M(n,t,e){return function(){var r=e.apply(t,arguments);return r===t?n:r}}function x(n,t){if(t in n)return t;t=t.charAt(0).toUpperCase()+t.slice(1);for(var e=0,r=da.length;r>e;++e){var u=da[e]+t;if(u in n)return u}}function b(){}function _(){}function w(n){function t(){for(var t,r=e,u=-1,i=r.length;++ue;e++)for(var u,i=n[e],o=0,a=i.length;a>o;o++)(u=i[o])&&t(u,o,e);return n}function Z(n){return ya(n,Sa),n}function V(n){var t,e;return function(r,u,i){var o,a=n[i].update,c=a.length;for(i!=e&&(e=i,t=0),u>=t&&(t=u+1);!(o=a[t])&&++t0&&(n=n.slice(0,a));var l=ka.get(n);return l&&(n=l,c=B),a?t?u:r:t?b:i}function $(n,t){return function(e){var r=ta.event;ta.event=e,t[0]=this.__data__;try{n.apply(this,t)}finally{ta.event=r}}}function B(n,t){var e=$(n,t);return function(n){var t=this,r=n.relatedTarget;r&&(r===t||8&r.compareDocumentPosition(t))||e.call(t,n)}}function W(e){var r=".dragsuppress-"+ ++Aa,u="click"+r,i=ta.select(t(e)).on("touchmove"+r,S).on("dragstart"+r,S).on("selectstart"+r,S);if(null==Ea&&(Ea="onselectstart"in e?!1:x(e.style,"userSelect")),Ea){var o=n(e).style,a=o[Ea];o[Ea]="none"}return function(n){if(i.on(r,null),Ea&&(o[Ea]=a),n){var t=function(){i.on(u,null)};i.on(u,function(){S(),t()},!0),setTimeout(t,0)}}}function J(n,e){e.changedTouches&&(e=e.changedTouches[0]);var r=n.ownerSVGElement||n;if(r.createSVGPoint){var u=r.createSVGPoint();if(0>Na){var i=t(n);if(i.scrollX||i.scrollY){r=ta.select("body").append("svg").style({position:"absolute",top:0,left:0,margin:0,padding:0,border:"none"},"important");var o=r[0][0].getScreenCTM();Na=!(o.f||o.e),r.remove()}}return Na?(u.x=e.pageX,u.y=e.pageY):(u.x=e.clientX,u.y=e.clientY),u=u.matrixTransform(n.getScreenCTM().inverse()),[u.x,u.y]}var a=n.getBoundingClientRect();return[e.clientX-a.left-n.clientLeft,e.clientY-a.top-n.clientTop]}function G(){return ta.event.changedTouches[0].identifier}function K(n){return n>0?1:0>n?-1:0}function Q(n,t,e){return(t[0]-n[0])*(e[1]-n[1])-(t[1]-n[1])*(e[0]-n[0])}function nt(n){return n>1?0:-1>n?qa:Math.acos(n)}function tt(n){return n>1?Ra:-1>n?-Ra:Math.asin(n)}function et(n){return((n=Math.exp(n))-1/n)/2}function rt(n){return((n=Math.exp(n))+1/n)/2}function ut(n){return((n=Math.exp(2*n))-1)/(n+1)}function it(n){return(n=Math.sin(n/2))*n}function ot(){}function at(n,t,e){return this instanceof at?(this.h=+n,this.s=+t,void(this.l=+e)):arguments.length<2?n instanceof at?new at(n.h,n.s,n.l):bt(""+n,_t,at):new at(n,t,e)}function ct(n,t,e){function r(n){return n>360?n-=360:0>n&&(n+=360),60>n?i+(o-i)*n/60:180>n?o:240>n?i+(o-i)*(240-n)/60:i}function u(n){return Math.round(255*r(n))}var i,o;return n=isNaN(n)?0:(n%=360)<0?n+360:n,t=isNaN(t)?0:0>t?0:t>1?1:t,e=0>e?0:e>1?1:e,o=.5>=e?e*(1+t):e+t-e*t,i=2*e-o,new mt(u(n+120),u(n),u(n-120))}function lt(n,t,e){return this instanceof lt?(this.h=+n,this.c=+t,void(this.l=+e)):arguments.length<2?n instanceof lt?new lt(n.h,n.c,n.l):n instanceof ft?gt(n.l,n.a,n.b):gt((n=wt((n=ta.rgb(n)).r,n.g,n.b)).l,n.a,n.b):new lt(n,t,e)}function st(n,t,e){return isNaN(n)&&(n=0),isNaN(t)&&(t=0),new ft(e,Math.cos(n*=Da)*t,Math.sin(n)*t)}function ft(n,t,e){return this instanceof ft?(this.l=+n,this.a=+t,void(this.b=+e)):arguments.length<2?n instanceof ft?new ft(n.l,n.a,n.b):n instanceof lt?st(n.h,n.c,n.l):wt((n=mt(n)).r,n.g,n.b):new ft(n,t,e)}function ht(n,t,e){var r=(n+16)/116,u=r+t/500,i=r-e/200;return u=pt(u)*Xa,r=pt(r)*$a,i=pt(i)*Ba,new mt(dt(3.2404542*u-1.5371385*r-.4985314*i),dt(-.969266*u+1.8760108*r+.041556*i),dt(.0556434*u-.2040259*r+1.0572252*i))}function gt(n,t,e){return n>0?new lt(Math.atan2(e,t)*Pa,Math.sqrt(t*t+e*e),n):new lt(0/0,0/0,n)}function pt(n){return n>.206893034?n*n*n:(n-4/29)/7.787037}function vt(n){return n>.008856?Math.pow(n,1/3):7.787037*n+4/29}function dt(n){return Math.round(255*(.00304>=n?12.92*n:1.055*Math.pow(n,1/2.4)-.055))}function mt(n,t,e){return this instanceof mt?(this.r=~~n,this.g=~~t,void(this.b=~~e)):arguments.length<2?n instanceof mt?new mt(n.r,n.g,n.b):bt(""+n,mt,ct):new mt(n,t,e)}function yt(n){return new mt(n>>16,n>>8&255,255&n)}function Mt(n){return yt(n)+""}function xt(n){return 16>n?"0"+Math.max(0,n).toString(16):Math.min(255,n).toString(16)}function bt(n,t,e){var r,u,i,o=0,a=0,c=0;if(r=/([a-z]+)\((.*)\)/i.exec(n))switch(u=r[2].split(","),r[1]){case"hsl":return e(parseFloat(u[0]),parseFloat(u[1])/100,parseFloat(u[2])/100);case"rgb":return t(kt(u[0]),kt(u[1]),kt(u[2]))}return(i=Ga.get(n.toLowerCase()))?t(i.r,i.g,i.b):(null==n||"#"!==n.charAt(0)||isNaN(i=parseInt(n.slice(1),16))||(4===n.length?(o=(3840&i)>>4,o=o>>4|o,a=240&i,a=a>>4|a,c=15&i,c=c<<4|c):7===n.length&&(o=(16711680&i)>>16,a=(65280&i)>>8,c=255&i)),t(o,a,c))}function _t(n,t,e){var r,u,i=Math.min(n/=255,t/=255,e/=255),o=Math.max(n,t,e),a=o-i,c=(o+i)/2;return a?(u=.5>c?a/(o+i):a/(2-o-i),r=n==o?(t-e)/a+(e>t?6:0):t==o?(e-n)/a+2:(n-t)/a+4,r*=60):(r=0/0,u=c>0&&1>c?0:r),new at(r,u,c)}function wt(n,t,e){n=St(n),t=St(t),e=St(e);var r=vt((.4124564*n+.3575761*t+.1804375*e)/Xa),u=vt((.2126729*n+.7151522*t+.072175*e)/$a),i=vt((.0193339*n+.119192*t+.9503041*e)/Ba);return ft(116*u-16,500*(r-u),200*(u-i))}function St(n){return(n/=255)<=.04045?n/12.92:Math.pow((n+.055)/1.055,2.4)}function kt(n){var t=parseFloat(n);return"%"===n.charAt(n.length-1)?Math.round(2.55*t):t}function Et(n){return"function"==typeof n?n:function(){return n}}function At(n){return function(t,e,r){return 2===arguments.length&&"function"==typeof e&&(r=e,e=null),Nt(t,e,n,r)}}function Nt(n,t,e,r){function u(){var n,t=c.status;if(!t&&zt(c)||t>=200&&300>t||304===t){try{n=e.call(i,c)}catch(r){return void o.error.call(i,r)}o.load.call(i,n)}else o.error.call(i,c)}var i={},o=ta.dispatch("beforesend","progress","load","error"),a={},c=new XMLHttpRequest,l=null;return!this.XDomainRequest||"withCredentials"in c||!/^(http(s)?:)?\/\//.test(n)||(c=new XDomainRequest),"onload"in c?c.onload=c.onerror=u:c.onreadystatechange=function(){c.readyState>3&&u()},c.onprogress=function(n){var t=ta.event;ta.event=n;try{o.progress.call(i,c)}finally{ta.event=t}},i.header=function(n,t){return n=(n+"").toLowerCase(),arguments.length<2?a[n]:(null==t?delete a[n]:a[n]=t+"",i)},i.mimeType=function(n){return arguments.length?(t=null==n?null:n+"",i):t},i.responseType=function(n){return arguments.length?(l=n,i):l},i.response=function(n){return e=n,i},["get","post"].forEach(function(n){i[n]=function(){return i.send.apply(i,[n].concat(ra(arguments)))}}),i.send=function(e,r,u){if(2===arguments.length&&"function"==typeof r&&(u=r,r=null),c.open(e,n,!0),null==t||"accept"in a||(a.accept=t+",*/*"),c.setRequestHeader)for(var s in a)c.setRequestHeader(s,a[s]);return null!=t&&c.overrideMimeType&&c.overrideMimeType(t),null!=l&&(c.responseType=l),null!=u&&i.on("error",u).on("load",function(n){u(null,n)}),o.beforesend.call(i,c),c.send(null==r?null:r),i},i.abort=function(){return c.abort(),i},ta.rebind(i,o,"on"),null==r?i:i.get(Ct(r))}function Ct(n){return 1===n.length?function(t,e){n(null==t?e:null)}:n}function zt(n){var t=n.responseType;return t&&"text"!==t?n.response:n.responseText}function qt(){var n=Lt(),t=Tt()-n;t>24?(isFinite(t)&&(clearTimeout(tc),tc=setTimeout(qt,t)),nc=0):(nc=1,rc(qt))}function Lt(){var n=Date.now();for(ec=Ka;ec;)n>=ec.t&&(ec.f=ec.c(n-ec.t)),ec=ec.n;return n}function Tt(){for(var n,t=Ka,e=1/0;t;)t.f?t=n?n.n=t.n:Ka=t.n:(t.t8?function(n){return n/e}:function(n){return n*e},symbol:n}}function Pt(n){var t=n.decimal,e=n.thousands,r=n.grouping,u=n.currency,i=r&&e?function(n,t){for(var u=n.length,i=[],o=0,a=r[0],c=0;u>0&&a>0&&(c+a+1>t&&(a=Math.max(1,t-c)),i.push(n.substring(u-=a,u+a)),!((c+=a+1)>t));)a=r[o=(o+1)%r.length];return i.reverse().join(e)}:y;return function(n){var e=ic.exec(n),r=e[1]||" ",o=e[2]||">",a=e[3]||"-",c=e[4]||"",l=e[5],s=+e[6],f=e[7],h=e[8],g=e[9],p=1,v="",d="",m=!1,y=!0;switch(h&&(h=+h.substring(1)),(l||"0"===r&&"="===o)&&(l=r="0",o="="),g){case"n":f=!0,g="g";break;case"%":p=100,d="%",g="f";break;case"p":p=100,d="%",g="r";break;case"b":case"o":case"x":case"X":"#"===c&&(v="0"+g.toLowerCase());case"c":y=!1;case"d":m=!0,h=0;break;case"s":p=-1,g="r"}"$"===c&&(v=u[0],d=u[1]),"r"!=g||h||(g="g"),null!=h&&("g"==g?h=Math.max(1,Math.min(21,h)):("e"==g||"f"==g)&&(h=Math.max(0,Math.min(20,h)))),g=oc.get(g)||Ut;var M=l&&f;return function(n){var e=d;if(m&&n%1)return"";var u=0>n||0===n&&0>1/n?(n=-n,"-"):"-"===a?"":a;if(0>p){var c=ta.formatPrefix(n,h);n=c.scale(n),e=c.symbol+d}else n*=p;n=g(n,h);var x,b,_=n.lastIndexOf(".");if(0>_){var w=y?n.lastIndexOf("e"):-1;0>w?(x=n,b=""):(x=n.substring(0,w),b=n.substring(w))}else x=n.substring(0,_),b=t+n.substring(_+1);!l&&f&&(x=i(x,1/0));var S=v.length+x.length+b.length+(M?0:u.length),k=s>S?new Array(S=s-S+1).join(r):"";return M&&(x=i(k+x,k.length?s-b.length:1/0)),u+=v,n=x+b,("<"===o?u+n+k:">"===o?k+u+n:"^"===o?k.substring(0,S>>=1)+u+n+k.substring(S):u+(M?n:k+n))+e}}}function Ut(n){return n+""}function jt(){this._=new Date(arguments.length>1?Date.UTC.apply(this,arguments):arguments[0])}function Ft(n,t,e){function r(t){var e=n(t),r=i(e,1);return r-t>t-e?e:r}function u(e){return t(e=n(new cc(e-1)),1),e}function i(n,e){return t(n=new cc(+n),e),n}function o(n,r,i){var o=u(n),a=[];if(i>1)for(;r>o;)e(o)%i||a.push(new Date(+o)),t(o,1);else for(;r>o;)a.push(new Date(+o)),t(o,1);return a}function a(n,t,e){try{cc=jt;var r=new jt;return r._=n,o(r,t,e)}finally{cc=Date}}n.floor=n,n.round=r,n.ceil=u,n.offset=i,n.range=o;var c=n.utc=Ht(n);return c.floor=c,c.round=Ht(r),c.ceil=Ht(u),c.offset=Ht(i),c.range=a,n}function Ht(n){return function(t,e){try{cc=jt;var r=new jt;return r._=t,n(r,e)._}finally{cc=Date}}}function Ot(n){function t(n){function t(t){for(var e,u,i,o=[],a=-1,c=0;++aa;){if(r>=l)return-1;if(u=t.charCodeAt(a++),37===u){if(o=t.charAt(a++),i=C[o in sc?t.charAt(a++):o],!i||(r=i(n,e,r))<0)return-1}else if(u!=e.charCodeAt(r++))return-1}return r}function r(n,t,e){_.lastIndex=0;var r=_.exec(t.slice(e));return r?(n.w=w.get(r[0].toLowerCase()),e+r[0].length):-1}function u(n,t,e){x.lastIndex=0;var r=x.exec(t.slice(e));return r?(n.w=b.get(r[0].toLowerCase()),e+r[0].length):-1}function i(n,t,e){E.lastIndex=0;var r=E.exec(t.slice(e));return r?(n.m=A.get(r[0].toLowerCase()),e+r[0].length):-1}function o(n,t,e){S.lastIndex=0;var r=S.exec(t.slice(e));return r?(n.m=k.get(r[0].toLowerCase()),e+r[0].length):-1}function a(n,t,r){return e(n,N.c.toString(),t,r)}function c(n,t,r){return e(n,N.x.toString(),t,r)}function l(n,t,r){return e(n,N.X.toString(),t,r)}function s(n,t,e){var r=M.get(t.slice(e,e+=2).toLowerCase());return null==r?-1:(n.p=r,e)}var f=n.dateTime,h=n.date,g=n.time,p=n.periods,v=n.days,d=n.shortDays,m=n.months,y=n.shortMonths;t.utc=function(n){function e(n){try{cc=jt;var t=new cc;return t._=n,r(t)}finally{cc=Date}}var r=t(n);return e.parse=function(n){try{cc=jt;var t=r.parse(n);return t&&t._}finally{cc=Date}},e.toString=r.toString,e},t.multi=t.utc.multi=ae;var M=ta.map(),x=Yt(v),b=Zt(v),_=Yt(d),w=Zt(d),S=Yt(m),k=Zt(m),E=Yt(y),A=Zt(y);p.forEach(function(n,t){M.set(n.toLowerCase(),t)});var N={a:function(n){return d[n.getDay()]},A:function(n){return v[n.getDay()]},b:function(n){return y[n.getMonth()]},B:function(n){return m[n.getMonth()]},c:t(f),d:function(n,t){return It(n.getDate(),t,2)},e:function(n,t){return It(n.getDate(),t,2)},H:function(n,t){return It(n.getHours(),t,2)},I:function(n,t){return It(n.getHours()%12||12,t,2)},j:function(n,t){return It(1+ac.dayOfYear(n),t,3)},L:function(n,t){return It(n.getMilliseconds(),t,3)},m:function(n,t){return It(n.getMonth()+1,t,2)},M:function(n,t){return It(n.getMinutes(),t,2)},p:function(n){return p[+(n.getHours()>=12)]},S:function(n,t){return It(n.getSeconds(),t,2)},U:function(n,t){return It(ac.sundayOfYear(n),t,2)},w:function(n){return n.getDay()},W:function(n,t){return It(ac.mondayOfYear(n),t,2)},x:t(h),X:t(g),y:function(n,t){return It(n.getFullYear()%100,t,2)},Y:function(n,t){return It(n.getFullYear()%1e4,t,4)},Z:ie,"%":function(){return"%"}},C={a:r,A:u,b:i,B:o,c:a,d:Qt,e:Qt,H:te,I:te,j:ne,L:ue,m:Kt,M:ee,p:s,S:re,U:Xt,w:Vt,W:$t,x:c,X:l,y:Wt,Y:Bt,Z:Jt,"%":oe};return t}function It(n,t,e){var r=0>n?"-":"",u=(r?-n:n)+"",i=u.length;return r+(e>i?new Array(e-i+1).join(t)+u:u)}function Yt(n){return new RegExp("^(?:"+n.map(ta.requote).join("|")+")","i")}function Zt(n){for(var t=new l,e=-1,r=n.length;++e68?1900:2e3)}function Kt(n,t,e){fc.lastIndex=0;var r=fc.exec(t.slice(e,e+2));return r?(n.m=r[0]-1,e+r[0].length):-1}function Qt(n,t,e){fc.lastIndex=0;var r=fc.exec(t.slice(e,e+2));return r?(n.d=+r[0],e+r[0].length):-1}function ne(n,t,e){fc.lastIndex=0;var r=fc.exec(t.slice(e,e+3));return r?(n.j=+r[0],e+r[0].length):-1}function te(n,t,e){fc.lastIndex=0;var r=fc.exec(t.slice(e,e+2));return r?(n.H=+r[0],e+r[0].length):-1}function ee(n,t,e){fc.lastIndex=0;var r=fc.exec(t.slice(e,e+2));return r?(n.M=+r[0],e+r[0].length):-1}function re(n,t,e){fc.lastIndex=0;var r=fc.exec(t.slice(e,e+2));return r?(n.S=+r[0],e+r[0].length):-1}function ue(n,t,e){fc.lastIndex=0;var r=fc.exec(t.slice(e,e+3));return r?(n.L=+r[0],e+r[0].length):-1}function ie(n){var t=n.getTimezoneOffset(),e=t>0?"-":"+",r=ga(t)/60|0,u=ga(t)%60;return e+It(r,"0",2)+It(u,"0",2)}function oe(n,t,e){hc.lastIndex=0;var r=hc.exec(t.slice(e,e+1));return r?e+r[0].length:-1}function ae(n){for(var t=n.length,e=-1;++e=0?1:-1,a=o*e,c=Math.cos(t),l=Math.sin(t),s=i*l,f=u*c+s*Math.cos(a),h=s*o*Math.sin(a);yc.add(Math.atan2(h,f)),r=n,u=c,i=l}var t,e,r,u,i;Mc.point=function(o,a){Mc.point=n,r=(t=o)*Da,u=Math.cos(a=(e=a)*Da/2+qa/4),i=Math.sin(a)},Mc.lineEnd=function(){n(t,e)}}function pe(n){var t=n[0],e=n[1],r=Math.cos(e);return[r*Math.cos(t),r*Math.sin(t),Math.sin(e)]}function ve(n,t){return n[0]*t[0]+n[1]*t[1]+n[2]*t[2]}function de(n,t){return[n[1]*t[2]-n[2]*t[1],n[2]*t[0]-n[0]*t[2],n[0]*t[1]-n[1]*t[0]]}function me(n,t){n[0]+=t[0],n[1]+=t[1],n[2]+=t[2]}function ye(n,t){return[n[0]*t,n[1]*t,n[2]*t]}function Me(n){var t=Math.sqrt(n[0]*n[0]+n[1]*n[1]+n[2]*n[2]);n[0]/=t,n[1]/=t,n[2]/=t}function xe(n){return[Math.atan2(n[1],n[0]),tt(n[2])]}function be(n,t){return ga(n[0]-t[0])a;++a)u.point((e=n[a])[0],e[1]);return void u.lineEnd()}var c=new qe(e,n,null,!0),l=new qe(e,null,c,!1);c.o=l,i.push(c),o.push(l),c=new qe(r,n,null,!1),l=new qe(r,null,c,!0),c.o=l,i.push(c),o.push(l)}}),o.sort(t),ze(i),ze(o),i.length){for(var a=0,c=e,l=o.length;l>a;++a)o[a].e=c=!c;for(var s,f,h=i[0];;){for(var g=h,p=!0;g.v;)if((g=g.n)===h)return;s=g.z,u.lineStart();do{if(g.v=g.o.v=!0,g.e){if(p)for(var a=0,l=s.length;l>a;++a)u.point((f=s[a])[0],f[1]);else r(g.x,g.n.x,1,u);g=g.n}else{if(p){s=g.p.z;for(var a=s.length-1;a>=0;--a)u.point((f=s[a])[0],f[1])}else r(g.x,g.p.x,-1,u);g=g.p}g=g.o,s=g.z,p=!p}while(!g.v);u.lineEnd()}}}function ze(n){if(t=n.length){for(var t,e,r=0,u=n[0];++r0){for(b||(i.polygonStart(),b=!0),i.lineStart();++o1&&2&t&&e.push(e.pop().concat(e.shift())),g.push(e.filter(Te))}var g,p,v,d=t(i),m=u.invert(r[0],r[1]),y={point:o,lineStart:c,lineEnd:l,polygonStart:function(){y.point=s,y.lineStart=f,y.lineEnd=h,g=[],p=[]},polygonEnd:function(){y.point=o,y.lineStart=c,y.lineEnd=l,g=ta.merge(g);var n=Fe(m,p);g.length?(b||(i.polygonStart(),b=!0),Ce(g,De,n,e,i)):n&&(b||(i.polygonStart(),b=!0),i.lineStart(),e(null,null,1,i),i.lineEnd()),b&&(i.polygonEnd(),b=!1),g=p=null},sphere:function(){i.polygonStart(),i.lineStart(),e(null,null,1,i),i.lineEnd(),i.polygonEnd()}},M=Re(),x=t(M),b=!1;return y}}function Te(n){return n.length>1}function Re(){var n,t=[];return{lineStart:function(){t.push(n=[])},point:function(t,e){n.push([t,e])},lineEnd:b,buffer:function(){var e=t;return t=[],n=null,e},rejoin:function(){t.length>1&&t.push(t.pop().concat(t.shift()))}}}function De(n,t){return((n=n.x)[0]<0?n[1]-Ra-Ca:Ra-n[1])-((t=t.x)[0]<0?t[1]-Ra-Ca:Ra-t[1])}function Pe(n){var t,e=0/0,r=0/0,u=0/0;return{lineStart:function(){n.lineStart(),t=1},point:function(i,o){var a=i>0?qa:-qa,c=ga(i-e);ga(c-qa)0?Ra:-Ra),n.point(u,r),n.lineEnd(),n.lineStart(),n.point(a,r),n.point(i,r),t=0):u!==a&&c>=qa&&(ga(e-u)Ca?Math.atan((Math.sin(t)*(i=Math.cos(r))*Math.sin(e)-Math.sin(r)*(u=Math.cos(t))*Math.sin(n))/(u*i*o)):(t+r)/2}function je(n,t,e,r){var u;if(null==n)u=e*Ra,r.point(-qa,u),r.point(0,u),r.point(qa,u),r.point(qa,0),r.point(qa,-u),r.point(0,-u),r.point(-qa,-u),r.point(-qa,0),r.point(-qa,u);else if(ga(n[0]-t[0])>Ca){var i=n[0]a;++a){var l=t[a],s=l.length;if(s)for(var f=l[0],h=f[0],g=f[1]/2+qa/4,p=Math.sin(g),v=Math.cos(g),d=1;;){d===s&&(d=0),n=l[d];var m=n[0],y=n[1]/2+qa/4,M=Math.sin(y),x=Math.cos(y),b=m-h,_=b>=0?1:-1,w=_*b,S=w>qa,k=p*M;if(yc.add(Math.atan2(k*_*Math.sin(w),v*x+k*Math.cos(w))),i+=S?b+_*La:b,S^h>=e^m>=e){var E=de(pe(f),pe(n));Me(E);var A=de(u,E);Me(A);var N=(S^b>=0?-1:1)*tt(A[2]);(r>N||r===N&&(E[0]||E[1]))&&(o+=S^b>=0?1:-1)}if(!d++)break;h=m,p=M,v=x,f=n}}return(-Ca>i||Ca>i&&0>yc)^1&o}function He(n){function t(n,t){return Math.cos(n)*Math.cos(t)>i}function e(n){var e,i,c,l,s;return{lineStart:function(){l=c=!1,s=1},point:function(f,h){var g,p=[f,h],v=t(f,h),d=o?v?0:u(f,h):v?u(f+(0>f?qa:-qa),h):0;if(!e&&(l=c=v)&&n.lineStart(),v!==c&&(g=r(e,p),(be(e,g)||be(p,g))&&(p[0]+=Ca,p[1]+=Ca,v=t(p[0],p[1]))),v!==c)s=0,v?(n.lineStart(),g=r(p,e),n.point(g[0],g[1])):(g=r(e,p),n.point(g[0],g[1]),n.lineEnd()),e=g;else if(a&&e&&o^v){var m;d&i||!(m=r(p,e,!0))||(s=0,o?(n.lineStart(),n.point(m[0][0],m[0][1]),n.point(m[1][0],m[1][1]),n.lineEnd()):(n.point(m[1][0],m[1][1]),n.lineEnd(),n.lineStart(),n.point(m[0][0],m[0][1])))}!v||e&&be(e,p)||n.point(p[0],p[1]),e=p,c=v,i=d},lineEnd:function(){c&&n.lineEnd(),e=null},clean:function(){return s|(l&&c)<<1}}}function r(n,t,e){var r=pe(n),u=pe(t),o=[1,0,0],a=de(r,u),c=ve(a,a),l=a[0],s=c-l*l;if(!s)return!e&&n;var f=i*c/s,h=-i*l/s,g=de(o,a),p=ye(o,f),v=ye(a,h);me(p,v);var d=g,m=ve(p,d),y=ve(d,d),M=m*m-y*(ve(p,p)-1);if(!(0>M)){var x=Math.sqrt(M),b=ye(d,(-m-x)/y);if(me(b,p),b=xe(b),!e)return b;var _,w=n[0],S=t[0],k=n[1],E=t[1];w>S&&(_=w,w=S,S=_);var A=S-w,N=ga(A-qa)A;if(!N&&k>E&&(_=k,k=E,E=_),C?N?k+E>0^b[1]<(ga(b[0]-w)qa^(w<=b[0]&&b[0]<=S)){var z=ye(d,(-m+x)/y);return me(z,p),[b,xe(z)]}}}function u(t,e){var r=o?n:qa-n,u=0;return-r>t?u|=1:t>r&&(u|=2),-r>e?u|=4:e>r&&(u|=8),u}var i=Math.cos(n),o=i>0,a=ga(i)>Ca,c=gr(n,6*Da);return Le(t,e,c,o?[0,-n]:[-qa,n-qa])}function Oe(n,t,e,r){return function(u){var i,o=u.a,a=u.b,c=o.x,l=o.y,s=a.x,f=a.y,h=0,g=1,p=s-c,v=f-l;if(i=n-c,p||!(i>0)){if(i/=p,0>p){if(h>i)return;g>i&&(g=i)}else if(p>0){if(i>g)return;i>h&&(h=i)}if(i=e-c,p||!(0>i)){if(i/=p,0>p){if(i>g)return;i>h&&(h=i)}else if(p>0){if(h>i)return;g>i&&(g=i)}if(i=t-l,v||!(i>0)){if(i/=v,0>v){if(h>i)return;g>i&&(g=i)}else if(v>0){if(i>g)return;i>h&&(h=i)}if(i=r-l,v||!(0>i)){if(i/=v,0>v){if(i>g)return;i>h&&(h=i)}else if(v>0){if(h>i)return;g>i&&(g=i)}return h>0&&(u.a={x:c+h*p,y:l+h*v}),1>g&&(u.b={x:c+g*p,y:l+g*v}),u}}}}}}function Ie(n,t,e,r){function u(r,u){return ga(r[0]-n)0?0:3:ga(r[0]-e)0?2:1:ga(r[1]-t)0?1:0:u>0?3:2}function i(n,t){return o(n.x,t.x)}function o(n,t){var e=u(n,1),r=u(t,1);return e!==r?e-r:0===e?t[1]-n[1]:1===e?n[0]-t[0]:2===e?n[1]-t[1]:t[0]-n[0]}return function(a){function c(n){for(var t=0,e=d.length,r=n[1],u=0;e>u;++u)for(var i,o=1,a=d[u],c=a.length,l=a[0];c>o;++o)i=a[o],l[1]<=r?i[1]>r&&Q(l,i,n)>0&&++t:i[1]<=r&&Q(l,i,n)<0&&--t,l=i;return 0!==t}function l(i,a,c,l){var s=0,f=0;if(null==i||(s=u(i,c))!==(f=u(a,c))||o(i,a)<0^c>0){do l.point(0===s||3===s?n:e,s>1?r:t);while((s=(s+c+4)%4)!==f)}else l.point(a[0],a[1])}function s(u,i){return u>=n&&e>=u&&i>=t&&r>=i}function f(n,t){s(n,t)&&a.point(n,t)}function h(){C.point=p,d&&d.push(m=[]),S=!0,w=!1,b=_=0/0}function g(){v&&(p(y,M),x&&w&&A.rejoin(),v.push(A.buffer())),C.point=f,w&&a.lineEnd()}function p(n,t){n=Math.max(-Tc,Math.min(Tc,n)),t=Math.max(-Tc,Math.min(Tc,t));var e=s(n,t);if(d&&m.push([n,t]),S)y=n,M=t,x=e,S=!1,e&&(a.lineStart(),a.point(n,t));else if(e&&w)a.point(n,t);else{var r={a:{x:b,y:_},b:{x:n,y:t}};N(r)?(w||(a.lineStart(),a.point(r.a.x,r.a.y)),a.point(r.b.x,r.b.y),e||a.lineEnd(),k=!1):e&&(a.lineStart(),a.point(n,t),k=!1)}b=n,_=t,w=e}var v,d,m,y,M,x,b,_,w,S,k,E=a,A=Re(),N=Oe(n,t,e,r),C={point:f,lineStart:h,lineEnd:g,polygonStart:function(){a=A,v=[],d=[],k=!0},polygonEnd:function(){a=E,v=ta.merge(v);var t=c([n,r]),e=k&&t,u=v.length;(e||u)&&(a.polygonStart(),e&&(a.lineStart(),l(null,null,1,a),a.lineEnd()),u&&Ce(v,i,t,l,a),a.polygonEnd()),v=d=m=null}};return C}}function Ye(n){var t=0,e=qa/3,r=ir(n),u=r(t,e);return u.parallels=function(n){return arguments.length?r(t=n[0]*qa/180,e=n[1]*qa/180):[t/qa*180,e/qa*180]},u}function Ze(n,t){function e(n,t){var e=Math.sqrt(i-2*u*Math.sin(t))/u;return[e*Math.sin(n*=u),o-e*Math.cos(n)]}var r=Math.sin(n),u=(r+Math.sin(t))/2,i=1+r*(2*u-r),o=Math.sqrt(i)/u;return e.invert=function(n,t){var e=o-t;return[Math.atan2(n,e)/u,tt((i-(n*n+e*e)*u*u)/(2*u))]},e}function Ve(){function n(n,t){Dc+=u*n-r*t,r=n,u=t}var t,e,r,u;Hc.point=function(i,o){Hc.point=n,t=r=i,e=u=o},Hc.lineEnd=function(){n(t,e)}}function Xe(n,t){Pc>n&&(Pc=n),n>jc&&(jc=n),Uc>t&&(Uc=t),t>Fc&&(Fc=t)}function $e(){function n(n,t){o.push("M",n,",",t,i)}function t(n,t){o.push("M",n,",",t),a.point=e}function e(n,t){o.push("L",n,",",t)}function r(){a.point=n}function u(){o.push("Z")}var i=Be(4.5),o=[],a={point:n,lineStart:function(){a.point=t},lineEnd:r,polygonStart:function(){a.lineEnd=u},polygonEnd:function(){a.lineEnd=r,a.point=n},pointRadius:function(n){return i=Be(n),a},result:function(){if(o.length){var n=o.join("");return o=[],n}}};return a}function Be(n){return"m0,"+n+"a"+n+","+n+" 0 1,1 0,"+-2*n+"a"+n+","+n+" 0 1,1 0,"+2*n+"z"}function We(n,t){_c+=n,wc+=t,++Sc}function Je(){function n(n,r){var u=n-t,i=r-e,o=Math.sqrt(u*u+i*i);kc+=o*(t+n)/2,Ec+=o*(e+r)/2,Ac+=o,We(t=n,e=r)}var t,e;Ic.point=function(r,u){Ic.point=n,We(t=r,e=u)}}function Ge(){Ic.point=We}function Ke(){function n(n,t){var e=n-r,i=t-u,o=Math.sqrt(e*e+i*i);kc+=o*(r+n)/2,Ec+=o*(u+t)/2,Ac+=o,o=u*n-r*t,Nc+=o*(r+n),Cc+=o*(u+t),zc+=3*o,We(r=n,u=t)}var t,e,r,u;Ic.point=function(i,o){Ic.point=n,We(t=r=i,e=u=o)},Ic.lineEnd=function(){n(t,e)}}function Qe(n){function t(t,e){n.moveTo(t+o,e),n.arc(t,e,o,0,La)}function e(t,e){n.moveTo(t,e),a.point=r}function r(t,e){n.lineTo(t,e)}function u(){a.point=t}function i(){n.closePath()}var o=4.5,a={point:t,lineStart:function(){a.point=e},lineEnd:u,polygonStart:function(){a.lineEnd=i},polygonEnd:function(){a.lineEnd=u,a.point=t},pointRadius:function(n){return o=n,a},result:b};return a}function nr(n){function t(n){return(a?r:e)(n)}function e(t){return rr(t,function(e,r){e=n(e,r),t.point(e[0],e[1])})}function r(t){function e(e,r){e=n(e,r),t.point(e[0],e[1])}function r(){M=0/0,S.point=i,t.lineStart()}function i(e,r){var i=pe([e,r]),o=n(e,r);u(M,x,y,b,_,w,M=o[0],x=o[1],y=e,b=i[0],_=i[1],w=i[2],a,t),t.point(M,x)}function o(){S.point=e,t.lineEnd()}function c(){r(),S.point=l,S.lineEnd=s}function l(n,t){i(f=n,h=t),g=M,p=x,v=b,d=_,m=w,S.point=i}function s(){u(M,x,y,b,_,w,g,p,f,v,d,m,a,t),S.lineEnd=o,o()}var f,h,g,p,v,d,m,y,M,x,b,_,w,S={point:e,lineStart:r,lineEnd:o,polygonStart:function(){t.polygonStart(),S.lineStart=c +},polygonEnd:function(){t.polygonEnd(),S.lineStart=r}};return S}function u(t,e,r,a,c,l,s,f,h,g,p,v,d,m){var y=s-t,M=f-e,x=y*y+M*M;if(x>4*i&&d--){var b=a+g,_=c+p,w=l+v,S=Math.sqrt(b*b+_*_+w*w),k=Math.asin(w/=S),E=ga(ga(w)-1)i||ga((y*z+M*q)/x-.5)>.3||o>a*g+c*p+l*v)&&(u(t,e,r,a,c,l,N,C,E,b/=S,_/=S,w,d,m),m.point(N,C),u(N,C,E,b,_,w,s,f,h,g,p,v,d,m))}}var i=.5,o=Math.cos(30*Da),a=16;return t.precision=function(n){return arguments.length?(a=(i=n*n)>0&&16,t):Math.sqrt(i)},t}function tr(n){var t=nr(function(t,e){return n([t*Pa,e*Pa])});return function(n){return or(t(n))}}function er(n){this.stream=n}function rr(n,t){return{point:t,sphere:function(){n.sphere()},lineStart:function(){n.lineStart()},lineEnd:function(){n.lineEnd()},polygonStart:function(){n.polygonStart()},polygonEnd:function(){n.polygonEnd()}}}function ur(n){return ir(function(){return n})()}function ir(n){function t(n){return n=a(n[0]*Da,n[1]*Da),[n[0]*h+c,l-n[1]*h]}function e(n){return n=a.invert((n[0]-c)/h,(l-n[1])/h),n&&[n[0]*Pa,n[1]*Pa]}function r(){a=Ae(o=lr(m,M,x),i);var n=i(v,d);return c=g-n[0]*h,l=p+n[1]*h,u()}function u(){return s&&(s.valid=!1,s=null),t}var i,o,a,c,l,s,f=nr(function(n,t){return n=i(n,t),[n[0]*h+c,l-n[1]*h]}),h=150,g=480,p=250,v=0,d=0,m=0,M=0,x=0,b=Lc,_=y,w=null,S=null;return t.stream=function(n){return s&&(s.valid=!1),s=or(b(o,f(_(n)))),s.valid=!0,s},t.clipAngle=function(n){return arguments.length?(b=null==n?(w=n,Lc):He((w=+n)*Da),u()):w},t.clipExtent=function(n){return arguments.length?(S=n,_=n?Ie(n[0][0],n[0][1],n[1][0],n[1][1]):y,u()):S},t.scale=function(n){return arguments.length?(h=+n,r()):h},t.translate=function(n){return arguments.length?(g=+n[0],p=+n[1],r()):[g,p]},t.center=function(n){return arguments.length?(v=n[0]%360*Da,d=n[1]%360*Da,r()):[v*Pa,d*Pa]},t.rotate=function(n){return arguments.length?(m=n[0]%360*Da,M=n[1]%360*Da,x=n.length>2?n[2]%360*Da:0,r()):[m*Pa,M*Pa,x*Pa]},ta.rebind(t,f,"precision"),function(){return i=n.apply(this,arguments),t.invert=i.invert&&e,r()}}function or(n){return rr(n,function(t,e){n.point(t*Da,e*Da)})}function ar(n,t){return[n,t]}function cr(n,t){return[n>qa?n-La:-qa>n?n+La:n,t]}function lr(n,t,e){return n?t||e?Ae(fr(n),hr(t,e)):fr(n):t||e?hr(t,e):cr}function sr(n){return function(t,e){return t+=n,[t>qa?t-La:-qa>t?t+La:t,e]}}function fr(n){var t=sr(n);return t.invert=sr(-n),t}function hr(n,t){function e(n,t){var e=Math.cos(t),a=Math.cos(n)*e,c=Math.sin(n)*e,l=Math.sin(t),s=l*r+a*u;return[Math.atan2(c*i-s*o,a*r-l*u),tt(s*i+c*o)]}var r=Math.cos(n),u=Math.sin(n),i=Math.cos(t),o=Math.sin(t);return e.invert=function(n,t){var e=Math.cos(t),a=Math.cos(n)*e,c=Math.sin(n)*e,l=Math.sin(t),s=l*i-c*o;return[Math.atan2(c*i+l*o,a*r+s*u),tt(s*r-a*u)]},e}function gr(n,t){var e=Math.cos(n),r=Math.sin(n);return function(u,i,o,a){var c=o*t;null!=u?(u=pr(e,u),i=pr(e,i),(o>0?i>u:u>i)&&(u+=o*La)):(u=n+o*La,i=n-.5*c);for(var l,s=u;o>0?s>i:i>s;s-=c)a.point((l=xe([e,-r*Math.cos(s),-r*Math.sin(s)]))[0],l[1])}}function pr(n,t){var e=pe(t);e[0]-=n,Me(e);var r=nt(-e[1]);return((-e[2]<0?-r:r)+2*Math.PI-Ca)%(2*Math.PI)}function vr(n,t,e){var r=ta.range(n,t-Ca,e).concat(t);return function(n){return r.map(function(t){return[n,t]})}}function dr(n,t,e){var r=ta.range(n,t-Ca,e).concat(t);return function(n){return r.map(function(t){return[t,n]})}}function mr(n){return n.source}function yr(n){return n.target}function Mr(n,t,e,r){var u=Math.cos(t),i=Math.sin(t),o=Math.cos(r),a=Math.sin(r),c=u*Math.cos(n),l=u*Math.sin(n),s=o*Math.cos(e),f=o*Math.sin(e),h=2*Math.asin(Math.sqrt(it(r-t)+u*o*it(e-n))),g=1/Math.sin(h),p=h?function(n){var t=Math.sin(n*=h)*g,e=Math.sin(h-n)*g,r=e*c+t*s,u=e*l+t*f,o=e*i+t*a;return[Math.atan2(u,r)*Pa,Math.atan2(o,Math.sqrt(r*r+u*u))*Pa]}:function(){return[n*Pa,t*Pa]};return p.distance=h,p}function xr(){function n(n,u){var i=Math.sin(u*=Da),o=Math.cos(u),a=ga((n*=Da)-t),c=Math.cos(a);Yc+=Math.atan2(Math.sqrt((a=o*Math.sin(a))*a+(a=r*i-e*o*c)*a),e*i+r*o*c),t=n,e=i,r=o}var t,e,r;Zc.point=function(u,i){t=u*Da,e=Math.sin(i*=Da),r=Math.cos(i),Zc.point=n},Zc.lineEnd=function(){Zc.point=Zc.lineEnd=b}}function br(n,t){function e(t,e){var r=Math.cos(t),u=Math.cos(e),i=n(r*u);return[i*u*Math.sin(t),i*Math.sin(e)]}return e.invert=function(n,e){var r=Math.sqrt(n*n+e*e),u=t(r),i=Math.sin(u),o=Math.cos(u);return[Math.atan2(n*i,r*o),Math.asin(r&&e*i/r)]},e}function _r(n,t){function e(n,t){o>0?-Ra+Ca>t&&(t=-Ra+Ca):t>Ra-Ca&&(t=Ra-Ca);var e=o/Math.pow(u(t),i);return[e*Math.sin(i*n),o-e*Math.cos(i*n)]}var r=Math.cos(n),u=function(n){return Math.tan(qa/4+n/2)},i=n===t?Math.sin(n):Math.log(r/Math.cos(t))/Math.log(u(t)/u(n)),o=r*Math.pow(u(n),i)/i;return i?(e.invert=function(n,t){var e=o-t,r=K(i)*Math.sqrt(n*n+e*e);return[Math.atan2(n,e)/i,2*Math.atan(Math.pow(o/r,1/i))-Ra]},e):Sr}function wr(n,t){function e(n,t){var e=i-t;return[e*Math.sin(u*n),i-e*Math.cos(u*n)]}var r=Math.cos(n),u=n===t?Math.sin(n):(r-Math.cos(t))/(t-n),i=r/u+n;return ga(u)u;u++){for(;r>1&&Q(n[e[r-2]],n[e[r-1]],n[u])<=0;)--r;e[r++]=u}return e.slice(0,r)}function zr(n,t){return n[0]-t[0]||n[1]-t[1]}function qr(n,t,e){return(e[0]-t[0])*(n[1]-t[1])<(e[1]-t[1])*(n[0]-t[0])}function Lr(n,t,e,r){var u=n[0],i=e[0],o=t[0]-u,a=r[0]-i,c=n[1],l=e[1],s=t[1]-c,f=r[1]-l,h=(a*(c-l)-f*(u-i))/(f*o-a*s);return[u+h*o,c+h*s]}function Tr(n){var t=n[0],e=n[n.length-1];return!(t[0]-e[0]||t[1]-e[1])}function Rr(){tu(this),this.edge=this.site=this.circle=null}function Dr(n){var t=el.pop()||new Rr;return t.site=n,t}function Pr(n){Xr(n),Qc.remove(n),el.push(n),tu(n)}function Ur(n){var t=n.circle,e=t.x,r=t.cy,u={x:e,y:r},i=n.P,o=n.N,a=[n];Pr(n);for(var c=i;c.circle&&ga(e-c.circle.x)s;++s)l=a[s],c=a[s-1],Kr(l.edge,c.site,l.site,u);c=a[0],l=a[f-1],l.edge=Jr(c.site,l.site,null,u),Vr(c),Vr(l)}function jr(n){for(var t,e,r,u,i=n.x,o=n.y,a=Qc._;a;)if(r=Fr(a,o)-i,r>Ca)a=a.L;else{if(u=i-Hr(a,o),!(u>Ca)){r>-Ca?(t=a.P,e=a):u>-Ca?(t=a,e=a.N):t=e=a;break}if(!a.R){t=a;break}a=a.R}var c=Dr(n);if(Qc.insert(t,c),t||e){if(t===e)return Xr(t),e=Dr(t.site),Qc.insert(c,e),c.edge=e.edge=Jr(t.site,c.site),Vr(t),void Vr(e);if(!e)return void(c.edge=Jr(t.site,c.site));Xr(t),Xr(e);var l=t.site,s=l.x,f=l.y,h=n.x-s,g=n.y-f,p=e.site,v=p.x-s,d=p.y-f,m=2*(h*d-g*v),y=h*h+g*g,M=v*v+d*d,x={x:(d*y-g*M)/m+s,y:(h*M-v*y)/m+f};Kr(e.edge,l,p,x),c.edge=Jr(l,n,null,x),e.edge=Jr(n,p,null,x),Vr(t),Vr(e)}}function Fr(n,t){var e=n.site,r=e.x,u=e.y,i=u-t;if(!i)return r;var o=n.P;if(!o)return-1/0;e=o.site;var a=e.x,c=e.y,l=c-t;if(!l)return a;var s=a-r,f=1/i-1/l,h=s/l;return f?(-h+Math.sqrt(h*h-2*f*(s*s/(-2*l)-c+l/2+u-i/2)))/f+r:(r+a)/2}function Hr(n,t){var e=n.N;if(e)return Fr(e,t);var r=n.site;return r.y===t?r.x:1/0}function Or(n){this.site=n,this.edges=[]}function Ir(n){for(var t,e,r,u,i,o,a,c,l,s,f=n[0][0],h=n[1][0],g=n[0][1],p=n[1][1],v=Kc,d=v.length;d--;)if(i=v[d],i&&i.prepare())for(a=i.edges,c=a.length,o=0;c>o;)s=a[o].end(),r=s.x,u=s.y,l=a[++o%c].start(),t=l.x,e=l.y,(ga(r-t)>Ca||ga(u-e)>Ca)&&(a.splice(o,0,new Qr(Gr(i.site,s,ga(r-f)Ca?{x:f,y:ga(t-f)Ca?{x:ga(e-p)Ca?{x:h,y:ga(t-h)Ca?{x:ga(e-g)=-za)){var g=c*c+l*l,p=s*s+f*f,v=(f*g-l*p)/h,d=(c*p-s*g)/h,f=d+a,m=rl.pop()||new Zr;m.arc=n,m.site=u,m.x=v+o,m.y=f+Math.sqrt(v*v+d*d),m.cy=f,n.circle=m;for(var y=null,M=tl._;M;)if(m.yd||d>=a)return;if(h>p){if(i){if(i.y>=l)return}else i={x:d,y:c};e={x:d,y:l}}else{if(i){if(i.yr||r>1)if(h>p){if(i){if(i.y>=l)return}else i={x:(c-u)/r,y:c};e={x:(l-u)/r,y:l}}else{if(i){if(i.yg){if(i){if(i.x>=a)return}else i={x:o,y:r*o+u};e={x:a,y:r*a+u}}else{if(i){if(i.xi||f>o||r>h||u>g)){if(p=n.point){var p,v=t-n.x,d=e-n.y,m=v*v+d*d;if(c>m){var y=Math.sqrt(c=m);r=t-y,u=e-y,i=t+y,o=e+y,a=p}}for(var M=n.nodes,x=.5*(s+h),b=.5*(f+g),_=t>=x,w=e>=b,S=w<<1|_,k=S+4;k>S;++S)if(n=M[3&S])switch(3&S){case 0:l(n,s,f,x,b);break;case 1:l(n,x,f,h,b);break;case 2:l(n,s,b,x,g);break;case 3:l(n,x,b,h,g)}}}(n,r,u,i,o),a}function gu(n,t){n=ta.rgb(n),t=ta.rgb(t);var e=n.r,r=n.g,u=n.b,i=t.r-e,o=t.g-r,a=t.b-u;return function(n){return"#"+xt(Math.round(e+i*n))+xt(Math.round(r+o*n))+xt(Math.round(u+a*n))}}function pu(n,t){var e,r={},u={};for(e in n)e in t?r[e]=mu(n[e],t[e]):u[e]=n[e];for(e in t)e in n||(u[e]=t[e]);return function(n){for(e in r)u[e]=r[e](n);return u}}function vu(n,t){return n=+n,t=+t,function(e){return n*(1-e)+t*e}}function du(n,t){var e,r,u,i=il.lastIndex=ol.lastIndex=0,o=-1,a=[],c=[];for(n+="",t+="";(e=il.exec(n))&&(r=ol.exec(t));)(u=r.index)>i&&(u=t.slice(i,u),a[o]?a[o]+=u:a[++o]=u),(e=e[0])===(r=r[0])?a[o]?a[o]+=r:a[++o]=r:(a[++o]=null,c.push({i:o,x:vu(e,r)})),i=ol.lastIndex;return ir;++r)a[(e=c[r]).i]=e.x(n);return a.join("")})}function mu(n,t){for(var e,r=ta.interpolators.length;--r>=0&&!(e=ta.interpolators[r](n,t)););return e}function yu(n,t){var e,r=[],u=[],i=n.length,o=t.length,a=Math.min(n.length,t.length);for(e=0;a>e;++e)r.push(mu(n[e],t[e]));for(;i>e;++e)u[e]=n[e];for(;o>e;++e)u[e]=t[e];return function(n){for(e=0;a>e;++e)u[e]=r[e](n);return u}}function Mu(n){return function(t){return 0>=t?0:t>=1?1:n(t)}}function xu(n){return function(t){return 1-n(1-t)}}function bu(n){return function(t){return.5*(.5>t?n(2*t):2-n(2-2*t))}}function _u(n){return n*n}function wu(n){return n*n*n}function Su(n){if(0>=n)return 0;if(n>=1)return 1;var t=n*n,e=t*n;return 4*(.5>n?e:3*(n-t)+e-.75)}function ku(n){return function(t){return Math.pow(t,n)}}function Eu(n){return 1-Math.cos(n*Ra)}function Au(n){return Math.pow(2,10*(n-1))}function Nu(n){return 1-Math.sqrt(1-n*n)}function Cu(n,t){var e;return arguments.length<2&&(t=.45),arguments.length?e=t/La*Math.asin(1/n):(n=1,e=t/4),function(r){return 1+n*Math.pow(2,-10*r)*Math.sin((r-e)*La/t)}}function zu(n){return n||(n=1.70158),function(t){return t*t*((n+1)*t-n)}}function qu(n){return 1/2.75>n?7.5625*n*n:2/2.75>n?7.5625*(n-=1.5/2.75)*n+.75:2.5/2.75>n?7.5625*(n-=2.25/2.75)*n+.9375:7.5625*(n-=2.625/2.75)*n+.984375}function Lu(n,t){n=ta.hcl(n),t=ta.hcl(t);var e=n.h,r=n.c,u=n.l,i=t.h-e,o=t.c-r,a=t.l-u;return isNaN(o)&&(o=0,r=isNaN(r)?t.c:r),isNaN(i)?(i=0,e=isNaN(e)?t.h:e):i>180?i-=360:-180>i&&(i+=360),function(n){return st(e+i*n,r+o*n,u+a*n)+""}}function Tu(n,t){n=ta.hsl(n),t=ta.hsl(t);var e=n.h,r=n.s,u=n.l,i=t.h-e,o=t.s-r,a=t.l-u;return isNaN(o)&&(o=0,r=isNaN(r)?t.s:r),isNaN(i)?(i=0,e=isNaN(e)?t.h:e):i>180?i-=360:-180>i&&(i+=360),function(n){return ct(e+i*n,r+o*n,u+a*n)+""}}function Ru(n,t){n=ta.lab(n),t=ta.lab(t);var e=n.l,r=n.a,u=n.b,i=t.l-e,o=t.a-r,a=t.b-u;return function(n){return ht(e+i*n,r+o*n,u+a*n)+""}}function Du(n,t){return t-=n,function(e){return Math.round(n+t*e)}}function Pu(n){var t=[n.a,n.b],e=[n.c,n.d],r=ju(t),u=Uu(t,e),i=ju(Fu(e,t,-u))||0;t[0]*e[1]180?s+=360:s-l>180&&(l+=360),u.push({i:r.push(r.pop()+"rotate(",null,")")-2,x:vu(l,s)})):s&&r.push(r.pop()+"rotate("+s+")"),f!=h?u.push({i:r.push(r.pop()+"skewX(",null,")")-2,x:vu(f,h)}):h&&r.push(r.pop()+"skewX("+h+")"),g[0]!=p[0]||g[1]!=p[1]?(e=r.push(r.pop()+"scale(",null,",",null,")"),u.push({i:e-4,x:vu(g[0],p[0])},{i:e-2,x:vu(g[1],p[1])})):(1!=p[0]||1!=p[1])&&r.push(r.pop()+"scale("+p+")"),e=u.length,function(n){for(var t,i=-1;++i=0;)e.push(u[r])}function Qu(n,t){for(var e=[n],r=[];null!=(n=e.pop());)if(r.push(n),(i=n.children)&&(u=i.length))for(var u,i,o=-1;++oe;++e)(t=n[e][1])>u&&(r=e,u=t);return r}function si(n){return n.reduce(fi,0)}function fi(n,t){return n+t[1]}function hi(n,t){return gi(n,Math.ceil(Math.log(t.length)/Math.LN2+1))}function gi(n,t){for(var e=-1,r=+n[0],u=(n[1]-r)/t,i=[];++e<=t;)i[e]=u*e+r;return i}function pi(n){return[ta.min(n),ta.max(n)]}function vi(n,t){return n.value-t.value}function di(n,t){var e=n._pack_next;n._pack_next=t,t._pack_prev=n,t._pack_next=e,e._pack_prev=t}function mi(n,t){n._pack_next=t,t._pack_prev=n}function yi(n,t){var e=t.x-n.x,r=t.y-n.y,u=n.r+t.r;return.999*u*u>e*e+r*r}function Mi(n){function t(n){s=Math.min(n.x-n.r,s),f=Math.max(n.x+n.r,f),h=Math.min(n.y-n.r,h),g=Math.max(n.y+n.r,g)}if((e=n.children)&&(l=e.length)){var e,r,u,i,o,a,c,l,s=1/0,f=-1/0,h=1/0,g=-1/0;if(e.forEach(xi),r=e[0],r.x=-r.r,r.y=0,t(r),l>1&&(u=e[1],u.x=u.r,u.y=0,t(u),l>2))for(i=e[2],wi(r,u,i),t(i),di(r,i),r._pack_prev=i,di(i,u),u=r._pack_next,o=3;l>o;o++){wi(r,u,i=e[o]);var p=0,v=1,d=1;for(a=u._pack_next;a!==u;a=a._pack_next,v++)if(yi(a,i)){p=1;break}if(1==p)for(c=r._pack_prev;c!==a._pack_prev&&!yi(c,i);c=c._pack_prev,d++);p?(d>v||v==d&&u.ro;o++)i=e[o],i.x-=m,i.y-=y,M=Math.max(M,i.r+Math.sqrt(i.x*i.x+i.y*i.y));n.r=M,e.forEach(bi)}}function xi(n){n._pack_next=n._pack_prev=n}function bi(n){delete n._pack_next,delete n._pack_prev}function _i(n,t,e,r){var u=n.children;if(n.x=t+=r*n.x,n.y=e+=r*n.y,n.r*=r,u)for(var i=-1,o=u.length;++i=0;)t=u[i],t.z+=e,t.m+=e,e+=t.s+(r+=t.c)}function Ci(n,t,e){return n.a.parent===t.parent?n.a:e}function zi(n){return 1+ta.max(n,function(n){return n.y})}function qi(n){return n.reduce(function(n,t){return n+t.x},0)/n.length}function Li(n){var t=n.children;return t&&t.length?Li(t[0]):n}function Ti(n){var t,e=n.children;return e&&(t=e.length)?Ti(e[t-1]):n}function Ri(n){return{x:n.x,y:n.y,dx:n.dx,dy:n.dy}}function Di(n,t){var e=n.x+t[3],r=n.y+t[0],u=n.dx-t[1]-t[3],i=n.dy-t[0]-t[2];return 0>u&&(e+=u/2,u=0),0>i&&(r+=i/2,i=0),{x:e,y:r,dx:u,dy:i}}function Pi(n){var t=n[0],e=n[n.length-1];return e>t?[t,e]:[e,t]}function Ui(n){return n.rangeExtent?n.rangeExtent():Pi(n.range())}function ji(n,t,e,r){var u=e(n[0],n[1]),i=r(t[0],t[1]);return function(n){return i(u(n))}}function Fi(n,t){var e,r=0,u=n.length-1,i=n[r],o=n[u];return i>o&&(e=r,r=u,u=e,e=i,i=o,o=e),n[r]=t.floor(i),n[u]=t.ceil(o),n}function Hi(n){return n?{floor:function(t){return Math.floor(t/n)*n},ceil:function(t){return Math.ceil(t/n)*n}}:ml}function Oi(n,t,e,r){var u=[],i=[],o=0,a=Math.min(n.length,t.length)-1;for(n[a]2?Oi:ji,c=r?Iu:Ou;return o=u(n,t,c,e),a=u(t,n,c,mu),i}function i(n){return o(n)}var o,a;return i.invert=function(n){return a(n)},i.domain=function(t){return arguments.length?(n=t.map(Number),u()):n},i.range=function(n){return arguments.length?(t=n,u()):t},i.rangeRound=function(n){return i.range(n).interpolate(Du)},i.clamp=function(n){return arguments.length?(r=n,u()):r},i.interpolate=function(n){return arguments.length?(e=n,u()):e},i.ticks=function(t){return Xi(n,t)},i.tickFormat=function(t,e){return $i(n,t,e)},i.nice=function(t){return Zi(n,t),u()},i.copy=function(){return Ii(n,t,e,r)},u()}function Yi(n,t){return ta.rebind(n,t,"range","rangeRound","interpolate","clamp")}function Zi(n,t){return Fi(n,Hi(Vi(n,t)[2]))}function Vi(n,t){null==t&&(t=10);var e=Pi(n),r=e[1]-e[0],u=Math.pow(10,Math.floor(Math.log(r/t)/Math.LN10)),i=t/r*u;return.15>=i?u*=10:.35>=i?u*=5:.75>=i&&(u*=2),e[0]=Math.ceil(e[0]/u)*u,e[1]=Math.floor(e[1]/u)*u+.5*u,e[2]=u,e}function Xi(n,t){return ta.range.apply(ta,Vi(n,t))}function $i(n,t,e){var r=Vi(n,t);if(e){var u=ic.exec(e);if(u.shift(),"s"===u[8]){var i=ta.formatPrefix(Math.max(ga(r[0]),ga(r[1])));return u[7]||(u[7]="."+Bi(i.scale(r[2]))),u[8]="f",e=ta.format(u.join("")),function(n){return e(i.scale(n))+i.symbol}}u[7]||(u[7]="."+Wi(u[8],r)),e=u.join("")}else e=",."+Bi(r[2])+"f";return ta.format(e)}function Bi(n){return-Math.floor(Math.log(n)/Math.LN10+.01)}function Wi(n,t){var e=Bi(t[2]);return n in yl?Math.abs(e-Bi(Math.max(ga(t[0]),ga(t[1]))))+ +("e"!==n):e-2*("%"===n)}function Ji(n,t,e,r){function u(n){return(e?Math.log(0>n?0:n):-Math.log(n>0?0:-n))/Math.log(t)}function i(n){return e?Math.pow(t,n):-Math.pow(t,-n)}function o(t){return n(u(t))}return o.invert=function(t){return i(n.invert(t))},o.domain=function(t){return arguments.length?(e=t[0]>=0,n.domain((r=t.map(Number)).map(u)),o):r},o.base=function(e){return arguments.length?(t=+e,n.domain(r.map(u)),o):t},o.nice=function(){var t=Fi(r.map(u),e?Math:xl);return n.domain(t),r=t.map(i),o},o.ticks=function(){var n=Pi(r),o=[],a=n[0],c=n[1],l=Math.floor(u(a)),s=Math.ceil(u(c)),f=t%1?2:t;if(isFinite(s-l)){if(e){for(;s>l;l++)for(var h=1;f>h;h++)o.push(i(l)*h);o.push(i(l))}else for(o.push(i(l));l++0;h--)o.push(i(l)*h);for(l=0;o[l]c;s--);o=o.slice(l,s)}return o},o.tickFormat=function(n,t){if(!arguments.length)return Ml;arguments.length<2?t=Ml:"function"!=typeof t&&(t=ta.format(t));var r,a=Math.max(.1,n/o.ticks().length),c=e?(r=1e-12,Math.ceil):(r=-1e-12,Math.floor);return function(n){return n/i(c(u(n)+r))<=a?t(n):""}},o.copy=function(){return Ji(n.copy(),t,e,r)},Yi(o,n)}function Gi(n,t,e){function r(t){return n(u(t))}var u=Ki(t),i=Ki(1/t);return r.invert=function(t){return i(n.invert(t))},r.domain=function(t){return arguments.length?(n.domain((e=t.map(Number)).map(u)),r):e},r.ticks=function(n){return Xi(e,n)},r.tickFormat=function(n,t){return $i(e,n,t)},r.nice=function(n){return r.domain(Zi(e,n))},r.exponent=function(o){return arguments.length?(u=Ki(t=o),i=Ki(1/t),n.domain(e.map(u)),r):t},r.copy=function(){return Gi(n.copy(),t,e)},Yi(r,n)}function Ki(n){return function(t){return 0>t?-Math.pow(-t,n):Math.pow(t,n)}}function Qi(n,t){function e(e){return i[((u.get(e)||("range"===t.t?u.set(e,n.push(e)):0/0))-1)%i.length]}function r(t,e){return ta.range(n.length).map(function(n){return t+e*n})}var u,i,o;return e.domain=function(r){if(!arguments.length)return n;n=[],u=new l;for(var i,o=-1,a=r.length;++oe?[0/0,0/0]:[e>0?a[e-1]:n[0],et?0/0:t/i+n,[t,t+1/i]},r.copy=function(){return to(n,t,e)},u()}function eo(n,t){function e(e){return e>=e?t[ta.bisect(n,e)]:void 0}return e.domain=function(t){return arguments.length?(n=t,e):n},e.range=function(n){return arguments.length?(t=n,e):t},e.invertExtent=function(e){return e=t.indexOf(e),[n[e-1],n[e]]},e.copy=function(){return eo(n,t)},e}function ro(n){function t(n){return+n}return t.invert=t,t.domain=t.range=function(e){return arguments.length?(n=e.map(t),t):n},t.ticks=function(t){return Xi(n,t)},t.tickFormat=function(t,e){return $i(n,t,e)},t.copy=function(){return ro(n)},t}function uo(){return 0}function io(n){return n.innerRadius}function oo(n){return n.outerRadius}function ao(n){return n.startAngle}function co(n){return n.endAngle}function lo(n){return n&&n.padAngle}function so(n,t,e,r){return(n-e)*t-(t-r)*n>0?0:1}function fo(n,t,e,r,u){var i=n[0]-t[0],o=n[1]-t[1],a=(u?r:-r)/Math.sqrt(i*i+o*o),c=a*o,l=-a*i,s=n[0]+c,f=n[1]+l,h=t[0]+c,g=t[1]+l,p=(s+h)/2,v=(f+g)/2,d=h-s,m=g-f,y=d*d+m*m,M=e-r,x=s*g-h*f,b=(0>m?-1:1)*Math.sqrt(M*M*y-x*x),_=(x*m-d*b)/y,w=(-x*d-m*b)/y,S=(x*m+d*b)/y,k=(-x*d+m*b)/y,E=_-p,A=w-v,N=S-p,C=k-v;return E*E+A*A>N*N+C*C&&(_=S,w=k),[[_-c,w-l],[_*e/M,w*e/M]]}function ho(n){function t(t){function o(){l.push("M",i(n(s),a))}for(var c,l=[],s=[],f=-1,h=t.length,g=Et(e),p=Et(r);++f1&&u.push("H",r[0]),u.join("")}function mo(n){for(var t=0,e=n.length,r=n[0],u=[r[0],",",r[1]];++t1){a=t[1],i=n[c],c++,r+="C"+(u[0]+o[0])+","+(u[1]+o[1])+","+(i[0]-a[0])+","+(i[1]-a[1])+","+i[0]+","+i[1];for(var l=2;l9&&(u=3*t/Math.sqrt(u),o[a]=u*e,o[a+1]=u*r));for(a=-1;++a<=c;)u=(n[Math.min(c,a+1)][0]-n[Math.max(0,a-1)][0])/(6*(1+o[a]*o[a])),i.push([u||0,o[a]*u||0]);return i}function To(n){return n.length<3?go(n):n[0]+_o(n,Lo(n))}function Ro(n){for(var t,e,r,u=-1,i=n.length;++ur)return s();var u=i[i.active];u&&(--i.count,delete i[i.active],u.event&&u.event.interrupt.call(n,n.__data__,u.index)),i.active=r,o.event&&o.event.start.call(n,n.__data__,t),o.tween.forEach(function(e,r){(r=r.call(n,n.__data__,t))&&v.push(r)}),h=o.ease,f=o.duration,ta.timer(function(){return p.c=l(e||1)?Ne:l,1},0,a)}function l(e){if(i.active!==r)return 1;for(var u=e/f,a=h(u),c=v.length;c>0;)v[--c].call(n,a);return u>=1?(o.event&&o.event.end.call(n,n.__data__,t),s()):void 0}function s(){return--i.count?delete i[r]:delete n[e],1}var f,h,g=o.delay,p=ec,v=[];return p.t=g+a,u>=g?c(u-g):void(p.c=c)},0,a)}}function Bo(n,t,e){n.attr("transform",function(n){var r=t(n);return"translate("+(isFinite(r)?r:e(n))+",0)"})}function Wo(n,t,e){n.attr("transform",function(n){var r=t(n);return"translate(0,"+(isFinite(r)?r:e(n))+")"})}function Jo(n){return n.toISOString()}function Go(n,t,e){function r(t){return n(t)}function u(n,e){var r=n[1]-n[0],u=r/e,i=ta.bisect(Vl,u);return i==Vl.length?[t.year,Vi(n.map(function(n){return n/31536e6}),e)[2]]:i?t[u/Vl[i-1]1?{floor:function(t){for(;e(t=n.floor(t));)t=Ko(t-1);return t},ceil:function(t){for(;e(t=n.ceil(t));)t=Ko(+t+1);return t}}:n))},r.ticks=function(n,t){var e=Pi(r.domain()),i=null==n?u(e,10):"number"==typeof n?u(e,n):!n.range&&[{range:n},t];return i&&(n=i[0],t=i[1]),n.range(e[0],Ko(+e[1]+1),1>t?1:t)},r.tickFormat=function(){return e},r.copy=function(){return Go(n.copy(),t,e)},Yi(r,n)}function Ko(n){return new Date(n)}function Qo(n){return JSON.parse(n.responseText)}function na(n){var t=ua.createRange();return t.selectNode(ua.body),t.createContextualFragment(n.responseText)}var ta={version:"3.5.5"},ea=[].slice,ra=function(n){return ea.call(n)},ua=this.document;if(ua)try{ra(ua.documentElement.childNodes)[0].nodeType}catch(ia){ra=function(n){for(var t=n.length,e=new Array(t);t--;)e[t]=n[t];return e}}if(Date.now||(Date.now=function(){return+new Date}),ua)try{ua.createElement("DIV").style.setProperty("opacity",0,"")}catch(oa){var aa=this.Element.prototype,ca=aa.setAttribute,la=aa.setAttributeNS,sa=this.CSSStyleDeclaration.prototype,fa=sa.setProperty;aa.setAttribute=function(n,t){ca.call(this,n,t+"")},aa.setAttributeNS=function(n,t,e){la.call(this,n,t,e+"")},sa.setProperty=function(n,t,e){fa.call(this,n,t+"",e)}}ta.ascending=e,ta.descending=function(n,t){return n>t?-1:t>n?1:t>=n?0:0/0},ta.min=function(n,t){var e,r,u=-1,i=n.length;if(1===arguments.length){for(;++u=r){e=r;break}for(;++ur&&(e=r)}else{for(;++u=r){e=r;break}for(;++ur&&(e=r)}return e},ta.max=function(n,t){var e,r,u=-1,i=n.length;if(1===arguments.length){for(;++u=r){e=r;break}for(;++ue&&(e=r)}else{for(;++u=r){e=r;break}for(;++ue&&(e=r)}return e},ta.extent=function(n,t){var e,r,u,i=-1,o=n.length;if(1===arguments.length){for(;++i=r){e=u=r;break}for(;++ir&&(e=r),r>u&&(u=r))}else{for(;++i=r){e=u=r;break}for(;++ir&&(e=r),r>u&&(u=r))}return[e,u]},ta.sum=function(n,t){var e,r=0,i=n.length,o=-1;if(1===arguments.length)for(;++o1?c/(s-1):void 0},ta.deviation=function(){var n=ta.variance.apply(this,arguments);return n?Math.sqrt(n):n};var ha=i(e);ta.bisectLeft=ha.left,ta.bisect=ta.bisectRight=ha.right,ta.bisector=function(n){return i(1===n.length?function(t,r){return e(n(t),r)}:n)},ta.shuffle=function(n,t,e){(i=arguments.length)<3&&(e=n.length,2>i&&(t=0));for(var r,u,i=e-t;i;)u=Math.random()*i--|0,r=n[i+t],n[i+t]=n[u+t],n[u+t]=r;return n},ta.permute=function(n,t){for(var e=t.length,r=new Array(e);e--;)r[e]=n[t[e]];return r},ta.pairs=function(n){for(var t,e=0,r=n.length-1,u=n[0],i=new Array(0>r?0:r);r>e;)i[e]=[t=u,u=n[++e]];return i},ta.zip=function(){if(!(r=arguments.length))return[];for(var n=-1,t=ta.min(arguments,o),e=new Array(t);++n=0;)for(r=n[u],t=r.length;--t>=0;)e[--o]=r[t];return e};var ga=Math.abs;ta.range=function(n,t,e){if(arguments.length<3&&(e=1,arguments.length<2&&(t=n,n=0)),(t-n)/e===1/0)throw new Error("infinite range");var r,u=[],i=a(ga(e)),o=-1;if(n*=i,t*=i,e*=i,0>e)for(;(r=n+e*++o)>t;)u.push(r/i);else for(;(r=n+e*++o)=i.length)return r?r.call(u,o):e?o.sort(e):o;for(var c,s,f,h,g=-1,p=o.length,v=i[a++],d=new l;++g=i.length)return n;var r=[],u=o[e++];return n.forEach(function(n,u){r.push({key:n,values:t(u,e)})}),u?r.sort(function(n,t){return u(n.key,t.key)}):r}var e,r,u={},i=[],o=[];return u.map=function(t,e){return n(e,t,0)},u.entries=function(e){return t(n(ta.map,e,0),0)},u.key=function(n){return i.push(n),u},u.sortKeys=function(n){return o[i.length-1]=n,u},u.sortValues=function(n){return e=n,u},u.rollup=function(n){return r=n,u},u},ta.set=function(n){var t=new m;if(n)for(var e=0,r=n.length;r>e;++e)t.add(n[e]);return t},c(m,{has:h,add:function(n){return this._[s(n+="")]=!0,n},remove:g,values:p,size:v,empty:d,forEach:function(n){for(var t in this._)n.call(this,f(t))}}),ta.behavior={},ta.rebind=function(n,t){for(var e,r=1,u=arguments.length;++r=0&&(r=n.slice(e+1),n=n.slice(0,e)),n)return arguments.length<2?this[n].on(r):this[n].on(r,t);if(2===arguments.length){if(null==t)for(n in this)this.hasOwnProperty(n)&&this[n].on(r,null);return this}},ta.event=null,ta.requote=function(n){return n.replace(ma,"\\$&")};var ma=/[\\\^\$\*\+\?\|\[\]\(\)\.\{\}]/g,ya={}.__proto__?function(n,t){n.__proto__=t}:function(n,t){for(var e in t)n[e]=t[e]},Ma=function(n,t){return t.querySelector(n)},xa=function(n,t){return t.querySelectorAll(n)},ba=function(n,t){var e=n.matches||n[x(n,"matchesSelector")];return(ba=function(n,t){return e.call(n,t)})(n,t)};"function"==typeof Sizzle&&(Ma=function(n,t){return Sizzle(n,t)[0]||null},xa=Sizzle,ba=Sizzle.matchesSelector),ta.selection=function(){return ta.select(ua.documentElement)};var _a=ta.selection.prototype=[];_a.select=function(n){var t,e,r,u,i=[];n=N(n);for(var o=-1,a=this.length;++o=0&&(e=n.slice(0,t),n=n.slice(t+1)),wa.hasOwnProperty(e)?{space:wa[e],local:n}:n}},_a.attr=function(n,t){if(arguments.length<2){if("string"==typeof n){var e=this.node();return n=ta.ns.qualify(n),n.local?e.getAttributeNS(n.space,n.local):e.getAttribute(n)}for(t in n)this.each(z(t,n[t]));return this}return this.each(z(n,t))},_a.classed=function(n,t){if(arguments.length<2){if("string"==typeof n){var e=this.node(),r=(n=T(n)).length,u=-1;if(t=e.classList){for(;++uu){if("string"!=typeof n){2>u&&(e="");for(r in n)this.each(P(r,n[r],e));return this}if(2>u){var i=this.node();return t(i).getComputedStyle(i,null).getPropertyValue(n)}r=""}return this.each(P(n,e,r))},_a.property=function(n,t){if(arguments.length<2){if("string"==typeof n)return this.node()[n];for(t in n)this.each(U(t,n[t]));return this}return this.each(U(n,t))},_a.text=function(n){return arguments.length?this.each("function"==typeof n?function(){var t=n.apply(this,arguments);this.textContent=null==t?"":t}:null==n?function(){this.textContent=""}:function(){this.textContent=n}):this.node().textContent},_a.html=function(n){return arguments.length?this.each("function"==typeof n?function(){var t=n.apply(this,arguments);this.innerHTML=null==t?"":t}:null==n?function(){this.innerHTML=""}:function(){this.innerHTML=n}):this.node().innerHTML},_a.append=function(n){return n=j(n),this.select(function(){return this.appendChild(n.apply(this,arguments))})},_a.insert=function(n,t){return n=j(n),t=N(t),this.select(function(){return this.insertBefore(n.apply(this,arguments),t.apply(this,arguments)||null)})},_a.remove=function(){return this.each(F)},_a.data=function(n,t){function e(n,e){var r,u,i,o=n.length,f=e.length,h=Math.min(o,f),g=new Array(f),p=new Array(f),v=new Array(o);if(t){var d,m=new l,y=new Array(o);for(r=-1;++rr;++r)p[r]=H(e[r]);for(;o>r;++r)v[r]=n[r]}p.update=g,p.parentNode=g.parentNode=v.parentNode=n.parentNode,a.push(p),c.push(g),s.push(v)}var r,u,i=-1,o=this.length;if(!arguments.length){for(n=new Array(o=(r=this[0]).length);++ii;i++){u.push(t=[]),t.parentNode=(e=this[i]).parentNode;for(var a=0,c=e.length;c>a;a++)(r=e[a])&&n.call(r,r.__data__,a,i)&&t.push(r)}return A(u)},_a.order=function(){for(var n=-1,t=this.length;++n=0;)(e=r[u])&&(i&&i!==e.nextSibling&&i.parentNode.insertBefore(e,i),i=e);return this},_a.sort=function(n){n=I.apply(this,arguments);for(var t=-1,e=this.length;++tn;n++)for(var e=this[n],r=0,u=e.length;u>r;r++){var i=e[r];if(i)return i}return null},_a.size=function(){var n=0;return Y(this,function(){++n}),n};var Sa=[];ta.selection.enter=Z,ta.selection.enter.prototype=Sa,Sa.append=_a.append,Sa.empty=_a.empty,Sa.node=_a.node,Sa.call=_a.call,Sa.size=_a.size,Sa.select=function(n){for(var t,e,r,u,i,o=[],a=-1,c=this.length;++ar){if("string"!=typeof n){2>r&&(t=!1);for(e in n)this.each(X(e,n[e],t));return this}if(2>r)return(r=this.node()["__on"+n])&&r._;e=!1}return this.each(X(n,t,e))};var ka=ta.map({mouseenter:"mouseover",mouseleave:"mouseout"});ua&&ka.forEach(function(n){"on"+n in ua&&ka.remove(n)});var Ea,Aa=0;ta.mouse=function(n){return J(n,k())};var Na=this.navigator&&/WebKit/.test(this.navigator.userAgent)?-1:0;ta.touch=function(n,t,e){if(arguments.length<3&&(e=t,t=k().changedTouches),t)for(var r,u=0,i=t.length;i>u;++u)if((r=t[u]).identifier===e)return J(n,r)},ta.behavior.drag=function(){function n(){this.on("mousedown.drag",i).on("touchstart.drag",o)}function e(n,t,e,i,o){return function(){function a(){var n,e,r=t(h,v);r&&(n=r[0]-M[0],e=r[1]-M[1],p|=n|e,M=r,g({type:"drag",x:r[0]+l[0],y:r[1]+l[1],dx:n,dy:e}))}function c(){t(h,v)&&(m.on(i+d,null).on(o+d,null),y(p&&ta.event.target===f),g({type:"dragend"}))}var l,s=this,f=ta.event.target,h=s.parentNode,g=r.of(s,arguments),p=0,v=n(),d=".drag"+(null==v?"":"-"+v),m=ta.select(e(f)).on(i+d,a).on(o+d,c),y=W(f),M=t(h,v);u?(l=u.apply(s,arguments),l=[l.x-M[0],l.y-M[1]]):l=[0,0],g({type:"dragstart"})}}var r=E(n,"drag","dragstart","dragend"),u=null,i=e(b,ta.mouse,t,"mousemove","mouseup"),o=e(G,ta.touch,y,"touchmove","touchend");return n.origin=function(t){return arguments.length?(u=t,n):u},ta.rebind(n,r,"on")},ta.touches=function(n,t){return arguments.length<2&&(t=k().touches),t?ra(t).map(function(t){var e=J(n,t);return e.identifier=t.identifier,e}):[]};var Ca=1e-6,za=Ca*Ca,qa=Math.PI,La=2*qa,Ta=La-Ca,Ra=qa/2,Da=qa/180,Pa=180/qa,Ua=Math.SQRT2,ja=2,Fa=4;ta.interpolateZoom=function(n,t){function e(n){var t=n*y;if(m){var e=rt(v),o=i/(ja*h)*(e*ut(Ua*t+v)-et(v));return[r+o*l,u+o*s,i*e/rt(Ua*t+v)]}return[r+n*l,u+n*s,i*Math.exp(Ua*t)]}var r=n[0],u=n[1],i=n[2],o=t[0],a=t[1],c=t[2],l=o-r,s=a-u,f=l*l+s*s,h=Math.sqrt(f),g=(c*c-i*i+Fa*f)/(2*i*ja*h),p=(c*c-i*i-Fa*f)/(2*c*ja*h),v=Math.log(Math.sqrt(g*g+1)-g),d=Math.log(Math.sqrt(p*p+1)-p),m=d-v,y=(m||Math.log(c/i))/Ua;return e.duration=1e3*y,e},ta.behavior.zoom=function(){function n(n){n.on(q,f).on(Oa+".zoom",g).on("dblclick.zoom",p).on(R,h)}function e(n){return[(n[0]-k.x)/k.k,(n[1]-k.y)/k.k]}function r(n){return[n[0]*k.k+k.x,n[1]*k.k+k.y]}function u(n){k.k=Math.max(N[0],Math.min(N[1],n))}function i(n,t){t=r(t),k.x+=n[0]-t[0],k.y+=n[1]-t[1]}function o(t,e,r,o){t.__chart__={x:k.x,y:k.y,k:k.k},u(Math.pow(2,o)),i(d=e,r),t=ta.select(t),C>0&&(t=t.transition().duration(C)),t.call(n.event)}function a(){b&&b.domain(x.range().map(function(n){return(n-k.x)/k.k}).map(x.invert)),w&&w.domain(_.range().map(function(n){return(n-k.y)/k.k}).map(_.invert))}function c(n){z++||n({type:"zoomstart"})}function l(n){a(),n({type:"zoom",scale:k.k,translate:[k.x,k.y]})}function s(n){--z||n({type:"zoomend"}),d=null}function f(){function n(){f=1,i(ta.mouse(u),g),l(a)}function r(){h.on(L,null).on(T,null),p(f&&ta.event.target===o),s(a)}var u=this,o=ta.event.target,a=D.of(u,arguments),f=0,h=ta.select(t(u)).on(L,n).on(T,r),g=e(ta.mouse(u)),p=W(u);Dl.call(u),c(a)}function h(){function n(){var n=ta.touches(p);return g=k.k,n.forEach(function(n){n.identifier in d&&(d[n.identifier]=e(n))}),n}function t(){var t=ta.event.target;ta.select(t).on(x,r).on(b,a),_.push(t);for(var e=ta.event.changedTouches,u=0,i=e.length;i>u;++u)d[e[u].identifier]=null;var c=n(),l=Date.now();if(1===c.length){if(500>l-M){var s=c[0];o(p,s,d[s.identifier],Math.floor(Math.log(k.k)/Math.LN2)+1),S()}M=l}else if(c.length>1){var s=c[0],f=c[1],h=s[0]-f[0],g=s[1]-f[1];m=h*h+g*g}}function r(){var n,t,e,r,o=ta.touches(p);Dl.call(p);for(var a=0,c=o.length;c>a;++a,r=null)if(e=o[a],r=d[e.identifier]){if(t)break;n=e,t=r}if(r){var s=(s=e[0]-n[0])*s+(s=e[1]-n[1])*s,f=m&&Math.sqrt(s/m);n=[(n[0]+e[0])/2,(n[1]+e[1])/2],t=[(t[0]+r[0])/2,(t[1]+r[1])/2],u(f*g)}M=null,i(n,t),l(v)}function a(){if(ta.event.touches.length){for(var t=ta.event.changedTouches,e=0,r=t.length;r>e;++e)delete d[t[e].identifier];for(var u in d)return void n()}ta.selectAll(_).on(y,null),w.on(q,f).on(R,h),E(),s(v)}var g,p=this,v=D.of(p,arguments),d={},m=0,y=".zoom-"+ta.event.changedTouches[0].identifier,x="touchmove"+y,b="touchend"+y,_=[],w=ta.select(p),E=W(p);t(),c(v),w.on(q,null).on(R,t)}function g(){var n=D.of(this,arguments);y?clearTimeout(y):(v=e(d=m||ta.mouse(this)),Dl.call(this),c(n)),y=setTimeout(function(){y=null,s(n)},50),S(),u(Math.pow(2,.002*Ha())*k.k),i(d,v),l(n)}function p(){var n=ta.mouse(this),t=Math.log(k.k)/Math.LN2;o(this,n,e(n),ta.event.shiftKey?Math.ceil(t)-1:Math.floor(t)+1)}var v,d,m,y,M,x,b,_,w,k={x:0,y:0,k:1},A=[960,500],N=Ia,C=250,z=0,q="mousedown.zoom",L="mousemove.zoom",T="mouseup.zoom",R="touchstart.zoom",D=E(n,"zoomstart","zoom","zoomend");return Oa||(Oa="onwheel"in ua?(Ha=function(){return-ta.event.deltaY*(ta.event.deltaMode?120:1)},"wheel"):"onmousewheel"in ua?(Ha=function(){return ta.event.wheelDelta},"mousewheel"):(Ha=function(){return-ta.event.detail},"MozMousePixelScroll")),n.event=function(n){n.each(function(){var n=D.of(this,arguments),t=k;Tl?ta.select(this).transition().each("start.zoom",function(){k=this.__chart__||{x:0,y:0,k:1},c(n)}).tween("zoom:zoom",function(){var e=A[0],r=A[1],u=d?d[0]:e/2,i=d?d[1]:r/2,o=ta.interpolateZoom([(u-k.x)/k.k,(i-k.y)/k.k,e/k.k],[(u-t.x)/t.k,(i-t.y)/t.k,e/t.k]);return function(t){var r=o(t),a=e/r[2];this.__chart__=k={x:u-r[0]*a,y:i-r[1]*a,k:a},l(n)}}).each("interrupt.zoom",function(){s(n)}).each("end.zoom",function(){s(n)}):(this.__chart__=k,c(n),l(n),s(n))})},n.translate=function(t){return arguments.length?(k={x:+t[0],y:+t[1],k:k.k},a(),n):[k.x,k.y]},n.scale=function(t){return arguments.length?(k={x:k.x,y:k.y,k:+t},a(),n):k.k},n.scaleExtent=function(t){return arguments.length?(N=null==t?Ia:[+t[0],+t[1]],n):N},n.center=function(t){return arguments.length?(m=t&&[+t[0],+t[1]],n):m},n.size=function(t){return arguments.length?(A=t&&[+t[0],+t[1]],n):A},n.duration=function(t){return arguments.length?(C=+t,n):C},n.x=function(t){return arguments.length?(b=t,x=t.copy(),k={x:0,y:0,k:1},n):b},n.y=function(t){return arguments.length?(w=t,_=t.copy(),k={x:0,y:0,k:1},n):w},ta.rebind(n,D,"on")};var Ha,Oa,Ia=[0,1/0];ta.color=ot,ot.prototype.toString=function(){return this.rgb()+""},ta.hsl=at;var Ya=at.prototype=new ot;Ya.brighter=function(n){return n=Math.pow(.7,arguments.length?n:1),new at(this.h,this.s,this.l/n)},Ya.darker=function(n){return n=Math.pow(.7,arguments.length?n:1),new at(this.h,this.s,n*this.l)},Ya.rgb=function(){return ct(this.h,this.s,this.l)},ta.hcl=lt;var Za=lt.prototype=new ot;Za.brighter=function(n){return new lt(this.h,this.c,Math.min(100,this.l+Va*(arguments.length?n:1)))},Za.darker=function(n){return new lt(this.h,this.c,Math.max(0,this.l-Va*(arguments.length?n:1)))},Za.rgb=function(){return st(this.h,this.c,this.l).rgb()},ta.lab=ft;var Va=18,Xa=.95047,$a=1,Ba=1.08883,Wa=ft.prototype=new ot;Wa.brighter=function(n){return new ft(Math.min(100,this.l+Va*(arguments.length?n:1)),this.a,this.b)},Wa.darker=function(n){return new ft(Math.max(0,this.l-Va*(arguments.length?n:1)),this.a,this.b)},Wa.rgb=function(){return ht(this.l,this.a,this.b)},ta.rgb=mt;var Ja=mt.prototype=new ot;Ja.brighter=function(n){n=Math.pow(.7,arguments.length?n:1);var t=this.r,e=this.g,r=this.b,u=30;return t||e||r?(t&&u>t&&(t=u),e&&u>e&&(e=u),r&&u>r&&(r=u),new mt(Math.min(255,t/n),Math.min(255,e/n),Math.min(255,r/n))):new mt(u,u,u)},Ja.darker=function(n){return n=Math.pow(.7,arguments.length?n:1),new mt(n*this.r,n*this.g,n*this.b)},Ja.hsl=function(){return _t(this.r,this.g,this.b)},Ja.toString=function(){return"#"+xt(this.r)+xt(this.g)+xt(this.b)};var Ga=ta.map({aliceblue:15792383,antiquewhite:16444375,aqua:65535,aquamarine:8388564,azure:15794175,beige:16119260,bisque:16770244,black:0,blanchedalmond:16772045,blue:255,blueviolet:9055202,brown:10824234,burlywood:14596231,cadetblue:6266528,chartreuse:8388352,chocolate:13789470,coral:16744272,cornflowerblue:6591981,cornsilk:16775388,crimson:14423100,cyan:65535,darkblue:139,darkcyan:35723,darkgoldenrod:12092939,darkgray:11119017,darkgreen:25600,darkgrey:11119017,darkkhaki:12433259,darkmagenta:9109643,darkolivegreen:5597999,darkorange:16747520,darkorchid:10040012,darkred:9109504,darksalmon:15308410,darkseagreen:9419919,darkslateblue:4734347,darkslategray:3100495,darkslategrey:3100495,darkturquoise:52945,darkviolet:9699539,deeppink:16716947,deepskyblue:49151,dimgray:6908265,dimgrey:6908265,dodgerblue:2003199,firebrick:11674146,floralwhite:16775920,forestgreen:2263842,fuchsia:16711935,gainsboro:14474460,ghostwhite:16316671,gold:16766720,goldenrod:14329120,gray:8421504,green:32768,greenyellow:11403055,grey:8421504,honeydew:15794160,hotpink:16738740,indianred:13458524,indigo:4915330,ivory:16777200,khaki:15787660,lavender:15132410,lavenderblush:16773365,lawngreen:8190976,lemonchiffon:16775885,lightblue:11393254,lightcoral:15761536,lightcyan:14745599,lightgoldenrodyellow:16448210,lightgray:13882323,lightgreen:9498256,lightgrey:13882323,lightpink:16758465,lightsalmon:16752762,lightseagreen:2142890,lightskyblue:8900346,lightslategray:7833753,lightslategrey:7833753,lightsteelblue:11584734,lightyellow:16777184,lime:65280,limegreen:3329330,linen:16445670,magenta:16711935,maroon:8388608,mediumaquamarine:6737322,mediumblue:205,mediumorchid:12211667,mediumpurple:9662683,mediumseagreen:3978097,mediumslateblue:8087790,mediumspringgreen:64154,mediumturquoise:4772300,mediumvioletred:13047173,midnightblue:1644912,mintcream:16121850,mistyrose:16770273,moccasin:16770229,navajowhite:16768685,navy:128,oldlace:16643558,olive:8421376,olivedrab:7048739,orange:16753920,orangered:16729344,orchid:14315734,palegoldenrod:15657130,palegreen:10025880,paleturquoise:11529966,palevioletred:14381203,papayawhip:16773077,peachpuff:16767673,peru:13468991,pink:16761035,plum:14524637,powderblue:11591910,purple:8388736,rebeccapurple:6697881,red:16711680,rosybrown:12357519,royalblue:4286945,saddlebrown:9127187,salmon:16416882,sandybrown:16032864,seagreen:3050327,seashell:16774638,sienna:10506797,silver:12632256,skyblue:8900331,slateblue:6970061,slategray:7372944,slategrey:7372944,snow:16775930,springgreen:65407,steelblue:4620980,tan:13808780,teal:32896,thistle:14204888,tomato:16737095,turquoise:4251856,violet:15631086,wheat:16113331,white:16777215,whitesmoke:16119285,yellow:16776960,yellowgreen:10145074});Ga.forEach(function(n,t){Ga.set(n,yt(t))}),ta.functor=Et,ta.xhr=At(y),ta.dsv=function(n,t){function e(n,e,i){arguments.length<3&&(i=e,e=null);var o=Nt(n,t,null==e?r:u(e),i);return o.row=function(n){return arguments.length?o.response(null==(e=n)?r:u(n)):e},o}function r(n){return e.parse(n.responseText)}function u(n){return function(t){return e.parse(t.responseText,n)}}function i(t){return t.map(o).join(n)}function o(n){return a.test(n)?'"'+n.replace(/\"/g,'""')+'"':n}var a=new RegExp('["'+n+"\n]"),c=n.charCodeAt(0);return e.parse=function(n,t){var r;return e.parseRows(n,function(n,e){if(r)return r(n,e-1);var u=new Function("d","return {"+n.map(function(n,t){return JSON.stringify(n)+": d["+t+"]"}).join(",")+"}");r=t?function(n,e){return t(u(n),e)}:u})},e.parseRows=function(n,t){function e(){if(s>=l)return o;if(u)return u=!1,i;var t=s;if(34===n.charCodeAt(t)){for(var e=t;e++s;){var r=n.charCodeAt(s++),a=1;if(10===r)u=!0;else if(13===r)u=!0,10===n.charCodeAt(s)&&(++s,++a);else if(r!==c)continue;return n.slice(t,s-a)}return n.slice(t)}for(var r,u,i={},o={},a=[],l=n.length,s=0,f=0;(r=e())!==o;){for(var h=[];r!==i&&r!==o;)h.push(r),r=e();t&&null==(h=t(h,f++))||a.push(h)}return a},e.format=function(t){if(Array.isArray(t[0]))return e.formatRows(t);var r=new m,u=[];return t.forEach(function(n){for(var t in n)r.has(t)||u.push(r.add(t))}),[u.map(o).join(n)].concat(t.map(function(t){return u.map(function(n){return o(t[n])}).join(n)})).join("\n")},e.formatRows=function(n){return n.map(i).join("\n")},e},ta.csv=ta.dsv(",","text/csv"),ta.tsv=ta.dsv(" ","text/tab-separated-values");var Ka,Qa,nc,tc,ec,rc=this[x(this,"requestAnimationFrame")]||function(n){setTimeout(n,17)};ta.timer=function(n,t,e){var r=arguments.length;2>r&&(t=0),3>r&&(e=Date.now());var u=e+t,i={c:n,t:u,f:!1,n:null};Qa?Qa.n=i:Ka=i,Qa=i,nc||(tc=clearTimeout(tc),nc=1,rc(qt))},ta.timer.flush=function(){Lt(),Tt()},ta.round=function(n,t){return t?Math.round(n*(t=Math.pow(10,t)))/t:Math.round(n)};var uc=["y","z","a","f","p","n","\xb5","m","","k","M","G","T","P","E","Z","Y"].map(Dt);ta.formatPrefix=function(n,t){var e=0;return n&&(0>n&&(n*=-1),t&&(n=ta.round(n,Rt(n,t))),e=1+Math.floor(1e-12+Math.log(n)/Math.LN10),e=Math.max(-24,Math.min(24,3*Math.floor((e-1)/3)))),uc[8+e/3]};var ic=/(?:([^{])?([<>=^]))?([+\- ])?([$#])?(0)?(\d+)?(,)?(\.-?\d+)?([a-z%])?/i,oc=ta.map({b:function(n){return n.toString(2)},c:function(n){return String.fromCharCode(n)},o:function(n){return n.toString(8)},x:function(n){return n.toString(16)},X:function(n){return n.toString(16).toUpperCase()},g:function(n,t){return n.toPrecision(t)},e:function(n,t){return n.toExponential(t)},f:function(n,t){return n.toFixed(t)},r:function(n,t){return(n=ta.round(n,Rt(n,t))).toFixed(Math.max(0,Math.min(20,Rt(n*(1+1e-15),t))))}}),ac=ta.time={},cc=Date;jt.prototype={getDate:function(){return this._.getUTCDate()},getDay:function(){return this._.getUTCDay()},getFullYear:function(){return this._.getUTCFullYear()},getHours:function(){return this._.getUTCHours()},getMilliseconds:function(){return this._.getUTCMilliseconds()},getMinutes:function(){return this._.getUTCMinutes()},getMonth:function(){return this._.getUTCMonth()},getSeconds:function(){return this._.getUTCSeconds()},getTime:function(){return this._.getTime()},getTimezoneOffset:function(){return 0},valueOf:function(){return this._.valueOf()},setDate:function(){lc.setUTCDate.apply(this._,arguments)},setDay:function(){lc.setUTCDay.apply(this._,arguments)},setFullYear:function(){lc.setUTCFullYear.apply(this._,arguments)},setHours:function(){lc.setUTCHours.apply(this._,arguments)},setMilliseconds:function(){lc.setUTCMilliseconds.apply(this._,arguments)},setMinutes:function(){lc.setUTCMinutes.apply(this._,arguments)},setMonth:function(){lc.setUTCMonth.apply(this._,arguments)},setSeconds:function(){lc.setUTCSeconds.apply(this._,arguments)},setTime:function(){lc.setTime.apply(this._,arguments)}};var lc=Date.prototype;ac.year=Ft(function(n){return n=ac.day(n),n.setMonth(0,1),n},function(n,t){n.setFullYear(n.getFullYear()+t)},function(n){return n.getFullYear()}),ac.years=ac.year.range,ac.years.utc=ac.year.utc.range,ac.day=Ft(function(n){var t=new cc(2e3,0);return t.setFullYear(n.getFullYear(),n.getMonth(),n.getDate()),t},function(n,t){n.setDate(n.getDate()+t)},function(n){return n.getDate()-1}),ac.days=ac.day.range,ac.days.utc=ac.day.utc.range,ac.dayOfYear=function(n){var t=ac.year(n);return Math.floor((n-t-6e4*(n.getTimezoneOffset()-t.getTimezoneOffset()))/864e5)},["sunday","monday","tuesday","wednesday","thursday","friday","saturday"].forEach(function(n,t){t=7-t;var e=ac[n]=Ft(function(n){return(n=ac.day(n)).setDate(n.getDate()-(n.getDay()+t)%7),n},function(n,t){n.setDate(n.getDate()+7*Math.floor(t))},function(n){var e=ac.year(n).getDay();return Math.floor((ac.dayOfYear(n)+(e+t)%7)/7)-(e!==t)});ac[n+"s"]=e.range,ac[n+"s"].utc=e.utc.range,ac[n+"OfYear"]=function(n){var e=ac.year(n).getDay();return Math.floor((ac.dayOfYear(n)+(e+t)%7)/7)}}),ac.week=ac.sunday,ac.weeks=ac.sunday.range,ac.weeks.utc=ac.sunday.utc.range,ac.weekOfYear=ac.sundayOfYear;var sc={"-":"",_:" ",0:"0"},fc=/^\s*\d+/,hc=/^%/;ta.locale=function(n){return{numberFormat:Pt(n),timeFormat:Ot(n)}};var gc=ta.locale({decimal:".",thousands:",",grouping:[3],currency:["$",""],dateTime:"%a %b %e %X %Y",date:"%m/%d/%Y",time:"%H:%M:%S",periods:["AM","PM"],days:["Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"],shortDays:["Sun","Mon","Tue","Wed","Thu","Fri","Sat"],months:["January","February","March","April","May","June","July","August","September","October","November","December"],shortMonths:["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]});ta.format=gc.numberFormat,ta.geo={},ce.prototype={s:0,t:0,add:function(n){le(n,this.t,pc),le(pc.s,this.s,this),this.s?this.t+=pc.t:this.s=pc.t +},reset:function(){this.s=this.t=0},valueOf:function(){return this.s}};var pc=new ce;ta.geo.stream=function(n,t){n&&vc.hasOwnProperty(n.type)?vc[n.type](n,t):se(n,t)};var vc={Feature:function(n,t){se(n.geometry,t)},FeatureCollection:function(n,t){for(var e=n.features,r=-1,u=e.length;++rn?4*qa+n:n,Mc.lineStart=Mc.lineEnd=Mc.point=b}};ta.geo.bounds=function(){function n(n,t){M.push(x=[s=n,h=n]),f>t&&(f=t),t>g&&(g=t)}function t(t,e){var r=pe([t*Da,e*Da]);if(m){var u=de(m,r),i=[u[1],-u[0],0],o=de(i,u);Me(o),o=xe(o);var c=t-p,l=c>0?1:-1,v=o[0]*Pa*l,d=ga(c)>180;if(d^(v>l*p&&l*t>v)){var y=o[1]*Pa;y>g&&(g=y)}else if(v=(v+360)%360-180,d^(v>l*p&&l*t>v)){var y=-o[1]*Pa;f>y&&(f=y)}else f>e&&(f=e),e>g&&(g=e);d?p>t?a(s,t)>a(s,h)&&(h=t):a(t,h)>a(s,h)&&(s=t):h>=s?(s>t&&(s=t),t>h&&(h=t)):t>p?a(s,t)>a(s,h)&&(h=t):a(t,h)>a(s,h)&&(s=t)}else n(t,e);m=r,p=t}function e(){b.point=t}function r(){x[0]=s,x[1]=h,b.point=n,m=null}function u(n,e){if(m){var r=n-p;y+=ga(r)>180?r+(r>0?360:-360):r}else v=n,d=e;Mc.point(n,e),t(n,e)}function i(){Mc.lineStart()}function o(){u(v,d),Mc.lineEnd(),ga(y)>Ca&&(s=-(h=180)),x[0]=s,x[1]=h,m=null}function a(n,t){return(t-=n)<0?t+360:t}function c(n,t){return n[0]-t[0]}function l(n,t){return t[0]<=t[1]?t[0]<=n&&n<=t[1]:nyc?(s=-(h=180),f=-(g=90)):y>Ca?g=90:-Ca>y&&(f=-90),x[0]=s,x[1]=h}};return function(n){g=h=-(s=f=1/0),M=[],ta.geo.stream(n,b);var t=M.length;if(t){M.sort(c);for(var e,r=1,u=M[0],i=[u];t>r;++r)e=M[r],l(e[0],u)||l(e[1],u)?(a(u[0],e[1])>a(u[0],u[1])&&(u[1]=e[1]),a(e[0],u[1])>a(u[0],u[1])&&(u[0]=e[0])):i.push(u=e);for(var o,e,p=-1/0,t=i.length-1,r=0,u=i[t];t>=r;u=e,++r)e=i[r],(o=a(u[1],e[0]))>p&&(p=o,s=e[0],h=u[1])}return M=x=null,1/0===s||1/0===f?[[0/0,0/0],[0/0,0/0]]:[[s,f],[h,g]]}}(),ta.geo.centroid=function(n){xc=bc=_c=wc=Sc=kc=Ec=Ac=Nc=Cc=zc=0,ta.geo.stream(n,qc);var t=Nc,e=Cc,r=zc,u=t*t+e*e+r*r;return za>u&&(t=kc,e=Ec,r=Ac,Ca>bc&&(t=_c,e=wc,r=Sc),u=t*t+e*e+r*r,za>u)?[0/0,0/0]:[Math.atan2(e,t)*Pa,tt(r/Math.sqrt(u))*Pa]};var xc,bc,_c,wc,Sc,kc,Ec,Ac,Nc,Cc,zc,qc={sphere:b,point:_e,lineStart:Se,lineEnd:ke,polygonStart:function(){qc.lineStart=Ee},polygonEnd:function(){qc.lineStart=Se}},Lc=Le(Ne,Pe,je,[-qa,-qa/2]),Tc=1e9;ta.geo.clipExtent=function(){var n,t,e,r,u,i,o={stream:function(n){return u&&(u.valid=!1),u=i(n),u.valid=!0,u},extent:function(a){return arguments.length?(i=Ie(n=+a[0][0],t=+a[0][1],e=+a[1][0],r=+a[1][1]),u&&(u.valid=!1,u=null),o):[[n,t],[e,r]]}};return o.extent([[0,0],[960,500]])},(ta.geo.conicEqualArea=function(){return Ye(Ze)}).raw=Ze,ta.geo.albers=function(){return ta.geo.conicEqualArea().rotate([96,0]).center([-.6,38.7]).parallels([29.5,45.5]).scale(1070)},ta.geo.albersUsa=function(){function n(n){var i=n[0],o=n[1];return t=null,e(i,o),t||(r(i,o),t)||u(i,o),t}var t,e,r,u,i=ta.geo.albers(),o=ta.geo.conicEqualArea().rotate([154,0]).center([-2,58.5]).parallels([55,65]),a=ta.geo.conicEqualArea().rotate([157,0]).center([-3,19.9]).parallels([8,18]),c={point:function(n,e){t=[n,e]}};return n.invert=function(n){var t=i.scale(),e=i.translate(),r=(n[0]-e[0])/t,u=(n[1]-e[1])/t;return(u>=.12&&.234>u&&r>=-.425&&-.214>r?o:u>=.166&&.234>u&&r>=-.214&&-.115>r?a:i).invert(n)},n.stream=function(n){var t=i.stream(n),e=o.stream(n),r=a.stream(n);return{point:function(n,u){t.point(n,u),e.point(n,u),r.point(n,u)},sphere:function(){t.sphere(),e.sphere(),r.sphere()},lineStart:function(){t.lineStart(),e.lineStart(),r.lineStart()},lineEnd:function(){t.lineEnd(),e.lineEnd(),r.lineEnd()},polygonStart:function(){t.polygonStart(),e.polygonStart(),r.polygonStart()},polygonEnd:function(){t.polygonEnd(),e.polygonEnd(),r.polygonEnd()}}},n.precision=function(t){return arguments.length?(i.precision(t),o.precision(t),a.precision(t),n):i.precision()},n.scale=function(t){return arguments.length?(i.scale(t),o.scale(.35*t),a.scale(t),n.translate(i.translate())):i.scale()},n.translate=function(t){if(!arguments.length)return i.translate();var l=i.scale(),s=+t[0],f=+t[1];return e=i.translate(t).clipExtent([[s-.455*l,f-.238*l],[s+.455*l,f+.238*l]]).stream(c).point,r=o.translate([s-.307*l,f+.201*l]).clipExtent([[s-.425*l+Ca,f+.12*l+Ca],[s-.214*l-Ca,f+.234*l-Ca]]).stream(c).point,u=a.translate([s-.205*l,f+.212*l]).clipExtent([[s-.214*l+Ca,f+.166*l+Ca],[s-.115*l-Ca,f+.234*l-Ca]]).stream(c).point,n},n.scale(1070)};var Rc,Dc,Pc,Uc,jc,Fc,Hc={point:b,lineStart:b,lineEnd:b,polygonStart:function(){Dc=0,Hc.lineStart=Ve},polygonEnd:function(){Hc.lineStart=Hc.lineEnd=Hc.point=b,Rc+=ga(Dc/2)}},Oc={point:Xe,lineStart:b,lineEnd:b,polygonStart:b,polygonEnd:b},Ic={point:We,lineStart:Je,lineEnd:Ge,polygonStart:function(){Ic.lineStart=Ke},polygonEnd:function(){Ic.point=We,Ic.lineStart=Je,Ic.lineEnd=Ge}};ta.geo.path=function(){function n(n){return n&&("function"==typeof a&&i.pointRadius(+a.apply(this,arguments)),o&&o.valid||(o=u(i)),ta.geo.stream(n,o)),i.result()}function t(){return o=null,n}var e,r,u,i,o,a=4.5;return n.area=function(n){return Rc=0,ta.geo.stream(n,u(Hc)),Rc},n.centroid=function(n){return _c=wc=Sc=kc=Ec=Ac=Nc=Cc=zc=0,ta.geo.stream(n,u(Ic)),zc?[Nc/zc,Cc/zc]:Ac?[kc/Ac,Ec/Ac]:Sc?[_c/Sc,wc/Sc]:[0/0,0/0]},n.bounds=function(n){return jc=Fc=-(Pc=Uc=1/0),ta.geo.stream(n,u(Oc)),[[Pc,Uc],[jc,Fc]]},n.projection=function(n){return arguments.length?(u=(e=n)?n.stream||tr(n):y,t()):e},n.context=function(n){return arguments.length?(i=null==(r=n)?new $e:new Qe(n),"function"!=typeof a&&i.pointRadius(a),t()):r},n.pointRadius=function(t){return arguments.length?(a="function"==typeof t?t:(i.pointRadius(+t),+t),n):a},n.projection(ta.geo.albersUsa()).context(null)},ta.geo.transform=function(n){return{stream:function(t){var e=new er(t);for(var r in n)e[r]=n[r];return e}}},er.prototype={point:function(n,t){this.stream.point(n,t)},sphere:function(){this.stream.sphere()},lineStart:function(){this.stream.lineStart()},lineEnd:function(){this.stream.lineEnd()},polygonStart:function(){this.stream.polygonStart()},polygonEnd:function(){this.stream.polygonEnd()}},ta.geo.projection=ur,ta.geo.projectionMutator=ir,(ta.geo.equirectangular=function(){return ur(ar)}).raw=ar.invert=ar,ta.geo.rotation=function(n){function t(t){return t=n(t[0]*Da,t[1]*Da),t[0]*=Pa,t[1]*=Pa,t}return n=lr(n[0]%360*Da,n[1]*Da,n.length>2?n[2]*Da:0),t.invert=function(t){return t=n.invert(t[0]*Da,t[1]*Da),t[0]*=Pa,t[1]*=Pa,t},t},cr.invert=ar,ta.geo.circle=function(){function n(){var n="function"==typeof r?r.apply(this,arguments):r,t=lr(-n[0]*Da,-n[1]*Da,0).invert,u=[];return e(null,null,1,{point:function(n,e){u.push(n=t(n,e)),n[0]*=Pa,n[1]*=Pa}}),{type:"Polygon",coordinates:[u]}}var t,e,r=[0,0],u=6;return n.origin=function(t){return arguments.length?(r=t,n):r},n.angle=function(r){return arguments.length?(e=gr((t=+r)*Da,u*Da),n):t},n.precision=function(r){return arguments.length?(e=gr(t*Da,(u=+r)*Da),n):u},n.angle(90)},ta.geo.distance=function(n,t){var e,r=(t[0]-n[0])*Da,u=n[1]*Da,i=t[1]*Da,o=Math.sin(r),a=Math.cos(r),c=Math.sin(u),l=Math.cos(u),s=Math.sin(i),f=Math.cos(i);return Math.atan2(Math.sqrt((e=f*o)*e+(e=l*s-c*f*a)*e),c*s+l*f*a)},ta.geo.graticule=function(){function n(){return{type:"MultiLineString",coordinates:t()}}function t(){return ta.range(Math.ceil(i/d)*d,u,d).map(h).concat(ta.range(Math.ceil(l/m)*m,c,m).map(g)).concat(ta.range(Math.ceil(r/p)*p,e,p).filter(function(n){return ga(n%d)>Ca}).map(s)).concat(ta.range(Math.ceil(a/v)*v,o,v).filter(function(n){return ga(n%m)>Ca}).map(f))}var e,r,u,i,o,a,c,l,s,f,h,g,p=10,v=p,d=90,m=360,y=2.5;return n.lines=function(){return t().map(function(n){return{type:"LineString",coordinates:n}})},n.outline=function(){return{type:"Polygon",coordinates:[h(i).concat(g(c).slice(1),h(u).reverse().slice(1),g(l).reverse().slice(1))]}},n.extent=function(t){return arguments.length?n.majorExtent(t).minorExtent(t):n.minorExtent()},n.majorExtent=function(t){return arguments.length?(i=+t[0][0],u=+t[1][0],l=+t[0][1],c=+t[1][1],i>u&&(t=i,i=u,u=t),l>c&&(t=l,l=c,c=t),n.precision(y)):[[i,l],[u,c]]},n.minorExtent=function(t){return arguments.length?(r=+t[0][0],e=+t[1][0],a=+t[0][1],o=+t[1][1],r>e&&(t=r,r=e,e=t),a>o&&(t=a,a=o,o=t),n.precision(y)):[[r,a],[e,o]]},n.step=function(t){return arguments.length?n.majorStep(t).minorStep(t):n.minorStep()},n.majorStep=function(t){return arguments.length?(d=+t[0],m=+t[1],n):[d,m]},n.minorStep=function(t){return arguments.length?(p=+t[0],v=+t[1],n):[p,v]},n.precision=function(t){return arguments.length?(y=+t,s=vr(a,o,90),f=dr(r,e,y),h=vr(l,c,90),g=dr(i,u,y),n):y},n.majorExtent([[-180,-90+Ca],[180,90-Ca]]).minorExtent([[-180,-80-Ca],[180,80+Ca]])},ta.geo.greatArc=function(){function n(){return{type:"LineString",coordinates:[t||r.apply(this,arguments),e||u.apply(this,arguments)]}}var t,e,r=mr,u=yr;return n.distance=function(){return ta.geo.distance(t||r.apply(this,arguments),e||u.apply(this,arguments))},n.source=function(e){return arguments.length?(r=e,t="function"==typeof e?null:e,n):r},n.target=function(t){return arguments.length?(u=t,e="function"==typeof t?null:t,n):u},n.precision=function(){return arguments.length?n:0},n},ta.geo.interpolate=function(n,t){return Mr(n[0]*Da,n[1]*Da,t[0]*Da,t[1]*Da)},ta.geo.length=function(n){return Yc=0,ta.geo.stream(n,Zc),Yc};var Yc,Zc={sphere:b,point:b,lineStart:xr,lineEnd:b,polygonStart:b,polygonEnd:b},Vc=br(function(n){return Math.sqrt(2/(1+n))},function(n){return 2*Math.asin(n/2)});(ta.geo.azimuthalEqualArea=function(){return ur(Vc)}).raw=Vc;var Xc=br(function(n){var t=Math.acos(n);return t&&t/Math.sin(t)},y);(ta.geo.azimuthalEquidistant=function(){return ur(Xc)}).raw=Xc,(ta.geo.conicConformal=function(){return Ye(_r)}).raw=_r,(ta.geo.conicEquidistant=function(){return Ye(wr)}).raw=wr;var $c=br(function(n){return 1/n},Math.atan);(ta.geo.gnomonic=function(){return ur($c)}).raw=$c,Sr.invert=function(n,t){return[n,2*Math.atan(Math.exp(t))-Ra]},(ta.geo.mercator=function(){return kr(Sr)}).raw=Sr;var Bc=br(function(){return 1},Math.asin);(ta.geo.orthographic=function(){return ur(Bc)}).raw=Bc;var Wc=br(function(n){return 1/(1+n)},function(n){return 2*Math.atan(n)});(ta.geo.stereographic=function(){return ur(Wc)}).raw=Wc,Er.invert=function(n,t){return[-t,2*Math.atan(Math.exp(n))-Ra]},(ta.geo.transverseMercator=function(){var n=kr(Er),t=n.center,e=n.rotate;return n.center=function(n){return n?t([-n[1],n[0]]):(n=t(),[n[1],-n[0]])},n.rotate=function(n){return n?e([n[0],n[1],n.length>2?n[2]+90:90]):(n=e(),[n[0],n[1],n[2]-90])},e([0,0,90])}).raw=Er,ta.geom={},ta.geom.hull=function(n){function t(n){if(n.length<3)return[];var t,u=Et(e),i=Et(r),o=n.length,a=[],c=[];for(t=0;o>t;t++)a.push([+u.call(this,n[t],t),+i.call(this,n[t],t),t]);for(a.sort(zr),t=0;o>t;t++)c.push([a[t][0],-a[t][1]]);var l=Cr(a),s=Cr(c),f=s[0]===l[0],h=s[s.length-1]===l[l.length-1],g=[];for(t=l.length-1;t>=0;--t)g.push(n[a[l[t]][2]]);for(t=+f;t=r&&l.x<=i&&l.y>=u&&l.y<=o?[[r,o],[i,o],[i,u],[r,u]]:[];s.point=n[a]}),t}function e(n){return n.map(function(n,t){return{x:Math.round(i(n,t)/Ca)*Ca,y:Math.round(o(n,t)/Ca)*Ca,i:t}})}var r=Ar,u=Nr,i=r,o=u,a=ul;return n?t(n):(t.links=function(n){return iu(e(n)).edges.filter(function(n){return n.l&&n.r}).map(function(t){return{source:n[t.l.i],target:n[t.r.i]}})},t.triangles=function(n){var t=[];return iu(e(n)).cells.forEach(function(e,r){for(var u,i,o=e.site,a=e.edges.sort(Yr),c=-1,l=a.length,s=a[l-1].edge,f=s.l===o?s.r:s.l;++c=l,h=r>=s,g=h<<1|f;n.leaf=!1,n=n.nodes[g]||(n.nodes[g]=su()),f?u=l:a=l,h?o=s:c=s,i(n,t,e,r,u,o,a,c)}var s,f,h,g,p,v,d,m,y,M=Et(a),x=Et(c);if(null!=t)v=t,d=e,m=r,y=u;else if(m=y=-(v=d=1/0),f=[],h=[],p=n.length,o)for(g=0;p>g;++g)s=n[g],s.xm&&(m=s.x),s.y>y&&(y=s.y),f.push(s.x),h.push(s.y);else for(g=0;p>g;++g){var b=+M(s=n[g],g),_=+x(s,g);v>b&&(v=b),d>_&&(d=_),b>m&&(m=b),_>y&&(y=_),f.push(b),h.push(_)}var w=m-v,S=y-d;w>S?y=d+w:m=v+S;var k=su();if(k.add=function(n){i(k,n,+M(n,++g),+x(n,g),v,d,m,y)},k.visit=function(n){fu(n,k,v,d,m,y)},k.find=function(n){return hu(k,n[0],n[1],v,d,m,y)},g=-1,null==t){for(;++g=0?n.slice(0,t):n,r=t>=0?n.slice(t+1):"in";return e=cl.get(e)||al,r=ll.get(r)||y,Mu(r(e.apply(null,ea.call(arguments,1))))},ta.interpolateHcl=Lu,ta.interpolateHsl=Tu,ta.interpolateLab=Ru,ta.interpolateRound=Du,ta.transform=function(n){var t=ua.createElementNS(ta.ns.prefix.svg,"g");return(ta.transform=function(n){if(null!=n){t.setAttribute("transform",n);var e=t.transform.baseVal.consolidate()}return new Pu(e?e.matrix:sl)})(n)},Pu.prototype.toString=function(){return"translate("+this.translate+")rotate("+this.rotate+")skewX("+this.skew+")scale("+this.scale+")"};var sl={a:1,b:0,c:0,d:1,e:0,f:0};ta.interpolateTransform=Hu,ta.layout={},ta.layout.bundle=function(){return function(n){for(var t=[],e=-1,r=n.length;++ea*a/d){if(p>c){var l=t.charge/c;n.px-=i*l,n.py-=o*l}return!0}if(t.point&&c&&p>c){var l=t.pointCharge/c;n.px-=i*l,n.py-=o*l}}return!t.charge}}function t(n){n.px=ta.event.x,n.py=ta.event.y,a.resume()}var e,r,u,i,o,a={},c=ta.dispatch("start","tick","end"),l=[1,1],s=.9,f=fl,h=hl,g=-30,p=gl,v=.1,d=.64,m=[],M=[];return a.tick=function(){if((r*=.99)<.005)return c.end({type:"end",alpha:r=0}),!0;var t,e,a,f,h,p,d,y,x,b=m.length,_=M.length;for(e=0;_>e;++e)a=M[e],f=a.source,h=a.target,y=h.x-f.x,x=h.y-f.y,(p=y*y+x*x)&&(p=r*i[e]*((p=Math.sqrt(p))-u[e])/p,y*=p,x*=p,h.x-=y*(d=f.weight/(h.weight+f.weight)),h.y-=x*d,f.x+=y*(d=1-d),f.y+=x*d);if((d=r*v)&&(y=l[0]/2,x=l[1]/2,e=-1,d))for(;++e0?n:0:n>0&&(c.start({type:"start",alpha:r=n}),ta.timer(a.tick)),a):r},a.start=function(){function n(n,r){if(!e){for(e=new Array(c),a=0;c>a;++a)e[a]=[];for(a=0;s>a;++a){var u=M[a];e[u.source.index].push(u.target),e[u.target.index].push(u.source)}}for(var i,o=e[t],a=-1,l=o.length;++at;++t)(r=m[t]).index=t,r.weight=0;for(t=0;s>t;++t)r=M[t],"number"==typeof r.source&&(r.source=m[r.source]),"number"==typeof r.target&&(r.target=m[r.target]),++r.source.weight,++r.target.weight;for(t=0;c>t;++t)r=m[t],isNaN(r.x)&&(r.x=n("x",p)),isNaN(r.y)&&(r.y=n("y",v)),isNaN(r.px)&&(r.px=r.x),isNaN(r.py)&&(r.py=r.y);if(u=[],"function"==typeof f)for(t=0;s>t;++t)u[t]=+f.call(this,M[t],t);else for(t=0;s>t;++t)u[t]=f;if(i=[],"function"==typeof h)for(t=0;s>t;++t)i[t]=+h.call(this,M[t],t);else for(t=0;s>t;++t)i[t]=h;if(o=[],"function"==typeof g)for(t=0;c>t;++t)o[t]=+g.call(this,m[t],t);else for(t=0;c>t;++t)o[t]=g;return a.resume()},a.resume=function(){return a.alpha(.1)},a.stop=function(){return a.alpha(0)},a.drag=function(){return e||(e=ta.behavior.drag().origin(y).on("dragstart.force",Xu).on("drag.force",t).on("dragend.force",$u)),arguments.length?void this.on("mouseover.force",Bu).on("mouseout.force",Wu).call(e):e},ta.rebind(a,c,"on")};var fl=20,hl=1,gl=1/0;ta.layout.hierarchy=function(){function n(u){var i,o=[u],a=[];for(u.depth=0;null!=(i=o.pop());)if(a.push(i),(l=e.call(n,i,i.depth))&&(c=l.length)){for(var c,l,s;--c>=0;)o.push(s=l[c]),s.parent=i,s.depth=i.depth+1;r&&(i.value=0),i.children=l}else r&&(i.value=+r.call(n,i,i.depth)||0),delete i.children;return Qu(u,function(n){var e,u;t&&(e=n.children)&&e.sort(t),r&&(u=n.parent)&&(u.value+=n.value)}),a}var t=ei,e=ni,r=ti;return n.sort=function(e){return arguments.length?(t=e,n):t},n.children=function(t){return arguments.length?(e=t,n):e},n.value=function(t){return arguments.length?(r=t,n):r},n.revalue=function(t){return r&&(Ku(t,function(n){n.children&&(n.value=0)}),Qu(t,function(t){var e;t.children||(t.value=+r.call(n,t,t.depth)||0),(e=t.parent)&&(e.value+=t.value)})),t},n},ta.layout.partition=function(){function n(t,e,r,u){var i=t.children;if(t.x=e,t.y=t.depth*u,t.dx=r,t.dy=u,i&&(o=i.length)){var o,a,c,l=-1;for(r=t.value?r/t.value:0;++lf?-1:1),p=(f-c*g)/ta.sum(l),v=ta.range(c),d=[];return null!=e&&v.sort(e===pl?function(n,t){return l[t]-l[n]}:function(n,t){return e(o[n],o[t])}),v.forEach(function(n){d[n]={data:o[n],value:a=l[n],startAngle:s,endAngle:s+=a*p+g,padAngle:h}}),d}var t=Number,e=pl,r=0,u=La,i=0;return n.value=function(e){return arguments.length?(t=e,n):t},n.sort=function(t){return arguments.length?(e=t,n):e},n.startAngle=function(t){return arguments.length?(r=t,n):r},n.endAngle=function(t){return arguments.length?(u=t,n):u},n.padAngle=function(t){return arguments.length?(i=t,n):i},n};var pl={};ta.layout.stack=function(){function n(a,c){if(!(h=a.length))return a;var l=a.map(function(e,r){return t.call(n,e,r)}),s=l.map(function(t){return t.map(function(t,e){return[i.call(n,t,e),o.call(n,t,e)]})}),f=e.call(n,s,c);l=ta.permute(l,f),s=ta.permute(s,f);var h,g,p,v,d=r.call(n,s,c),m=l[0].length;for(p=0;m>p;++p)for(u.call(n,l[0][p],v=d[p],s[0][p][1]),g=1;h>g;++g)u.call(n,l[g][p],v+=s[g-1][p][1],s[g][p][1]);return a}var t=y,e=ai,r=ci,u=oi,i=ui,o=ii;return n.values=function(e){return arguments.length?(t=e,n):t},n.order=function(t){return arguments.length?(e="function"==typeof t?t:vl.get(t)||ai,n):e},n.offset=function(t){return arguments.length?(r="function"==typeof t?t:dl.get(t)||ci,n):r},n.x=function(t){return arguments.length?(i=t,n):i},n.y=function(t){return arguments.length?(o=t,n):o},n.out=function(t){return arguments.length?(u=t,n):u},n};var vl=ta.map({"inside-out":function(n){var t,e,r=n.length,u=n.map(li),i=n.map(si),o=ta.range(r).sort(function(n,t){return u[n]-u[t]}),a=0,c=0,l=[],s=[];for(t=0;r>t;++t)e=o[t],c>a?(a+=i[e],l.push(e)):(c+=i[e],s.push(e));return s.reverse().concat(l)},reverse:function(n){return ta.range(n.length).reverse()},"default":ai}),dl=ta.map({silhouette:function(n){var t,e,r,u=n.length,i=n[0].length,o=[],a=0,c=[];for(e=0;i>e;++e){for(t=0,r=0;u>t;t++)r+=n[t][e][1];r>a&&(a=r),o.push(r)}for(e=0;i>e;++e)c[e]=(a-o[e])/2;return c},wiggle:function(n){var t,e,r,u,i,o,a,c,l,s=n.length,f=n[0],h=f.length,g=[];for(g[0]=c=l=0,e=1;h>e;++e){for(t=0,u=0;s>t;++t)u+=n[t][e][1];for(t=0,i=0,a=f[e][0]-f[e-1][0];s>t;++t){for(r=0,o=(n[t][e][1]-n[t][e-1][1])/(2*a);t>r;++r)o+=(n[r][e][1]-n[r][e-1][1])/a;i+=o*n[t][e][1]}g[e]=c-=u?i/u*a:0,l>c&&(l=c)}for(e=0;h>e;++e)g[e]-=l;return g},expand:function(n){var t,e,r,u=n.length,i=n[0].length,o=1/u,a=[];for(e=0;i>e;++e){for(t=0,r=0;u>t;t++)r+=n[t][e][1];if(r)for(t=0;u>t;t++)n[t][e][1]/=r;else for(t=0;u>t;t++)n[t][e][1]=o}for(e=0;i>e;++e)a[e]=0;return a},zero:ci});ta.layout.histogram=function(){function n(n,i){for(var o,a,c=[],l=n.map(e,this),s=r.call(this,l,i),f=u.call(this,s,l,i),i=-1,h=l.length,g=f.length-1,p=t?1:1/h;++i0)for(i=-1;++i=s[0]&&a<=s[1]&&(o=c[ta.bisect(f,a,1,g)-1],o.y+=p,o.push(n[i]));return c}var t=!0,e=Number,r=pi,u=hi;return n.value=function(t){return arguments.length?(e=t,n):e},n.range=function(t){return arguments.length?(r=Et(t),n):r},n.bins=function(t){return arguments.length?(u="number"==typeof t?function(n){return gi(n,t)}:Et(t),n):u},n.frequency=function(e){return arguments.length?(t=!!e,n):t},n},ta.layout.pack=function(){function n(n,i){var o=e.call(this,n,i),a=o[0],c=u[0],l=u[1],s=null==t?Math.sqrt:"function"==typeof t?t:function(){return t};if(a.x=a.y=0,Qu(a,function(n){n.r=+s(n.value)}),Qu(a,Mi),r){var f=r*(t?1:Math.max(2*a.r/c,2*a.r/l))/2;Qu(a,function(n){n.r+=f}),Qu(a,Mi),Qu(a,function(n){n.r-=f})}return _i(a,c/2,l/2,t?1:1/Math.max(2*a.r/c,2*a.r/l)),o}var t,e=ta.layout.hierarchy().sort(vi),r=0,u=[1,1];return n.size=function(t){return arguments.length?(u=t,n):u},n.radius=function(e){return arguments.length?(t=null==e||"function"==typeof e?e:+e,n):t},n.padding=function(t){return arguments.length?(r=+t,n):r},Gu(n,e)},ta.layout.tree=function(){function n(n,u){var s=o.call(this,n,u),f=s[0],h=t(f);if(Qu(h,e),h.parent.m=-h.z,Ku(h,r),l)Ku(f,i);else{var g=f,p=f,v=f;Ku(f,function(n){n.xp.x&&(p=n),n.depth>v.depth&&(v=n)});var d=a(g,p)/2-g.x,m=c[0]/(p.x+a(p,g)/2+d),y=c[1]/(v.depth||1);Ku(f,function(n){n.x=(n.x+d)*m,n.y=n.depth*y})}return s}function t(n){for(var t,e={A:null,children:[n]},r=[e];null!=(t=r.pop());)for(var u,i=t.children,o=0,a=i.length;a>o;++o)r.push((i[o]=u={_:i[o],parent:t,children:(u=i[o].children)&&u.slice()||[],A:null,a:null,z:0,m:0,c:0,s:0,t:null,i:o}).a=u);return e.children[0]}function e(n){var t=n.children,e=n.parent.children,r=n.i?e[n.i-1]:null;if(t.length){Ni(n);var i=(t[0].z+t[t.length-1].z)/2;r?(n.z=r.z+a(n._,r._),n.m=n.z-i):n.z=i}else r&&(n.z=r.z+a(n._,r._));n.parent.A=u(n,r,n.parent.A||e[0])}function r(n){n._.x=n.z+n.parent.m,n.m+=n.parent.m}function u(n,t,e){if(t){for(var r,u=n,i=n,o=t,c=u.parent.children[0],l=u.m,s=i.m,f=o.m,h=c.m;o=Ei(o),u=ki(u),o&&u;)c=ki(c),i=Ei(i),i.a=n,r=o.z+f-u.z-l+a(o._,u._),r>0&&(Ai(Ci(o,n,e),n,r),l+=r,s+=r),f+=o.m,l+=u.m,h+=c.m,s+=i.m;o&&!Ei(i)&&(i.t=o,i.m+=f-s),u&&!ki(c)&&(c.t=u,c.m+=l-h,e=n)}return e}function i(n){n.x*=c[0],n.y=n.depth*c[1]}var o=ta.layout.hierarchy().sort(null).value(null),a=Si,c=[1,1],l=null;return n.separation=function(t){return arguments.length?(a=t,n):a},n.size=function(t){return arguments.length?(l=null==(c=t)?i:null,n):l?null:c},n.nodeSize=function(t){return arguments.length?(l=null==(c=t)?null:i,n):l?c:null},Gu(n,o)},ta.layout.cluster=function(){function n(n,i){var o,a=t.call(this,n,i),c=a[0],l=0;Qu(c,function(n){var t=n.children;t&&t.length?(n.x=qi(t),n.y=zi(t)):(n.x=o?l+=e(n,o):0,n.y=0,o=n)});var s=Li(c),f=Ti(c),h=s.x-e(s,f)/2,g=f.x+e(f,s)/2;return Qu(c,u?function(n){n.x=(n.x-c.x)*r[0],n.y=(c.y-n.y)*r[1]}:function(n){n.x=(n.x-h)/(g-h)*r[0],n.y=(1-(c.y?n.y/c.y:1))*r[1]}),a}var t=ta.layout.hierarchy().sort(null).value(null),e=Si,r=[1,1],u=!1;return n.separation=function(t){return arguments.length?(e=t,n):e},n.size=function(t){return arguments.length?(u=null==(r=t),n):u?null:r},n.nodeSize=function(t){return arguments.length?(u=null!=(r=t),n):u?r:null},Gu(n,t)},ta.layout.treemap=function(){function n(n,t){for(var e,r,u=-1,i=n.length;++ut?0:t),e.area=isNaN(r)||0>=r?0:r}function t(e){var i=e.children;if(i&&i.length){var o,a,c,l=f(e),s=[],h=i.slice(),p=1/0,v="slice"===g?l.dx:"dice"===g?l.dy:"slice-dice"===g?1&e.depth?l.dy:l.dx:Math.min(l.dx,l.dy);for(n(h,l.dx*l.dy/e.value),s.area=0;(c=h.length)>0;)s.push(o=h[c-1]),s.area+=o.area,"squarify"!==g||(a=r(s,v))<=p?(h.pop(),p=a):(s.area-=s.pop().area,u(s,v,l,!1),v=Math.min(l.dx,l.dy),s.length=s.area=0,p=1/0);s.length&&(u(s,v,l,!0),s.length=s.area=0),i.forEach(t)}}function e(t){var r=t.children;if(r&&r.length){var i,o=f(t),a=r.slice(),c=[];for(n(a,o.dx*o.dy/t.value),c.area=0;i=a.pop();)c.push(i),c.area+=i.area,null!=i.z&&(u(c,i.z?o.dx:o.dy,o,!a.length),c.length=c.area=0);r.forEach(e)}}function r(n,t){for(var e,r=n.area,u=0,i=1/0,o=-1,a=n.length;++oe&&(i=e),e>u&&(u=e));return r*=r,t*=t,r?Math.max(t*u*p/r,r/(t*i*p)):1/0}function u(n,t,e,r){var u,i=-1,o=n.length,a=e.x,l=e.y,s=t?c(n.area/t):0;if(t==e.dx){for((r||s>e.dy)&&(s=e.dy);++ie.dx)&&(s=e.dx);++ie&&(t=1),1>e&&(n=0),function(){var e,r,u;do e=2*Math.random()-1,r=2*Math.random()-1,u=e*e+r*r;while(!u||u>1);return n+t*e*Math.sqrt(-2*Math.log(u)/u)}},logNormal:function(){var n=ta.random.normal.apply(ta,arguments);return function(){return Math.exp(n())}},bates:function(n){var t=ta.random.irwinHall(n);return function(){return t()/n}},irwinHall:function(n){return function(){for(var t=0,e=0;n>e;e++)t+=Math.random();return t}}},ta.scale={};var ml={floor:y,ceil:y};ta.scale.linear=function(){return Ii([0,1],[0,1],mu,!1)};var yl={s:1,g:1,p:1,r:1,e:1};ta.scale.log=function(){return Ji(ta.scale.linear().domain([0,1]),10,!0,[1,10])};var Ml=ta.format(".0e"),xl={floor:function(n){return-Math.ceil(-n)},ceil:function(n){return-Math.floor(-n)}};ta.scale.pow=function(){return Gi(ta.scale.linear(),1,[0,1])},ta.scale.sqrt=function(){return ta.scale.pow().exponent(.5)},ta.scale.ordinal=function(){return Qi([],{t:"range",a:[[]]})},ta.scale.category10=function(){return ta.scale.ordinal().range(bl)},ta.scale.category20=function(){return ta.scale.ordinal().range(_l)},ta.scale.category20b=function(){return ta.scale.ordinal().range(wl)},ta.scale.category20c=function(){return ta.scale.ordinal().range(Sl)};var bl=[2062260,16744206,2924588,14034728,9725885,9197131,14907330,8355711,12369186,1556175].map(Mt),_l=[2062260,11454440,16744206,16759672,2924588,10018698,14034728,16750742,9725885,12955861,9197131,12885140,14907330,16234194,8355711,13092807,12369186,14408589,1556175,10410725].map(Mt),wl=[3750777,5395619,7040719,10264286,6519097,9216594,11915115,13556636,9202993,12426809,15186514,15190932,8666169,11356490,14049643,15177372,8077683,10834324,13528509,14589654].map(Mt),Sl=[3244733,7057110,10406625,13032431,15095053,16616764,16625259,16634018,3253076,7652470,10607003,13101504,7695281,10394312,12369372,14342891,6513507,9868950,12434877,14277081].map(Mt);ta.scale.quantile=function(){return no([],[])},ta.scale.quantize=function(){return to(0,1,[0,1])},ta.scale.threshold=function(){return eo([.5],[0,1])},ta.scale.identity=function(){return ro([0,1])},ta.svg={},ta.svg.arc=function(){function n(){var n=Math.max(0,+e.apply(this,arguments)),l=Math.max(0,+r.apply(this,arguments)),s=o.apply(this,arguments)-Ra,f=a.apply(this,arguments)-Ra,h=Math.abs(f-s),g=s>f?0:1;if(n>l&&(p=l,l=n,n=p),h>=Ta)return t(l,g)+(n?t(n,1-g):"")+"Z";var p,v,d,m,y,M,x,b,_,w,S,k,E=0,A=0,N=[];if((m=(+c.apply(this,arguments)||0)/2)&&(d=i===kl?Math.sqrt(n*n+l*l):+i.apply(this,arguments),g||(A*=-1),l&&(A=tt(d/l*Math.sin(m))),n&&(E=tt(d/n*Math.sin(m)))),l){y=l*Math.cos(s+A),M=l*Math.sin(s+A),x=l*Math.cos(f-A),b=l*Math.sin(f-A);var C=Math.abs(f-s-2*A)<=qa?0:1;if(A&&so(y,M,x,b)===g^C){var z=(s+f)/2;y=l*Math.cos(z),M=l*Math.sin(z),x=b=null}}else y=M=0;if(n){_=n*Math.cos(f-E),w=n*Math.sin(f-E),S=n*Math.cos(s+E),k=n*Math.sin(s+E);var q=Math.abs(s-f+2*E)<=qa?0:1;if(E&&so(_,w,S,k)===1-g^q){var L=(s+f)/2;_=n*Math.cos(L),w=n*Math.sin(L),S=k=null}}else _=w=0;if((p=Math.min(Math.abs(l-n)/2,+u.apply(this,arguments)))>.001){v=l>n^g?0:1;var T=null==S?[_,w]:null==x?[y,M]:Lr([y,M],[S,k],[x,b],[_,w]),R=y-T[0],D=M-T[1],P=x-T[0],U=b-T[1],j=1/Math.sin(Math.acos((R*P+D*U)/(Math.sqrt(R*R+D*D)*Math.sqrt(P*P+U*U)))/2),F=Math.sqrt(T[0]*T[0]+T[1]*T[1]);if(null!=x){var H=Math.min(p,(l-F)/(j+1)),O=fo(null==S?[_,w]:[S,k],[y,M],l,H,g),I=fo([x,b],[_,w],l,H,g);p===H?N.push("M",O[0],"A",H,",",H," 0 0,",v," ",O[1],"A",l,",",l," 0 ",1-g^so(O[1][0],O[1][1],I[1][0],I[1][1]),",",g," ",I[1],"A",H,",",H," 0 0,",v," ",I[0]):N.push("M",O[0],"A",H,",",H," 0 1,",v," ",I[0])}else N.push("M",y,",",M);if(null!=S){var Y=Math.min(p,(n-F)/(j-1)),Z=fo([y,M],[S,k],n,-Y,g),V=fo([_,w],null==x?[y,M]:[x,b],n,-Y,g);p===Y?N.push("L",V[0],"A",Y,",",Y," 0 0,",v," ",V[1],"A",n,",",n," 0 ",g^so(V[1][0],V[1][1],Z[1][0],Z[1][1]),",",1-g," ",Z[1],"A",Y,",",Y," 0 0,",v," ",Z[0]):N.push("L",V[0],"A",Y,",",Y," 0 0,",v," ",Z[0])}else N.push("L",_,",",w)}else N.push("M",y,",",M),null!=x&&N.push("A",l,",",l," 0 ",C,",",g," ",x,",",b),N.push("L",_,",",w),null!=S&&N.push("A",n,",",n," 0 ",q,",",1-g," ",S,",",k);return N.push("Z"),N.join("")}function t(n,t){return"M0,"+n+"A"+n+","+n+" 0 1,"+t+" 0,"+-n+"A"+n+","+n+" 0 1,"+t+" 0,"+n}var e=io,r=oo,u=uo,i=kl,o=ao,a=co,c=lo;return n.innerRadius=function(t){return arguments.length?(e=Et(t),n):e},n.outerRadius=function(t){return arguments.length?(r=Et(t),n):r},n.cornerRadius=function(t){return arguments.length?(u=Et(t),n):u},n.padRadius=function(t){return arguments.length?(i=t==kl?kl:Et(t),n):i},n.startAngle=function(t){return arguments.length?(o=Et(t),n):o},n.endAngle=function(t){return arguments.length?(a=Et(t),n):a},n.padAngle=function(t){return arguments.length?(c=Et(t),n):c},n.centroid=function(){var n=(+e.apply(this,arguments)+ +r.apply(this,arguments))/2,t=(+o.apply(this,arguments)+ +a.apply(this,arguments))/2-Ra;return[Math.cos(t)*n,Math.sin(t)*n]},n};var kl="auto";ta.svg.line=function(){return ho(y)};var El=ta.map({linear:go,"linear-closed":po,step:vo,"step-before":mo,"step-after":yo,basis:So,"basis-open":ko,"basis-closed":Eo,bundle:Ao,cardinal:bo,"cardinal-open":Mo,"cardinal-closed":xo,monotone:To});El.forEach(function(n,t){t.key=n,t.closed=/-closed$/.test(n)});var Al=[0,2/3,1/3,0],Nl=[0,1/3,2/3,0],Cl=[0,1/6,2/3,1/6];ta.svg.line.radial=function(){var n=ho(Ro);return n.radius=n.x,delete n.x,n.angle=n.y,delete n.y,n},mo.reverse=yo,yo.reverse=mo,ta.svg.area=function(){return Do(y)},ta.svg.area.radial=function(){var n=Do(Ro);return n.radius=n.x,delete n.x,n.innerRadius=n.x0,delete n.x0,n.outerRadius=n.x1,delete n.x1,n.angle=n.y,delete n.y,n.startAngle=n.y0,delete n.y0,n.endAngle=n.y1,delete n.y1,n},ta.svg.chord=function(){function n(n,a){var c=t(this,i,n,a),l=t(this,o,n,a);return"M"+c.p0+r(c.r,c.p1,c.a1-c.a0)+(e(c,l)?u(c.r,c.p1,c.r,c.p0):u(c.r,c.p1,l.r,l.p0)+r(l.r,l.p1,l.a1-l.a0)+u(l.r,l.p1,c.r,c.p0))+"Z"}function t(n,t,e,r){var u=t.call(n,e,r),i=a.call(n,u,r),o=c.call(n,u,r)-Ra,s=l.call(n,u,r)-Ra;return{r:i,a0:o,a1:s,p0:[i*Math.cos(o),i*Math.sin(o)],p1:[i*Math.cos(s),i*Math.sin(s)]}}function e(n,t){return n.a0==t.a0&&n.a1==t.a1}function r(n,t,e){return"A"+n+","+n+" 0 "+ +(e>qa)+",1 "+t}function u(n,t,e,r){return"Q 0,0 "+r}var i=mr,o=yr,a=Po,c=ao,l=co;return n.radius=function(t){return arguments.length?(a=Et(t),n):a},n.source=function(t){return arguments.length?(i=Et(t),n):i},n.target=function(t){return arguments.length?(o=Et(t),n):o},n.startAngle=function(t){return arguments.length?(c=Et(t),n):c},n.endAngle=function(t){return arguments.length?(l=Et(t),n):l},n},ta.svg.diagonal=function(){function n(n,u){var i=t.call(this,n,u),o=e.call(this,n,u),a=(i.y+o.y)/2,c=[i,{x:i.x,y:a},{x:o.x,y:a},o];return c=c.map(r),"M"+c[0]+"C"+c[1]+" "+c[2]+" "+c[3]}var t=mr,e=yr,r=Uo;return n.source=function(e){return arguments.length?(t=Et(e),n):t},n.target=function(t){return arguments.length?(e=Et(t),n):e},n.projection=function(t){return arguments.length?(r=t,n):r},n},ta.svg.diagonal.radial=function(){var n=ta.svg.diagonal(),t=Uo,e=n.projection;return n.projection=function(n){return arguments.length?e(jo(t=n)):t},n},ta.svg.symbol=function(){function n(n,r){return(zl.get(t.call(this,n,r))||Oo)(e.call(this,n,r))}var t=Ho,e=Fo;return n.type=function(e){return arguments.length?(t=Et(e),n):t},n.size=function(t){return arguments.length?(e=Et(t),n):e},n};var zl=ta.map({circle:Oo,cross:function(n){var t=Math.sqrt(n/5)/2;return"M"+-3*t+","+-t+"H"+-t+"V"+-3*t+"H"+t+"V"+-t+"H"+3*t+"V"+t+"H"+t+"V"+3*t+"H"+-t+"V"+t+"H"+-3*t+"Z"},diamond:function(n){var t=Math.sqrt(n/(2*Ll)),e=t*Ll;return"M0,"+-t+"L"+e+",0 0,"+t+" "+-e+",0Z"},square:function(n){var t=Math.sqrt(n)/2;return"M"+-t+","+-t+"L"+t+","+-t+" "+t+","+t+" "+-t+","+t+"Z"},"triangle-down":function(n){var t=Math.sqrt(n/ql),e=t*ql/2;return"M0,"+e+"L"+t+","+-e+" "+-t+","+-e+"Z"},"triangle-up":function(n){var t=Math.sqrt(n/ql),e=t*ql/2;return"M0,"+-e+"L"+t+","+e+" "+-t+","+e+"Z"}});ta.svg.symbolTypes=zl.keys();var ql=Math.sqrt(3),Ll=Math.tan(30*Da);_a.transition=function(n){for(var t,e,r=Tl||++Ul,u=Xo(n),i=[],o=Rl||{time:Date.now(),ease:Su,delay:0,duration:250},a=-1,c=this.length;++ai;i++){u.push(t=[]);for(var e=this[i],a=0,c=e.length;c>a;a++)(r=e[a])&&n.call(r,r.__data__,a,i)&&t.push(r)}return Yo(u,this.namespace,this.id)},Pl.tween=function(n,t){var e=this.id,r=this.namespace;return arguments.length<2?this.node()[r][e].tween.get(n):Y(this,null==t?function(t){t[r][e].tween.remove(n)}:function(u){u[r][e].tween.set(n,t)})},Pl.attr=function(n,t){function e(){this.removeAttribute(a)}function r(){this.removeAttributeNS(a.space,a.local)}function u(n){return null==n?e:(n+="",function(){var t,e=this.getAttribute(a);return e!==n&&(t=o(e,n),function(n){this.setAttribute(a,t(n))})})}function i(n){return null==n?r:(n+="",function(){var t,e=this.getAttributeNS(a.space,a.local);return e!==n&&(t=o(e,n),function(n){this.setAttributeNS(a.space,a.local,t(n))})})}if(arguments.length<2){for(t in n)this.attr(t,n[t]);return this}var o="transform"==n?Hu:mu,a=ta.ns.qualify(n);return Zo(this,"attr."+n,t,a.local?i:u)},Pl.attrTween=function(n,t){function e(n,e){var r=t.call(this,n,e,this.getAttribute(u));return r&&function(n){this.setAttribute(u,r(n))}}function r(n,e){var r=t.call(this,n,e,this.getAttributeNS(u.space,u.local));return r&&function(n){this.setAttributeNS(u.space,u.local,r(n))}}var u=ta.ns.qualify(n);return this.tween("attr."+n,u.local?r:e)},Pl.style=function(n,e,r){function u(){this.style.removeProperty(n)}function i(e){return null==e?u:(e+="",function(){var u,i=t(this).getComputedStyle(this,null).getPropertyValue(n);return i!==e&&(u=mu(i,e),function(t){this.style.setProperty(n,u(t),r)})})}var o=arguments.length;if(3>o){if("string"!=typeof n){2>o&&(e="");for(r in n)this.style(r,n[r],e);return this}r=""}return Zo(this,"style."+n,e,i)},Pl.styleTween=function(n,e,r){function u(u,i){var o=e.call(this,u,i,t(this).getComputedStyle(this,null).getPropertyValue(n));return o&&function(t){this.style.setProperty(n,o(t),r)}}return arguments.length<3&&(r=""),this.tween("style."+n,u)},Pl.text=function(n){return Zo(this,"text",n,Vo)},Pl.remove=function(){var n=this.namespace;return this.each("end.transition",function(){var t;this[n].count<2&&(t=this.parentNode)&&t.removeChild(this)})},Pl.ease=function(n){var t=this.id,e=this.namespace;return arguments.length<1?this.node()[e][t].ease:("function"!=typeof n&&(n=ta.ease.apply(ta,arguments)),Y(this,function(r){r[e][t].ease=n}))},Pl.delay=function(n){var t=this.id,e=this.namespace;return arguments.length<1?this.node()[e][t].delay:Y(this,"function"==typeof n?function(r,u,i){r[e][t].delay=+n.call(r,r.__data__,u,i)}:(n=+n,function(r){r[e][t].delay=n}))},Pl.duration=function(n){var t=this.id,e=this.namespace;return arguments.length<1?this.node()[e][t].duration:Y(this,"function"==typeof n?function(r,u,i){r[e][t].duration=Math.max(1,n.call(r,r.__data__,u,i))}:(n=Math.max(1,n),function(r){r[e][t].duration=n}))},Pl.each=function(n,t){var e=this.id,r=this.namespace;if(arguments.length<2){var u=Rl,i=Tl;try{Tl=e,Y(this,function(t,u,i){Rl=t[r][e],n.call(t,t.__data__,u,i)})}finally{Rl=u,Tl=i}}else Y(this,function(u){var i=u[r][e];(i.event||(i.event=ta.dispatch("start","end","interrupt"))).on(n,t)});return this},Pl.transition=function(){for(var n,t,e,r,u=this.id,i=++Ul,o=this.namespace,a=[],c=0,l=this.length;l>c;c++){a.push(n=[]);for(var t=this[c],s=0,f=t.length;f>s;s++)(e=t[s])&&(r=e[o][u],$o(e,s,o,i,{time:r.time,ease:r.ease,delay:r.delay+r.duration,duration:r.duration})),n.push(e)}return Yo(a,o,i)},ta.svg.axis=function(){function n(n){n.each(function(){var n,l=ta.select(this),s=this.__chart__||e,f=this.__chart__=e.copy(),h=null==c?f.ticks?f.ticks.apply(f,a):f.domain():c,g=null==t?f.tickFormat?f.tickFormat.apply(f,a):y:t,p=l.selectAll(".tick").data(h,f),v=p.enter().insert("g",".domain").attr("class","tick").style("opacity",Ca),d=ta.transition(p.exit()).style("opacity",Ca).remove(),m=ta.transition(p.order()).style("opacity",1),M=Math.max(u,0)+o,x=Ui(f),b=l.selectAll(".domain").data([0]),_=(b.enter().append("path").attr("class","domain"),ta.transition(b));v.append("line"),v.append("text");var w,S,k,E,A=v.select("line"),N=m.select("line"),C=p.select("text").text(g),z=v.select("text"),q=m.select("text"),L="top"===r||"left"===r?-1:1;if("bottom"===r||"top"===r?(n=Bo,w="x",k="y",S="x2",E="y2",C.attr("dy",0>L?"0em":".71em").style("text-anchor","middle"),_.attr("d","M"+x[0]+","+L*i+"V0H"+x[1]+"V"+L*i)):(n=Wo,w="y",k="x",S="y2",E="x2",C.attr("dy",".32em").style("text-anchor",0>L?"end":"start"),_.attr("d","M"+L*i+","+x[0]+"H0V"+x[1]+"H"+L*i)),A.attr(E,L*u),z.attr(k,L*M),N.attr(S,0).attr(E,L*u),q.attr(w,0).attr(k,L*M),f.rangeBand){var T=f,R=T.rangeBand()/2;s=f=function(n){return T(n)+R}}else s.rangeBand?s=f:d.call(n,f,s);v.call(n,s,f),m.call(n,f,f)})}var t,e=ta.scale.linear(),r=jl,u=6,i=6,o=3,a=[10],c=null;return n.scale=function(t){return arguments.length?(e=t,n):e},n.orient=function(t){return arguments.length?(r=t in Fl?t+"":jl,n):r},n.ticks=function(){return arguments.length?(a=arguments,n):a},n.tickValues=function(t){return arguments.length?(c=t,n):c},n.tickFormat=function(e){return arguments.length?(t=e,n):t},n.tickSize=function(t){var e=arguments.length;return e?(u=+t,i=+arguments[e-1],n):u},n.innerTickSize=function(t){return arguments.length?(u=+t,n):u},n.outerTickSize=function(t){return arguments.length?(i=+t,n):i},n.tickPadding=function(t){return arguments.length?(o=+t,n):o},n.tickSubdivide=function(){return arguments.length&&n},n};var jl="bottom",Fl={top:1,right:1,bottom:1,left:1};ta.svg.brush=function(){function n(t){t.each(function(){var t=ta.select(this).style("pointer-events","all").style("-webkit-tap-highlight-color","rgba(0,0,0,0)").on("mousedown.brush",i).on("touchstart.brush",i),o=t.selectAll(".background").data([0]);o.enter().append("rect").attr("class","background").style("visibility","hidden").style("cursor","crosshair"),t.selectAll(".extent").data([0]).enter().append("rect").attr("class","extent").style("cursor","move");var a=t.selectAll(".resize").data(v,y);a.exit().remove(),a.enter().append("g").attr("class",function(n){return"resize "+n}).style("cursor",function(n){return Hl[n]}).append("rect").attr("x",function(n){return/[ew]$/.test(n)?-3:null}).attr("y",function(n){return/^[ns]/.test(n)?-3:null}).attr("width",6).attr("height",6).style("visibility","hidden"),a.style("display",n.empty()?"none":null);var c,f=ta.transition(t),h=ta.transition(o);l&&(c=Ui(l),h.attr("x",c[0]).attr("width",c[1]-c[0]),r(f)),s&&(c=Ui(s),h.attr("y",c[0]).attr("height",c[1]-c[0]),u(f)),e(f)})}function e(n){n.selectAll(".resize").attr("transform",function(n){return"translate("+f[+/e$/.test(n)]+","+h[+/^s/.test(n)]+")"})}function r(n){n.select(".extent").attr("x",f[0]),n.selectAll(".extent,.n>rect,.s>rect").attr("width",f[1]-f[0])}function u(n){n.select(".extent").attr("y",h[0]),n.selectAll(".extent,.e>rect,.w>rect").attr("height",h[1]-h[0])}function i(){function i(){32==ta.event.keyCode&&(C||(M=null,q[0]-=f[1],q[1]-=h[1],C=2),S())}function v(){32==ta.event.keyCode&&2==C&&(q[0]+=f[1],q[1]+=h[1],C=0,S())}function d(){var n=ta.mouse(b),t=!1;x&&(n[0]+=x[0],n[1]+=x[1]),C||(ta.event.altKey?(M||(M=[(f[0]+f[1])/2,(h[0]+h[1])/2]),q[0]=f[+(n[0]s?(u=r,r=s):u=s),v[0]!=r||v[1]!=u?(e?a=null:o=null,v[0]=r,v[1]=u,!0):void 0}function y(){d(),k.style("pointer-events","all").selectAll(".resize").style("display",n.empty()?"none":null),ta.select("body").style("cursor",null),L.on("mousemove.brush",null).on("mouseup.brush",null).on("touchmove.brush",null).on("touchend.brush",null).on("keydown.brush",null).on("keyup.brush",null),z(),w({type:"brushend"})}var M,x,b=this,_=ta.select(ta.event.target),w=c.of(b,arguments),k=ta.select(b),E=_.datum(),A=!/^(n|s)$/.test(E)&&l,N=!/^(e|w)$/.test(E)&&s,C=_.classed("extent"),z=W(b),q=ta.mouse(b),L=ta.select(t(b)).on("keydown.brush",i).on("keyup.brush",v);if(ta.event.changedTouches?L.on("touchmove.brush",d).on("touchend.brush",y):L.on("mousemove.brush",d).on("mouseup.brush",y),k.interrupt().selectAll("*").interrupt(),C)q[0]=f[0]-q[0],q[1]=h[0]-q[1];else if(E){var T=+/w$/.test(E),R=+/^n/.test(E);x=[f[1-T]-q[0],h[1-R]-q[1]],q[0]=f[T],q[1]=h[R]}else ta.event.altKey&&(M=q.slice());k.style("pointer-events","none").selectAll(".resize").style("display",null),ta.select("body").style("cursor",_.style("cursor")),w({type:"brushstart"}),d()}var o,a,c=E(n,"brushstart","brush","brushend"),l=null,s=null,f=[0,0],h=[0,0],g=!0,p=!0,v=Ol[0];return n.event=function(n){n.each(function(){var n=c.of(this,arguments),t={x:f,y:h,i:o,j:a},e=this.__chart__||t;this.__chart__=t,Tl?ta.select(this).transition().each("start.brush",function(){o=e.i,a=e.j,f=e.x,h=e.y,n({type:"brushstart"})}).tween("brush:brush",function(){var e=yu(f,t.x),r=yu(h,t.y);return o=a=null,function(u){f=t.x=e(u),h=t.y=r(u),n({type:"brush",mode:"resize"})}}).each("end.brush",function(){o=t.i,a=t.j,n({type:"brush",mode:"resize"}),n({type:"brushend"})}):(n({type:"brushstart"}),n({type:"brush",mode:"resize"}),n({type:"brushend"}))})},n.x=function(t){return arguments.length?(l=t,v=Ol[!l<<1|!s],n):l},n.y=function(t){return arguments.length?(s=t,v=Ol[!l<<1|!s],n):s},n.clamp=function(t){return arguments.length?(l&&s?(g=!!t[0],p=!!t[1]):l?g=!!t:s&&(p=!!t),n):l&&s?[g,p]:l?g:s?p:null},n.extent=function(t){var e,r,u,i,c;return arguments.length?(l&&(e=t[0],r=t[1],s&&(e=e[0],r=r[0]),o=[e,r],l.invert&&(e=l(e),r=l(r)),e>r&&(c=e,e=r,r=c),(e!=f[0]||r!=f[1])&&(f=[e,r])),s&&(u=t[0],i=t[1],l&&(u=u[1],i=i[1]),a=[u,i],s.invert&&(u=s(u),i=s(i)),u>i&&(c=u,u=i,i=c),(u!=h[0]||i!=h[1])&&(h=[u,i])),n):(l&&(o?(e=o[0],r=o[1]):(e=f[0],r=f[1],l.invert&&(e=l.invert(e),r=l.invert(r)),e>r&&(c=e,e=r,r=c))),s&&(a?(u=a[0],i=a[1]):(u=h[0],i=h[1],s.invert&&(u=s.invert(u),i=s.invert(i)),u>i&&(c=u,u=i,i=c))),l&&s?[[e,u],[r,i]]:l?[e,r]:s&&[u,i])},n.clear=function(){return n.empty()||(f=[0,0],h=[0,0],o=a=null),n},n.empty=function(){return!!l&&f[0]==f[1]||!!s&&h[0]==h[1]},ta.rebind(n,c,"on")};var Hl={n:"ns-resize",e:"ew-resize",s:"ns-resize",w:"ew-resize",nw:"nwse-resize",ne:"nesw-resize",se:"nwse-resize",sw:"nesw-resize"},Ol=[["n","e","s","w","nw","ne","se","sw"],["e","w"],["n","s"],[]],Il=ac.format=gc.timeFormat,Yl=Il.utc,Zl=Yl("%Y-%m-%dT%H:%M:%S.%LZ");Il.iso=Date.prototype.toISOString&&+new Date("2000-01-01T00:00:00.000Z")?Jo:Zl,Jo.parse=function(n){var t=new Date(n);return isNaN(t)?null:t},Jo.toString=Zl.toString,ac.second=Ft(function(n){return new cc(1e3*Math.floor(n/1e3))},function(n,t){n.setTime(n.getTime()+1e3*Math.floor(t))},function(n){return n.getSeconds()}),ac.seconds=ac.second.range,ac.seconds.utc=ac.second.utc.range,ac.minute=Ft(function(n){return new cc(6e4*Math.floor(n/6e4))},function(n,t){n.setTime(n.getTime()+6e4*Math.floor(t))},function(n){return n.getMinutes()}),ac.minutes=ac.minute.range,ac.minutes.utc=ac.minute.utc.range,ac.hour=Ft(function(n){var t=n.getTimezoneOffset()/60;return new cc(36e5*(Math.floor(n/36e5-t)+t))},function(n,t){n.setTime(n.getTime()+36e5*Math.floor(t))},function(n){return n.getHours()}),ac.hours=ac.hour.range,ac.hours.utc=ac.hour.utc.range,ac.month=Ft(function(n){return n=ac.day(n),n.setDate(1),n},function(n,t){n.setMonth(n.getMonth()+t)},function(n){return n.getMonth()}),ac.months=ac.month.range,ac.months.utc=ac.month.utc.range;var Vl=[1e3,5e3,15e3,3e4,6e4,3e5,9e5,18e5,36e5,108e5,216e5,432e5,864e5,1728e5,6048e5,2592e6,7776e6,31536e6],Xl=[[ac.second,1],[ac.second,5],[ac.second,15],[ac.second,30],[ac.minute,1],[ac.minute,5],[ac.minute,15],[ac.minute,30],[ac.hour,1],[ac.hour,3],[ac.hour,6],[ac.hour,12],[ac.day,1],[ac.day,2],[ac.week,1],[ac.month,1],[ac.month,3],[ac.year,1]],$l=Il.multi([[".%L",function(n){return n.getMilliseconds()}],[":%S",function(n){return n.getSeconds()}],["%I:%M",function(n){return n.getMinutes()}],["%I %p",function(n){return n.getHours()}],["%a %d",function(n){return n.getDay()&&1!=n.getDate()}],["%b %d",function(n){return 1!=n.getDate()}],["%B",function(n){return n.getMonth()}],["%Y",Ne]]),Bl={range:function(n,t,e){return ta.range(Math.ceil(n/e)*e,+t,e).map(Ko)},floor:y,ceil:y};Xl.year=ac.year,ac.scale=function(){return Go(ta.scale.linear(),Xl,$l)};var Wl=Xl.map(function(n){return[n[0].utc,n[1]]}),Jl=Yl.multi([[".%L",function(n){return n.getUTCMilliseconds()}],[":%S",function(n){return n.getUTCSeconds()}],["%I:%M",function(n){return n.getUTCMinutes()}],["%I %p",function(n){return n.getUTCHours()}],["%a %d",function(n){return n.getUTCDay()&&1!=n.getUTCDate()}],["%b %d",function(n){return 1!=n.getUTCDate()}],["%B",function(n){return n.getUTCMonth()}],["%Y",Ne]]);Wl.year=ac.year.utc,ac.scale.utc=function(){return Go(ta.scale.linear(),Wl,Jl)},ta.text=At(function(n){return n.responseText}),ta.json=function(n,t){return Nt(n,"application/json",Qo,t)},ta.html=function(n,t){return Nt(n,"text/html",na,t)},ta.xml=At(function(n){return n.responseXML}),"function"==typeof define&&define.amd?define(ta):"object"==typeof module&&module.exports&&(module.exports=ta),this.d3=ta}(); diff --git a/core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js b/core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js new file mode 100644 index 0000000000000..2d9262b972a59 --- /dev/null +++ b/core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js @@ -0,0 +1,29 @@ +/* This is a custom version of dagre-d3 on top of v0.4.3. The full list of commits can be found at http://github.com/andrewor14/dagre-d3/ */!function(e){if("object"==typeof exports&&"undefined"!=typeof module)module.exports=e();else if("function"==typeof define&&define.amd)define([],e);else{var f;"undefined"!=typeof window?f=window:"undefined"!=typeof global?f=global:"undefined"!=typeof self&&(f=self),f.dagreD3=e()}}(function(){var define,module,exports;return function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o0}},{}],14:[function(require,module,exports){module.exports=intersectNode;function intersectNode(node,point){return node.intersect(point)}},{}],15:[function(require,module,exports){var intersectLine=require("./intersect-line");module.exports=intersectPolygon;function intersectPolygon(node,polyPoints,point){var x1=node.x;var y1=node.y;var intersections=[];var minX=Number.POSITIVE_INFINITY,minY=Number.POSITIVE_INFINITY;polyPoints.forEach(function(entry){minX=Math.min(minX,entry.x);minY=Math.min(minY,entry.y)});var left=x1-node.width/2-minX;var top=y1-node.height/2-minY;for(var i=0;i1){intersections.sort(function(p,q){var pdx=p.x-point.x,pdy=p.y-point.y,distp=Math.sqrt(pdx*pdx+pdy*pdy),qdx=q.x-point.x,qdy=q.y-point.y,distq=Math.sqrt(qdx*qdx+qdy*qdy);return distpMath.abs(dx)*h){if(dy<0){h=-h}sx=dy===0?0:h*dx/dy;sy=h}else{if(dx<0){w=-w}sx=w;sy=dx===0?0:w*dy/dx}return{x:x+sx,y:y+sy}}},{}],17:[function(require,module,exports){var util=require("../util");module.exports=addHtmlLabel;function addHtmlLabel(root,node){var fo=root.append("foreignObject").attr("width","100000");var div=fo.append("xhtml:div");var label=node.label;switch(typeof label){case"function":div.insert(label);break;case"object":div.insert(function(){return label});break;default:div.html(label)}util.applyStyle(div,node.labelStyle);div.style("display","inline-block");div.style("white-space","nowrap");var w,h;div.each(function(){w=this.clientWidth;h=this.clientHeight});fo.attr("width",w).attr("height",h);return fo}},{"../util":25}],18:[function(require,module,exports){var addTextLabel=require("./add-text-label"),addHtmlLabel=require("./add-html-label");module.exports=addLabel;function addLabel(root,node){var label=node.label;var labelSvg=root.append("g");if(typeof label!=="string"||node.labelType==="html"){addHtmlLabel(labelSvg,node)}else{addTextLabel(labelSvg,node)}var labelBBox=labelSvg.node().getBBox();labelSvg.attr("transform","translate("+-labelBBox.width/2+","+-labelBBox.height/2+")");return labelSvg}},{"./add-html-label":17,"./add-text-label":19}],19:[function(require,module,exports){var util=require("../util");module.exports=addTextLabel;function addTextLabel(root,node){var domNode=root.append("text");var lines=processEscapeSequences(node.label).split("\n");for(var i=0;imaxPadding){maxPadding=child.paddingTop}}return maxPadding}function getRank(g,v){var maxRank=0;var children=g.children(v);for(var i=0;imaxRank){maxRank=thisRank}}return maxRank}function orderByRank(g,nodes){return nodes.sort(function(x,y){return getRank(g,x)-getRank(g,y)})}function edgeToId(e){return escapeId(e.v)+":"+escapeId(e.w)+":"+escapeId(e.name)}var ID_DELIM=/:/g;function escapeId(str){return str?String(str).replace(ID_DELIM,"\\:"):""}function applyStyle(dom,styleFn){if(styleFn){dom.attr("style",styleFn)}}function applyClass(dom,classFn,otherClasses){if(classFn){dom.attr("class",classFn).attr("class",otherClasses+" "+dom.attr("class"))}}function applyTransition(selection,g){var graph=g.graph();if(_.isPlainObject(graph)){var transition=graph.transition;if(_.isFunction(transition)){return transition(selection)}}return selection}},{"./lodash":20}],26:[function(require,module,exports){module.exports="0.4.4-pre"},{}],27:[function(require,module,exports){module.exports={graphlib:require("./lib/graphlib"),layout:require("./lib/layout"),debug:require("./lib/debug"),util:{time:require("./lib/util").time,notime:require("./lib/util").notime},version:require("./lib/version")}},{"./lib/debug":32,"./lib/graphlib":33,"./lib/layout":35,"./lib/util":55,"./lib/version":56}],28:[function(require,module,exports){"use strict";var _=require("./lodash"),greedyFAS=require("./greedy-fas");module.exports={run:run,undo:undo};function run(g){var fas=g.graph().acyclicer==="greedy"?greedyFAS(g,weightFn(g)):dfsFAS(g);_.each(fas,function(e){var label=g.edge(e);g.removeEdge(e);label.forwardName=e.name;label.reversed=true;g.setEdge(e.w,e.v,label,_.uniqueId("rev"))});function weightFn(g){return function(e){return g.edge(e).weight}}}function dfsFAS(g){var fas=[],stack={},visited={};function dfs(v){if(_.has(visited,v)){return}visited[v]=true;stack[v]=true;_.each(g.outEdges(v),function(e){if(_.has(stack,e.w)){fas.push(e)}else{dfs(e.w)}});delete stack[v]}_.each(g.nodes(),dfs);return fas}function undo(g){_.each(g.edges(),function(e){var label=g.edge(e);if(label.reversed){g.removeEdge(e);var forwardName=label.forwardName;delete label.reversed;delete label.forwardName;g.setEdge(e.w,e.v,label,forwardName)}})}},{"./greedy-fas":34,"./lodash":36}],29:[function(require,module,exports){var _=require("./lodash"),util=require("./util");module.exports=addBorderSegments;function addBorderSegments(g){function dfs(v){var children=g.children(v),node=g.node(v);if(children.length){_.each(children,dfs)}if(_.has(node,"minRank")){node.borderLeft=[];node.borderRight=[];for(var rank=node.minRank,maxRank=node.maxRank+1;rank0;--i){entry=buckets[i].dequeue();if(entry){results=results.concat(removeNode(g,buckets,zeroIdx,entry,true));break}}}}return results}function removeNode(g,buckets,zeroIdx,entry,collectPredecessors){var results=collectPredecessors?[]:undefined;_.each(g.inEdges(entry.v),function(edge){var weight=g.edge(edge),uEntry=g.node(edge.v);if(collectPredecessors){results.push({v:edge.v,w:edge.w})}uEntry.out-=weight;assignBucket(buckets,zeroIdx,uEntry)});_.each(g.outEdges(entry.v),function(edge){var weight=g.edge(edge),w=edge.w,wEntry=g.node(w);wEntry["in"]-=weight;assignBucket(buckets,zeroIdx,wEntry)});g.removeNode(entry.v);return results}function buildState(g,weightFn){var fasGraph=new Graph,maxIn=0,maxOut=0;_.each(g.nodes(),function(v){fasGraph.setNode(v,{v:v,"in":0,out:0})});_.each(g.edges(),function(e){var prevWeight=fasGraph.edge(e.v,e.w)||0,weight=weightFn(e),edgeWeight=prevWeight+weight;fasGraph.setEdge(e.v,e.w,edgeWeight);maxOut=Math.max(maxOut,fasGraph.node(e.v).out+=weight);maxIn=Math.max(maxIn,fasGraph.node(e.w)["in"]+=weight)});var buckets=_.range(maxOut+maxIn+3).map(function(){return new List});var zeroIdx=maxIn+1;_.each(fasGraph.nodes(),function(v){assignBucket(buckets,zeroIdx,fasGraph.node(v))});return{graph:fasGraph,buckets:buckets,zeroIdx:zeroIdx}}function assignBucket(buckets,zeroIdx,entry){if(!entry.out){buckets[0].enqueue(entry)}else if(!entry["in"]){buckets[buckets.length-1].enqueue(entry)}else{buckets[entry.out-entry["in"]+zeroIdx].enqueue(entry)}}},{"./data/list":31,"./graphlib":33,"./lodash":36}],35:[function(require,module,exports){"use strict";var _=require("./lodash"),acyclic=require("./acyclic"),normalize=require("./normalize"),rank=require("./rank"),normalizeRanks=require("./util").normalizeRanks,parentDummyChains=require("./parent-dummy-chains"),removeEmptyRanks=require("./util").removeEmptyRanks,nestingGraph=require("./nesting-graph"),addBorderSegments=require("./add-border-segments"),coordinateSystem=require("./coordinate-system"),order=require("./order"),position=require("./position"),util=require("./util"),Graph=require("./graphlib").Graph;module.exports=layout;function layout(g,opts){var time=opts&&opts.debugTiming?util.time:util.notime;time("layout",function(){var layoutGraph=time(" buildLayoutGraph",function(){return buildLayoutGraph(g)});time(" runLayout",function(){runLayout(layoutGraph,time)});time(" updateInputGraph",function(){updateInputGraph(g,layoutGraph)})})}function runLayout(g,time){time(" makeSpaceForEdgeLabels",function(){makeSpaceForEdgeLabels(g)});time(" removeSelfEdges",function(){removeSelfEdges(g)});time(" acyclic",function(){acyclic.run(g)});time(" nestingGraph.run",function(){nestingGraph.run(g)});time(" rank",function(){rank(util.asNonCompoundGraph(g))});time(" injectEdgeLabelProxies",function(){injectEdgeLabelProxies(g)});time(" removeEmptyRanks",function(){removeEmptyRanks(g)});time(" nestingGraph.cleanup",function(){nestingGraph.cleanup(g)});time(" normalizeRanks",function(){normalizeRanks(g)});time(" assignRankMinMax",function(){assignRankMinMax(g)});time(" removeEdgeLabelProxies",function(){removeEdgeLabelProxies(g)});time(" normalize.run",function(){normalize.run(g)});time(" parentDummyChains",function(){ +parentDummyChains(g)});time(" addBorderSegments",function(){addBorderSegments(g)});time(" order",function(){order(g)});time(" insertSelfEdges",function(){insertSelfEdges(g)});time(" adjustCoordinateSystem",function(){coordinateSystem.adjust(g)});time(" position",function(){position(g)});time(" positionSelfEdges",function(){positionSelfEdges(g)});time(" removeBorderNodes",function(){removeBorderNodes(g)});time(" normalize.undo",function(){normalize.undo(g)});time(" fixupEdgeLabelCoords",function(){fixupEdgeLabelCoords(g)});time(" undoCoordinateSystem",function(){coordinateSystem.undo(g)});time(" translateGraph",function(){translateGraph(g)});time(" assignNodeIntersects",function(){assignNodeIntersects(g)});time(" reversePoints",function(){reversePointsForReversedEdges(g)});time(" acyclic.undo",function(){acyclic.undo(g)})}function updateInputGraph(inputGraph,layoutGraph){_.each(inputGraph.nodes(),function(v){var inputLabel=inputGraph.node(v),layoutLabel=layoutGraph.node(v);if(inputLabel){inputLabel.x=layoutLabel.x;inputLabel.y=layoutLabel.y;if(layoutGraph.children(v).length){inputLabel.width=layoutLabel.width;inputLabel.height=layoutLabel.height}}});_.each(inputGraph.edges(),function(e){var inputLabel=inputGraph.edge(e),layoutLabel=layoutGraph.edge(e);inputLabel.points=layoutLabel.points;if(_.has(layoutLabel,"x")){inputLabel.x=layoutLabel.x;inputLabel.y=layoutLabel.y}});inputGraph.graph().width=layoutGraph.graph().width;inputGraph.graph().height=layoutGraph.graph().height}var graphNumAttrs=["nodesep","edgesep","ranksep","marginx","marginy"],graphDefaults={ranksep:50,edgesep:20,nodesep:50,rankdir:"tb"},graphAttrs=["acyclicer","ranker","rankdir","align"],nodeNumAttrs=["width","height"],nodeDefaults={width:0,height:0},edgeNumAttrs=["minlen","weight","width","height","labeloffset"],edgeDefaults={minlen:1,weight:1,width:0,height:0,labeloffset:10,labelpos:"r"},edgeAttrs=["labelpos"];function buildLayoutGraph(inputGraph){var g=new Graph({multigraph:true,compound:true}),graph=canonicalize(inputGraph.graph());g.setGraph(_.merge({},graphDefaults,selectNumberAttrs(graph,graphNumAttrs),_.pick(graph,graphAttrs)));_.each(inputGraph.nodes(),function(v){var node=canonicalize(inputGraph.node(v));g.setNode(v,_.defaults(selectNumberAttrs(node,nodeNumAttrs),nodeDefaults));g.setParent(v,inputGraph.parent(v))});_.each(inputGraph.edges(),function(e){var edge=canonicalize(inputGraph.edge(e));g.setEdge(e,_.merge({},edgeDefaults,selectNumberAttrs(edge,edgeNumAttrs),_.pick(edge,edgeAttrs)))});return g}function makeSpaceForEdgeLabels(g){var graph=g.graph();graph.ranksep/=2;_.each(g.edges(),function(e){var edge=g.edge(e);edge.minlen*=2;if(edge.labelpos.toLowerCase()!=="c"){if(graph.rankdir==="TB"||graph.rankdir==="BT"){edge.width+=edge.labeloffset}else{edge.height+=edge.labeloffset}}})}function injectEdgeLabelProxies(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.width&&edge.height){var v=g.node(e.v),w=g.node(e.w),label={rank:(w.rank-v.rank)/2+v.rank,e:e};util.addDummyNode(g,"edge-proxy",label,"_ep")}})}function assignRankMinMax(g){var maxRank=0;_.each(g.nodes(),function(v){var node=g.node(v);if(node.borderTop){node.minRank=g.node(node.borderTop).rank;node.maxRank=g.node(node.borderBottom).rank;maxRank=_.max(maxRank,node.maxRank)}});g.graph().maxRank=maxRank}function removeEdgeLabelProxies(g){_.each(g.nodes(),function(v){var node=g.node(v);if(node.dummy==="edge-proxy"){g.edge(node.e).labelRank=node.rank;g.removeNode(v)}})}function translateGraph(g){var minX=Number.POSITIVE_INFINITY,maxX=0,minY=Number.POSITIVE_INFINITY,maxY=0,graphLabel=g.graph(),marginX=graphLabel.marginx||0,marginY=graphLabel.marginy||0;function getExtremes(attrs){var x=attrs.x,y=attrs.y,w=attrs.width,h=attrs.height;minX=Math.min(minX,x-w/2);maxX=Math.max(maxX,x+w/2);minY=Math.min(minY,y-h/2);maxY=Math.max(maxY,y+h/2)}_.each(g.nodes(),function(v){getExtremes(g.node(v))});_.each(g.edges(),function(e){var edge=g.edge(e);if(_.has(edge,"x")){getExtremes(edge)}});minX-=marginX;minY-=marginY;_.each(g.nodes(),function(v){var node=g.node(v);node.x-=minX;node.y-=minY});_.each(g.edges(),function(e){var edge=g.edge(e);_.each(edge.points,function(p){p.x-=minX;p.y-=minY});if(_.has(edge,"x")){edge.x-=minX}if(_.has(edge,"y")){edge.y-=minY}});graphLabel.width=maxX-minX+marginX;graphLabel.height=maxY-minY+marginY}function assignNodeIntersects(g){_.each(g.edges(),function(e){var edge=g.edge(e),nodeV=g.node(e.v),nodeW=g.node(e.w),p1,p2;if(!edge.points){edge.points=[];p1=nodeW;p2=nodeV}else{p1=edge.points[0];p2=edge.points[edge.points.length-1]}edge.points.unshift(util.intersectRect(nodeV,p1));edge.points.push(util.intersectRect(nodeW,p2))})}function fixupEdgeLabelCoords(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(_.has(edge,"x")){if(edge.labelpos==="l"||edge.labelpos==="r"){edge.width-=edge.labeloffset}switch(edge.labelpos){case"l":edge.x-=edge.width/2+edge.labeloffset;break;case"r":edge.x+=edge.width/2+edge.labeloffset;break}}})}function reversePointsForReversedEdges(g){_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.reversed){edge.points.reverse()}})}function removeBorderNodes(g){_.each(g.nodes(),function(v){if(g.children(v).length){var node=g.node(v),t=g.node(node.borderTop),b=g.node(node.borderBottom),l=g.node(_.last(node.borderLeft)),r=g.node(_.last(node.borderRight));node.width=Math.abs(r.x-l.x);node.height=Math.abs(b.y-t.y);node.x=l.x+node.width/2;node.y=t.y+node.height/2}});_.each(g.nodes(),function(v){if(g.node(v).dummy==="border"){g.removeNode(v)}})}function removeSelfEdges(g){_.each(g.edges(),function(e){if(e.v===e.w){var node=g.node(e.v);if(!node.selfEdges){node.selfEdges=[]}node.selfEdges.push({e:e,label:g.edge(e)});g.removeEdge(e)}})}function insertSelfEdges(g){var layers=util.buildLayerMatrix(g);_.each(layers,function(layer){var orderShift=0;_.each(layer,function(v,i){var node=g.node(v);node.order=i+orderShift;_.each(node.selfEdges,function(selfEdge){util.addDummyNode(g,"selfedge",{width:selfEdge.label.width,height:selfEdge.label.height,rank:node.rank,order:i+ ++orderShift,e:selfEdge.e,label:selfEdge.label},"_se")});delete node.selfEdges})})}function positionSelfEdges(g){_.each(g.nodes(),function(v){var node=g.node(v);if(node.dummy==="selfedge"){var selfNode=g.node(node.e.v),x=selfNode.x+selfNode.width/2,y=selfNode.y,dx=node.x-x,dy=selfNode.height/2;g.setEdge(node.e,node.label);g.removeNode(v);node.label.points=[{x:x+2*dx/3,y:y-dy},{x:x+5*dx/6,y:y-dy},{x:x+dx,y:y},{x:x+5*dx/6,y:y+dy},{x:x+2*dx/3,y:y+dy}];node.label.x=node.x;node.label.y=node.y}})}function selectNumberAttrs(obj,attrs){return _.mapValues(_.pick(obj,attrs),Number)}function canonicalize(attrs){var newAttrs={};_.each(attrs,function(v,k){newAttrs[k.toLowerCase()]=v});return newAttrs}},{"./acyclic":28,"./add-border-segments":29,"./coordinate-system":30,"./graphlib":33,"./lodash":36,"./nesting-graph":37,"./normalize":38,"./order":43,"./parent-dummy-chains":48,"./position":50,"./rank":52,"./util":55}],36:[function(require,module,exports){module.exports=require(20)},{"/Users/andrew/Documents/dev/dagre-d3/lib/lodash.js":20,lodash:77}],37:[function(require,module,exports){var _=require("./lodash"),util=require("./util");module.exports={run:run,cleanup:cleanup};function run(g){var root=util.addDummyNode(g,"root",{},"_root"),depths=treeDepths(g),height=_.max(depths)-1,nodeSep=2*height+1;g.graph().nestingRoot=root;_.each(g.edges(),function(e){g.edge(e).minlen*=nodeSep});var weight=sumWeights(g)+1;_.each(g.children(),function(child){dfs(g,root,nodeSep,weight,height,depths,child)});g.graph().nodeRankFactor=nodeSep}function dfs(g,root,nodeSep,weight,height,depths,v){var children=g.children(v);if(!children.length){if(v!==root){g.setEdge(root,v,{weight:0,minlen:nodeSep})}return}var top=util.addBorderNode(g,"_bt"),bottom=util.addBorderNode(g,"_bb"),label=g.node(v);g.setParent(top,v);label.borderTop=top;g.setParent(bottom,v);label.borderBottom=bottom;_.each(children,function(child){dfs(g,root,nodeSep,weight,height,depths,child);var childNode=g.node(child),childTop=childNode.borderTop?childNode.borderTop:child,childBottom=childNode.borderBottom?childNode.borderBottom:child,thisWeight=childNode.borderTop?weight:2*weight,minlen=childTop!==childBottom?1:height-depths[v]+1;g.setEdge(top,childTop,{weight:thisWeight,minlen:minlen,nestingEdge:true});g.setEdge(childBottom,bottom,{weight:thisWeight,minlen:minlen,nestingEdge:true})});if(!g.parent(v)){g.setEdge(root,top,{weight:0,minlen:height+depths[v]})}}function treeDepths(g){var depths={};function dfs(v,depth){var children=g.children(v);if(children&&children.length){_.each(children,function(child){dfs(child,depth+1)})}depths[v]=depth}_.each(g.children(),function(v){dfs(v,1)});return depths}function sumWeights(g){return _.reduce(g.edges(),function(acc,e){return acc+g.edge(e).weight},0)}function cleanup(g){var graphLabel=g.graph();g.removeNode(graphLabel.nestingRoot);delete graphLabel.nestingRoot;_.each(g.edges(),function(e){var edge=g.edge(e);if(edge.nestingEdge){g.removeEdge(e)}})}},{"./lodash":36,"./util":55}],38:[function(require,module,exports){"use strict";var _=require("./lodash"),util=require("./util");module.exports={run:run,undo:undo};function run(g){g.graph().dummyChains=[];_.each(g.edges(),function(edge){normalizeEdge(g,edge)})}function normalizeEdge(g,e){var v=e.v,vRank=g.node(v).rank,w=e.w,wRank=g.node(w).rank,name=e.name,edgeLabel=g.edge(e),labelRank=edgeLabel.labelRank;if(wRank===vRank+1)return;g.removeEdge(e);var dummy,attrs,i;for(i=0,++vRank;vRank0){if(index%2){weightSum+=tree[index+1]}index=index-1>>1;tree[index]+=entry.weight}cc+=entry.weight*weightSum}));return cc}},{"../lodash":36}],43:[function(require,module,exports){"use strict";var _=require("../lodash"),initOrder=require("./init-order"),crossCount=require("./cross-count"),sortSubgraph=require("./sort-subgraph"),buildLayerGraph=require("./build-layer-graph"),addSubgraphConstraints=require("./add-subgraph-constraints"),Graph=require("../graphlib").Graph,util=require("../util");module.exports=order;function order(g){var maxRank=util.maxRank(g),downLayerGraphs=buildLayerGraphs(g,_.range(1,maxRank+1),"inEdges"),upLayerGraphs=buildLayerGraphs(g,_.range(maxRank-1,-1,-1),"outEdges");var layering=initOrder(g);assignOrder(g,layering);var bestCC=Number.POSITIVE_INFINITY,best;for(var i=0,lastBest=0;lastBest<4;++i,++lastBest){sweepLayerGraphs(i%2?downLayerGraphs:upLayerGraphs,i%4>=2);layering=util.buildLayerMatrix(g);var cc=crossCount(g,layering);if(cc=vEntry.barycenter){mergeEntries(vEntry,uEntry)}}}function handleOut(vEntry){return function(wEntry){wEntry["in"].push(vEntry);if(--wEntry.indegree===0){sourceSet.push(wEntry)}}}while(sourceSet.length){var entry=sourceSet.pop();entries.push(entry);_.each(entry["in"].reverse(),handleIn(entry));_.each(entry.out,handleOut(entry))}return _.chain(entries).filter(function(entry){return!entry.merged}).map(function(entry){return _.pick(entry,["vs","i","barycenter","weight"])}).value()}function mergeEntries(target,source){var sum=0,weight=0;if(target.weight){sum+=target.barycenter*target.weight;weight+=target.weight}if(source.weight){sum+=source.barycenter*source.weight;weight+=source.weight}target.vs=source.vs.concat(target.vs);target.barycenter=sum/weight;target.weight=weight;target.i=Math.min(source.i,target.i);source.merged=true}},{"../lodash":36}],46:[function(require,module,exports){var _=require("../lodash"),barycenter=require("./barycenter"),resolveConflicts=require("./resolve-conflicts"),sort=require("./sort");module.exports=sortSubgraph;function sortSubgraph(g,v,cg,biasRight){var movable=g.children(v),node=g.node(v),bl=node?node.borderLeft:undefined,br=node?node.borderRight:undefined,subgraphs={};if(bl){movable=_.filter(movable,function(w){return w!==bl&&w!==br})}var barycenters=barycenter(g,movable);_.each(barycenters,function(entry){if(g.children(entry.v).length){var subgraphResult=sortSubgraph(g,entry.v,cg,biasRight);subgraphs[entry.v]=subgraphResult;if(_.has(subgraphResult,"barycenter")){mergeBarycenters(entry,subgraphResult)}}});var entries=resolveConflicts(barycenters,cg);expandSubgraphs(entries,subgraphs);var result=sort(entries,biasRight);if(bl){result.vs=_.flatten([bl,result.vs,br],true);if(g.predecessors(bl).length){var blPred=g.node(g.predecessors(bl)[0]),brPred=g.node(g.predecessors(br)[0]);if(!_.has(result,"barycenter")){result.barycenter=0;result.weight=0}result.barycenter=(result.barycenter*result.weight+blPred.order+brPred.order)/(result.weight+2);result.weight+=2}}return result}function expandSubgraphs(entries,subgraphs){_.each(entries,function(entry){entry.vs=_.flatten(entry.vs.map(function(v){if(subgraphs[v]){return subgraphs[v].vs}return v}),true)})}function mergeBarycenters(target,other){if(!_.isUndefined(target.barycenter)){target.barycenter=(target.barycenter*target.weight+other.barycenter*other.weight)/(target.weight+other.weight);target.weight+=other.weight}else{target.barycenter=other.barycenter;target.weight=other.weight}}},{"../lodash":36,"./barycenter":40,"./resolve-conflicts":45,"./sort":47}],47:[function(require,module,exports){var _=require("../lodash"),util=require("../util");module.exports=sort;function sort(entries,biasRight){var parts=util.partition(entries,function(entry){return _.has(entry,"barycenter")});var sortable=parts.lhs,unsortable=_.sortBy(parts.rhs,function(entry){return-entry.i}),vs=[],sum=0,weight=0,vsIndex=0;sortable.sort(compareWithBias(!!biasRight));vsIndex=consumeUnsortable(vs,unsortable,vsIndex);_.each(sortable,function(entry){vsIndex+=entry.vs.length;vs.push(entry.vs);sum+=entry.barycenter*entry.weight;weight+=entry.weight;vsIndex=consumeUnsortable(vs,unsortable,vsIndex)});var result={vs:_.flatten(vs,true)};if(weight){result.barycenter=sum/weight;result.weight=weight}return result}function consumeUnsortable(vs,unsortable,index){var last;while(unsortable.length&&(last=_.last(unsortable)).i<=index){unsortable.pop();vs.push(last.vs);index++}return index}function compareWithBias(bias){return function(entryV,entryW){if(entryV.barycenterentryW.barycenter){return 1}return!bias?entryV.i-entryW.i:entryW.i-entryV.i}}},{"../lodash":36,"../util":55}],48:[function(require,module,exports){var _=require("./lodash");module.exports=parentDummyChains;function parentDummyChains(g){var postorderNums=postorder(g);_.each(g.graph().dummyChains,function(v){var node=g.node(v),edgeObj=node.edgeObj,pathData=findPath(g,postorderNums,edgeObj.v,edgeObj.w),path=pathData.path,lca=pathData.lca,pathIdx=0,pathV=path[pathIdx],ascending=true;while(v!==edgeObj.w){node=g.node(v);if(ascending){while((pathV=path[pathIdx])!==lca&&g.node(pathV).maxRanklow||lim>postorderNums[parent].lim));lca=parent;parent=w;while((parent=g.parent(parent))!==lca){wPath.push(parent)}return{path:vPath.concat(wPath.reverse()),lca:lca}}function postorder(g){var result={},lim=0;function dfs(v){var low=lim;_.each(g.children(v),dfs);result[v]={low:low,lim:lim++}}_.each(g.children(),dfs);return result}},{"./lodash":36}],49:[function(require,module,exports){"use strict";var _=require("../lodash"),Graph=require("../graphlib").Graph,util=require("../util");module.exports={positionX:positionX,findType1Conflicts:findType1Conflicts,findType2Conflicts:findType2Conflicts,addConflict:addConflict,hasConflict:hasConflict,verticalAlignment:verticalAlignment,horizontalCompaction:horizontalCompaction,alignCoordinates:alignCoordinates,findSmallestWidthAlignment:findSmallestWidthAlignment,balance:balance};function findType1Conflicts(g,layering){var conflicts={};function visitLayer(prevLayer,layer){var k0=0,scanPos=0,prevLayerLength=prevLayer.length,lastNode=_.last(layer);_.each(layer,function(v,i){var w=findOtherInnerSegmentNode(g,v),k1=w?g.node(w).order:prevLayerLength;if(w||v===lastNode){_.each(layer.slice(scanPos,i+1),function(scanNode){_.each(g.predecessors(scanNode),function(u){var uLabel=g.node(u),uPos=uLabel.order;if((uPosnextNorthBorder)){addConflict(conflicts,u,v)}})}})}function visitLayer(north,south){var prevNorthPos=-1,nextNorthPos,southPos=0;_.each(south,function(v,southLookahead){if(g.node(v).dummy==="border"){var predecessors=g.predecessors(v);if(predecessors.length){nextNorthPos=g.node(predecessors[0]).order;scan(south,southPos,southLookahead,prevNorthPos,nextNorthPos);southPos=southLookahead;prevNorthPos=nextNorthPos}}scan(south,southPos,south.length,nextNorthPos,north.length)});return south}_.reduce(layering,visitLayer);return conflicts}function findOtherInnerSegmentNode(g,v){if(g.node(v).dummy){return _.find(g.predecessors(v),function(u){return g.node(u).dummy})}}function addConflict(conflicts,v,w){if(v>w){var tmp=v;v=w;w=tmp}var conflictsV=conflicts[v];if(!conflictsV){conflicts[v]=conflictsV={}}conflictsV[w]=true}function hasConflict(conflicts,v,w){if(v>w){var tmp=v;v=w;w=tmp}return _.has(conflicts[v],w)}function verticalAlignment(g,layering,conflicts,neighborFn){var root={},align={},pos={};_.each(layering,function(layer){_.each(layer,function(v,order){root[v]=v;align[v]=v;pos[v]=order})});_.each(layering,function(layer){var prevIdx=-1;_.each(layer,function(v){var ws=neighborFn(v);if(ws.length){ws=_.sortBy(ws,function(w){return pos[w]});var mp=(ws.length-1)/2;for(var i=Math.floor(mp),il=Math.ceil(mp);i<=il;++i){var w=ws[i];if(align[v]===v&&prevIdxwLabel.lim){tailLabel=wLabel;flip=true}var candidates=_.filter(g.edges(),function(edge){return flip===isDescendant(t,t.node(edge.v),tailLabel)&&flip!==isDescendant(t,t.node(edge.w),tailLabel)});return _.min(candidates,function(edge){return slack(g,edge)})}function exchangeEdges(t,g,e,f){var v=e.v,w=e.w;t.removeEdge(v,w);t.setEdge(f.v,f.w,{});initLowLimValues(t);initCutValues(t,g);updateRanks(t,g)}function updateRanks(t,g){var root=_.find(t.nodes(),function(v){return!g.node(v).parent}),vs=preorder(t,root);vs=vs.slice(1);_.each(vs,function(v){var parent=t.node(v).parent,edge=g.edge(v,parent),flipped=false;if(!edge){edge=g.edge(parent,v);flipped=true}g.node(v).rank=g.node(parent).rank+(flipped?edge.minlen:-edge.minlen)})}function isTreeEdge(tree,u,v){return tree.hasEdge(u,v)}function isDescendant(tree,vLabel,rootLabel){return rootLabel.low<=vLabel.lim&&vLabel.lim<=rootLabel.lim}},{"../graphlib":33,"../lodash":36,"../util":55,"./feasible-tree":51,"./util":54}],54:[function(require,module,exports){"use strict";var _=require("../lodash");module.exports={longestPath:longestPath,slack:slack};function longestPath(g){var visited={};function dfs(v){var label=g.node(v);if(_.has(visited,v)){return label.rank}visited[v]=true;var rank=_.min(_.map(g.outEdges(v),function(e){return dfs(e.w)-g.edge(e).minlen}));if(rank===Number.POSITIVE_INFINITY){rank=0}return label.rank=rank}_.each(g.sources(),dfs)}function slack(g,e){return g.node(e.w).rank-g.node(e.v).rank-g.edge(e).minlen}},{"../lodash":36}],55:[function(require,module,exports){"use strict";var _=require("./lodash"),Graph=require("./graphlib").Graph;module.exports={addDummyNode:addDummyNode,simplify:simplify,asNonCompoundGraph:asNonCompoundGraph,successorWeights:successorWeights,predecessorWeights:predecessorWeights,intersectRect:intersectRect,buildLayerMatrix:buildLayerMatrix,normalizeRanks:normalizeRanks,removeEmptyRanks:removeEmptyRanks,addBorderNode:addBorderNode,maxRank:maxRank,partition:partition,time:time,notime:notime};function addDummyNode(g,type,attrs,name){var v;do{v=_.uniqueId(name)}while(g.hasNode(v));attrs.dummy=type;g.setNode(v,attrs);return v}function simplify(g){var simplified=(new Graph).setGraph(g.graph());_.each(g.nodes(),function(v){simplified.setNode(v,g.node(v))});_.each(g.edges(),function(e){var simpleLabel=simplified.edge(e.v,e.w)||{weight:0,minlen:1},label=g.edge(e);simplified.setEdge(e.v,e.w,{weight:simpleLabel.weight+label.weight,minlen:Math.max(simpleLabel.minlen,label.minlen)})});return simplified}function asNonCompoundGraph(g){var simplified=new Graph({multigraph:g.isMultigraph()}).setGraph(g.graph());_.each(g.nodes(),function(v){if(!g.children(v).length){simplified.setNode(v,g.node(v))}});_.each(g.edges(),function(e){simplified.setEdge(e,g.edge(e))});return simplified}function successorWeights(g){var weightMap=_.map(g.nodes(),function(v){var sucs={};_.each(g.outEdges(v),function(e){sucs[e.w]=(sucs[e.w]||0)+g.edge(e).weight});return sucs});return _.zipObject(g.nodes(),weightMap)}function predecessorWeights(g){var weightMap=_.map(g.nodes(),function(v){var preds={};_.each(g.inEdges(v),function(e){preds[e.v]=(preds[e.v]||0)+g.edge(e).weight});return preds});return _.zipObject(g.nodes(),weightMap)}function intersectRect(rect,point){var x=rect.x;var y=rect.y;var dx=point.x-x;var dy=point.y-y;var w=rect.width/2;var h=rect.height/2;if(!dx&&!dy){throw new Error("Not possible to find intersection inside of the rectangle")}var sx,sy;if(Math.abs(dy)*w>Math.abs(dx)*h){if(dy<0){h=-h}sx=h*dx/dy;sy=h}else{if(dx<0){w=-w}sx=w;sy=w*dy/dx}return{x:x+sx,y:y+sy}}function buildLayerMatrix(g){var layering=_.map(_.range(maxRank(g)+1),function(){return[]});_.each(g.nodes(),function(v){var node=g.node(v),rank=node.rank;if(!_.isUndefined(rank)){layering[rank][node.order]=v}});return layering}function normalizeRanks(g){var min=_.min(_.map(g.nodes(),function(v){return g.node(v).rank}));_.each(g.nodes(),function(v){var node=g.node(v);if(_.has(node,"rank")){node.rank-=min}})}function removeEmptyRanks(g){var offset=_.min(_.map(g.nodes(),function(v){return g.node(v).rank}));var layers=[];_.each(g.nodes(),function(v){var rank=g.node(v).rank-offset;if(!_.has(layers,rank)){layers[rank]=[]}layers[rank].push(v)});var delta=0,nodeRankFactor=g.graph().nodeRankFactor;_.each(layers,function(vs,i){if(_.isUndefined(vs)&&i%nodeRankFactor!==0){--delta}else if(delta){_.each(vs,function(v){g.node(v).rank+=delta})}})}function addBorderNode(g,prefix,rank,order){var node={width:0,height:0};if(arguments.length>=4){node.rank=rank;node.order=order}return addDummyNode(g,"border",node,prefix)}function maxRank(g){return _.max(_.map(g.nodes(),function(v){var rank=g.node(v).rank;if(!_.isUndefined(rank)){return rank}}))}function partition(collection,fn){var result={lhs:[],rhs:[]};_.each(collection,function(value){if(fn(value)){result.lhs.push(value)}else{result.rhs.push(value)}});return result}function time(name,fn){var start=_.now();try{return fn()}finally{console.log(name+" time: "+(_.now()-start)+"ms")}}function notime(name,fn){return fn()}},{"./graphlib":33,"./lodash":36}],56:[function(require,module,exports){module.exports="0.7.1"},{}],57:[function(require,module,exports){var lib=require("./lib");module.exports={Graph:lib.Graph,json:require("./lib/json"),alg:require("./lib/alg"),version:lib.version}},{"./lib":73,"./lib/alg":64,"./lib/json":74}],58:[function(require,module,exports){var _=require("../lodash");module.exports=components;function components(g){var visited={},cmpts=[],cmpt;function dfs(v){if(_.has(visited,v))return;visited[v]=true;cmpt.push(v);_.each(g.successors(v),dfs);_.each(g.predecessors(v),dfs)}_.each(g.nodes(),function(v){cmpt=[];dfs(v);if(cmpt.length){cmpts.push(cmpt)}});return cmpts}},{"../lodash":75}],59:[function(require,module,exports){var _=require("../lodash");module.exports=dfs;function dfs(g,vs,order){if(!_.isArray(vs)){vs=[vs]}var acc=[],visited={};_.each(vs,function(v){if(!g.hasNode(v)){throw new Error("Graph does not have node: "+v)}doDfs(g,v,order==="post",visited,acc)});return acc}function doDfs(g,v,postorder,visited,acc){if(!_.has(visited,v)){visited[v]=true;if(!postorder){acc.push(v)}_.each(g.neighbors(v),function(w){doDfs(g,w,postorder,visited,acc)});if(postorder){acc.push(v)}}}},{"../lodash":75}],60:[function(require,module,exports){var dijkstra=require("./dijkstra"),_=require("../lodash");module.exports=dijkstraAll;function dijkstraAll(g,weightFunc,edgeFunc){return _.transform(g.nodes(),function(acc,v){acc[v]=dijkstra(g,v,weightFunc,edgeFunc)},{})}},{"../lodash":75,"./dijkstra":61}],61:[function(require,module,exports){var _=require("../lodash"),PriorityQueue=require("../data/priority-queue");module.exports=dijkstra;var DEFAULT_WEIGHT_FUNC=_.constant(1);function dijkstra(g,source,weightFn,edgeFn){return runDijkstra(g,String(source),weightFn||DEFAULT_WEIGHT_FUNC,edgeFn||function(v){return g.outEdges(v)})}function runDijkstra(g,source,weightFn,edgeFn){var results={},pq=new PriorityQueue,v,vEntry;var updateNeighbors=function(edge){var w=edge.v!==v?edge.v:edge.w,wEntry=results[w],weight=weightFn(edge),distance=vEntry.distance+weight;if(weight<0){throw new Error("dijkstra does not allow negative edge weights. "+"Bad edge: "+edge+" Weight: "+weight)}if(distance0){v=pq.removeMin();vEntry=results[v];if(vEntry.distance===Number.POSITIVE_INFINITY){break}edgeFn(v).forEach(updateNeighbors)}return results}},{"../data/priority-queue":71,"../lodash":75}],62:[function(require,module,exports){var _=require("../lodash"),tarjan=require("./tarjan");module.exports=findCycles;function findCycles(g){return _.filter(tarjan(g),function(cmpt){return cmpt.length>1})}},{"../lodash":75,"./tarjan":69}],63:[function(require,module,exports){var _=require("../lodash");module.exports=floydWarshall;var DEFAULT_WEIGHT_FUNC=_.constant(1);function floydWarshall(g,weightFn,edgeFn){return runFloydWarshall(g,weightFn||DEFAULT_WEIGHT_FUNC,edgeFn||function(v){return g.outEdges(v)})}function runFloydWarshall(g,weightFn,edgeFn){var results={},nodes=g.nodes();nodes.forEach(function(v){results[v]={};results[v][v]={distance:0};nodes.forEach(function(w){if(v!==w){results[v][w]={distance:Number.POSITIVE_INFINITY}}});edgeFn(v).forEach(function(edge){var w=edge.v===v?edge.w:edge.v,d=weightFn(edge);results[v][w]={distance:d,predecessor:v}})});nodes.forEach(function(k){var rowK=results[k];nodes.forEach(function(i){var rowI=results[i];nodes.forEach(function(j){var ik=rowI[k];var kj=rowK[j];var ij=rowI[j];var altDistance=ik.distance+kj.distance;if(altDistance0){v=pq.removeMin();if(_.has(parents,v)){result.setEdge(v,parents[v])}else if(init){throw new Error("Input graph is not connected: "+g)}else{init=true}g.nodeEdges(v).forEach(updateNeighbors)}return result}},{"../data/priority-queue":71,"../graph":72,"../lodash":75}],69:[function(require,module,exports){var _=require("../lodash");module.exports=tarjan;function tarjan(g){var index=0,stack=[],visited={},results=[];function dfs(v){var entry=visited[v]={onStack:true,lowlink:index,index:index++};stack.push(v);g.successors(v).forEach(function(w){if(!_.has(visited,w)){dfs(w);entry.lowlink=Math.min(entry.lowlink,visited[w].lowlink)}else if(visited[w].onStack){entry.lowlink=Math.min(entry.lowlink,visited[w].index)}});if(entry.lowlink===entry.index){var cmpt=[],w;do{w=stack.pop();visited[w].onStack=false;cmpt.push(w)}while(v!==w);results.push(cmpt)}}g.nodes().forEach(function(v){if(!_.has(visited,v)){dfs(v)}});return results}},{"../lodash":75}],70:[function(require,module,exports){var _=require("../lodash");module.exports=topsort;topsort.CycleException=CycleException;function topsort(g){var visited={},stack={},results=[];function visit(node){if(_.has(stack,node)){throw new CycleException}if(!_.has(visited,node)){stack[node]=true;visited[node]=true;_.each(g.predecessors(node),visit);delete stack[node];results.push(node)}}_.each(g.sinks(),visit);if(_.size(visited)!==g.nodeCount()){throw new CycleException}return results}function CycleException(){}},{"../lodash":75}],71:[function(require,module,exports){var _=require("../lodash");module.exports=PriorityQueue;function PriorityQueue(){this._arr=[];this._keyIndices={}}PriorityQueue.prototype.size=function(){return this._arr.length};PriorityQueue.prototype.keys=function(){return this._arr.map(function(x){return x.key})};PriorityQueue.prototype.has=function(key){return _.has(this._keyIndices,key)};PriorityQueue.prototype.priority=function(key){var index=this._keyIndices[key];if(index!==undefined){return this._arr[index].priority}};PriorityQueue.prototype.min=function(){if(this.size()===0){throw new Error("Queue underflow")}return this._arr[0].key};PriorityQueue.prototype.add=function(key,priority){var keyIndices=this._keyIndices;key=String(key);if(!_.has(keyIndices,key)){var arr=this._arr;var index=arr.length;keyIndices[key]=index;arr.push({key:key,priority:priority});this._decrease(index);return true}return false};PriorityQueue.prototype.removeMin=function(){this._swap(0,this._arr.length-1);var min=this._arr.pop();delete this._keyIndices[min.key];this._heapify(0);return min.key};PriorityQueue.prototype.decrease=function(key,priority){var index=this._keyIndices[key];if(priority>this._arr[index].priority){throw new Error("New priority is greater than current priority. "+"Key: "+key+" Old: "+this._arr[index].priority+" New: "+priority)}this._arr[index].priority=priority;this._decrease(index)};PriorityQueue.prototype._heapify=function(i){var arr=this._arr;var l=2*i,r=l+1,largest=i;if(l>1;if(arr[parent].priority1){this.setNode(v,value)}else{this.setNode(v)}},this);return this};Graph.prototype.setNode=function(v,value){if(_.has(this._nodes,v)){if(arguments.length>1){this._nodes[v]=value}return this}this._nodes[v]=arguments.length>1?value:this._defaultNodeLabelFn(v);if(this._isCompound){this._parent[v]=GRAPH_NODE;this._children[v]={};this._children[GRAPH_NODE][v]=true}this._in[v]={};this._preds[v]={};this._out[v]={};this._sucs[v]={};++this._nodeCount;return this};Graph.prototype.node=function(v){return this._nodes[v]};Graph.prototype.hasNode=function(v){return _.has(this._nodes,v)};Graph.prototype.removeNode=function(v){var self=this;if(_.has(this._nodes,v)){var removeEdge=function(e){self.removeEdge(self._edgeObjs[e])};delete this._nodes[v];if(this._isCompound){this._removeFromParentsChildList(v);delete this._parent[v];_.each(this.children(v),function(child){this.setParent(child)},this);delete this._children[v]}_.each(_.keys(this._in[v]),removeEdge);delete this._in[v];delete this._preds[v];_.each(_.keys(this._out[v]),removeEdge);delete this._out[v];delete this._sucs[v];--this._nodeCount}return this};Graph.prototype.setParent=function(v,parent){if(!this._isCompound){throw new Error("Cannot set parent in a non-compound graph")}if(_.isUndefined(parent)){parent=GRAPH_NODE}else{for(var ancestor=parent;!_.isUndefined(ancestor);ancestor=this.parent(ancestor)){if(ancestor===v){throw new Error("Setting "+parent+" as parent of "+v+" would create create a cycle")}}this.setNode(parent)}this.setNode(v);this._removeFromParentsChildList(v);this._parent[v]=parent;this._children[parent][v]=true;return this};Graph.prototype._removeFromParentsChildList=function(v){delete this._children[this._parent[v]][v]};Graph.prototype.parent=function(v){if(this._isCompound){var parent=this._parent[v];if(parent!==GRAPH_NODE){return parent}}};Graph.prototype.children=function(v){if(_.isUndefined(v)){v=GRAPH_NODE}if(this._isCompound){var children=this._children[v];if(children){return _.keys(children)}}else if(v===GRAPH_NODE){return this.nodes()}else if(this.hasNode(v)){return[]}};Graph.prototype.predecessors=function(v){var predsV=this._preds[v];if(predsV){return _.keys(predsV)}};Graph.prototype.successors=function(v){var sucsV=this._sucs[v];if(sucsV){return _.keys(sucsV)}};Graph.prototype.neighbors=function(v){var preds=this.predecessors(v);if(preds){return _.union(preds,this.successors(v))}};Graph.prototype.setDefaultEdgeLabel=function(newDefault){if(!_.isFunction(newDefault)){newDefault=_.constant(newDefault)}this._defaultEdgeLabelFn=newDefault;return this};Graph.prototype.edgeCount=function(){return this._edgeCount};Graph.prototype.edges=function(){return _.values(this._edgeObjs)};Graph.prototype.setPath=function(vs,value){var self=this,args=arguments;_.reduce(vs,function(v,w){if(args.length>1){self.setEdge(v,w,value)}else{self.setEdge(v,w)}return w});return this};Graph.prototype.setEdge=function(){var v,w,name,value,valueSpecified=false;if(_.isPlainObject(arguments[0])){v=arguments[0].v;w=arguments[0].w;name=arguments[0].name;if(arguments.length===2){value=arguments[1];valueSpecified=true}}else{v=arguments[0];w=arguments[1];name=arguments[3];if(arguments.length>2){value=arguments[2];valueSpecified=true}}v=""+v;w=""+w;if(!_.isUndefined(name)){name=""+name}var e=edgeArgsToId(this._isDirected,v,w,name);if(_.has(this._edgeLabels,e)){if(valueSpecified){this._edgeLabels[e]=value}return this}if(!_.isUndefined(name)&&!this._isMultigraph){throw new Error("Cannot set a named edge when isMultigraph = false")}this.setNode(v);this.setNode(w);this._edgeLabels[e]=valueSpecified?value:this._defaultEdgeLabelFn(v,w,name);var edgeObj=edgeArgsToObj(this._isDirected,v,w,name);v=edgeObj.v;w=edgeObj.w;Object.freeze(edgeObj);this._edgeObjs[e]=edgeObj;incrementOrInitEntry(this._preds[w],v);incrementOrInitEntry(this._sucs[v],w);this._in[w][e]=edgeObj;this._out[v][e]=edgeObj;this._edgeCount++;return this};Graph.prototype.edge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name);return this._edgeLabels[e]};Graph.prototype.hasEdge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name);return _.has(this._edgeLabels,e)};Graph.prototype.removeEdge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name),edge=this._edgeObjs[e];if(edge){v=edge.v;w=edge.w;delete this._edgeLabels[e];delete this._edgeObjs[e];decrementOrRemoveEntry(this._preds[w],v);decrementOrRemoveEntry(this._sucs[v],w);delete this._in[w][e];delete this._out[v][e];this._edgeCount--}return this};Graph.prototype.inEdges=function(v,u){var inV=this._in[v];if(inV){var edges=_.values(inV);if(!u){return edges}return _.filter(edges,function(edge){return edge.v===u})}};Graph.prototype.outEdges=function(v,w){var outV=this._out[v];if(outV){var edges=_.values(outV);if(!w){return edges}return _.filter(edges,function(edge){return edge.w===w})}};Graph.prototype.nodeEdges=function(v,w){var inEdges=this.inEdges(v,w);if(inEdges){return inEdges.concat(this.outEdges(v,w))}};function incrementOrInitEntry(map,k){if(_.has(map,k)){map[k]++}else{map[k]=1}}function decrementOrRemoveEntry(map,k){if(!--map[k]){delete map[k]}}function edgeArgsToId(isDirected,v,w,name){if(!isDirected&&v>w){var tmp=v;v=w;w=tmp}return v+EDGE_KEY_DELIM+w+EDGE_KEY_DELIM+(_.isUndefined(name)?DEFAULT_EDGE_NAME:name)}function edgeArgsToObj(isDirected,v,w,name){if(!isDirected&&v>w){var tmp=v;v=w;w=tmp}var edgeObj={v:v,w:w};if(name){edgeObj.name=name}return edgeObj}function edgeObjToId(isDirected,edgeObj){return edgeArgsToId(isDirected,edgeObj.v,edgeObj.w,edgeObj.name)}},{"./lodash":75}],73:[function(require,module,exports){module.exports={Graph:require("./graph"),version:require("./version")}},{"./graph":72,"./version":76}],74:[function(require,module,exports){var _=require("./lodash"),Graph=require("./graph");module.exports={write:write,read:read};function write(g){var json={options:{directed:g.isDirected(),multigraph:g.isMultigraph(),compound:g.isCompound()},nodes:writeNodes(g),edges:writeEdges(g)};if(!_.isUndefined(g.graph())){json.value=_.clone(g.graph())}return json}function writeNodes(g){return _.map(g.nodes(),function(v){var nodeValue=g.node(v),parent=g.parent(v),node={v:v};if(!_.isUndefined(nodeValue)){node.value=nodeValue}if(!_.isUndefined(parent)){node.parent=parent}return node})}function writeEdges(g){return _.map(g.edges(),function(e){var edgeValue=g.edge(e),edge={v:e.v,w:e.w};if(!_.isUndefined(e.name)){edge.name=e.name}if(!_.isUndefined(edgeValue)){edge.value=edgeValue}return edge})}function read(json){var g=new Graph(json.options).setGraph(json.value);_.each(json.nodes,function(entry){g.setNode(entry.v,entry.value);if(entry.parent){g.setParent(entry.v,entry.parent)}});_.each(json.edges,function(entry){g.setEdge({v:entry.v,w:entry.w,name:entry.name},entry.value)});return g}},{"./graph":72,"./lodash":75}],75:[function(require,module,exports){module.exports=require(20)},{"/Users/andrew/Documents/dev/dagre-d3/lib/lodash.js":20,lodash:77}],76:[function(require,module,exports){module.exports="1.0.1"},{}],77:[function(require,module,exports){(function(global){(function(){var undefined;var arrayPool=[],objectPool=[];var idCounter=0;var keyPrefix=+new Date+"";var largeArraySize=75;var maxPoolSize=40;var whitespace=" \f \ufeff"+"\n\r\u2028\u2029"+" ᠎              ";var reEmptyStringLeading=/\b__p \+= '';/g,reEmptyStringMiddle=/\b(__p \+=) '' \+/g,reEmptyStringTrailing=/(__e\(.*?\)|\b__t\)) \+\n'';/g;var reEsTemplate=/\$\{([^\\}]*(?:\\.[^\\}]*)*)\}/g;var reFlags=/\w*$/;var reFuncName=/^\s*function[ \n\r\t]+\w/;var reInterpolate=/<%=([\s\S]+?)%>/g;var reLeadingSpacesAndZeros=RegExp("^["+whitespace+"]*0+(?=.$)");var reNoMatch=/($^)/;var reThis=/\bthis\b/;var reUnescapedString=/['\n\r\t\u2028\u2029\\]/g;var contextProps=["Array","Boolean","Date","Function","Math","Number","Object","RegExp","String","_","attachEvent","clearTimeout","isFinite","isNaN","parseInt","setTimeout"];var templateCounter=0;var argsClass="[object Arguments]",arrayClass="[object Array]",boolClass="[object Boolean]",dateClass="[object Date]",funcClass="[object Function]",numberClass="[object Number]",objectClass="[object Object]",regexpClass="[object RegExp]",stringClass="[object String]";var cloneableClasses={};cloneableClasses[funcClass]=false;cloneableClasses[argsClass]=cloneableClasses[arrayClass]=cloneableClasses[boolClass]=cloneableClasses[dateClass]=cloneableClasses[numberClass]=cloneableClasses[objectClass]=cloneableClasses[regexpClass]=cloneableClasses[stringClass]=true;var debounceOptions={leading:false,maxWait:0,trailing:false};var descriptor={configurable:false,enumerable:false,value:null,writable:false};var objectTypes={"boolean":false,"function":true,object:true,number:false,string:false,undefined:false};var stringEscapes={"\\":"\\","'":"'","\n":"n","\r":"r"," ":"t","\u2028":"u2028","\u2029":"u2029"};var root=objectTypes[typeof window]&&window||this;var freeExports=objectTypes[typeof exports]&&exports&&!exports.nodeType&&exports;var freeModule=objectTypes[typeof module]&&module&&!module.nodeType&&module;var moduleExports=freeModule&&freeModule.exports===freeExports&&freeExports;var freeGlobal=objectTypes[typeof global]&&global;if(freeGlobal&&(freeGlobal.global===freeGlobal||freeGlobal.window===freeGlobal)){root=freeGlobal}function baseIndexOf(array,value,fromIndex){var index=(fromIndex||0)-1,length=array?array.length:0;while(++index-1?0:-1:cache?0:-1}function cachePush(value){var cache=this.cache,type=typeof value;if(type=="boolean"||value==null){cache[value]=true}else{if(type!="number"&&type!="string"){type="object"}var key=type=="number"?value:keyPrefix+value,typeCache=cache[type]||(cache[type]={});if(type=="object"){(typeCache[key]||(typeCache[key]=[])).push(value)}else{typeCache[key]=true}}}function charAtCallback(value){return value.charCodeAt(0)}function compareAscending(a,b){var ac=a.criteria,bc=b.criteria,index=-1,length=ac.length;while(++indexother||typeof value=="undefined"){return 1}if(value/g,evaluate:/<%([\s\S]+?)%>/g,interpolate:reInterpolate,variable:"",imports:{_:lodash}};function baseBind(bindData){var func=bindData[0],partialArgs=bindData[2],thisArg=bindData[4];function bound(){if(partialArgs){var args=slice(partialArgs);push.apply(args,arguments)}if(this instanceof bound){var thisBinding=baseCreate(func.prototype),result=func.apply(thisBinding,args||arguments);return isObject(result)?result:thisBinding}return func.apply(thisArg,args||arguments)}setBindData(bound,bindData);return bound}function baseClone(value,isDeep,callback,stackA,stackB){if(callback){var result=callback(value);if(typeof result!="undefined"){return result}}var isObj=isObject(value);if(isObj){var className=toString.call(value);if(!cloneableClasses[className]){return value}var ctor=ctorByClass[className];switch(className){case boolClass:case dateClass:return new ctor(+value);case numberClass:case stringClass:return new ctor(value);case regexpClass:result=ctor(value.source,reFlags.exec(value));result.lastIndex=value.lastIndex;return result}}else{return value}var isArr=isArray(value);if(isDeep){var initedStack=!stackA;stackA||(stackA=getArray());stackB||(stackB=getArray());var length=stackA.length;while(length--){if(stackA[length]==value){return stackB[length]}}result=isArr?ctor(value.length):{}}else{result=isArr?slice(value):assign({},value)}if(isArr){if(hasOwnProperty.call(value,"index")){result.index=value.index}if(hasOwnProperty.call(value,"input")){result.input=value.input}}if(!isDeep){return result}stackA.push(value);stackB.push(result);(isArr?forEach:forOwn)(value,function(objValue,key){result[key]=baseClone(objValue,isDeep,callback,stackA,stackB)});if(initedStack){releaseArray(stackA);releaseArray(stackB)}return result}function baseCreate(prototype,properties){return isObject(prototype)?nativeCreate(prototype):{}; + +}if(!nativeCreate){baseCreate=function(){function Object(){}return function(prototype){if(isObject(prototype)){Object.prototype=prototype;var result=new Object;Object.prototype=null}return result||context.Object()}}()}function baseCreateCallback(func,thisArg,argCount){if(typeof func!="function"){return identity}if(typeof thisArg=="undefined"||!("prototype"in func)){return func}var bindData=func.__bindData__;if(typeof bindData=="undefined"){if(support.funcNames){bindData=!func.name}bindData=bindData||!support.funcDecomp;if(!bindData){var source=fnToString.call(func);if(!support.funcNames){bindData=!reFuncName.test(source)}if(!bindData){bindData=reThis.test(source);setBindData(func,bindData)}}}if(bindData===false||bindData!==true&&bindData[1]&1){return func}switch(argCount){case 1:return function(value){return func.call(thisArg,value)};case 2:return function(a,b){return func.call(thisArg,a,b)};case 3:return function(value,index,collection){return func.call(thisArg,value,index,collection)};case 4:return function(accumulator,value,index,collection){return func.call(thisArg,accumulator,value,index,collection)}}return bind(func,thisArg)}function baseCreateWrapper(bindData){var func=bindData[0],bitmask=bindData[1],partialArgs=bindData[2],partialRightArgs=bindData[3],thisArg=bindData[4],arity=bindData[5];var isBind=bitmask&1,isBindKey=bitmask&2,isCurry=bitmask&4,isCurryBound=bitmask&8,key=func;function bound(){var thisBinding=isBind?thisArg:this;if(partialArgs){var args=slice(partialArgs);push.apply(args,arguments)}if(partialRightArgs||isCurry){args||(args=slice(arguments));if(partialRightArgs){push.apply(args,partialRightArgs)}if(isCurry&&args.length=largeArraySize&&indexOf===baseIndexOf,result=[];if(isLarge){var cache=createCache(values);if(cache){indexOf=cacheIndexOf;values=cache}else{isLarge=false}}while(++index-1}})}}stackA.pop();stackB.pop();if(initedStack){releaseArray(stackA);releaseArray(stackB)}return result}function baseMerge(object,source,callback,stackA,stackB){(isArray(source)?forEach:forOwn)(source,function(source,key){var found,isArr,result=source,value=object[key];if(source&&((isArr=isArray(source))||isPlainObject(source))){var stackLength=stackA.length;while(stackLength--){if(found=stackA[stackLength]==source){value=stackB[stackLength];break}}if(!found){var isShallow;if(callback){result=callback(value,source);if(isShallow=typeof result!="undefined"){value=result}}if(!isShallow){value=isArr?isArray(value)?value:[]:isPlainObject(value)?value:{}}stackA.push(source);stackB.push(value);if(!isShallow){baseMerge(value,source,callback,stackA,stackB)}}}else{if(callback){result=callback(value,source);if(typeof result=="undefined"){result=source}}if(typeof result!="undefined"){value=result}}object[key]=value})}function baseRandom(min,max){return min+floor(nativeRandom()*(max-min+1))}function baseUniq(array,isSorted,callback){var index=-1,indexOf=getIndexOf(),length=array?array.length:0,result=[];var isLarge=!isSorted&&length>=largeArraySize&&indexOf===baseIndexOf,seen=callback||isLarge?getArray():result;if(isLarge){var cache=createCache(seen);indexOf=cacheIndexOf;seen=cache}while(++index":">",'"':""","'":"'"};var htmlUnescapes=invert(htmlEscapes);var reEscapedHtml=RegExp("("+keys(htmlUnescapes).join("|")+")","g"),reUnescapedHtml=RegExp("["+keys(htmlEscapes).join("")+"]","g");var assign=function(object,source,guard){var index,iterable=object,result=iterable;if(!iterable)return result;var args=arguments,argsIndex=0,argsLength=typeof guard=="number"?2:args.length;if(argsLength>3&&typeof args[argsLength-2]=="function"){var callback=baseCreateCallback(args[--argsLength-1],args[argsLength--],2)}else if(argsLength>2&&typeof args[argsLength-1]=="function"){callback=args[--argsLength]}while(++argsIndex3&&typeof args[length-2]=="function"){var callback=baseCreateCallback(args[--length-1],args[length--],2)}else if(length>2&&typeof args[length-1]=="function"){callback=args[--length]}var sources=slice(arguments,1,length),index=-1,stackA=getArray(),stackB=getArray();while(++index-1}else if(typeof length=="number"){result=(isString(collection)?collection.indexOf(target,fromIndex):indexOf(collection,target,fromIndex))>-1}else{forOwn(collection,function(value){if(++index>=fromIndex){return!(result=value===target)}})}return result}var countBy=createAggregator(function(result,value,key){hasOwnProperty.call(result,key)?result[key]++:result[key]=1});function every(collection,callback,thisArg){var result=true;callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++indexresult){result=value}}}else{callback=callback==null&&isString(collection)?charAtCallback:lodash.createCallback(callback,thisArg,3);forEach(collection,function(value,index,collection){var current=callback(value,index,collection);if(current>computed){computed=current;result=value}})}return result}function min(collection,callback,thisArg){var computed=Infinity,result=computed;if(typeof callback!="function"&&thisArg&&thisArg[callback]===collection){callback=null}if(callback==null&&isArray(collection)){var index=-1,length=collection.length;while(++index=largeArraySize&&createCache(argsIndex?args[argsIndex]:seen))}}var array=args[0],index=-1,length=array?array.length:0,result=[];outer:while(++index>>1;callback(array[mid])1?arguments:arguments[0],index=-1,length=array?max(pluck(array,"length")):0,result=Array(length<0?0:length);while(++index2?createWrapper(func,17,slice(arguments,2),null,thisArg):createWrapper(func,1,null,null,thisArg)}function bindAll(object){var funcs=arguments.length>1?baseFlatten(arguments,true,false,1):functions(object),index=-1,length=funcs.length;while(++index2?createWrapper(key,19,slice(arguments,2),null,object):createWrapper(key,3,null,null,object)}function compose(){var funcs=arguments,length=funcs.length;while(length--){if(!isFunction(funcs[length])){throw new TypeError}}return function(){var args=arguments,length=funcs.length;while(length--){args=[funcs[length].apply(this,args)]}return args[0]}}function curry(func,arity){arity=typeof arity=="number"?arity:+arity||func.length;return createWrapper(func,4,null,null,null,arity)}function debounce(func,wait,options){var args,maxTimeoutId,result,stamp,thisArg,timeoutId,trailingCall,lastCalled=0,maxWait=false,trailing=true;if(!isFunction(func)){throw new TypeError}wait=nativeMax(0,wait)||0;if(options===true){var leading=true;trailing=false}else if(isObject(options)){leading=options.leading;maxWait="maxWait"in options&&(nativeMax(wait,options.maxWait)||0);trailing="trailing"in options?options.trailing:trailing}var delayed=function(){var remaining=wait-(now()-stamp);if(remaining<=0){if(maxTimeoutId){clearTimeout(maxTimeoutId)}var isCalled=trailingCall;maxTimeoutId=timeoutId=trailingCall=undefined;if(isCalled){lastCalled=now();result=func.apply(thisArg,args);if(!timeoutId&&!maxTimeoutId){args=thisArg=null}}}else{timeoutId=setTimeout(delayed,remaining)}};var maxDelayed=function(){if(timeoutId){clearTimeout(timeoutId)}maxTimeoutId=timeoutId=trailingCall=undefined;if(trailing||maxWait!==wait){lastCalled=now();result=func.apply(thisArg,args);if(!timeoutId&&!maxTimeoutId){args=thisArg=null}}};return function(){args=arguments;stamp=now();thisArg=this;trailingCall=trailing&&(timeoutId||!leading);if(maxWait===false){var leadingCall=leading&&!timeoutId}else{if(!maxTimeoutId&&!leading){lastCalled=stamp}var remaining=maxWait-(stamp-lastCalled),isCalled=remaining<=0;if(isCalled){if(maxTimeoutId){maxTimeoutId=clearTimeout(maxTimeoutId)}lastCalled=stamp;result=func.apply(thisArg,args)}else if(!maxTimeoutId){maxTimeoutId=setTimeout(maxDelayed,remaining)}}if(isCalled&&timeoutId){timeoutId=clearTimeout(timeoutId)}else if(!timeoutId&&wait!==maxWait){timeoutId=setTimeout(delayed,wait)}if(leadingCall){isCalled=true;result=func.apply(thisArg,args)}if(isCalled&&!timeoutId&&!maxTimeoutId){args=thisArg=null}return result}}function defer(func){if(!isFunction(func)){throw new TypeError}var args=slice(arguments,1);return setTimeout(function(){func.apply(undefined,args)},1)}function delay(func,wait){if(!isFunction(func)){throw new TypeError}var args=slice(arguments,2);return setTimeout(function(){func.apply(undefined,args)},wait)}function memoize(func,resolver){if(!isFunction(func)){throw new TypeError}var memoized=function(){var cache=memoized.cache,key=resolver?resolver.apply(this,arguments):keyPrefix+arguments[0];return hasOwnProperty.call(cache,key)?cache[key]:cache[key]=func.apply(this,arguments)};memoized.cache={};return memoized}function once(func){var ran,result;if(!isFunction(func)){throw new TypeError}return function(){if(ran){return result}ran=true;result=func.apply(this,arguments);func=null;return result}}function partial(func){return createWrapper(func,16,slice(arguments,1))}function partialRight(func){return createWrapper(func,32,null,slice(arguments,1))}function throttle(func,wait,options){var leading=true,trailing=true;if(!isFunction(func)){throw new TypeError}if(options===false){leading=false}else if(isObject(options)){leading="leading"in options?options.leading:leading;trailing="trailing"in options?options.trailing:trailing}debounceOptions.leading=leading;debounceOptions.maxWait=wait;debounceOptions.trailing=trailing;return debounce(func,wait,debounceOptions)}function wrap(value,wrapper){return createWrapper(wrapper,16,[value])}function constant(value){return function(){return value}}function createCallback(func,thisArg,argCount){var type=typeof func;if(func==null||type=="function"){return baseCreateCallback(func,thisArg,argCount)}if(type!="object"){return property(func)}var props=keys(func),key=props[0],a=func[key];if(props.length==1&&a===a&&!isObject(a)){return function(object){var b=object[key];return a===b&&(a!==0||1/a==1/b)}}return function(object){var length=props.length,result=false;while(length--){if(!(result=baseIsEqual(object[props[length]],func[props[length]],null,true))){break}}return result}}function escape(string){return string==null?"":String(string).replace(reUnescapedHtml,escapeHtmlChar)}function identity(value){return value}function mixin(object,source,options){var chain=true,methodNames=source&&functions(source);if(!source||!options&&!methodNames.length){if(options==null){options=source}ctor=lodashWrapper;source=object;object=lodash;methodNames=functions(source)}if(options===false){chain=false}else if(isObject(options)&&"chain"in options){chain=options.chain}var ctor=object,isFunc=isFunction(ctor);forEach(methodNames,function(methodName){var func=object[methodName]=source[methodName];if(isFunc){ctor.prototype[methodName]=function(){var chainAll=this.__chain__,value=this.__wrapped__,args=[value];push.apply(args,arguments);var result=func.apply(object,args);if(chain||chainAll){if(value===result&&isObject(result)){return this}result=new ctor(result);result.__chain__=chainAll}return result}}})}function noConflict(){context._=oldDash;return this}function noop(){}var now=isNative(now=Date.now)&&now||function(){return(new Date).getTime()};var parseInt=nativeParseInt(whitespace+"08")==8?nativeParseInt:function(value,radix){return nativeParseInt(isString(value)?value.replace(reLeadingSpacesAndZeros,""):value,radix||0)};function property(key){return function(object){return object[key]}}function random(min,max,floating){var noMin=min==null,noMax=max==null;if(floating==null){if(typeof min=="boolean"&&noMax){floating=min;min=1}else if(!noMax&&typeof max=="boolean"){floating=max;noMax=true}}if(noMin&&noMax){max=1}min=+min||0;if(noMax){max=min;min=0}else{max=+max||0}if(floating||min%1||max%1){var rand=nativeRandom();return nativeMin(min+rand*(max-min+parseFloat("1e-"+((rand+"").length-1))),max)}return baseRandom(min,max)}function result(object,key){if(object){var value=object[key];return isFunction(value)?object[key]():value}}function template(text,data,options){var settings=lodash.templateSettings;text=String(text||"");options=defaults({},options,settings);var imports=defaults({},options.imports,settings.imports),importsKeys=keys(imports),importsValues=values(imports);var isEvaluating,index=0,interpolate=options.interpolate||reNoMatch,source="__p += '";var reDelimiters=RegExp((options.escape||reNoMatch).source+"|"+interpolate.source+"|"+(interpolate===reInterpolate?reEsTemplate:reNoMatch).source+"|"+(options.evaluate||reNoMatch).source+"|$","g");text.replace(reDelimiters,function(match,escapeValue,interpolateValue,esTemplateValue,evaluateValue,offset){interpolateValue||(interpolateValue=esTemplateValue);source+=text.slice(index,offset).replace(reUnescapedString,escapeStringChar);if(escapeValue){source+="' +\n__e("+escapeValue+") +\n'"}if(evaluateValue){isEvaluating=true;source+="';\n"+evaluateValue+";\n__p += '"}if(interpolateValue){source+="' +\n((__t = ("+interpolateValue+")) == null ? '' : __t) +\n'"}index=offset+match.length;return match});source+="';\n";var variable=options.variable,hasVariable=variable;if(!hasVariable){variable="obj";source="with ("+variable+") {\n"+source+"\n}\n"}source=(isEvaluating?source.replace(reEmptyStringLeading,""):source).replace(reEmptyStringMiddle,"$1").replace(reEmptyStringTrailing,"$1;");source="function("+variable+") {\n"+(hasVariable?"":variable+" || ("+variable+" = {});\n")+"var __t, __p = '', __e = _.escape"+(isEvaluating?", __j = Array.prototype.join;\n"+"function print() { __p += __j.call(arguments, '') }\n":";\n")+source+"return __p\n}";var sourceURL="\n/*\n//# sourceURL="+(options.sourceURL||"/lodash/template/source["+templateCounter++ +"]")+"\n*/";try{var result=Function(importsKeys,"return "+source+sourceURL).apply(undefined,importsValues)}catch(e){e.source=source;throw e}if(data){return result(data)}result.source=source;return result}function times(n,callback,thisArg){n=(n=+n)>-1?n:0;var index=-1,result=Array(n);callback=baseCreateCallback(callback,thisArg,1);while(++index1?arguments[1]:{},peg$FAILED={},peg$startRuleFunctions={start:peg$parsestart,graphStmt:peg$parsegraphStmt},peg$startRuleFunction=peg$parsestart,peg$c0=[],peg$c1=peg$FAILED,peg$c2=null,peg$c3="{",peg$c4={type:"literal",value:"{",description:'"{"'},peg$c5="}",peg$c6={type:"literal",value:"}",description:'"}"'},peg$c7=function(strict,type,id,stmts){return{type:type,id:id,strict:strict!==null,stmts:stmts}},peg$c8=";",peg$c9={type:"literal",value:";",description:'";"'},peg$c10=function(first,rest){var result=[first];for(var i=0;i",description:'"->"'},peg$c33=function(rhs,rest){var result=[rhs];if(rest){for(var i=0;ipos){peg$cachedPos=0;peg$cachedPosDetails={line:1,column:1,seenCR:false}}advance(peg$cachedPosDetails,peg$cachedPos,pos);peg$cachedPos=pos}return peg$cachedPosDetails}function peg$fail(expected){if(peg$currPospeg$maxFailPos){peg$maxFailPos=peg$currPos;peg$maxFailExpected=[]}peg$maxFailExpected.push(expected)}function peg$buildException(message,expected,pos){function cleanupExpected(expected){var i=1;expected.sort(function(a,b){if(a.descriptionb.description){return 1}else{return 0}});while(i1?expectedDescs.slice(0,-1).join(", ")+" or "+expectedDescs[expected.length-1]:expectedDescs[0];foundDesc=found?'"'+stringEscape(found)+'"':"end of input";return"Expected "+expectedDesc+" but "+foundDesc+" found."}var posDetails=peg$computePosDetails(pos),found=pospeg$currPos){s5=input.charAt(peg$currPos);peg$currPos++}else{s5=peg$FAILED;if(peg$silentFails===0){peg$fail(peg$c110)}}if(s5!==peg$FAILED){s4=[s4,s5];s3=s4}else{peg$currPos=s3;s3=peg$c1}}else{peg$currPos=s3;s3=peg$c1}while(s3!==peg$FAILED){s2.push(s3);s3=peg$currPos;s4=peg$currPos;peg$silentFails++;if(input.substr(peg$currPos,2)===peg$c108){s5=peg$c108;peg$currPos+=2}else{s5=peg$FAILED;if(peg$silentFails===0){peg$fail(peg$c109)}}peg$silentFails--;if(s5===peg$FAILED){s4=peg$c30}else{peg$currPos=s4;s4=peg$c1}if(s4!==peg$FAILED){if(input.length>peg$currPos){s5=input.charAt(peg$currPos);peg$currPos++}else{s5=peg$FAILED;if(peg$silentFails===0){peg$fail(peg$c110)}}if(s5!==peg$FAILED){s4=[s4,s5];s3=s4}else{peg$currPos=s3;s3=peg$c1}}else{peg$currPos=s3;s3=peg$c1}}if(s2!==peg$FAILED){if(input.substr(peg$currPos,2)===peg$c108){s3=peg$c108;peg$currPos+=2}else{s3=peg$FAILED;if(peg$silentFails===0){peg$fail(peg$c109)}}if(s3!==peg$FAILED){s1=[s1,s2,s3];s0=s1}else{peg$currPos=s0;s0=peg$c1}}else{peg$currPos=s0;s0=peg$c1}}else{peg$currPos=s0;s0=peg$c1}}peg$silentFails--;if(s0===peg$FAILED){s1=peg$FAILED;if(peg$silentFails===0){peg$fail(peg$c101)}}return s0}function peg$parse_(){var s0;s0=peg$parsewhitespace();if(s0===peg$FAILED){s0=peg$parsecomment()}return s0}var _=require("lodash");var directed;peg$result=peg$startRuleFunction();if(peg$result!==peg$FAILED&&peg$currPos===input.length){return peg$result}else{if(peg$result!==peg$FAILED&&peg$currPos":"--",writer=new Writer;if(!g.isMultigraph()){writer.write("strict ")}writer.writeLine((g.isDirected()?"digraph":"graph")+" {");writer.indent();var graphAttrs=g.graph();if(_.isObject(graphAttrs)){_.each(graphAttrs,function(v,k){writer.writeLine(id(k)+"="+id(v)+";")})}writeSubgraph(g,undefined,writer);g.edges().forEach(function(edge){writeEdge(g,edge,ec,writer)});writer.unindent();writer.writeLine("}");return writer.toString()}function writeSubgraph(g,v,writer){var children=g.isCompound()?g.children(v):g.nodes();_.each(children,function(w){if(!g.isCompound()||!g.children(w).length){writeNode(g,w,writer)}else{writer.writeLine("subgraph "+id(w)+" {");writer.indent();if(_.isObject(g.node(w))){_.map(g.node(w),function(val,key){writer.writeLine(id(key)+"="+id(val)+";")})}writeSubgraph(g,w,writer);writer.unindent();writer.writeLine("}")}})}function writeNode(g,v,writer){writer.write(id(v));writeAttrs(g.node(v),writer);writer.writeLine()}function writeEdge(g,edge,ec,writer){var v=edge.v,w=edge.w,attrs=g.edge(edge);writer.write(id(v)+" "+ec+" "+id(w));writeAttrs(attrs,writer);writer.writeLine()}function writeAttrs(attrs,writer){if(_.isObject(attrs)){var attrStrs=_.map(attrs,function(val,key){return id(key)+"="+id(val)});if(attrStrs.length){writer.write(" ["+attrStrs.join(",")+"]")}}}function id(obj){if(typeof obj==="number"||obj.toString().match(UNESCAPED_ID_PATTERN)){return obj}return'"'+obj.toString().replace(/"/g,'\\"')+'"'}function Writer(){this._indent="";this._content="";this._shouldIndent=true}Writer.prototype.INDENT=" ";Writer.prototype.indent=function(){this._indent+=this.INDENT};Writer.prototype.unindent=function(){this._indent=this._indent.slice(this.INDENT.length)};Writer.prototype.writeLine=function(line){this.write((line||"")+"\n");this._shouldIndent=true};Writer.prototype.write=function(str){if(this._shouldIndent){this._shouldIndent=false;this._content+=this._indent}this._content+=str};Writer.prototype.toString=function(){return this._content}},{lodash:28}],9:[function(require,module,exports){var _=require("lodash");module.exports=_.clone(require("./lib"));module.exports.json=require("./lib/json");module.exports.alg=require("./lib/alg")},{"./lib":25,"./lib/alg":16,"./lib/json":26,lodash:28}],10:[function(require,module,exports){var _=require("lodash");module.exports=components;function components(g){var visited={},cmpts=[],cmpt;function dfs(v){if(_.has(visited,v))return;visited[v]=true;cmpt.push(v);_.each(g.successors(v),dfs);_.each(g.predecessors(v),dfs)}_.each(g.nodes(),function(v){cmpt=[];dfs(v);if(cmpt.length){cmpts.push(cmpt)}});return cmpts}},{lodash:28}],11:[function(require,module,exports){var _=require("lodash");module.exports=dfs;function dfs(g,vs,order){if(!_.isArray(vs)){vs=[vs]}var acc=[],visited={};_.each(vs,function(v){if(!g.hasNode(v)){throw new Error("Graph does not have node: "+v)}doDfs(g,v,order==="post",visited,acc)});return acc}function doDfs(g,v,postorder,visited,acc){if(!_.has(visited,v)){visited[v]=true;if(!postorder){acc.push(v)}_.each(g.neighbors(v),function(w){doDfs(g,w,postorder,visited,acc)});if(postorder){acc.push(v)}}}},{lodash:28}],12:[function(require,module,exports){var dijkstra=require("./dijkstra"),_=require("lodash");module.exports=dijkstraAll;function dijkstraAll(g,weightFunc,edgeFunc){return _.transform(g.nodes(),function(acc,v){acc[v]=dijkstra(g,v,weightFunc,edgeFunc)},{})}},{"./dijkstra":13,lodash:28}],13:[function(require,module,exports){var _=require("lodash"),PriorityQueue=require("../data/priority-queue");module.exports=dijkstra;var DEFAULT_WEIGHT_FUNC=_.constant(1);function dijkstra(g,source,weightFn,edgeFn){return runDijkstra(g,String(source),weightFn||DEFAULT_WEIGHT_FUNC,edgeFn||function(v){return g.outEdges(v)})}function runDijkstra(g,source,weightFn,edgeFn){var results={},pq=new PriorityQueue,v,vEntry;var updateNeighbors=function(edge){var w=edge.v!==v?edge.v:edge.w,wEntry=results[w],weight=weightFn(edge),distance=vEntry.distance+weight;if(weight<0){throw new Error("dijkstra does not allow negative edge weights. "+"Bad edge: "+edge+" Weight: "+weight)}if(distance0){v=pq.removeMin();vEntry=results[v];if(vEntry.distance===Number.POSITIVE_INFINITY){break}edgeFn(v).forEach(updateNeighbors)}return results}},{"../data/priority-queue":23,lodash:28}],14:[function(require,module,exports){var _=require("lodash"),tarjan=require("./tarjan");module.exports=findCycles;function findCycles(g){return _.filter(tarjan(g),function(cmpt){return cmpt.length>1})}},{"./tarjan":21,lodash:28}],15:[function(require,module,exports){var _=require("lodash");module.exports=floydWarshall;var DEFAULT_WEIGHT_FUNC=_.constant(1);function floydWarshall(g,weightFn,edgeFn){return runFloydWarshall(g,weightFn||DEFAULT_WEIGHT_FUNC,edgeFn||function(v){return g.outEdges(v)})}function runFloydWarshall(g,weightFn,edgeFn){var results={},nodes=g.nodes();nodes.forEach(function(v){results[v]={};results[v][v]={distance:0};nodes.forEach(function(w){if(v!==w){results[v][w]={distance:Number.POSITIVE_INFINITY}}});edgeFn(v).forEach(function(edge){var w=edge.v===v?edge.w:edge.v,d=weightFn(edge);results[v][w]={distance:d,predecessor:v}})});nodes.forEach(function(k){var rowK=results[k];nodes.forEach(function(i){var rowI=results[i];nodes.forEach(function(j){var ik=rowI[k];var kj=rowK[j];var ij=rowI[j];var altDistance=ik.distance+kj.distance;if(altDistance0){v=pq.removeMin();if(_.has(parents,v)){result.setEdge(v,parents[v])}else if(init){throw new Error("Input graph is not connected: "+g)}else{init=true}g.nodeEdges(v).forEach(updateNeighbors)}return result}},{"../data/priority-queue":23,"../graph":24,lodash:28}],21:[function(require,module,exports){var _=require("lodash");module.exports=tarjan;function tarjan(g){var index=0,stack=[],visited={},results=[];function dfs(v){var entry=visited[v]={onStack:true,lowlink:index,index:index++};stack.push(v);g.successors(v).forEach(function(w){if(!_.has(visited,w)){dfs(w);entry.lowlink=Math.min(entry.lowlink,visited[w].lowlink)}else if(visited[w].onStack){entry.lowlink=Math.min(entry.lowlink,visited[w].index)}});if(entry.lowlink===entry.index){var cmpt=[],w;do{w=stack.pop();visited[w].onStack=false;cmpt.push(w)}while(v!==w);results.push(cmpt)}}g.nodes().forEach(function(v){if(!_.has(visited,v)){dfs(v)}});return results}},{lodash:28}],22:[function(require,module,exports){var _=require("lodash");module.exports=topsort;topsort.CycleException=CycleException;function topsort(g){var visited={},stack={},results=[];function visit(node){if(_.has(stack,node)){throw new CycleException}if(!_.has(visited,node)){stack[node]=true;visited[node]=true;_.each(g.predecessors(node),visit);delete stack[node];results.push(node)}}_.each(g.sinks(),visit);if(_.size(visited)!==g.nodeCount()){throw new CycleException}return results}function CycleException(){}},{lodash:28}],23:[function(require,module,exports){var _=require("lodash");module.exports=PriorityQueue;function PriorityQueue(){this._arr=[];this._keyIndices={}}PriorityQueue.prototype.size=function(){return this._arr.length};PriorityQueue.prototype.keys=function(){return this._arr.map(function(x){return x.key})};PriorityQueue.prototype.has=function(key){return _.has(this._keyIndices,key)};PriorityQueue.prototype.priority=function(key){var index=this._keyIndices[key];if(index!==undefined){return this._arr[index].priority}};PriorityQueue.prototype.min=function(){if(this.size()===0){throw new Error("Queue underflow")}return this._arr[0].key};PriorityQueue.prototype.add=function(key,priority){var keyIndices=this._keyIndices;key=String(key);if(!_.has(keyIndices,key)){var arr=this._arr;var index=arr.length;keyIndices[key]=index;arr.push({key:key,priority:priority});this._decrease(index);return true}return false};PriorityQueue.prototype.removeMin=function(){this._swap(0,this._arr.length-1);var min=this._arr.pop();delete this._keyIndices[min.key];this._heapify(0);return min.key};PriorityQueue.prototype.decrease=function(key,priority){var index=this._keyIndices[key];if(priority>this._arr[index].priority){throw new Error("New priority is greater than current priority. "+"Key: "+key+" Old: "+this._arr[index].priority+" New: "+priority)}this._arr[index].priority=priority;this._decrease(index)};PriorityQueue.prototype._heapify=function(i){var arr=this._arr;var l=2*i,r=l+1,largest=i;if(l>1;if(arr[parent].priority1){this.setNode(v,value)}else{this.setNode(v)}},this);return this};Graph.prototype.setNode=function(v,value){if(_.has(this._nodes,v)){if(arguments.length>1){this._nodes[v]=value}return this}this._nodes[v]=arguments.length>1?value:this._defaultNodeLabelFn(v);if(this._isCompound){this._parent[v]=GRAPH_NODE;this._children[v]={};this._children[GRAPH_NODE][v]=true}this._in[v]={};this._preds[v]={};this._out[v]={};this._sucs[v]={};++this._nodeCount;return this};Graph.prototype.node=function(v){return this._nodes[v]};Graph.prototype.hasNode=function(v){return _.has(this._nodes,v)};Graph.prototype.removeNode=function(v){var self=this;if(_.has(this._nodes,v)){var removeEdge=function(e){self.removeEdge(self._edgeObjs[e])};delete this._nodes[v];if(this._isCompound){this._removeFromParentsChildList(v);delete this._parent[v];_.each(this.children(v),function(child){this.setParent(child)},this);delete this._children[v]}_.each(_.keys(this._in[v]),removeEdge);delete this._in[v];delete this._preds[v];_.each(_.keys(this._out[v]),removeEdge);delete this._out[v];delete this._sucs[v];--this._nodeCount}return this};Graph.prototype.setParent=function(v,parent){if(!this._isCompound){throw new Error("Cannot set parent in a non-compound graph")}if(_.isUndefined(parent)){parent=GRAPH_NODE}else{for(var ancestor=parent;!_.isUndefined(ancestor);ancestor=this.parent(ancestor)){if(ancestor===v){throw new Error("Setting "+parent+" as parent of "+v+" would create create a cycle")}}this.setNode(parent)}this.setNode(v);this._removeFromParentsChildList(v);this._parent[v]=parent;this._children[parent][v]=true;return this};Graph.prototype._removeFromParentsChildList=function(v){delete this._children[this._parent[v]][v]};Graph.prototype.parent=function(v){if(this._isCompound){var parent=this._parent[v];if(parent!==GRAPH_NODE){return parent}}};Graph.prototype.children=function(v){if(_.isUndefined(v)){v=GRAPH_NODE}if(this._isCompound){var children=this._children[v];if(children){return _.keys(children)}}else if(v===GRAPH_NODE){return this.nodes()}else if(this.hasNode(v)){return[]}};Graph.prototype.predecessors=function(v){var predsV=this._preds[v];if(predsV){return _.keys(predsV)}};Graph.prototype.successors=function(v){var sucsV=this._sucs[v];if(sucsV){return _.keys(sucsV)}};Graph.prototype.neighbors=function(v){var preds=this.predecessors(v);if(preds){return _.union(preds,this.successors(v))}};Graph.prototype.setDefaultEdgeLabel=function(newDefault){if(!_.isFunction(newDefault)){newDefault=_.constant(newDefault)}this._defaultEdgeLabelFn=newDefault;return this};Graph.prototype.edgeCount=function(){return this._edgeCount};Graph.prototype.edges=function(){return _.values(this._edgeObjs)};Graph.prototype.setPath=function(vs,value){var self=this,args=arguments;_.reduce(vs,function(v,w){if(args.length>1){self.setEdge(v,w,value)}else{self.setEdge(v,w)}return w});return this};Graph.prototype.setEdge=function(v,w,value,name){var valueSpecified=arguments.length>2;if(_.isPlainObject(arguments[0])){v=arguments[0].v;w=arguments[0].w;name=arguments[0].name;if(arguments.length===2){value=arguments[1];valueSpecified=true}}var e=edgeArgsToId(this._isDirected,v,w,name);if(_.has(this._edgeLabels,e)){if(valueSpecified){this._edgeLabels[e]=value}return this}if(!_.isUndefined(name)&&!this._isMultigraph){throw new Error("Cannot set a named edge when isMultigraph = false")}this.setNode(v);this.setNode(w);this._edgeLabels[e]=valueSpecified?value:this._defaultEdgeLabelFn(v,w,name);var edgeObj=edgeArgsToObj(this._isDirected,v,w,name);v=edgeObj.v;w=edgeObj.w;Object.freeze(edgeObj);this._edgeObjs[e]=edgeObj;incrementOrInitEntry(this._preds[w],v);incrementOrInitEntry(this._sucs[v],w);this._in[w][e]=edgeObj;this._out[v][e]=edgeObj;this._edgeCount++;return this};Graph.prototype.edge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name);return this._edgeLabels[e]};Graph.prototype.hasEdge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name);return _.has(this._edgeLabels,e)};Graph.prototype.removeEdge=function(v,w,name){var e=arguments.length===1?edgeObjToId(this._isDirected,arguments[0]):edgeArgsToId(this._isDirected,v,w,name),edge=this._edgeObjs[e];if(edge){v=edge.v;w=edge.w;delete this._edgeLabels[e];delete this._edgeObjs[e];decrementOrRemoveEntry(this._preds[w],v);decrementOrRemoveEntry(this._sucs[v],w);delete this._in[w][e];delete this._out[v][e];this._edgeCount--}return this};Graph.prototype.inEdges=function(v,u){var inV=this._in[v];if(inV){var edges=_.values(inV);if(!u){return edges}return _.filter(edges,function(edge){return edge.v===u})}};Graph.prototype.outEdges=function(v,w){var outV=this._out[v];if(outV){var edges=_.values(outV);if(!w){return edges}return _.filter(edges,function(edge){return edge.w===w})}};Graph.prototype.nodeEdges=function(v,w){var inEdges=this.inEdges(v,w);if(inEdges){return inEdges.concat(this.outEdges(v,w))}};function incrementOrInitEntry(map,k){if(_.has(map,k)){map[k]++}else{map[k]=1}}function decrementOrRemoveEntry(map,k){if(!--map[k]){delete map[k]}}function edgeArgsToId(isDirected,v,w,name){if(!isDirected&&v>w){var tmp=v;v=w;w=tmp}return v+EDGE_KEY_DELIM+w+EDGE_KEY_DELIM+(_.isUndefined(name)?DEFAULT_EDGE_NAME:name)}function edgeArgsToObj(isDirected,v,w,name){if(!isDirected&&v>w){var tmp=v;v=w;w=tmp}var edgeObj={v:v,w:w};if(name){edgeObj.name=name}return edgeObj}function edgeObjToId(isDirected,edgeObj){return edgeArgsToId(isDirected,edgeObj.v,edgeObj.w,edgeObj.name)}},{lodash:28}],25:[function(require,module,exports){module.exports={Graph:require("./graph"),version:require("./version")}},{"./graph":24,"./version":27}],26:[function(require,module,exports){var _=require("lodash"),Graph=require("./graph");module.exports={write:write,read:read};function write(g){var json={options:{directed:g.isDirected(),multigraph:g.isMultigraph(),compound:g.isCompound()},nodes:writeNodes(g),edges:writeEdges(g)};if(!_.isUndefined(g.graph())){json.value=_.clone(g.graph())}return json}function writeNodes(g){return _.map(g.nodes(),function(v){var nodeValue=g.node(v),parent=g.parent(v),node={v:v};if(!_.isUndefined(nodeValue)){node.value=nodeValue}if(!_.isUndefined(parent)){node.parent=parent}return node})}function writeEdges(g){return _.map(g.edges(),function(e){var edgeValue=g.edge(e),edge={v:e.v,w:e.w};if(!_.isUndefined(e.name)){edge.name=e.name}if(!_.isUndefined(edgeValue)){edge.value=edgeValue}return edge})}function read(json){var g=new Graph(json.options).setGraph(json.value);_.each(json.nodes,function(entry){g.setNode(entry.v,entry.value);if(entry.parent){g.setParent(entry.v,entry.parent)}});_.each(json.edges,function(entry){g.setEdge({v:entry.v,w:entry.w,name:entry.name},entry.value)});return g}},{"./graph":24,lodash:28}],27:[function(require,module,exports){module.exports="0.8.1"},{}],28:[function(require,module,exports){(function(global){(function(){var undefined;var arrayPool=[],objectPool=[];var idCounter=0;var keyPrefix=+new Date+"";var largeArraySize=75;var maxPoolSize=40;var whitespace=" \f "+"\n\r\u2028\u2029"+" ᠎              ";var reEmptyStringLeading=/\b__p \+= '';/g,reEmptyStringMiddle=/\b(__p \+=) '' \+/g,reEmptyStringTrailing=/(__e\(.*?\)|\b__t\)) \+\n'';/g;var reEsTemplate=/\$\{([^\\}]*(?:\\.[^\\}]*)*)\}/g;var reFlags=/\w*$/;var reFuncName=/^\s*function[ \n\r\t]+\w/;var reInterpolate=/<%=([\s\S]+?)%>/g;var reLeadingSpacesAndZeros=RegExp("^["+whitespace+"]*0+(?=.$)");var reNoMatch=/($^)/;var reThis=/\bthis\b/;var reUnescapedString=/['\n\r\t\u2028\u2029\\]/g;var contextProps=["Array","Boolean","Date","Function","Math","Number","Object","RegExp","String","_","attachEvent","clearTimeout","isFinite","isNaN","parseInt","setTimeout"];var templateCounter=0;var argsClass="[object Arguments]",arrayClass="[object Array]",boolClass="[object Boolean]",dateClass="[object Date]",funcClass="[object Function]",numberClass="[object Number]",objectClass="[object Object]",regexpClass="[object RegExp]",stringClass="[object String]";var cloneableClasses={};cloneableClasses[funcClass]=false;cloneableClasses[argsClass]=cloneableClasses[arrayClass]=cloneableClasses[boolClass]=cloneableClasses[dateClass]=cloneableClasses[numberClass]=cloneableClasses[objectClass]=cloneableClasses[regexpClass]=cloneableClasses[stringClass]=true;var debounceOptions={leading:false,maxWait:0,trailing:false};var descriptor={configurable:false,enumerable:false,value:null,writable:false};var objectTypes={"boolean":false,"function":true,object:true,number:false,string:false,undefined:false};var stringEscapes={"\\":"\\","'":"'","\n":"n","\r":"r"," ":"t","\u2028":"u2028","\u2029":"u2029"};var root=objectTypes[typeof window]&&window||this;var freeExports=objectTypes[typeof exports]&&exports&&!exports.nodeType&&exports;var freeModule=objectTypes[typeof module]&&module&&!module.nodeType&&module;var moduleExports=freeModule&&freeModule.exports===freeExports&&freeExports;var freeGlobal=objectTypes[typeof global]&&global;if(freeGlobal&&(freeGlobal.global===freeGlobal||freeGlobal.window===freeGlobal)){root=freeGlobal}function baseIndexOf(array,value,fromIndex){var index=(fromIndex||0)-1,length=array?array.length:0;while(++index-1?0:-1:cache?0:-1}function cachePush(value){var cache=this.cache,type=typeof value;if(type=="boolean"||value==null){cache[value]=true}else{if(type!="number"&&type!="string"){type="object"}var key=type=="number"?value:keyPrefix+value,typeCache=cache[type]||(cache[type]={});if(type=="object"){(typeCache[key]||(typeCache[key]=[])).push(value)}else{typeCache[key]=true}}}function charAtCallback(value){return value.charCodeAt(0)}function compareAscending(a,b){var ac=a.criteria,bc=b.criteria,index=-1,length=ac.length;while(++indexother||typeof value=="undefined"){return 1}if(value/g,evaluate:/<%([\s\S]+?)%>/g,interpolate:reInterpolate,variable:"",imports:{_:lodash}};function baseBind(bindData){var func=bindData[0],partialArgs=bindData[2],thisArg=bindData[4];function bound(){if(partialArgs){var args=slice(partialArgs);push.apply(args,arguments)}if(this instanceof bound){var thisBinding=baseCreate(func.prototype),result=func.apply(thisBinding,args||arguments);return isObject(result)?result:thisBinding}return func.apply(thisArg,args||arguments)}setBindData(bound,bindData);return bound}function baseClone(value,isDeep,callback,stackA,stackB){if(callback){var result=callback(value);if(typeof result!="undefined"){return result}}var isObj=isObject(value);if(isObj){var className=toString.call(value);if(!cloneableClasses[className]){return value}var ctor=ctorByClass[className];switch(className){case boolClass:case dateClass:return new ctor(+value);case numberClass:case stringClass:return new ctor(value);case regexpClass:result=ctor(value.source,reFlags.exec(value));result.lastIndex=value.lastIndex;return result}}else{return value}var isArr=isArray(value);if(isDeep){var initedStack=!stackA;stackA||(stackA=getArray());stackB||(stackB=getArray());var length=stackA.length;while(length--){if(stackA[length]==value){return stackB[length]}}result=isArr?ctor(value.length):{}}else{result=isArr?slice(value):assign({},value)}if(isArr){if(hasOwnProperty.call(value,"index")){result.index=value.index}if(hasOwnProperty.call(value,"input")){result.input=value.input}}if(!isDeep){return result}stackA.push(value);stackB.push(result);(isArr?forEach:forOwn)(value,function(objValue,key){result[key]=baseClone(objValue,isDeep,callback,stackA,stackB)});if(initedStack){releaseArray(stackA);releaseArray(stackB)}return result}function baseCreate(prototype,properties){return isObject(prototype)?nativeCreate(prototype):{}}if(!nativeCreate){baseCreate=function(){function Object(){}return function(prototype){if(isObject(prototype)){Object.prototype=prototype;var result=new Object;Object.prototype=null}return result||context.Object()}}()}function baseCreateCallback(func,thisArg,argCount){if(typeof func!="function"){return identity}if(typeof thisArg=="undefined"||!("prototype"in func)){return func}var bindData=func.__bindData__;if(typeof bindData=="undefined"){if(support.funcNames){bindData=!func.name}bindData=bindData||!support.funcDecomp;if(!bindData){var source=fnToString.call(func);if(!support.funcNames){bindData=!reFuncName.test(source)}if(!bindData){bindData=reThis.test(source);setBindData(func,bindData)}}}if(bindData===false||bindData!==true&&bindData[1]&1){return func}switch(argCount){case 1:return function(value){return func.call(thisArg,value)};case 2:return function(a,b){return func.call(thisArg,a,b)};case 3:return function(value,index,collection){return func.call(thisArg,value,index,collection)};case 4:return function(accumulator,value,index,collection){return func.call(thisArg,accumulator,value,index,collection)}}return bind(func,thisArg)}function baseCreateWrapper(bindData){var func=bindData[0],bitmask=bindData[1],partialArgs=bindData[2],partialRightArgs=bindData[3],thisArg=bindData[4],arity=bindData[5];var isBind=bitmask&1,isBindKey=bitmask&2,isCurry=bitmask&4,isCurryBound=bitmask&8,key=func;function bound(){var thisBinding=isBind?thisArg:this;if(partialArgs){var args=slice(partialArgs);push.apply(args,arguments)}if(partialRightArgs||isCurry){args||(args=slice(arguments));if(partialRightArgs){push.apply(args,partialRightArgs)}if(isCurry&&args.length=largeArraySize&&indexOf===baseIndexOf,result=[];if(isLarge){var cache=createCache(values);if(cache){indexOf=cacheIndexOf;values=cache}else{isLarge=false}}while(++index-1}})}}stackA.pop();stackB.pop();if(initedStack){releaseArray(stackA);releaseArray(stackB)}return result}function baseMerge(object,source,callback,stackA,stackB){(isArray(source)?forEach:forOwn)(source,function(source,key){var found,isArr,result=source,value=object[key];if(source&&((isArr=isArray(source))||isPlainObject(source))){var stackLength=stackA.length;while(stackLength--){if(found=stackA[stackLength]==source){value=stackB[stackLength];break}}if(!found){var isShallow;if(callback){result=callback(value,source);if(isShallow=typeof result!="undefined"){value=result}}if(!isShallow){value=isArr?isArray(value)?value:[]:isPlainObject(value)?value:{}}stackA.push(source);stackB.push(value);if(!isShallow){baseMerge(value,source,callback,stackA,stackB)}}}else{if(callback){result=callback(value,source);if(typeof result=="undefined"){result=source}}if(typeof result!="undefined"){value=result}}object[key]=value})}function baseRandom(min,max){return min+floor(nativeRandom()*(max-min+1))}function baseUniq(array,isSorted,callback){var index=-1,indexOf=getIndexOf(),length=array?array.length:0,result=[];var isLarge=!isSorted&&length>=largeArraySize&&indexOf===baseIndexOf,seen=callback||isLarge?getArray():result;if(isLarge){var cache=createCache(seen);indexOf=cacheIndexOf;seen=cache}while(++index":">",'"':""","'":"'"};var htmlUnescapes=invert(htmlEscapes);var reEscapedHtml=RegExp("("+keys(htmlUnescapes).join("|")+")","g"),reUnescapedHtml=RegExp("["+keys(htmlEscapes).join("")+"]","g");var assign=function(object,source,guard){var index,iterable=object,result=iterable;if(!iterable)return result;var args=arguments,argsIndex=0,argsLength=typeof guard=="number"?2:args.length;if(argsLength>3&&typeof args[argsLength-2]=="function"){var callback=baseCreateCallback(args[--argsLength-1],args[argsLength--],2)}else if(argsLength>2&&typeof args[argsLength-1]=="function"){callback=args[--argsLength]}while(++argsIndex3&&typeof args[length-2]=="function"){var callback=baseCreateCallback(args[--length-1],args[length--],2)}else if(length>2&&typeof args[length-1]=="function"){callback=args[--length]}var sources=slice(arguments,1,length),index=-1,stackA=getArray(),stackB=getArray();while(++index-1}else if(typeof length=="number"){result=(isString(collection)?collection.indexOf(target,fromIndex):indexOf(collection,target,fromIndex))>-1}else{forOwn(collection,function(value){if(++index>=fromIndex){return!(result=value===target)}})}return result}var countBy=createAggregator(function(result,value,key){hasOwnProperty.call(result,key)?result[key]++:result[key]=1});function every(collection,callback,thisArg){var result=true;callback=lodash.createCallback(callback,thisArg,3);var index=-1,length=collection?collection.length:0;if(typeof length=="number"){while(++indexresult){result=value}}}else{callback=callback==null&&isString(collection)?charAtCallback:lodash.createCallback(callback,thisArg,3);forEach(collection,function(value,index,collection){var current=callback(value,index,collection);if(current>computed){computed=current;result=value}})}return result}function min(collection,callback,thisArg){var computed=Infinity,result=computed;if(typeof callback!="function"&&thisArg&&thisArg[callback]===collection){callback=null}if(callback==null&&isArray(collection)){var index=-1,length=collection.length;while(++index=largeArraySize&&createCache(argsIndex?args[argsIndex]:seen))}}var array=args[0],index=-1,length=array?array.length:0,result=[];outer:while(++index>>1;callback(array[mid])1?arguments:arguments[0],index=-1,length=array?max(pluck(array,"length")):0,result=Array(length<0?0:length);while(++index2?createWrapper(func,17,slice(arguments,2),null,thisArg):createWrapper(func,1,null,null,thisArg)}function bindAll(object){var funcs=arguments.length>1?baseFlatten(arguments,true,false,1):functions(object),index=-1,length=funcs.length;while(++index2?createWrapper(key,19,slice(arguments,2),null,object):createWrapper(key,3,null,null,object)}function compose(){var funcs=arguments,length=funcs.length;while(length--){if(!isFunction(funcs[length])){throw new TypeError}}return function(){var args=arguments,length=funcs.length;while(length--){args=[funcs[length].apply(this,args)]}return args[0]}}function curry(func,arity){arity=typeof arity=="number"?arity:+arity||func.length;return createWrapper(func,4,null,null,null,arity)}function debounce(func,wait,options){var args,maxTimeoutId,result,stamp,thisArg,timeoutId,trailingCall,lastCalled=0,maxWait=false,trailing=true;if(!isFunction(func)){throw new TypeError}wait=nativeMax(0,wait)||0;if(options===true){var leading=true;trailing=false}else if(isObject(options)){leading=options.leading;maxWait="maxWait"in options&&(nativeMax(wait,options.maxWait)||0);trailing="trailing"in options?options.trailing:trailing}var delayed=function(){var remaining=wait-(now()-stamp);if(remaining<=0){if(maxTimeoutId){clearTimeout(maxTimeoutId)}var isCalled=trailingCall;maxTimeoutId=timeoutId=trailingCall=undefined;if(isCalled){lastCalled=now();result=func.apply(thisArg,args);if(!timeoutId&&!maxTimeoutId){args=thisArg=null}}}else{timeoutId=setTimeout(delayed,remaining)}};var maxDelayed=function(){if(timeoutId){clearTimeout(timeoutId)}maxTimeoutId=timeoutId=trailingCall=undefined;if(trailing||maxWait!==wait){lastCalled=now();result=func.apply(thisArg,args);if(!timeoutId&&!maxTimeoutId){args=thisArg=null}}};return function(){args=arguments;stamp=now();thisArg=this;trailingCall=trailing&&(timeoutId||!leading);if(maxWait===false){var leadingCall=leading&&!timeoutId}else{if(!maxTimeoutId&&!leading){lastCalled=stamp}var remaining=maxWait-(stamp-lastCalled),isCalled=remaining<=0;if(isCalled){if(maxTimeoutId){maxTimeoutId=clearTimeout(maxTimeoutId)}lastCalled=stamp;result=func.apply(thisArg,args)}else if(!maxTimeoutId){maxTimeoutId=setTimeout(maxDelayed,remaining)}}if(isCalled&&timeoutId){timeoutId=clearTimeout(timeoutId)}else if(!timeoutId&&wait!==maxWait){timeoutId=setTimeout(delayed,wait)}if(leadingCall){isCalled=true;result=func.apply(thisArg,args)}if(isCalled&&!timeoutId&&!maxTimeoutId){args=thisArg=null}return result}}function defer(func){if(!isFunction(func)){throw new TypeError}var args=slice(arguments,1);return setTimeout(function(){func.apply(undefined,args)},1)}function delay(func,wait){if(!isFunction(func)){throw new TypeError}var args=slice(arguments,2);return setTimeout(function(){func.apply(undefined,args)},wait)}function memoize(func,resolver){if(!isFunction(func)){throw new TypeError}var memoized=function(){var cache=memoized.cache,key=resolver?resolver.apply(this,arguments):keyPrefix+arguments[0];return hasOwnProperty.call(cache,key)?cache[key]:cache[key]=func.apply(this,arguments)};memoized.cache={};return memoized}function once(func){var ran,result;if(!isFunction(func)){throw new TypeError}return function(){if(ran){return result}ran=true;result=func.apply(this,arguments);func=null;return result}}function partial(func){return createWrapper(func,16,slice(arguments,1))}function partialRight(func){return createWrapper(func,32,null,slice(arguments,1))}function throttle(func,wait,options){var leading=true,trailing=true;if(!isFunction(func)){throw new TypeError}if(options===false){leading=false}else if(isObject(options)){leading="leading"in options?options.leading:leading;trailing="trailing"in options?options.trailing:trailing}debounceOptions.leading=leading;debounceOptions.maxWait=wait;debounceOptions.trailing=trailing;return debounce(func,wait,debounceOptions)}function wrap(value,wrapper){return createWrapper(wrapper,16,[value])}function constant(value){return function(){return value}}function createCallback(func,thisArg,argCount){var type=typeof func;if(func==null||type=="function"){return baseCreateCallback(func,thisArg,argCount)}if(type!="object"){return property(func)}var props=keys(func),key=props[0],a=func[key];if(props.length==1&&a===a&&!isObject(a)){return function(object){var b=object[key];return a===b&&(a!==0||1/a==1/b)}}return function(object){var length=props.length,result=false;while(length--){if(!(result=baseIsEqual(object[props[length]],func[props[length]],null,true))){break}}return result}}function escape(string){return string==null?"":String(string).replace(reUnescapedHtml,escapeHtmlChar)}function identity(value){return value}function mixin(object,source,options){var chain=true,methodNames=source&&functions(source);if(!source||!options&&!methodNames.length){if(options==null){options=source}ctor=lodashWrapper;source=object;object=lodash;methodNames=functions(source)}if(options===false){chain=false}else if(isObject(options)&&"chain"in options){chain=options.chain}var ctor=object,isFunc=isFunction(ctor);forEach(methodNames,function(methodName){var func=object[methodName]=source[methodName];if(isFunc){ctor.prototype[methodName]=function(){var chainAll=this.__chain__,value=this.__wrapped__,args=[value];push.apply(args,arguments);var result=func.apply(object,args);if(chain||chainAll){if(value===result&&isObject(result)){return this}result=new ctor(result);result.__chain__=chainAll}return result}}})}function noConflict(){context._=oldDash;return this}function noop(){}var now=isNative(now=Date.now)&&now||function(){return(new Date).getTime()};var parseInt=nativeParseInt(whitespace+"08")==8?nativeParseInt:function(value,radix){return nativeParseInt(isString(value)?value.replace(reLeadingSpacesAndZeros,""):value,radix||0)};function property(key){return function(object){return object[key]}}function random(min,max,floating){var noMin=min==null,noMax=max==null;if(floating==null){if(typeof min=="boolean"&&noMax){floating=min;min=1}else if(!noMax&&typeof max=="boolean"){floating=max;noMax=true}}if(noMin&&noMax){max=1}min=+min||0;if(noMax){max=min;min=0}else{max=+max||0}if(floating||min%1||max%1){var rand=nativeRandom();return nativeMin(min+rand*(max-min+parseFloat("1e-"+((rand+"").length-1))),max)}return baseRandom(min,max)}function result(object,key){if(object){var value=object[key];return isFunction(value)?object[key]():value}}function template(text,data,options){var settings=lodash.templateSettings;text=String(text||"");options=defaults({},options,settings);var imports=defaults({},options.imports,settings.imports),importsKeys=keys(imports),importsValues=values(imports);var isEvaluating,index=0,interpolate=options.interpolate||reNoMatch,source="__p += '";var reDelimiters=RegExp((options.escape||reNoMatch).source+"|"+interpolate.source+"|"+(interpolate===reInterpolate?reEsTemplate:reNoMatch).source+"|"+(options.evaluate||reNoMatch).source+"|$","g");text.replace(reDelimiters,function(match,escapeValue,interpolateValue,esTemplateValue,evaluateValue,offset){interpolateValue||(interpolateValue=esTemplateValue);source+=text.slice(index,offset).replace(reUnescapedString,escapeStringChar);if(escapeValue){source+="' +\n__e("+escapeValue+") +\n'"}if(evaluateValue){isEvaluating=true;source+="';\n"+evaluateValue+";\n__p += '"}if(interpolateValue){source+="' +\n((__t = ("+interpolateValue+")) == null ? '' : __t) +\n'"}index=offset+match.length;return match});source+="';\n";var variable=options.variable,hasVariable=variable;if(!hasVariable){variable="obj";source="with ("+variable+") {\n"+source+"\n}\n"}source=(isEvaluating?source.replace(reEmptyStringLeading,""):source).replace(reEmptyStringMiddle,"$1").replace(reEmptyStringTrailing,"$1;");source="function("+variable+") {\n"+(hasVariable?"":variable+" || ("+variable+" = {});\n")+"var __t, __p = '', __e = _.escape"+(isEvaluating?", __j = Array.prototype.join;\n"+"function print() { __p += __j.call(arguments, '') }\n":";\n")+source+"return __p\n}";var sourceURL="\n/*\n//# sourceURL="+(options.sourceURL||"/lodash/template/source["+templateCounter++ +"]")+"\n*/";try{var result=Function(importsKeys,"return "+source+sourceURL).apply(undefined,importsValues)}catch(e){e.source=source;throw e}if(data){return result(data)}result.source=source;return result}function times(n,callback,thisArg){n=(n=+n)>-1?n:0;var index=-1,result=Array(n);callback=baseCreateCallback(callback,thisArg,1);while(++index5' : ' ▴'; + sortrevind.innerHTML = stIsIE ? ' 5' : ' ▾'; this.appendChild(sortrevind); return; } @@ -113,7 +113,7 @@ sorttable = { this.removeChild(document.getElementById('sorttable_sortrevind')); sortfwdind = document.createElement('span'); sortfwdind.id = "sorttable_sortfwdind"; - sortfwdind.innerHTML = stIsIE ? ' 6' : ' ▾'; + sortfwdind.innerHTML = stIsIE ? ' 6' : ' ▴'; this.appendChild(sortfwdind); return; } @@ -134,7 +134,7 @@ sorttable = { this.className += ' sorttable_sorted'; sortfwdind = document.createElement('span'); sortfwdind.id = "sorttable_sortfwdind"; - sortfwdind.innerHTML = stIsIE ? ' 6' : ' ▾'; + sortfwdind.innerHTML = stIsIE ? ' 6' : ' ▴'; this.appendChild(sortfwdind); // build an array to sort. This is a Schwartzian transform thing, diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css new file mode 100644 index 0000000000000..3b4ae2ed354b8 --- /dev/null +++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.css @@ -0,0 +1,124 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#dag-viz-graph a, #dag-viz-graph a:hover { + text-decoration: none; +} + +#dag-viz-graph .label { + font-weight: normal; + text-shadow: none; +} + +#dag-viz-graph svg path { + stroke: #444; + stroke-width: 1.5px; +} + +#dag-viz-graph svg g.cluster rect { + stroke-width: 1px; +} + +#dag-viz-graph div#empty-dag-viz-message { + margin: 15px; +} + +/* Job page specific styles */ + +#dag-viz-graph svg.job marker#marker-arrow path { + fill: #333; + stroke-width: 0px; +} + +#dag-viz-graph svg.job g.cluster rect { + fill: #A0DFFF; + stroke: #3EC0FF; + stroke-width: 1px; +} + +#dag-viz-graph svg.job g.cluster.skipped rect { + fill: #D6D6D6; + stroke: #B7B7B7; + stroke-width: 1px; +} + +#dag-viz-graph svg.job g.cluster.stage rect { + fill: #FFFFFF; + stroke: #FF99AC; + stroke-width: 1px; +} + +#dag-viz-graph svg.job g.cluster.stage.skipped rect { + stroke: #ADADAD; + stroke-width: 1px; +} + +#dag-viz-graph svg.job g#cross-stage-edges path { + fill: none; +} + +#dag-viz-graph svg.job g.cluster text { + fill: #333; +} + +#dag-viz-graph svg.job g.cluster.skipped text { + fill: #666; +} + +#dag-viz-graph svg.job g.node circle { + fill: #444; +} + +#dag-viz-graph svg.job g.node.cached circle { + fill: #A3F545; + stroke: #52C366; + stroke-width: 2px; +} + +/* Stage page specific styles */ + +#dag-viz-graph svg.stage g.cluster rect { + fill: #A0DFFF; + stroke: #3EC0FF; + stroke-width: 1px; +} + +#dag-viz-graph svg.stage g.cluster.stage rect { + fill: #FFFFFF; + stroke: #FFA6B6; + stroke-width: 1px; +} + +#dag-viz-graph svg.stage g.node g.label text tspan { + fill: #333; +} + +#dag-viz-graph svg.stage g.cluster text { + fill: #333; +} + +#dag-viz-graph svg.stage g.node rect { + fill: #C3EBFF; + stroke: #3EC0FF; + stroke-width: 1px; +} + +#dag-viz-graph svg.stage g.node.cached rect { + fill: #B3F5C5; + stroke: #52C366; + stroke-width: 2px; +} diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js new file mode 100644 index 0000000000000..e96af8768daa0 --- /dev/null +++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js @@ -0,0 +1,492 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains the logic to render the RDD DAG visualization in the UI. + * + * This DAG describes the relationships between + * (1) an RDD and its dependencies, + * (2) an RDD and its operation scopes, and + * (3) an RDD's operation scopes and the stage / job hierarchy + * + * An operation scope is a general, named code block that instantiates RDDs + * (e.g. filter, textFile, reduceByKey). An operation scope can be nested inside + * of other scopes if the corresponding RDD operation invokes other such operations + * (for more detail, see o.a.s.rdd.RDDOperationScope). + * + * A stage may include one or more operation scopes if the RDD operations are + * streamlined into one stage (e.g. rdd.map(...).filter(...).flatMap(...)). + * On the flip side, an operation scope may also include one or many stages, + * or even jobs if the RDD operation is higher level than Spark's scheduling + * primitives (e.g. take, any SQL query). + * + * In the visualization, an RDD is expressed as a node, and its dependencies + * as directed edges (from parent to child). operation scopes, stages, and + * jobs are expressed as clusters that may contain one or many nodes. These + * clusters may be nested inside of each other in the scenarios described + * above. + * + * The visualization is rendered in an SVG contained in "div#dag-viz-graph", + * and its input data is expected to be populated in "div#dag-viz-metadata" + * by Spark's UI code. This is currently used only on the stage page and on + * the job page. + * + * This requires jQuery, d3, and dagre-d3. Note that we use a custom release + * of dagre-d3 (http://github.com/andrewor14/dagre-d3) for some specific + * functionality. For more detail, please track the changes in that project + * since it was forked (commit 101503833a8ce5fe369547f6addf3e71172ce10b). + */ + +var VizConstants = { + svgMarginX: 16, + svgMarginY: 16, + stageSep: 40, + graphPrefix: "graph_", + nodePrefix: "node_", + clusterPrefix: "cluster_" +}; + +var JobPageVizConstants = { + clusterLabelSize: 12, + stageClusterLabelSize: 14, + rankSep: 40 +}; + +var StagePageVizConstants = { + clusterLabelSize: 14, + stageClusterLabelSize: 14, + rankSep: 40 +}; + +/* + * Show or hide the RDD DAG visualization. + * + * The graph is only rendered the first time this is called. + * This is the narrow interface called from the Scala UI code. + */ +function toggleDagViz(forJob) { + var arrowSelector = ".expand-dag-viz-arrow"; + $(arrowSelector).toggleClass('arrow-closed'); + $(arrowSelector).toggleClass('arrow-open'); + var shouldShow = $(arrowSelector).hasClass("arrow-open"); + if (shouldShow) { + var shouldRender = graphContainer().select("*").empty(); + if (shouldRender) { + renderDagViz(forJob); + } + graphContainer().style("display", "block"); + } else { + // Save the graph for later so we don't have to render it again + graphContainer().style("display", "none"); + } +} + +/* + * Render the RDD DAG visualization. + * + * Input DOM hierarchy: + * div#dag-viz-metadata > + * div.stage-metadata > + * div.[dot-file | incoming-edge | outgoing-edge] + * + * Output DOM hierarchy: + * div#dag-viz-graph > + * svg > + * g.cluster_stage_[stageId] + * + * Note that the input metadata is populated by o.a.s.ui.UIUtils.showDagViz. + * Any changes in the input format here must be reflected there. + */ +function renderDagViz(forJob) { + + // If there is not a dot file to render, fail fast and report error + var jobOrStage = forJob ? "job" : "stage"; + if (metadataContainer().empty() || + metadataContainer().selectAll("div").empty()) { + var message = + "No visualization information available for this " + jobOrStage + "!
" + + "If this is an old " + jobOrStage + ", its visualization metadata may have been " + + "cleaned up over time.
You may consider increasing the value of "; + if (forJob) { + message += "spark.ui.retainedJobs and spark.ui.retainedStages."; + } else { + message += "spark.ui.retainedStages"; + } + graphContainer().append("div").attr("id", "empty-dag-viz-message").html(message); + return; + } + + // Render + var svg = graphContainer().append("svg").attr("class", jobOrStage); + if (forJob) { + renderDagVizForJob(svg); + } else { + renderDagVizForStage(svg); + } + + // Find cached RDDs and mark them as such + metadataContainer().selectAll(".cached-rdd").each(function(v) { + var nodeId = VizConstants.nodePrefix + d3.select(this).text(); + svg.selectAll("g." + nodeId).classed("cached", true); + }); + + resizeSvg(svg); +} + +/* Render the RDD DAG visualization on the stage page. */ +function renderDagVizForStage(svgContainer) { + var metadata = metadataContainer().select(".stage-metadata"); + var dot = metadata.select(".dot-file").text(); + var containerId = VizConstants.graphPrefix + metadata.attr("stage-id"); + var container = svgContainer.append("g").attr("id", containerId); + renderDot(dot, container, false); + + // Round corners on rectangles + svgContainer + .selectAll("rect") + .attr("rx", "5") + .attr("ry", "5"); +} + +/* + * Render the RDD DAG visualization on the job page. + * + * Due to limitations in dagre-d3, each stage is rendered independently so that + * we have more control on how to position them. Unfortunately, this means we + * cannot rely on dagre-d3 to render edges that cross stages and must render + * these manually on our own. + */ +function renderDagVizForJob(svgContainer) { + var crossStageEdges = []; + + // Each div.stage-metadata contains the information needed to generate the graph + // for a stage. This includes the DOT file produced from the appropriate UI listener, + // any incoming and outgoing edges, and any cached RDDs that belong to this stage. + metadataContainer().selectAll(".stage-metadata").each(function(d, i) { + var metadata = d3.select(this); + var dot = metadata.select(".dot-file").text(); + var stageId = metadata.attr("stage-id"); + var containerId = VizConstants.graphPrefix + stageId; + var isSkipped = metadata.attr("skipped") == "true"; + var container; + if (isSkipped) { + container = svgContainer + .append("g") + .attr("id", containerId) + .attr("skipped", "true"); + } else { + // Link each graph to the corresponding stage page (TODO: handle stage attempts) + // Use the link from the stage table so it also works for the history server + var attemptId = 0 + var stageLink = d3.select("#stage-" + stageId + "-" + attemptId) + .select("a.name-link") + .attr("href") + "&expandDagViz=true"; + container = svgContainer + .append("a") + .attr("xlink:href", stageLink) + .append("g") + .attr("id", containerId); + } + + // Now we need to shift the container for this stage so it doesn't overlap with + // existing ones, taking into account the position and width of the last stage's + // container. We do not need to do this for the first stage of this job. + if (i > 0) { + var existingStages = svgContainer.selectAll("g.cluster.stage") + if (!existingStages.empty()) { + var lastStage = d3.select(existingStages[0].pop()); + var lastStageWidth = toFloat(lastStage.select("rect").attr("width")); + var lastStagePosition = getAbsolutePosition(lastStage); + var offset = lastStagePosition.x + lastStageWidth + VizConstants.stageSep; + container.attr("transform", "translate(" + offset + ", 0)"); + } + } + + // Actually render the stage + renderDot(dot, container, true); + + // Mark elements as skipped if appropriate. Unfortunately we need to mark all + // elements instead of the parent container because of CSS override rules. + if (isSkipped) { + container.selectAll("g").classed("skipped", true); + } + + // Round corners on rectangles + container + .selectAll("rect") + .attr("rx", "4") + .attr("ry", "4"); + + // If there are any incoming edges into this graph, keep track of them to render + // them separately later. Note that we cannot draw them now because we need to + // put these edges in a separate container that is on top of all stage graphs. + metadata.selectAll(".incoming-edge").each(function(v) { + var edge = d3.select(this).text().split(","); // e.g. 3,4 => [3, 4] + crossStageEdges.push(edge); + }); + }); + + addTooltipsForRDDs(svgContainer); + drawCrossStageEdges(crossStageEdges, svgContainer); +} + +/* Render the dot file as an SVG in the given container. */ +function renderDot(dot, container, forJob) { + var escaped_dot = dot + .replace(/</g, "<") + .replace(/>/g, ">") + .replace(/"/g, "\""); + var g = graphlibDot.read(escaped_dot); + var renderer = new dagreD3.render(); + preprocessGraphLayout(g, forJob); + renderer(container, g); + + // Find the stage cluster and mark it for styling and post-processing + container.selectAll("g.cluster[name*=\"Stage\"]").classed("stage", true); +} + +/* -------------------- * + * | Helper functions | * + * -------------------- */ + +// Helper d3 accessors +function graphContainer() { return d3.select("#dag-viz-graph"); } +function metadataContainer() { return d3.select("#dag-viz-metadata"); } + +/* + * Helper function to pre-process the graph layout. + * This step is necessary for certain styles that affect the positioning + * and sizes of graph elements, e.g. padding, font style, shape. + */ +function preprocessGraphLayout(g, forJob) { + var nodes = g.nodes(); + for (var i = 0; i < nodes.length; i++) { + var isCluster = g.children(nodes[i]).length > 0; + if (!isCluster) { + var node = g.node(nodes[i]); + if (forJob) { + // Do not display RDD name on job page + node.shape = "circle"; + node.labelStyle = "font-size: 0px"; + } else { + node.labelStyle = "font-size: 12px"; + } + node.padding = "5"; + } + } + // Curve the edges + var edges = g.edges(); + for (var j = 0; j < edges.length; j++) { + var edge = g.edge(edges[j]); + edge.lineInterpolate = "basis"; + } + // Adjust vertical separation between nodes + if (forJob) { + g.graph().rankSep = JobPageVizConstants.rankSep; + } else { + g.graph().rankSep = StagePageVizConstants.rankSep; + } +} + +/* + * Helper function to size the SVG appropriately such that all elements are displyed. + * This assumes that all outermost elements are clusters (rectangles). + */ +function resizeSvg(svg) { + var allClusters = svg.selectAll("g.cluster rect")[0]; + var startX = -VizConstants.svgMarginX + + toFloat(d3.min(allClusters, function(e) { + return getAbsolutePosition(d3.select(e)).x; + })); + var startY = -VizConstants.svgMarginY + + toFloat(d3.min(allClusters, function(e) { + return getAbsolutePosition(d3.select(e)).y; + })); + var endX = VizConstants.svgMarginX + + toFloat(d3.max(allClusters, function(e) { + var t = d3.select(e); + return getAbsolutePosition(t).x + toFloat(t.attr("width")); + })); + var endY = VizConstants.svgMarginY + + toFloat(d3.max(allClusters, function(e) { + var t = d3.select(e); + return getAbsolutePosition(t).y + toFloat(t.attr("height")); + })); + var width = endX - startX; + var height = endY - startY; + svg.attr("viewBox", startX + " " + startY + " " + width + " " + height) + .attr("width", width) + .attr("height", height); +} + +/* + * (Job page only) Helper function to draw edges that cross stage boundaries. + * We need to do this manually because we render each stage separately in dagre-d3. + */ +function drawCrossStageEdges(edges, svgContainer) { + if (edges.length == 0) { + return; + } + // Draw the paths first + var edgesContainer = svgContainer.append("g").attr("id", "cross-stage-edges"); + for (var i = 0; i < edges.length; i++) { + var fromRDDId = edges[i][0]; + var toRDDId = edges[i][1]; + connectRDDs(fromRDDId, toRDDId, edgesContainer, svgContainer); + } + // Now draw the arrows by borrowing the arrow marker generated by dagre-d3 + var dagreD3Marker = svgContainer.select("g.edgePaths marker"); + if (!dagreD3Marker.empty()) { + svgContainer + .append(function() { return dagreD3Marker.node().cloneNode(true); }) + .attr("id", "marker-arrow"); + svgContainer.selectAll("g > path").attr("marker-end", "url(#marker-arrow)"); + svgContainer.selectAll("g.edgePaths def").remove(); // We no longer need these + } +} + +/* + * (Job page only) Helper function to compute the absolute + * position of the specified element in our graph. + */ +function getAbsolutePosition(d3selection) { + if (d3selection.empty()) { + throw "Attempted to get absolute position of an empty selection."; + } + var obj = d3selection; + var _x = toFloat(obj.attr("x")) || 0; + var _y = toFloat(obj.attr("y")) || 0; + while (!obj.empty()) { + var transformText = obj.attr("transform"); + if (transformText) { + var translate = d3.transform(transformText).translate; + _x += toFloat(translate[0]); + _y += toFloat(translate[1]); + } + // Climb upwards to find how our parents are translated + obj = d3.select(obj.node().parentNode); + // Stop when we've reached the graph container itself + if (obj.node() == graphContainer().node()) { + break; + } + } + return { x: _x, y: _y }; +} + +/* (Job page only) Helper function to connect two RDDs with a curved edge. */ +function connectRDDs(fromRDDId, toRDDId, edgesContainer, svgContainer) { + var fromNodeId = VizConstants.nodePrefix + fromRDDId; + var toNodeId = VizConstants.nodePrefix + toRDDId; + var fromPos = getAbsolutePosition(svgContainer.select("g." + fromNodeId)); + var toPos = getAbsolutePosition(svgContainer.select("g." + toNodeId)); + + // On the job page, RDDs are rendered as dots (circles). When rendering the path, + // we need to account for the radii of these circles. Otherwise the arrow heads + // will bleed into the circle itself. + var delta = toFloat(svgContainer + .select("g.node." + toNodeId) + .select("circle") + .attr("r")); + if (fromPos.x < toPos.x) { + fromPos.x += delta; + toPos.x -= delta; + } else if (fromPos.x > toPos.x) { + fromPos.x -= delta; + toPos.x += delta; + } + + var points; + if (fromPos.y == toPos.y) { + // If they are on the same rank, curve the middle part of the edge + // upward a little to avoid interference with things in between + // e.g. _______ + // _____/ \_____ + points = [ + [fromPos.x, fromPos.y], + [fromPos.x + (toPos.x - fromPos.x) * 0.2, fromPos.y], + [fromPos.x + (toPos.x - fromPos.x) * 0.3, fromPos.y - 20], + [fromPos.x + (toPos.x - fromPos.x) * 0.7, fromPos.y - 20], + [fromPos.x + (toPos.x - fromPos.x) * 0.8, toPos.y], + [toPos.x, toPos.y] + ]; + } else { + // Otherwise, draw a curved edge that flattens out on both ends + // e.g. _____ + // / + // | + // _____/ + points = [ + [fromPos.x, fromPos.y], + [fromPos.x + (toPos.x - fromPos.x) * 0.4, fromPos.y], + [fromPos.x + (toPos.x - fromPos.x) * 0.6, toPos.y], + [toPos.x, toPos.y] + ]; + } + + var line = d3.svg.line().interpolate("basis"); + edgesContainer.append("path").datum(points).attr("d", line); +} + +/* (Job page only) Helper function to add tooltips for RDDs. */ +function addTooltipsForRDDs(svgContainer) { + svgContainer.selectAll("g.node").each(function() { + var node = d3.select(this); + var tooltipText = node.attr("name"); + if (tooltipText) { + node.select("circle") + .attr("data-toggle", "tooltip") + .attr("data-placement", "bottom") + .attr("title", tooltipText); + } + // Link tooltips for all nodes that belong to the same RDD + node.on("mouseenter", function() { triggerTooltipForRDD(node, true); }); + node.on("mouseleave", function() { triggerTooltipForRDD(node, false); }); + }); + + $("[data-toggle=tooltip]") + .filter("g.node circle") + .tooltip({ container: "body", trigger: "manual" }); +} + +/* + * (Job page only) Helper function to show or hide tooltips for all nodes + * in the graph that refer to the same RDD the specified node represents. + */ +function triggerTooltipForRDD(d3node, show) { + var classes = d3node.node().classList; + for (var i = 0; i < classes.length; i++) { + var clazz = classes[i]; + var isRDDClass = clazz.indexOf(VizConstants.nodePrefix) == 0; + if (isRDDClass) { + graphContainer().selectAll("g." + clazz).each(function() { + var circle = d3.select(this).select("circle").node(); + var showOrHide = show ? "show" : "hide"; + $(circle).tooltip(showOrHide); + }); + } + } +} + +/* Helper function to convert attributes to numeric values. */ +function toFloat(f) { + if (f) { + return parseFloat(f.toString().replace(/px$/, "")); + } else { + return f; + } +} + diff --git a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css index 35ef14e5aaf1a..0f400461c5293 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css +++ b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css @@ -19,10 +19,74 @@ div#application-timeline, div#job-timeline { margin-bottom: 30px; } -#application-timeline div.legend-area { +#application-timeline div.legend-area, +#job-timeline div.legend-area { margin-top: 5px; } +#task-assignment-timeline div.legend-area { + width: 574px; +} + +#task-assignment-timeline .legend-area > svg { + width: 100%; + height: 55px; +} + +#task-assignment-timeline div.item.range { + padding: 0px; + height: 26px; + border-width: 0; +} + +.task-assignment-timeline-content { + width: 100%; +} + +.task-assignment-timeline-duration-bar { + width: 100%; + height: 26px; +} + +rect.scheduler-delay-proportion { + fill: #80B1D3; + stroke: #6B94B0; +} + +rect.deserialization-time-proportion { + fill: #FB8072; + stroke: #D26B5F; +} + +rect.shuffle-read-time-proportion { + fill: #FDB462; + stroke: #D39651; +} + +rect.executor-runtime-proportion { + fill: #B3DE69; + stroke: #95B957; +} + +rect.shuffle-write-time-proportion { + fill: #FFED6F; + stroke: #D5C65C; +} + +rect.serialization-time-proportion { + fill: #BC80BD; + stroke: #9D6B9E; +} + +rect.getting-result-time-proportion { + fill: #8DD3C7; + stroke: #75B0A6; +} + +.vis.timeline { + line-height: 14px; +} + .vis.timeline div.content { width: 100%; } @@ -32,48 +96,55 @@ div#application-timeline, div#job-timeline { } .vis.timeline .item.stage.succeeded { - background-color: #D5DDF6; + background-color: #A0DFFF; + border-color: #3EC0FF; } .vis.timeline .item.stage.succeeded.selected { - background-color: #D5DDF6; - border-color: #97B0F8; - z-index: auto; + background-color: #A0DFFF; + border-color: #3EC0FF; + z-index: auto; } .legend-area rect.completed-stage-legend { - fill: #D5DDF6; - stroke: #97B0F8; + fill: #A0DFFF; + stroke: #3EC0FF; } .vis.timeline .item.stage.failed { - background-color: #FF5475; + background-color: #FFA1B0; + border-color: #FF4D6D; } .vis.timeline .item.stage.failed.selected { - background-color: #FF5475; - border-color: #97B0F8; - z-index: auto; + background-color: #FFA1B0; + border-color: #FF4D6D; + z-index: auto; } .legend-area rect.failed-stage-legend { - fill: #FF5475; - stroke: #97B0F8; + fill: #FFA1B0; + stroke: #FF4D6D; } .vis.timeline .item.stage.running { - background-color: #FDFFCA; + background-color: #A2FCC0; + border-color: #36F572; } .vis.timeline .item.stage.running.selected { - background-color: #FDFFCA; - border-color: #97B0F8; - z-index: auto; + background-color: #A2FCC0; + border-color: #36F572; + z-index: auto; } .legend-area rect.active-stage-legend { - fill: #FDFFCA; - stroke: #97B0F8; + fill: #A2FCC0; + stroke: #36F572; +} + +.vis.timeline .foreground { + cursor: move; } .vis.timeline .item.job { @@ -81,76 +152,81 @@ div#application-timeline, div#job-timeline { } .vis.timeline .item.job.succeeded { - background-color: #D5DDF6; + background-color: #A0DFFF; + border-color: #3EC0FF; } .vis.timeline .item.job.succeeded.selected { - background-color: #D5DDF6; - border-color: #97B0F8; - z-index: auto; + background-color: #A0DFFF; + border-color: #3EC0FF; + z-index: auto; } .legend-area rect.succeeded-job-legend { - fill: #D5DDF6; - stroke: #97B0F8; + fill: #A0DFFF; + stroke: #3EC0FF; } .vis.timeline .item.job.failed { - background-color: #FF5475; + background-color: #FFA1B0; + border-color: #FF4D6D; } .vis.timeline .item.job.failed.selected { - background-color: #FF5475; - border-color: #97B0F8; - z-index: auto; + background-color: #FFA1B0; + border-color: #FF4D6D; + z-index: auto; } .legend-area rect.failed-job-legend { - fill: #FF5475; - stroke: #97B0F8; + fill: #FFA1B0; + stroke: #FF4D6D; } .vis.timeline .item.job.running { - background-color: #FDFFCA; + background-color: #A2FCC0; + border-color: #36F572; } .vis.timeline .item.job.running.selected { - background-color: #FDFFCA; - border-color: #97B0F8; - z-index: auto; + background-color: #A2FCC0; + border-color: #36F572; + z-index: auto; } .legend-area rect.running-job-legend { - fill: #FDFFCA; - stroke: #97B0F8; + fill: #A2FCC0; + stroke: #36F572; } .vis.timeline .item.executor.added { - background-color: #D5DDF6; + background-color: #A0DFFF; + border-color: #3EC0FF; } .legend-area rect.executor-added-legend { - fill: #D5DDF6; - stroke: #97B0F8; + fill: #A0DFFF; + stroke: #3EC0FF; } .vis.timeline .item.executor.removed { - background-color: #EBCA59; + background-color: #FFA1B0; + border-color: #FF4D6D; } .legend-area rect.executor-removed-legend { - fill: #EBCA59; - stroke: #97B0F8; + fill: #FFA1B0; + stroke: #FF4D6D; } .vis.timeline .item.executor.selected { - border-color: #FFC200; - background-color: #FFF785; + background-color: #A2FCC0; + border-color: #36F572; z-index: 2; } -tr.corresponding-item-hover>td, tr.corresponding-item-hover>th { - background-color: #FFE1FA !important; +tr.corresponding-item-hover > td, tr.corresponding-item-hover > th { + background-color: #D6FFE4 !important; } #application-timeline.collapsed { @@ -161,15 +237,24 @@ tr.corresponding-item-hover>td, tr.corresponding-item-hover>th { display: none; } +#task-assignment-timeline.collapsed { + display: none; +} + .control-panel { margin-bottom: 5px; } -span.expand-application-timeline, span.expand-job-timeline { +.control-panel input[type="checkbox"] { + margin: 0; +} + +span.expand-application-timeline, span.expand-job-timeline, +span.expand-task-assignment-timeline { cursor: pointer; } -.control-panel input+span { +.control-panel input + span { cursor: pointer; } @@ -180,3 +265,17 @@ span.expand-application-timeline, span.expand-job-timeline { .vis.timeline .item .tooltip-inner { max-width: unset !important; } + +.vispanel.center { + font-size: 12px; + line-height: 12px; +} + +.legend-area text { + fill: #4D4D4D; +} + +.additional-metrics ul { + list-style: none; + margin-left: 15px; +} diff --git a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js index e4a891d47f035..ca74ef9d7e94e 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js +++ b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js @@ -39,23 +39,24 @@ function drawApplicationTimeline(groupArray, eventObjArray, startTime) { function setupJobEventAction() { $(".item.range.job.application-timeline-object").each(function() { - var getJobId = function(baseElem) { + var getSelectorForJobEntry = function(baseElem) { var jobIdText = $($(baseElem).find(".application-timeline-content")[0]).text(); var jobId = jobIdText.match("\\(Job (\\d+)\\)")[1]; - return jobId; + return "#job-" + jobId; }; $(this).click(function() { - window.location.href = "job/?id=" + getJobId(this); + var jobPagePath = $(getSelectorForJobEntry(this)).find("a.name-link").attr("href") + window.location.href = jobPagePath }); $(this).hover( function() { - $("#job-" + getJobId(this)).addClass("corresponding-item-hover"); + $(getSelectorForJobEntry(this)).addClass("corresponding-item-hover"); $($(this).find("div.application-timeline-content")[0]).tooltip("show"); }, function() { - $("#job-" + getJobId(this)).removeClass("corresponding-item-hover"); + $(getSelectorForJobEntry(this)).removeClass("corresponding-item-hover"); $($(this).find("div.application-timeline-content")[0]).tooltip("hide"); } ); @@ -97,32 +98,24 @@ function drawJobTimeline(groupArray, eventObjArray, startTime) { function setupStageEventAction() { $(".item.range.stage.job-timeline-object").each(function() { - var getStageIdAndAttempt = function(baseElem) { + var getSelectorForStageEntry = function(baseElem) { var stageIdText = $($(baseElem).find(".job-timeline-content")[0]).text(); var stageIdAndAttempt = stageIdText.match("\\(Stage (\\d+\\.\\d+)\\)")[1].split("."); - return stageIdAndAttempt; + return "#stage-" + stageIdAndAttempt[0] + "-" + stageIdAndAttempt[1]; }; $(this).click(function() { - var idAndAttempt = getStageIdAndAttempt(this); - var id = idAndAttempt[0]; - var attempt = idAndAttempt[1]; - window.location.href = "../../stages/stage/?id=" + id + "&attempt=" + attempt; + var stagePagePath = $(getSelectorForStageEntry(this)).find("a.name-link").attr("href") + window.location.href = stagePagePath }); $(this).hover( function() { - var idAndAttempt = getStageIdAndAttempt(this); - var id = idAndAttempt[0]; - var attempt = idAndAttempt[1]; - $("#stage-" + id + "-" + attempt).addClass("corresponding-item-hover"); + $(getSelectorForStageEntry(this)).addClass("corresponding-item-hover"); $($(this).find("div.job-timeline-content")[0]).tooltip("show"); }, function() { - var idAndAttempt = getStageIdAndAttempt(this); - var id = idAndAttempt[0]; - var attempt = idAndAttempt[1]; - $("#stage-" + id + "-" + attempt).removeClass("corresponding-item-hover"); + $(getSelectorForStageEntry(this)).removeClass("corresponding-item-hover"); $($(this).find("div.job-timeline-content")[0]).tooltip("hide"); } ); @@ -140,6 +133,57 @@ function drawJobTimeline(groupArray, eventObjArray, startTime) { }); } +function drawTaskAssignmentTimeline(groupArray, eventObjArray, minLaunchTime, maxFinishTime) { + var groups = new vis.DataSet(groupArray); + var items = new vis.DataSet(eventObjArray); + var container = $("#task-assignment-timeline")[0] + var options = { + groupOrder: function(a, b) { + return a.value - b.value + }, + editable: false, + align: 'left', + selectable: false, + showCurrentTime: false, + min: minLaunchTime, + max: maxFinishTime, + zoomable: false + }; + + var taskTimeline = new vis.Timeline(container) + taskTimeline.setOptions(options); + taskTimeline.setGroups(groups); + taskTimeline.setItems(items); + + // If a user zooms while a tooltip is displayed, the user may zoom such that the cursor is no + // longer over the task that the tooltip corresponds to. So, when a user zooms, we should hide + // any currently displayed tooltips. + var currentDisplayedTooltip = null; + $("#task-assignment-timeline").on({ + "mouseenter": function() { + currentDisplayedTooltip = this; + }, + "mouseleave": function() { + currentDisplayedTooltip = null; + } + }, ".task-assignment-timeline-content"); + taskTimeline.on("rangechange", function(prop) { + if (currentDisplayedTooltip !== null) { + $(currentDisplayedTooltip).tooltip("hide"); + } + }); + + setupZoomable("#task-assignment-timeline-zoom-lock", taskTimeline); + + $("span.expand-task-assignment-timeline").click(function() { + $("#task-assignment-timeline").toggleClass("collapsed"); + + // Switch the class of the arrow from open to closed. + $(this).find(".expand-task-assignment-timeline-arrow").toggleClass("arrow-open"); + $(this).find(".expand-task-assignment-timeline-arrow").toggleClass("arrow-closed"); + }); +} + function setupExecutorEventAction() { $(".item.box.executor").each(function () { $(this).hover( @@ -154,15 +198,15 @@ function setupExecutorEventAction() { } function setupZoomable(id, timeline) { - $(id + '>input[type="checkbox"]').click(function() { + $(id + ' > input[type="checkbox"]').click(function() { if (this.checked) { - timeline.setOptions({zoomable: false}); - } else { timeline.setOptions({zoomable: true}); + } else { + timeline.setOptions({zoomable: false}); } }); - $(id + ">span").click(function() { + $(id + " > span").click(function() { $(this).parent().find('input:checkbox').trigger('click'); }); } diff --git a/core/src/main/resources/org/apache/spark/ui/static/vis.map b/core/src/main/resources/org/apache/spark/ui/static/vis.map deleted file mode 100644 index 9be8b618989b7..0000000000000 --- a/core/src/main/resources/org/apache/spark/ui/static/vis.map +++ /dev/null @@ -1 +0,0 @@ -{"version":3,"file":"vis.map","sources":["./dist/vis.js"],"names":["root","factory","exports","module","define","amd","this","modules","__webpack_require__","moduleId","installedModules","id","loaded","call","m","c","p","util","DOMutil","DataSet","DataView","Queue","Graph3d","graph3d","Camera","Filter","Point2d","Point3d","Slider","StepNumber","Timeline","Graph2d","timeline","DateUtil","DataStep","Range","stack","TimeStep","components","items","Item","BackgroundItem","BoxItem","PointItem","RangeItem","Component","CurrentTime","CustomTime","DataAxis","GraphGroup","Group","BackgroundGroup","ItemSet","Legend","LineGraph","TimeAxis","Network","network","Edge","Groups","Images","Node","Popup","dotparser","gephiParser","Graph","Error","moment","hammer","Hammer","isNumber","object","Number","isString","String","isDate","Date","match","ASPDateRegex","exec","isNaN","parse","isDataTable","google","visualization","DataTable","randomUUID","S4","Math","floor","random","toString","extend","a","i","len","arguments","length","other","prop","hasOwnProperty","selectiveExtend","props","Array","isArray","selectiveDeepExtend","b","TypeError","constructor","Object","undefined","deepExtend","selectiveNotDeepExtend","indexOf","equalArray","convert","type","Boolean","valueOf","isMoment","toDate","getType","toISOString","value","getAbsoluteLeft","elem","getBoundingClientRect","left","getAbsoluteTop","top","addClassName","className","classes","split","push","join","removeClassName","index","splice","forEach","callback","toArray","array","updateProperty","key","addEventListener","element","action","listener","useCapture","navigator","userAgent","attachEvent","removeEventListener","detachEvent","preventDefault","event","window","returnValue","getTarget","target","srcElement","nodeType","parentNode","option","asBoolean","defaultValue","asNumber","asString","asSize","asElement","hexToRGB","hex","shorthandRegex","replace","r","g","result","parseInt","RGBToHex","red","green","blue","slice","parseColor","color","isValidRGB","rgb","substr","isValidHex","hsv","hexToHSV","lighterColorHSV","h","s","v","min","darkerColorHSV","darkerColorHex","HSVToHex","lighterColorHex","background","border","highlight","hover","RGBToHSV","minRGB","maxRGB","max","d","hue","saturation","cssUtil","cssText","styles","style","trim","parts","keys","map","addCssText","currentStyles","newStyles","removeCssText","removeStyles","HSVToRGB","f","q","t","isOk","test","selectiveBridgeObject","fields","referenceObject","objectTo","create","bridgeObject","mergeOptions","mergeTarget","options","enabled","binarySearchCustom","orderedItems","searchFunction","field","field2","maxIterations","iteration","low","high","middle","item","searchResult","binarySearchValue","sidePreference","prevValue","nextValue","easeInOutQuad","start","end","duration","change","easingFunctions","linear","easeInQuad","easeOutQuad","easeInCubic","easeOutCubic","easeInOutCubic","easeInQuart","easeOutQuart","easeInOutQuart","easeInQuint","easeOutQuint","easeInOutQuint","prepareElements","JSONcontainer","elementType","redundant","used","cleanupElements","removeChild","getSVGElement","svgContainer","shift","document","createElementNS","appendChild","getDOMElement","DOMContainer","insertBefore","createElement","drawPoint","x","y","group","point","drawPoints","setAttributeNS","size","drawBar","width","height","rect","data","_options","_data","_fieldId","fieldId","_type","_subscribers","add","setOptions","prototype","queue","_queue","destroy","on","subscribers","subscribe","off","filter","unsubscribe","_trigger","params","senderId","concat","subscriber","addedIds","me","_addItem","columns","_getColumnNames","row","rows","getNumberOfRows","col","cols","getValue","update","updatedIds","updatedData","addOrUpdate","_updateItem","get","ids","firstType","returnType","allowedValues","itemId","_getItem","order","_sort","_filterFields","_appendRow","getIds","getDataSet","mappedItems","filteredItem","name","sort","av","bv","remove","removedId","removedIds","_remove","clear","maxField","itemField","minField","distinct","values","fieldType","count","exists","types","raw","converted","JSON","stringify","dataTable","getNumberOfColumns","getColumnId","getColumnLabel","addRow","setValue","_ids","_onEvent","apply","setData","viewOptions","getArguments","defaultFilter","dataSet","added","updated","removed","delay","Infinity","_timeout","_extended","_flushIfNeeded","flush","methods","original","method","args","fn","context","entry","clearTimeout","setTimeout","container","SyntaxError","containerElement","margin","defaultXCenter","defaultYCenter","xLabel","yLabel","zLabel","passValueFn","xValueLabel","yValueLabel","zValueLabel","filterLabel","legendLabel","STYLE","DOT","showPerspective","showGrid","keepAspectRatio","showShadow","showGrayBottom","showTooltip","verticalRatio","animationInterval","animationPreload","camera","eye","dataPoints","colX","colY","colZ","colValue","colFilter","xMin","xStep","xMax","yMin","yStep","yMax","zMin","zStep","zMax","valueMin","valueMax","xBarWidth","yBarWidth","colorAxis","colorGrid","colorDot","colorDotBorder","getMouseX","clientX","targetTouches","getMouseY","clientY","Emitter","_setScale","scale","z","xCenter","yCenter","zCenter","setArmLocation","_convert3Dto2D","point3d","translation","_convertPointToTranslation","_convertTranslationToScreen","ax","ay","az","cx","getCameraLocation","cy","cz","sinTx","sin","getCameraRotation","cosTx","cos","sinTy","cosTy","sinTz","cosTz","dx","dy","dz","bx","by","ex","ey","ez","getArmLength","xcenter","frame","canvas","clientWidth","ycenter","_setBackgroundColor","backgroundColor","fill","stroke","strokeWidth","borderColor","borderWidth","borderStyle","BAR","BARCOLOR","BARSIZE","DOTLINE","DOTCOLOR","DOTSIZE","GRID","LINE","SURFACE","_getStyleNumber","styleName","_determineColumnIndexes","counter","column","getDistinctValues","distinctValues","getColumnRange","minMax","_dataInitialize","rawData","_onChange","dataFilter","setOnLoadCallback","redraw","withBars","defaultXBarWidth","dataX","defaultYBarWidth","dataY","xRange","defaultXMin","defaultXMax","defaultXStep","yRange","defaultYMin","defaultYMax","defaultYStep","zRange","defaultZMin","defaultZMax","defaultZStep","valueRange","defaultValueMin","defaultValueMax","_getDataPoints","obj","sortNumber","dataMatrix","xIndex","yIndex","trans","screen","bottom","pointRight","pointTop","pointCross","hasChildNodes","firstChild","position","overflow","noCanvas","fontWeight","padding","innerHTML","onmousedown","_onMouseDown","ontouchstart","_onTouchStart","onmousewheel","_onWheel","ontooltip","_onTooltip","onkeydown","setSize","_resizeCanvas","clientHeight","animationStart","slider","play","animationStop","stop","_resizeCenter","charAt","parseFloat","setCameraPosition","pos","horizontal","vertical","setArmRotation","distance","setArmLength","getCameraPosition","getArmRotation","_readData","_redrawFilter","animationAutoStart","cameraPosition","styleNumber","tooltip","showAnimationControls","_redrawSlider","_redrawClear","_redrawAxis","_redrawDataGrid","_redrawDataLine","_redrawDataBar","_redrawDataDot","_redrawInfo","_redrawLegend","ctx","getContext","clearRect","widthMin","widthMax","dotSize","right","lineWidth","font","ymin","ymax","_hsv2rgb","strokeStyle","beginPath","moveTo","lineTo","strokeRect","fillStyle","closePath","gridLineLen","step","getCurrent","next","textAlign","textBaseline","fillText","label","visible","setValues","setPlayInterval","onchange","getIndex","selectValue","setOnChangeCallback","lineStyle","getLabel","getSelectedValue","from","to","prettyStep","text","xText","yText","zText","offset","xOffset","yOffset","xMin2d","xMax2d","gridLenX","gridLenY","textMargin","armAngle","H","S","V","R","G","B","C","Hi","X","abs","cross","topSideVisible","zAvg","transBottom","dist","sortDepth","aDiff","subtract","bDiff","crossproduct","crossProduct","radius","arc","PI","j","surface","corners","xWidth","yWidth","surfaces","center","avg","transCenter","diff","leftButtonDown","_onMouseUp","which","button","touchDown","startMouseX","startMouseY","startStart","startEnd","startArmRotation","cursor","onmousemove","_onMouseMove","onmouseup","diffX","diffY","horizontalNew","verticalNew","snapAngle","snapValue","round","parameters","emit","boundingRect","mouseX","mouseY","tooltipTimeout","_hideTooltip","dataPoint","_dataPointFromXY","_showTooltip","ontouchmove","_onTouchMove","ontouchend","_onTouchEnd","delta","wheelDelta","detail","oldLength","newLength","_insideTriangle","triangle","sign","as","bs","cs","distMax","closestDataPoint","closestDist","triangle1","triangle2","distX","distY","sqrt","content","line","dot","dom","borderRadius","boxShadow","borderLeft","contentWidth","offsetWidth","contentHeight","offsetHeight","lineHeight","dotWidth","dotHeight","armLocation","armRotation","armLength","cameraLocation","cameraRotation","calculateCameraOrientation","rot","graph","onLoadCallback","loadInBackground","isLoaded","getLoadedProgress","getColumn","getValues","dataView","progress","sub","sum","prev","bar","MozBorderRadius","slide","onclick","togglePlay","onChangeCallback","playTimeout","playInterval","playLoop","setIndex","playNext","interval","clearInterval","getPlayInterval","setPlayLoop","doLoop","onChange","indexToLeft","startClientX","startSlideX","leftToIndex","_start","_end","_step","precision","_current","setRange","setStep","calculatePrettyStep","log10","log","LN10","step1","pow","step2","step5","toPrecision","getStep","groups","forthArgument","defaultOptions","autoResize","orientation","maxHeight","minHeight","_create","body","domProps","emitter","bind","hiddenDates","snap","toScreen","_toScreen","toGlobalScreen","_toGlobalScreen","toTime","_toTime","toGlobalTime","_toGlobalTime","range","timeAxis","currentTime","customTime","itemSet","itemsData","groupsData","setGroups","setItems","Core","newDataSet","initialLoad","dataRange","_getDataRange","setWindow","animate","fit","setSelection","focus","getSelection","itemData","e","getItemRange","dataset","minItem","maxStartItem","maxEndItem","linegraph","getLegend","groupId","isGroupVisible","visibility","convertHiddenOptions","repeat","dateItem","updateHiddenDates","centerContainer","totalRange","pixelTime","startDate","endDate","_d","runUntil","clone","day","dayOfYear","year","dayOffset","date","month","console","removeDuplicates","startHidden","isHidden","endHidden","rangeStart","rangeEnd","hidden","startToFront","endToFront","_applyRange","safeDates","printDates","dates","stepOverHiddenDates","timeStep","previousTime","stepInHidden","currentValue","current","newValue","switchedYear","switchedMonth","switchedDay","time","conversion","getHiddenDurationBetween","correctTimeForHidden","hiddenDuration","totalDuration","partialDuration","accumulatedHiddenDuration","getAccumulatedHiddenDuration","newTime","getHiddenDurationBefore","timeOffset","requiredDuration","previousPoint","snapAwayFromHidden","direction","correctionEnabled","minimumStep","containerHeight","customRange","alignZeros","autoScale","stepIndex","marginStart","marginEnd","deadSpace","majorSteps","minorSteps","setMinimumStep","setFirst","safeSize","minimumStepValue","orderOfMagnitude","minorStepIdx","magnitudefactor","solutionFound","stepSize","niceStart","niceEnd","roundToMinor","marginRange","rounded","hasNext","previous","decimals","exp","cnt","isMajor","now","hours","minutes","seconds","milliseconds","deltaDifference","scaleOffset","moveable","zoomable","zoomMin","zoomMax","touch","animateTimer","_onDragStart","_onDrag","_onDragEnd","_onHold","_onMouseWheel","_onTouch","_onPinch","validateDirection","getPointer","byUser","_cancelAnimation","initStart","initEnd","initTime","anyChanged","dragging","done","changed","newStart","newEnd","getRange","totalHidden","previousDelta","allowDragging","deltaX","deltaY","diffRange","safeStart","safeEnd","pointer","pageX","pageY","pointerDate","_pointerToDate","zoom","centerDate","hiddenDurationBefore","hiddenDurationAfter","move","EPSILON","orderByStart","orderByEnd","aTime","bTime","force","iMax","axis","collidingItem","jj","collision","nostack","subgroups","newTop","subgroup","format","FORMAT","minorLabels","millisecond","second","minute","hour","weekday","majorLabels","setFormat","defaultFormat","first","setFullYear","getFullYear","setMonth","setDate","setHours","setMinutes","setSeconds","setMilliseconds","getMilliseconds","getSeconds","getMinutes","getHours","getDate","getMonth","setScale","newScale","newStep","setAutoScale","enable","stepYear","stepMonth","stepDay","stepHour","stepMinute","stepSecond","stepMillisecond","getLabelMinor","getLabelMajor","getClassName","even","today","isSame","currentWeek","currentMonth","currentYear","locale","lang","toLowerCase","_isResized","resized","_previousWidth","_previousHeight","showCurrentTime","locales","parent","backgroundVertical","title","toUpperCase","substring","currentTimeTimer","setCurrentTime","getCurrentTime","showCustomTime","eventParams","drag","setCustomTime","getCustomTime","stopPropagation","svg","linegraphOptions","showMinorLabels","showMajorLabels","icons","majorLinesOffset","minorLinesOffset","labelOffsetX","labelOffsetY","iconWidth","linegraphSVG","DOMelements","lines","labels","conversionFactor","minWidth","stepPixels","stepPixelsForced","zeroCrossing","lineOffset","master","svgElements","iconsRemoved","amountOfGroups","lineContainer","scrollTop","addGroup","graphOptions","updateGroup","removeGroup","hide","show","display","_redrawGroupIcons","iconHeight","iconOffset","drawIcon","_cleanupIcons","backgroundHorizontal","activeGroups","_calculateCharSize","minorLabelHeight","minorCharHeight","majorLabelHeight","majorCharHeight","minorLineWidth","minorLineHeight","majorLineWidth","majorLineHeight","_redrawLabels","_redrawTitle","amountOfSteps","stepDifference","zeroStepDifference","valueAtZero","marginStartPos","maxLabelSize","_redrawLabel","_redrawLine","titleWidth","titleCharHeight","convertValue","invertedValue","convertedValue","characterHeight","largestWidth","majorCharWidth","minorCharWidth","textMinor","createTextNode","measureCharMinor","textMajor","measureCharMajor","textTitle","measureCharTitle","titleCharWidth","groupsUsingDefaultStyles","usingDefaultStyle","zeroPosition","Line","Bar","Points","setZeroPosition","catmullRom","parametrization","alpha","SVGcontainer","path","fillPath","fillHeight","outline","shaded","barWidth","bar1Height","bar2Height","icon","yAxisOrientation","getYRange","groupData","draw","framework","subgroupIndex","subgroupOrderer","subgroupOrder","visibleItems","byStart","byEnd","checkRangedItems","inner","foreground","marker","Element","getLabelWidth","restack","_updateVisibleItems","markerHeight","lastMarkerHeight","dirty","displayed","_calculateHeight","offsetTop","offsetLeft","ii","repositionY","resetSubgroups","labelSet","setParent","orderSubgroups","_checkIfVisible","sortArray","sortField","removeFromDataSet","removeItem","startArray","endArray","oldVisibleItems","visibleItemsLookup","lowerBound","upperBound","_checkIfVisibleWithReference","initialPosByStart","_traceVisible","initialPosByEnd","repositionX","initialPos","breakCondition","isVisible","align","groupOrder","selectable","editable","updateTime","onAdd","onUpdate","onMove","onRemove","onMoving","itemOptions","itemListeners","_onAdd","_onUpdate","_onRemove","groupListeners","_onAddGroups","_onUpdateGroups","_onRemoveGroups","groupIds","selection","stackDirty","touchParams","UNGROUPED","BACKGROUND","box","_updateUngrouped","backgroundGroup","isFirst","_onSelectItem","_onMultiSelectItem","_onAddItem","addCallback","Function","markDirty","unselect","select","getVisibleItems","rawVisibleItems","_deselect","_orderGroups","visibleInterval","zoomed","lastVisibleInterval","lastWidth","firstGroup","_firstGroup","firstMargin","nonFirstMargin","groupMargin","groupResized","firstGroupIndex","firstGroupId","ungrouped","_getGroupId","getLabelSet","oldItemsData","getItems","_order","getGroups","_getType","_removeItem","groupOptions","oldGroupId","oldGroup","_constructByEndArray","itemFromTarget","dragLeftItem","dragRightItem","itemProps","selected","initialX","newProps","initial","groupFromTarget","_updateItemProps","_moveToGroup","changes","ctrlKey","srcEvent","shiftKey","oldSelection","newSelection","xAbs","newItem","_getItemRange","_item","itemSetFromTarget","side","iconSize","iconSpacing","textArea","scrollableHeight","drawLegendIcons","getComputedStyle","paddingTop","defaultGroup","sampling","graphHeight","barChart","handleOverlap","dataAxis","legend","abortedGraphUpdate","updateSVGheight","updateSVGheightOnResize","lastStart","COUNTER","BarGraphFunctions","yAxisLeft","yAxisRight","legendLeft","legendRight","_updateAllGroupData","_updateGroup","groupsContent","ungroupedCounter","forceGraphUpdate","_updateGraph","rangePerPixelInv","preprocessedGroupData","processedGroupData","groupRanges","changeCalled","minDate","maxDate","_getRelevantData","_applySampling","_convertXcoordinates","_getYRanges","_updateYAxis","MAX_CYCLES","_convertYcoordinates","dataContainer","guess","increment","amountOfPoints","xDistance","pointsPerPixel","ceil","sampledData","barCombinedDataLeft","barCombinedDataRight","getStackedBarYRange","minVal","maxVal","yAxisLeftUsed","yAxisRightUsed","minLeft","minRight","maxLeft","maxRight","ignore","_toggleAxisVisiblity","drawIcons","axisUsed","datapoints","xValue","yValue","extractedData","svgHeight","majorTexts","minorTexts","lineTop","parentChanged","foregroundNextSibling","nextSibling","backgroundNextSibling","_repaintLabels","timeLabelsize","cur","prevLine","xPrev","xFirstMajorLabel","_repaintMinorText","_repaintMajorText","_repaintMajorLine","_repaintMinorLine","leftTime","leftText","widthText","arr","pop","childNodes","nodeValue","_repaintDeleteButton","anchor","deleteButton","_updateContents","template","_updateTitle","removeAttribute","_updateDataAttributes","dataAttributes","attributes","setAttribute","_updateStyle","emptyContent","baseClassName","onTop","itemSubgroup","itemSetHeight","marginLeft","maxWidth","_repaintDragLeft","_repaintDragRight","contentLeft","parentWidth","boxWidth","dragLeft","dragRight","_determineBrowserMethod","_initializeMixinLoaders","renderRefreshRate","renderTimestep","renderTime","physicsTime","runDoubleSpeed","physicsDiscreteStepsize","initializing","triggerFunctions","edit","editEdge","connect","del","nodes","mass","radiusMin","radiusMax","shape","image","fontColor","fontSize","fontFace","fontFill","fontStrokeWidth","fontStrokeColor","level","borderWidthSelected","edges","widthSelectionMultiplier","hoverWidth","labelAlignment","arrowScaleFactor","dash","gap","altLength","inheritColor","configurePhysics","physics","barnesHut","thetaInverted","gravitationalConstant","centralGravity","springLength","springConstant","damping","repulsion","nodeDistance","hierarchicalRepulsion","clustering","initialMaxNodes","clusterThreshold","reduceToNodes","chainThreshold","clusterEdgeThreshold","sectorThreshold","screenSizeThreshold","fontSizeMultiplier","maxFontSize","forceAmplification","distanceAmplification","edgeGrowth","nodeScaling","maxNodeSizeIncrements","activeAreaBoxSize","clusterLevelDifference","navigation","keyboard","speed","dataManipulation","initiallyVisible","hierarchicalLayout","levelSeparation","nodeSpacing","layout","freezeForStabilization","smoothCurves","dynamic","roundness","maxVelocity","minVelocity","stabilize","stabilizationIterations","zoomExtentOnStabilize","dragNetwork","dragNodes","hideEdgesOnDrag","hideNodesOnDrag","constants","pixelRatio","hoverObj","controlNodesActive","navigationHammers","existing","_new","animationSpeed","animationEasingFunction","easingTime","sourceScale","targetScale","sourceTranslation","targetTranslation","lockedOnNodeId","lockedOnNodeOffset","touchTime","images","setOnloadCallback","_redraw","xIncrement","yIncrement","zoomIncrement","_loadPhysicsSystem","_loadSectorSystem","_loadClusterSystem","_loadSelectionSystem","_loadHierarchySystem","_setTranslation","freezeSimulation","cachedFunctions","startedStabilization","stabilized","draggingNodes","calculationNodes","calculationNodeIndices","nodeIndices","canvasTopLeft","canvasBottomRight","pointerPosition","areaCenter","previousScale","nodesData","edgesData","nodesListeners","_addNodes","_updateNodes","_removeNodes","edgesListeners","_addEdges","_updateEdges","_removeEdges","moving","timer","_setupHierarchicalLayout","zoomExtent","startWithClustering","keycharm","hammerUtil","MixinLoader","Activator","browserType","requiresTimeout","_getScriptPath","scripts","getElementsByTagName","src","_getRange","node","minY","maxY","minX","maxX","nodeId","boundingBox","_findCenter","animationOptions","initialZoom","disableStart","zoomLevel","numberOfNodes","factor","yDistance","xZoomLevel","yZoomLevel","animation","_updateNodeIndexList","_clearNodeIndexList","idx","_createManipulatorBar","dotData","DOTToGraph","gephi","gephiData","parseGephi","_setNodes","_setEdges","_putDataInSector","_resetLevels","_stabilize","onEdit","onEditEdge","onConnect","onDelete","editMode","newColorObj","groupname","clickToUse","activator","_createKeyBinds","_loadNavigationControls","_loadManipulationSystem","_configureSmoothCurves","devicePixelRatio","webkitBackingStorePixelRatio","mozBackingStorePixelRatio","msBackingStorePixelRatio","oBackingStorePixelRatio","backingStorePixelRatio","setTransform","pinch","set","_onTap","_onDoubleTap","onTouch","_onMouseMoveTitle","reset","isActive","_moveUp","_yStopMoving","_moveDown","_moveLeft","_xStopMoving","_moveRight","_zoomIn","_stopZoom","_zoomOut","_deleteSelected","_cleanupPhysicsConfiguration","_recursiveDOMDelete","DOMobject","_getPointer","pinched","_getScale","_handleTouch","_handleDragStart","_getNodeAt","_getTranslation","isSelected","_selectObject","nodeIds","objectId","selectionObj","xFixed","yFixed","_handleOnDrag","releaseNode","_XconvertDOMtoCanvas","_XconvertCanvasToDOM","_YconvertDOMtoCanvas","_YconvertCanvasToDOM","_handleDragEnd","_handleTap","_handleDoubleTap","_handleOnHold","_onRelease","_handleOnRelease","_zoom","scaleOld","preScaleDragPointer","DOMtoCanvas","scaleFrac","tx","ty","updateClustersDefault","postScaleDragPointer","canvasToDOM","popupObj","_checkHidePopup","checkShow","_checkShowPopup","popupTimer","edgeId","_getEdgeAt","_hoverObject","_blurObject","lastPopupNode","nodeUnderCursor","overlappingNodes","isOverlappingWith","getTitle","overlappingEdges","edge","connected","popup","setPosition","setText","emitEvent","oldWidth","oldHeight","oldNodesData","_updateSelection","angle","_updateCalculationNodes","_reconnectEdges","_updateValueRange","updateLabels","changedData","setProperties","properties","oldEdgesData","oldEdge","disconnect","showInternalIds","_createBezierNodes","via","sectors","dynamicEdges","setValueRange","w","save","translate","_doInAllSectors","restore","offsetX","offsetY","_drawNodes","alwaysShow","setScaleAndPos","inArea","sMax","_drawEdges","_drawControlNodes","_freezeDefinedNodes","_physicsTick","_restoreFrozenNodes","fixedData","_isMoving","vmin","isMoving","_discreteStepNodes","nodesPresent","discreteStepLimited","discreteStep","vminCorrected","_revertPhysicsState","revertPosition","_revertPhysicsTick","_doInAllActiveSectors","_doInSupportSector","mainMovingStatus","supportMovingStatus","mainMoving","_animationStep","_handleNavigation","startTime","renderStartTime","requestAnimationFrame","mozRequestAnimationFrame","webkitRequestAnimationFrame","msRequestAnimationFrame","iterations","toggleFreeze","parentEdgeId","internalMultiplier","positionBezierNode","mixin","storePosition","storePositions","dataArray","allowedToMoveX","allowedToMoveY","getPositions","focusOnNode","nodePosition","lockedOnNode","easingFunction","animateView","locked","_transitionRedraw","viewCenter","distanceFromCenter","_classicRedraw","_lockedRedraw","active","getScale","getCenterCoordinates","getBoundingBox","networkConstants","fromId","toId","widthSelected","labelDimensions","yLine","dirtyLabel","fromBackup","toBackup","originalFromId","originalToId","widthFixed","lengthFixed","controlNodesEnabled","controlNodes","positions","connectedNode","_drawLine","_drawArrow","_drawArrowCenter","_drawDashLine","attachEdge","detachEdge","xFrom","yFrom","xTo","yTo","xObj","yObj","_getDistanceToEdge","_getColor","colorObj","_getLineWidth","_line","midpointX","midpointY","_pointOnLine","_label","resize","_circle","_pointOnCircle","networkScaleInv","_getViaCoordinates","xVia","yVia","quadraticCurveTo","lineCount","measureText","_rotateForLabelAlignment","_drawLabelRect","_drawLabelText","angleInDegrees","atan2","rotate","lineMargin","fillRect","lineJoin","strokeText","setLineDash","pattern","lineDashOffset","lineCap","dashedLine","percentage","arrow","_pointOnBezier","_findBorderPosition","distanceToBorder","distanceToNodes","difference","threshold","arrowPos","guidePos","edgeSegmentLength","toBorderDist","toBorderPoint","x1","y1","x2","y2","x3","y3","lastX","lastY","minDistance","_getDistanceToLine","px","py","something","u","nodeIdFrom","nodeIdTo","getControlNodeFromPosition","getControlNodeToPosition","_enableControlNodes","_disableControlNodes","_getSelectedControlNode","fromDistance","toDistance","_restoreControlNodes","controlnodeFromPos","fromBorderDist","fromBorderPoint","controlnodeToPos","defaultIndex","DEFAULT","imageBroken","load","url","brokenUrl","img","Image","onload","onerror","error","imagelist","grouplist","reroutedEdges","fontDrawThreshold","horizontalAlignLeft","verticalAlignTop","baseRadiusValue","radiusFixed","preassignedLevel","hierarchyEnumerated","fx","fy","vx","vy","previousState","resetCluster","dynamicEdgesLength","clusterSession","clusterSizeWidthFactor","clusterSizeHeightFactor","clusterSizeRadiusFactor","growthIndicator","networkScale","formationScale","clusterSize","containedNodes","containedEdges","clusterSessions","originalLabel","triggerFunction","groupObj","imageObj","brokenImage","_drawDatabase","_resizeDatabase","_drawBox","_resizeBox","_drawCircle","_resizeCircle","_drawEllipse","_resizeEllipse","_drawImage","_resizeImage","_drawCircularImage","_resizeCircularImage","_drawText","_resizeText","_drawDot","_resizeShape","_drawSquare","_drawTriangle","_drawTriangleDown","_drawStar","_reset","clearSizeCache","_setForce","_addForce","storeState","isFixed","velocity","getDistance","_drawImageAtPosition","globalAlpha","drawImage","_drawImageLabel","getTextSize","_swapToImageResizeWhenImageLoaded","diameter","centerX","centerY","_drawRawCircle","circle","clip","textSize","clusterLineWidth","selectionLineWidth","roundRect","database","defaultSize","ellipse","_drawShape","radiusMultiplier","baseline","labelUnderNode","inView","clearVelocity","updateVelocity","massBeforeClustering","energyBefore","styleAttr","fontFamily","WebkitBorderRadius","whiteSpace","parseDOT","parseGraph","nextPreview","isAlphaNumeric","regexAlphaNumeric","merge","o","addNode","graphs","attr","addEdge","createEdge","getToken","tokenType","TOKENTYPE","NULL","token","isComment","DELIMITER","c2","DELIMITERS","IDENTIFIER","newSyntaxError","UNKNOWN","chop","strict","parseStatements","parseStatement","subgraph","parseSubgraph","parseEdge","parseAttributeStatement","parseNodeStatement","subgraphs","parseAttributeList","message","maxLength","forEach2","array1","array2","elem1","elem2","graphData","dotNode","graphNode","convertEdge","dotEdge","graphEdge","subEdge","{","}","[","]",";","=",",","->","--","gephiJSON","allowedToMove","gEdges","gNodes","gEdge","source","gNode","propagating","onMouseWheel","leftContainer","rightContainer","shadowTop","shadowBottom","shadowTopLeft","shadowBottomLeft","shadowTopRight","shadowBottomRight","_redrawTimer","touchAction","listeners","events","scrollTopMin","redrawCount","_initAutoResize","component","_stopAutoResize","what","getWindow","borderRootHeight","borderRootWidth","autoHeight","centerWidth","_updateScrollTop","visibilityTop","visibilityBottom","MAX_REDRAWS","repaint","_startAutoResize","_onResize","lastHeight","watchTimer","setInterval","_setScrollTop","_getScrollTop","inputHandler","onRelease","isFinal","offTouch","offRelease","custom","back","editNode","addDescription","edgeDescription","editEdgeDescription","createEdgeError","deleteClusterError","CanvasRenderingContext2D","square","s2","ir","triangleDown","star","n","r2d","kappa","ox","oy","xe","ye","xm","ym","bezierCurveTo","wEllipse","hEllipse","ymb","yeb","xt","yt","xi","yi","xl","yl","xr","yr","dashArray","dashLength","dashCount","slope","distRemaining","dashIndex","_catmullRom","_linear","dFill","_catmullRomUniform","p0","p1","p2","p3","bp1","bp2","normalization","d1","d2","d3","A","N","M","d3powA","d2powA","d3pow2A","d2pow2A","d1pow2A","d1powA","Bargraph","barCombinedData","coreDistance","drawData","combinedData","intersections","barPoints","_getDataIntersections","heightOffset","_getSafeDrawData","nextKey","amount","resolved","prevKey","accumulated","groupLabel","_getStackedBarYRange","xpos","PhysicsMixin","ClusterMixin","SectorsMixin","SelectionMixin","ManipulationMixin","NavigationMixin","HierarchicalLayoutMixin","_loadMixin","sourceVariable","mixinFunction","_clearMixin","_loadSelectedForceSolver","_loadPhysicsConfiguration","hubThreshold","activeSector","drawingNode","blockConnectingEdgeSelection","forceAppendSelection","manipulationDiv","editModeDiv","closeDiv","_cleanNavigation","_loadNavigationElements","overlay","prevent_default","_onTapOverlay","windowHammer","_hasParent","deactivate","escListener","activate","unbind","_callbacks","once","self","removeListener","removeAllListeners","callbacks","cb","hasListeners","__WEBPACK_AMD_DEFINE_RESULT__","global","dfl","hasOwnProp","defaultParsingFlags","empty","unusedTokens","unusedInput","charsLeftOver","nullInput","invalidMonth","invalidFormat","userInvalidated","iso","printMsg","msg","suppressDeprecationWarnings","warn","deprecate","firstTime","deprecateSimple","deprecations","padToken","func","leftZeroFill","ordinalizeToken","period","localeData","ordinal","monthDiff","anchor2","adjust","wholeMonthDiff","meridiemFixWrap","meridiem","isPm","meridiemHour","isPM","Locale","Moment","config","skipOverflow","checkOverflow","copyConfig","updateInProgress","updateOffset","Duration","normalizedInput","normalizeObjectUnits","years","quarters","quarter","months","weeks","week","days","_milliseconds","_days","_months","_locale","_bubble","val","_isAMomentObject","_i","_f","_l","_strict","_tzm","_isUTC","_offset","_pf","momentProperties","absRound","number","targetLength","forceSign","output","positiveMomentsDifference","base","res","isAfter","momentsDifference","makeAs","isBefore","createAdder","dur","tmp","addOrSubtractDurationFromMoment","mom","isAdding","setTime","rawSetter","rawGetter","rawMonthSetter","input","compareArrays","dontConvert","lengthDiff","diffs","toInt","normalizeUnits","units","lowered","unitAliases","camelFunctions","inputObject","normalizedProp","makeList","setter","getter","results","utc","argumentForCoercion","coercedNumber","isFinite","daysInMonth","UTC","getUTCDate","weeksInYear","dow","doy","weekOfYear","daysInYear","isLeapYear","_a","MONTH","DATE","YEAR","HOUR","MINUTE","SECOND","MILLISECOND","_overflowDayOfYear","isValid","_isValid","getTime","bigHour","normalizeLocale","chooseLocale","names","loadLocale","oldLocale","hasModule","code","model","local","removeFormattingTokens","makeFormatFunction","formattingTokens","formatTokenFunctions","formatMoment","expandFormat","formatFunctions","invalidDate","replaceLongDateFormatTokens","longDateFormat","localFormattingTokens","lastIndex","getParseRegexForToken","parseTokenOneDigit","parseTokenThreeDigits","parseTokenFourDigits","parseTokenOneToFourDigits","parseTokenSignedNumber","parseTokenSixDigits","parseTokenOneToSixDigits","parseTokenTwoDigits","parseTokenOneToThreeDigits","parseTokenWord","_meridiemParse","parseTokenOffsetMs","parseTokenTimestampMs","parseTokenTimezone","parseTokenT","parseTokenDigits","parseTokenOneOrTwoDigits","_ordinalParse","_ordinalParseLenient","RegExp","regexpEscape","unescapeFormat","utcOffsetFromString","string","possibleTzMatches","tzChunk","parseTimezoneChunker","addTimeToArrayFromToken","datePartArray","monthsParse","_dayOfYear","parseTwoDigitYear","_meridiem","_useUTC","weekdaysParse","_w","invalidWeekday","dayOfYearFromWeekInfo","weekYear","temp","GG","W","E","_week","gg","dayOfYearFromWeeks","dateFromConfig","currentDate","yearToUse","currentDateArray","makeUTCDate","getUTCMonth","_nextDay","makeDate","setUTCMinutes","getUTCMinutes","dateFromObject","getUTCFullYear","makeDateFromStringAndFormat","ISO_8601","parseISO","parsedInput","tokens","skipped","stringLength","totalParsedInputLength","matched","p4","makeDateFromStringAndArray","tempConfig","bestMoment","scoreToBeat","currentScore","NaN","score","l","isoRegex","isoDates","isoTimes","makeDateFromString","createFromInputFallback","makeDateFromInput","aspNetJsonRegex","ms","setUTCFullYear","parseWeekday","substituteTimeAgo","withoutSuffix","isFuture","relativeTime","posNegDuration","relativeTimeThresholds","firstDayOfWeek","firstDayOfWeekOfYear","adjustedMoment","daysToDayOfWeek","daysToAdd","getUTCDay","makeMoment","invalid","preparse","pickBy","moments","dayOfMonth","unit","makeAccessor","keepTime","daysToYears","yearsToDays","makeDurationGetter","makeGlobal","shouldDeprecate","ender","oldGlobalMoment","globalScope","VERSION","aspNetTimeSpanJsonRegex","isoDurationRegex","isoFormat","unitMillisecondFactors","Milliseconds","Seconds","Minutes","Hours","Days","Months","Years","D","Q","DDD","dayofyear","isoweekday","isoweek","weekyear","isoweekyear","ordinalizeTokens","paddedTokens","MMM","monthsShort","MMMM","dd","weekdaysMin","ddd","weekdaysShort","dddd","weekdays","isoWeek","YY","YYYY","YYYYY","YYYYYY","gggg","ggggg","isoWeekYear","GGGG","GGGGG","isoWeekday","SS","SSS","SSSS","Z","utcOffset","ZZ","zoneAbbr","zz","zoneName","unix","lists","DDDD","_monthsShort","monthName","regex","_monthsParse","_longMonthsParse","_shortMonthsParse","_weekdays","_weekdaysShort","_weekdaysMin","weekdayName","_weekdaysParse","_longDateFormat","LTS","LT","L","LL","LLL","LLLL","isLower","_calendar","sameDay","nextDay","nextWeek","lastDay","lastWeek","sameElse","calendar","_relativeTime","future","past","mm","hh","MM","yy","pastFuture","_ordinal","postformat","firstDayOfYear","_invalidDate","ret","parseIso","diffRes","isDuration","inp","version","relativeTimeThreshold","limit","defineLocale","_abbr","abbr","langData","flags","parseZone","isDSTShifted","parsingFlags","invalidAt","keepLocalTime","_dateUtcOffset","inputString","asFloat","that","zoneDiff","humanize","fromNow","sod","startOf","isDST","getDay","endOf","inputMs","isBetween","zone","localAdjust","_changeInProgress","isLocal","isUtcOffset","isUtc","hasAlignedHourOffset","isoWeeksInYear","weekInfo","newLocaleData","getTimezoneOffset","isoWeeks","toJSON","isUTC","withSuffix","toIsoString","asSeconds","asMilliseconds","asMinutes","asHours","asDays","asWeeks","asMonths","asYears","ordinalParse","require","noGlobal","__WEBPACK_AMD_DEFINE_FACTORY__","__WEBPACK_AMD_DEFINE_ARRAY__","_exportFunctions","_bound","keydown","keyup","_keys","fromCharCode","down","handleEvent","up","keyCode","bound","bindAll","getKey","newBindings","_firstTarget","propagatedHandler","_handled","stopped","firstTarget","_handlers","Manager","PropagatingHammer","_on","_off","_destroy","handler","exportName","setTimeoutContext","timeout","bindFn","invokeArrayArg","arg","each","iterator","dest","inherit","child","childP","baseP","_super","boolOrFn","TYPE_FUNCTION","ifUndefined","val1","val2","addEventListeners","splitStr","removeEventListeners","hasParent","inStr","str","find","inArray","findByKey","uniqueArray","prefixed","property","prefix","camelProp","VENDOR_PREFIXES","uniqueId","_uniqueId","getWindowForElement","doc","ownerDocument","defaultView","parentWindow","Input","manager","inputTarget","domHandler","ev","init","createInputInstance","Type","inputClass","SUPPORT_POINTER_EVENTS","PointerEventInput","SUPPORT_ONLY_TOUCH","TouchInput","SUPPORT_TOUCH","TouchMouseInput","MouseInput","eventType","pointersLen","pointers","changedPointersLen","changedPointers","INPUT_START","INPUT_END","INPUT_CANCEL","session","computeInputData","recognize","prevInput","pointersLength","firstInput","simpleCloneInputData","firstMultiple","offsetCenter","getCenter","timeStamp","deltaTime","getAngle","computeDeltaXY","offsetDirection","getDirection","rotation","getRotation","computeIntervalInputData","offsetDelta","prevDelta","velocityX","velocityY","last","lastInterval","COMPUTE_INTERVAL","getVelocity","DIRECTION_NONE","DIRECTION_LEFT","DIRECTION_RIGHT","DIRECTION_UP","DIRECTION_DOWN","PROPS_XY","PROPS_CLIENT_XY","evEl","MOUSE_ELEMENT_EVENTS","evWin","MOUSE_WINDOW_EVENTS","allow","pressed","POINTER_ELEMENT_EVENTS","POINTER_WINDOW_EVENTS","store","pointerEvents","SingleTouchInput","evTarget","SINGLE_TOUCH_TARGET_EVENTS","SINGLE_TOUCH_WINDOW_EVENTS","started","normalizeSingleTouches","all","touches","changedTouches","TOUCH_TARGET_EVENTS","targetIds","getTouches","allTouches","INPUT_MOVE","identifier","changedTargetTouches","mouse","TouchAction","cleanTouchActions","actions","TOUCH_ACTION_NONE","hasPanX","TOUCH_ACTION_PAN_X","hasPanY","TOUCH_ACTION_PAN_Y","TOUCH_ACTION_MANIPULATION","TOUCH_ACTION_AUTO","Recognizer","defaults","state","STATE_POSSIBLE","simultaneous","requireFail","stateStr","STATE_CANCELLED","STATE_ENDED","STATE_CHANGED","STATE_BEGAN","directionStr","getRecognizerByNameIfManager","otherRecognizer","recognizer","AttrRecognizer","PanRecognizer","pX","pY","PinchRecognizer","PressRecognizer","_timer","_input","RotateRecognizer","SwipeRecognizer","TapRecognizer","pTime","pCenter","recognizers","preset","handlers","toggleCssProps","recognizeWith","requireFailure","cssProps","triggerDomEvent","gestureEvent","createEvent","initEvent","gesture","dispatchEvent","TEST_ELEMENT","MOBILE_REGEX","INPUT_TYPE_TOUCH","INPUT_TYPE_PEN","INPUT_TYPE_MOUSE","INPUT_TYPE_KINECT","DIRECTION_HORIZONTAL","DIRECTION_VERTICAL","DIRECTION_ALL","MOUSE_INPUT_MAP","mousedown","mousemove","mouseup","pointerType","POINTER_INPUT_MAP","pointerdown","pointermove","pointerup","pointercancel","pointerout","IE10_POINTER_TYPE_ENUM",2,3,4,5,"MSPointerEvent","removePointer","eventTypeNormalized","isTouch","storeIndex","pointerId","SINGLE_TOUCH_INPUT_MAP","touchstart","touchmove","touchend","touchcancel","TOUCH_INPUT_MAP","inputEvent","inputData","isMouse","PREFIXED_TOUCH_ACTION","NATIVE_TOUCH_ACTION","TOUCH_ACTION_COMPUTE","compute","getTouchAction","preventDefaults","prevented","hasNone","preventSrc","STATE_RECOGNIZED","STATE_FAILED","dropRecognizeWith","dropRequireFailure","hasRequireFailures","canRecognizeWith","withState","tryEmit","canEmit","inputDataClone","process","attrTest","optionPointers","isRecognized","directionTest","hasMoved","inOut","validPointers","validMovement","validTime","taps","posThreshold","validTouchTime","failTimeout","validInterval","validMultiTap","tapCount","domEvents","userSelect","touchSelect","touchCallout","contentZooming","userDrag","tapHighlightColor","STOP","FORCED_STOP","curRecognizer","Tap","Pan","Swipe","Pinch","Rotate","Press","clusterToFit","maxNumberOfNodes","reposition","maxLevels","forceAggregateHubs","normalizeClusterLevels","increaseClusterLevel","repositionNodes","openCluster","isMovingBeforeClustering","_nodeInActiveArea","_sector","_addSector","decreaseClusterLevel","_expandClusterNode","_updateDynamicEdges","updateClusters","zoomDirection","recursive","doNotStart","amountOfNodes","_collapseSector","_formClusters","_openClusters","_openClustersBySize","_aggregateHubs","handleChains","chainPercentage","_getChainFraction","_reduceAmountOfChains","_getHubSize","_formClustersByHub","openAll","containedNodeId","childNode","_expelChildFromParent","_unselectAll","_releaseContainedEdges","_connectEdgeBackToChild","_validateEdges","othersPresent","childNodeId","_repositionBezierNodes","_formClustersByZoom","_forceClustersByZoom","minLength","_addToCluster","_clusterToSmallestNeighbour","smallestNeighbour","smallestNeighbourNode","neighbour","onlyEqual","_formClusterFromHub","hubNode","absorptionSizeOffset","allowCluster","edgesIdarray","amountOfInitialEdges","_addToContainedEdges","_connectEdgeToCluster","_containCircularEdgesFromNode","massBefore","correction","edgeToId","edgeFromId","k","_addToReroutedEdges","maxLevel","minLevel","clusterLevel","targetLevel","average","averageSquared","hubCounter","largestHub","variance","standardDeviation","fraction","reduceAmount","chains","total","_switchToSector","sectorId","sectorType","_switchToActiveSector","_switchToFrozenSector","_switchToSupportSector","_loadLatestSector","_previousSector","_setActiveSector","newId","_forgetLastSector","_createNewSector","_deleteActiveSector","_deleteFrozenSector","_freezeSector","_activateSector","_mergeThisWithFrozen","_collapseThisToSingleCluster","sector","unqiueIdentifier","previousSector","runFunction","argument","returnValues","_doInAllFrozenSectors","_drawSectorNodes","_drawAllSectorNodes","_getNodesOverlappingWith","_getAllNodesOverlappingWith","_pointerToPositionObject","positionObject","_getEdgesOverlappingWith","_getAllEdgesOverlappingWith","_addToSelection","_addToHover","_removeFromSelection","doNotTrigger","_unselectClusters","_getSelectedNodeCount","_getSelectedNode","_getSelectedEdge","_getSelectedEdgeCount","_getSelectedObjectCount","_selectionIsEmpty","_clusterInSelection","_selectConnectedEdges","_hoverConnectedEdges","_unselectConnectedEdges","append","highlightEdges","overrideSelectable","DOM","_manipulationReleaseOverload","_navigationReleaseOverload","getSelectedNodes","edgeIds","getSelectedEdges","idArray","selectNodes","RangeError","selectEdges","_clearManipulatorBar","manipulationDOM","_restoreOverloadedFunctions","functionName","_toggleEditMode","toolbar","boundFunction","edgeBeingEdited","selectedControlNode","_createAddNodeToolbar","_createAddEdgeToolbar","_editNode","_createEditEdgeToolbar","_addNode","_handleConnect","_finishConnect","_selectControlNode","_controlNodeDrag","_releaseControlNode","newNode","_editEdge","alert","supportNodes","targetNode","connectionEdge","connectFromId","_createEdge","defaultData","finalizedData","sourceNodeId","targetNodeId","selectedNodes","selectedEdges","navigationDivs","navigationDivActions","_stopMovement","_zoomExtent","hubsize","definedLevel","undefinedLevel","_changeConstants","_determineLevels","_determineLevelsDirected","distribution","_getDistribution","_placeNodesByHierarchy","minPos","_placeBranchNodes","maxCount","_setLevel","firstNode","_setLevelDirected","parentId","parentLevel","nodeMoved","_restoreNodes","graphToggleSmoothCurves","graph_toggleSmooth","getElementById","graphRepositionNodes","showValueOfRange","graphGenerateOptions","optionsSpecific","radioButton1","radioButton2","checked","backupConstants","optionsDiv","switchConfigurations","radioButton","querySelector","tableId","table","constantsVariableName","valueId","rangeValue","_overWriteGraphConstants","RepulsionMixin","HierarchialRepulsionMixin","BarnesHutMixin","_toggleBarnesHut","barnesHutTree","_initializeForceCalculation","_calculateForces","_calculateGravitationalForces","_calculateNodeForces","_calculateSpringForcesWithSupport","_calculateHierarchicalSpringForces","_calculateSpringForces","supportNodeId","gravity","gravityForce","edgeLength","springForce","combinedClusterSize","node1","node2","node3","_calculateSpringForce","physicsConfiguration","hierarchicalLayoutDirections","parentElement","rangeElement","radioButton3","graph_repositionNodes","graph_generateOptions","dynamicSmoothCurves","nameArray","webpackContext","req","resolve","repulsingForce","a_base","minimumDistance","steepness","springFx","springFy","totalFx","totalFy","correctionFx","correctionFy","nodeCount","_formBarnesHutTree","_getForceContribution","children","NW","NE","SW","SE","parentBranch","childrenCount","centerOfMass","calcSize","MAX_VALUE","sizeDiff","minimumTreeSize","rootSize","halfRootSize","_splitBranch","_placeInTree","_updateBranchMass","totalMass","totalMassInv","biggestSize","skipMassUpdate","_placeInRegion","region","containedNode","_insertRegion","childSize","_drawTree","_drawBranch","branch","webpackPolyfill","paths","__webpack_amd_options__"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;AAyBA,cAEA,SAA2CA,EAAMC,GAC1B,gBAAZC,UAA0C,gBAAXC,QACxCA,OAAOD,QAAUD,IACQ,kBAAXG,SAAyBA,OAAOC,IAC9CD,OAAOH,GACmB,gBAAZC,SACdA,QAAa,IAAID,IAEjBD,EAAU,IAAIC,KACbK,KAAM,WACT,MAAgB,UAAUC,GAKhB,QAASC,GAAoBC,GAG5B,GAAGC,EAAiBD,GACnB,MAAOC,GAAiBD,GAAUP,OAGnC,IAAIC,GAASO,EAAiBD,IAC7BP,WACAS,GAAIF,EACJG,QAAQ,EAUT,OANAL,GAAQE,GAAUI,KAAKV,EAAOD,QAASC,EAAQA,EAAOD,QAASM,GAG/DL,EAAOS,QAAS,EAGTT,EAAOD,QAvBf,GAAIQ,KAqCJ,OATAF,GAAoBM,EAAIP,EAGxBC,EAAoBO,EAAIL,EAGxBF,EAAoBQ,EAAI,GAGjBR,EAAoB,KAK/B,SAASL,EAAQD,EAASM,GAG9BN,EAAQe,KAAOT,EAAoB,GACnCN,EAAQgB,QAAUV,EAAoB,GAGtCN,EAAQiB,QAAUX,EAAoB,GACtCN,EAAQkB,SAAWZ,EAAoB,GACvCN,EAAQmB,MAAQb,EAAoB,GAGpCN,EAAQoB,QAAUd,EAAoB,GACtCN,EAAQqB,SACNC,OAAQhB,EAAoB,GAC5BiB,OAAQjB,EAAoB,GAC5BkB,QAASlB,EAAoB,GAC7BmB,QAASnB,EAAoB,IAC7BoB,OAAQpB,EAAoB,IAC5BqB,WAAYrB,EAAoB,KAIlCN,EAAQ4B,SAAWtB,EAAoB,IACvCN,EAAQ6B,QAAUvB,EAAoB,IACtCN,EAAQ8B,UACNC,SAAUzB,EAAoB,IAC9B0B,SAAU1B,EAAoB,IAC9B2B,MAAO3B,EAAoB,IAC3B4B,MAAO5B,EAAoB,IAC3B6B,SAAU7B,EAAoB,IAE9B8B,YACEC,OACEC,KAAMhC,EAAoB,IAC1BiC,eAAgBjC,EAAoB,IACpCkC,QAASlC,EAAoB,IAC7BmC,UAAWnC,EAAoB,IAC/BoC,UAAWpC,EAAoB,KAGjCqC,UAAWrC,EAAoB,IAC/BsC,YAAatC,EAAoB,IACjCuC,WAAYvC,EAAoB,IAChCwC,SAAUxC,EAAoB,IAC9ByC,WAAYzC,EAAoB,IAChC0C,MAAO1C,EAAoB,IAC3B2C,gBAAiB3C,EAAoB,IACrC4C,QAAS5C,EAAoB,IAC7B6C,OAAQ7C,EAAoB,IAC5B8C,UAAW9C,EAAoB,IAC/B+C,SAAU/C,EAAoB,MAKlCN,EAAQsD,QAAUhD,EAAoB,IACtCN,EAAQuD,SACNC,KAAMlD,EAAoB,IAC1BmD,OAAQnD,EAAoB,IAC5BoD,OAAQpD,EAAoB,IAC5BqD,KAAMrD,EAAoB,IAC1BsD,MAAOtD,EAAoB,IAC3BuD,UAAWvD,EAAoB,IAC/BwD,YAAaxD,EAAoB,KAInCN,EAAQ+D,MAAQ,WACd,KAAM,IAAIC,OAAM,+EAIlBhE,EAAQiE,OAAS3D,EAAoB,IACrCN,EAAQkE,OAAS5D,EAAoB,IACrCN,EAAQmE,OAAS7D,EAAoB,KAKjC,SAASL,EAAQD,EAASM,GAM9B,GAAI2D,GAAS3D,EAAoB,GAOjCN,GAAQoE,SAAW,SAASC,GAC1B,MAAQA,aAAkBC,SAA2B,gBAAVD,IAQ7CrE,EAAQuE,SAAW,SAASF,GAC1B,MAAQA,aAAkBG,SAA2B,gBAAVH,IAQ7CrE,EAAQyE,OAAS,SAASJ,GACxB,GAAIA,YAAkBK,MACpB,OAAO,CAEJ,IAAI1E,EAAQuE,SAASF,GAAS,CAEjC,GAAIM,GAAQC,EAAaC,KAAKR,EAC9B,IAAIM,EACF,OAAO,CAEJ,KAAKG,MAAMJ,KAAKK,MAAMV,IACzB,OAAO,EAIX,OAAO,GAQTrE,EAAQgF,YAAc,SAASX,GAC7B,MAA4B,mBAAb,SACVY,OAAoB,eACpBA,OAAOC,cAAuB,WAC9Bb,YAAkBY,QAAOC,cAAcC,WAQ9CnF,EAAQoF,WAAa,WACnB,GAAIC,GAAK,WACP,MAAOC,MAAKC,MACQ,MAAhBD,KAAKE,UACPC,SAAS,IAGb,OACIJ,KAAOA,IAAO,IACVA,IAAO,IACPA,IAAO,IACPA,IAAO,IACPA,IAAOA,IAAOA,KAWxBrF,EAAQ0F,OAAS,SAAUC,GACzB,IAAK,GAAIC,GAAI,EAAGC,EAAMC,UAAUC,OAAYF,EAAJD,EAASA,IAAK,CACpD,GAAII,GAAQF,UAAUF,EACtB,KAAK,GAAIK,KAAQD,GACXA,EAAME,eAAeD,KACvBN,EAAEM,GAAQD,EAAMC,IAKtB,MAAON,IAWT3F,EAAQmG,gBAAkB,SAAUC,EAAOT,GACzC,IAAKU,MAAMC,QAAQF,GACjB,KAAM,IAAIpC,OAAM,uDAGlB,KAAK,GAAI4B,GAAI,EAAGA,EAAIE,UAAUC,OAAQH,IAGpC,IAAK,GAFDI,GAAQF,UAAUF,GAEb9E,EAAI,EAAGA,EAAIsF,EAAML,OAAQjF,IAAK,CACrC,GAAImF,GAAOG,EAAMtF,EACbkF,GAAME,eAAeD,KACvBN,EAAEM,GAAQD,EAAMC,IAItB,MAAON,IAWT3F,EAAQuG,oBAAsB,SAAUH,EAAOT,EAAGa,GAEhD,GAAIH,MAAMC,QAAQE,GAChB,KAAM,IAAIC,WAAU,yCAEtB,KAAK,GAAIb,GAAI,EAAGA,EAAIE,UAAUC,OAAQH,IAEpC,IAAK,GADDI,GAAQF,UAAUF,GACb9E,EAAI,EAAGA,EAAIsF,EAAML,OAAQjF,IAAK,CACrC,GAAImF,GAAOG,EAAMtF,EACjB,IAAIkF,EAAME,eAAeD,GACvB,GAAIO,EAAEP,IAASO,EAAEP,GAAMS,cAAgBC,OACrBC,SAAZjB,EAAEM,KACJN,EAAEM,OAEAN,EAAEM,GAAMS,cAAgBC,OAC1B3G,EAAQ6G,WAAWlB,EAAEM,GAAOO,EAAEP,IAG9BN,EAAEM,GAAQO,EAAEP,OAET,CAAA,GAAII,MAAMC,QAAQE,EAAEP,IACzB,KAAM,IAAIQ,WAAU,yCAEpBd,GAAEM,GAAQO,EAAEP,IAMpB,MAAON,IAWT3F,EAAQ8G,uBAAyB,SAAUV,EAAOT,EAAGa,GAEnD,GAAIH,MAAMC,QAAQE,GAChB,KAAM,IAAIC,WAAU,yCAEtB,KAAK,GAAIR,KAAQO,GACf,GAAIA,EAAEN,eAAeD,IACQ,IAAvBG,EAAMW,QAAQd,GAChB,GAAIO,EAAEP,IAASO,EAAEP,GAAMS,cAAgBC,OACrBC,SAAZjB,EAAEM,KACJN,EAAEM,OAEAN,EAAEM,GAAMS,cAAgBC,OAC1B3G,EAAQ6G,WAAWlB,EAAEM,GAAOO,EAAEP,IAG9BN,EAAEM,GAAQO,EAAEP,OAET,CAAA,GAAII,MAAMC,QAAQE,EAAEP,IACzB,KAAM,IAAIQ,WAAU,yCAEpBd,GAAEM,GAAQO,EAAEP,GAKpB,MAAON,IAST3F,EAAQ6G,WAAa,SAASlB,EAAGa,GAE/B,GAAIH,MAAMC,QAAQE,GAChB,KAAM,IAAIC,WAAU,yCAGtB,KAAK,GAAIR,KAAQO,GACf,GAAIA,EAAEN,eAAeD,GACnB,GAAIO,EAAEP,IAASO,EAAEP,GAAMS,cAAgBC,OACrBC,SAAZjB,EAAEM,KACJN,EAAEM,OAEAN,EAAEM,GAAMS,cAAgBC,OAC1B3G,EAAQ6G,WAAWlB,EAAEM,GAAOO,EAAEP,IAG9BN,EAAEM,GAAQO,EAAEP,OAET,CAAA,GAAII,MAAMC,QAAQE,EAAEP,IACzB,KAAM,IAAIQ,WAAU,yCAEpBd,GAAEM,GAAQO,EAAEP,GAIlB,MAAON,IAUT3F,EAAQgH,WAAa,SAAUrB,EAAGa,GAChC,GAAIb,EAAEI,QAAUS,EAAET,OAAQ,OAAO,CAEjC,KAAK,GAAIH,GAAI,EAAGC,EAAMF,EAAEI,OAAYF,EAAJD,EAASA,IACvC,GAAID,EAAEC,IAAMY,EAAEZ,GAAI,OAAO,CAG3B,QAAO,GAYT5F,EAAQiH,QAAU,SAAS5C,EAAQ6C,GACjC,GAAIvC,EAEJ,IAAeiC,SAAXvC,EACF,MAAOuC,OAET,IAAe,OAAXvC,EACF,MAAO,KAGT,KAAK6C,EACH,MAAO7C,EAET,IAAsB,gBAAT6C,MAAwBA,YAAgB1C,SACnD,KAAM,IAAIR,OAAM,wBAIlB,QAAQkD,GACN,IAAK,UACL,IAAK,UACH,MAAOC,SAAQ9C,EAEjB,KAAK,SACL,IAAK,SACH,MAAOC,QAAOD,EAAO+C,UAEvB,KAAK,SACL,IAAK,SACH,MAAO5C,QAAOH,EAEhB,KAAK,OACH,GAAIrE,EAAQoE,SAASC,GACnB,MAAO,IAAIK,MAAKL,EAElB,IAAIA,YAAkBK,MACpB,MAAO,IAAIA,MAAKL,EAAO+C,UAEpB,IAAInD,EAAOoD,SAAShD,GACvB,MAAO,IAAIK,MAAKL,EAAO+C,UAEzB,IAAIpH,EAAQuE,SAASF,GAEnB,MADAM,GAAQC,EAAaC,KAAKR,GACtBM,EAEK,GAAID,MAAKJ,OAAOK,EAAM,KAGtBV,EAAOI,GAAQiD,QAIxB,MAAM,IAAItD,OACN,iCAAmChE,EAAQuH,QAAQlD,GAC/C,gBAGZ,KAAK,SACH,GAAIrE,EAAQoE,SAASC,GACnB,MAAOJ,GAAOI,EAEhB,IAAIA,YAAkBK,MACpB,MAAOT,GAAOI,EAAO+C,UAElB,IAAInD,EAAOoD,SAAShD,GACvB,MAAOJ,GAAOI,EAEhB,IAAIrE,EAAQuE,SAASF,GAEnB,MADAM,GAAQC,EAAaC,KAAKR,GAGjBJ,EAFLU,EAEYL,OAAOK,EAAM,IAGbN,EAIhB,MAAM,IAAIL,OACN,iCAAmChE,EAAQuH,QAAQlD,GAC/C,gBAGZ,KAAK,UACH,GAAIrE,EAAQoE,SAASC,GACnB,MAAO,IAAIK,MAAKL,EAEb,IAAIA,YAAkBK,MACzB,MAAOL,GAAOmD,aAEX,IAAIvD,EAAOoD,SAAShD,GACvB,MAAOA,GAAOiD,SAASE,aAEpB,IAAIxH,EAAQuE,SAASF,GAExB,MADAM,GAAQC,EAAaC,KAAKR,GACtBM,EAEK,GAAID,MAAKJ,OAAOK,EAAM,KAAK6C,cAG3B,GAAI9C,MAAKL,GAAQmD,aAI1B,MAAM,IAAIxD,OACN,iCAAmChE,EAAQuH,QAAQlD,GAC/C,mBAGZ,KAAK,UACH,GAAIrE,EAAQoE,SAASC,GACnB,MAAO,SAAWA,EAAS,IAExB,IAAIA,YAAkBK,MACzB,MAAO,SAAWL,EAAO+C,UAAY,IAElC,IAAIpH,EAAQuE,SAASF,GAAS,CACjCM,EAAQC,EAAaC,KAAKR,EAC1B,IAAIoD,EAQJ,OALEA,GAFE9C,EAEM,GAAID,MAAKJ,OAAOK,EAAM,KAAKyC,UAG3B,GAAI1C,MAAKL,GAAQ+C,UAEpB,SAAWK,EAAQ,KAG1B,KAAM,IAAIzD,OACN,iCAAmChE,EAAQuH,QAAQlD,GAC/C,mBAGZ,SACE,KAAM,IAAIL,OAAM,iBAAmBkD,EAAO,MAOhD,IAAItC,GAAe,qBAOnB5E,GAAQuH,QAAU,SAASlD,GACzB,GAAI6C,SAAc7C,EAElB,OAAY,UAAR6C,EACY,MAAV7C,EACK,OAELA,YAAkB8C,SACb,UAEL9C,YAAkBC,QACb,SAELD,YAAkBG,QACb,SAEL6B,MAAMC,QAAQjC,GACT,QAELA,YAAkBK,MACb,OAEF,SAEQ,UAARwC,EACA,SAEQ,WAARA,EACA,UAEQ,UAARA,EACA,SAGFA,GASTlH,EAAQ0H,gBAAkB,SAASC,GACjC,MAAOA,GAAKC,wBAAwBC,MAStC7H,EAAQ8H,eAAiB,SAASH,GAChC,MAAOA,GAAKC,wBAAwBG,KAQtC/H,EAAQgI,aAAe,SAASL,EAAMM,GACpC,GAAIC,GAAUP,EAAKM,UAAUE,MAAM,IACD,KAA9BD,EAAQnB,QAAQkB,KAClBC,EAAQE,KAAKH,GACbN,EAAKM,UAAYC,EAAQG,KAAK,OASlCrI,EAAQsI,gBAAkB,SAASX,EAAMM,GACvC,GAAIC,GAAUP,EAAKM,UAAUE,MAAM,KAC/BI,EAAQL,EAAQnB,QAAQkB,EACf,KAATM,IACFL,EAAQM,OAAOD,EAAO,GACtBZ,EAAKM,UAAYC,EAAQG,KAAK,OAalCrI,EAAQyI,QAAU,SAASpE,EAAQqE,GACjC,GAAI9C,GACAC,CACJ,IAAIQ,MAAMC,QAAQjC,GAEhB,IAAKuB,EAAI,EAAGC,EAAMxB,EAAO0B,OAAYF,EAAJD,EAASA,IACxC8C,EAASrE,EAAOuB,GAAIA,EAAGvB,OAKzB,KAAKuB,IAAKvB,GACJA,EAAO6B,eAAeN,IACxB8C,EAASrE,EAAOuB,GAAIA,EAAGvB,IAY/BrE,EAAQ2I,QAAU,SAAStE,GACzB,GAAIuE,KAEJ,KAAK,GAAI3C,KAAQ5B,GACXA,EAAO6B,eAAeD,IAAO2C,EAAMR,KAAK/D,EAAO4B,GAGrD,OAAO2C,IAUT5I,EAAQ6I,eAAiB,SAASxE,EAAQyE,EAAKrB,GAC7C,MAAIpD,GAAOyE,KAASrB,GAClBpD,EAAOyE,GAAOrB,GACP,IAGA,GAYXzH,EAAQ+I,iBAAmB,SAASC,EAASC,EAAQC,EAAUC,GACzDH,EAAQD,kBACSnC,SAAfuC,IACFA,GAAa,GAEA,eAAXF,GAA2BG,UAAUC,UAAUtC,QAAQ,YAAc,IACvEkC,EAAS,kBAGXD,EAAQD,iBAAiBE,EAAQC,EAAUC,IAE3CH,EAAQM,YAAY,KAAOL,EAAQC,IAWvClJ,EAAQuJ,oBAAsB,SAASP,EAASC,EAAQC,EAAUC,GAC5DH,EAAQO,qBAES3C,SAAfuC,IACFA,GAAa,GAEA,eAAXF,GAA2BG,UAAUC,UAAUtC,QAAQ,YAAc,IACvEkC,EAAS,kBAGXD,EAAQO,oBAAoBN,EAAQC,EAAUC,IAG9CH,EAAQQ,YAAY,KAAOP,EAAQC,IAOvClJ,EAAQyJ,eAAiB,SAAUC,GAC5BA,IACHA,EAAQC,OAAOD,OAEbA,EAAMD,eACRC,EAAMD,iBAGNC,EAAME,aAAc,GASxB5J,EAAQ6J,UAAY,SAASH,GAEtBA,IACHA,EAAQC,OAAOD,MAGjB,IAAII,EAcJ,OAZIJ,GAAMI,OACRA,EAASJ,EAAMI,OAERJ,EAAMK,aACbD,EAASJ,EAAMK,YAGMnD,QAAnBkD,EAAOE,UAA4C,GAAnBF,EAAOE,WAEzCF,EAASA,EAAOG,YAGXH,GAGT9J,EAAQkK,UAQRlK,EAAQkK,OAAOC,UAAY,SAAU1C,EAAO2C,GAK1C,MAJoB,kBAAT3C,KACTA,EAAQA,KAGG,MAATA,EACe,GAATA,EAGH2C,GAAgB,MASzBpK,EAAQkK,OAAOG,SAAW,SAAU5C,EAAO2C,GAKzC,MAJoB,kBAAT3C,KACTA,EAAQA,KAGG,MAATA,EACKnD,OAAOmD,IAAU2C,GAAgB,KAGnCA,GAAgB,MASzBpK,EAAQkK,OAAOI,SAAW,SAAU7C,EAAO2C,GAKzC,MAJoB,kBAAT3C,KACTA,EAAQA,KAGG,MAATA,EACKjD,OAAOiD,GAGT2C,GAAgB,MASzBpK,EAAQkK,OAAOK,OAAS,SAAU9C,EAAO2C,GAKvC,MAJoB,kBAAT3C,KACTA,EAAQA,KAGNzH,EAAQuE,SAASkD,GACZA,EAEAzH,EAAQoE,SAASqD,GACjBA,EAAQ,KAGR2C,GAAgB,MAU3BpK,EAAQkK,OAAOM,UAAY,SAAU/C,EAAO2C,GAK1C,MAJoB,kBAAT3C,KACTA,EAAQA,KAGHA,GAAS2C,GAAgB,MASlCpK,EAAQyK,SAAW,SAASC,GAE1B,GAAIC,GAAiB,kCACrBD,GAAMA,EAAIE,QAAQD,EAAgB,SAAS/J,EAAGiK,EAAGC,EAAGtE,GAChD,MAAOqE,GAAIA,EAAIC,EAAIA,EAAItE,EAAIA,GAE/B,IAAIuE,GAAS,4CAA4ClG,KAAK6F,EAC9D,OAAOK,IACHF,EAAGG,SAASD,EAAO,GAAI,IACvBD,EAAGE,SAASD,EAAO,GAAI,IACvBvE,EAAGwE,SAASD,EAAO,GAAI,KACvB,MAWN/K,EAAQiL,SAAW,SAASC,EAAIC,EAAMC,GACpC,MAAO,MAAQ,GAAK,KAAOF,GAAO,KAAOC,GAAS,GAAKC,GAAM3F,SAAS,IAAI4F,MAAM,IASlFrL,EAAQsL,WAAa,SAASC,GAC5B,GAAI1K,EACJ,IAAIb,EAAQuE,SAASgH,GAAQ,CAC3B,GAAIvL,EAAQwL,WAAWD,GAAQ,CAC7B,GAAIE,GAAMF,EAAMG,OAAO,GAAGA,OAAO,EAAEH,EAAMxF,OAAO,GAAGoC,MAAM,IACzDoD,GAAQvL,EAAQiL,SAASQ,EAAI,GAAGA,EAAI,GAAGA,EAAI,IAE7C,GAAIzL,EAAQ2L,WAAWJ,GAAQ,CAC7B,GAAIK,GAAM5L,EAAQ6L,SAASN,GACvBO,GAAmBC,EAAEH,EAAIG,EAAEC,EAAU,IAARJ,EAAII,EAASC,EAAE3G,KAAK4G,IAAI,EAAU,KAARN,EAAIK,IAC3DE,GAAmBJ,EAAEH,EAAIG,EAAEC,EAAE1G,KAAK4G,IAAI,EAAU,KAARN,EAAIK,GAAUA,EAAQ,GAANL,EAAIK,GAC5DG,EAAkBpM,EAAQqM,SAASF,EAAeJ,EAAGI,EAAeJ,EAAGI,EAAeF,GACtFK,EAAkBtM,EAAQqM,SAASP,EAAgBC,EAAED,EAAgBE,EAAEF,EAAgBG,EAE3FpL,IACE0L,WAAYhB,EACZiB,OAAOJ,EACPK,WACEF,WAAWD,EACXE,OAAOJ,GAETM,OACEH,WAAWD,EACXE,OAAOJ,QAKXvL,IACE0L,WAAWhB,EACXiB,OAAOjB,EACPkB,WACEF,WAAWhB,EACXiB,OAAOjB,GAETmB,OACEH,WAAWhB,EACXiB,OAAOjB,QAMb1K,MACAA,EAAE0L,WAAahB,EAAMgB,YAAc,QACnC1L,EAAE2L,OAASjB,EAAMiB,QAAU3L,EAAE0L,WAEzBvM,EAAQuE,SAASgH,EAAMkB,WACzB5L,EAAE4L,WACAD,OAAQjB,EAAMkB,UACdF,WAAYhB,EAAMkB,YAIpB5L,EAAE4L,aACF5L,EAAE4L,UAAUF,WAAahB,EAAMkB,WAAalB,EAAMkB,UAAUF,YAAc1L,EAAE0L,WAC5E1L,EAAE4L,UAAUD,OAASjB,EAAMkB,WAAalB,EAAMkB,UAAUD,QAAU3L,EAAE2L,QAGlExM,EAAQuE,SAASgH,EAAMmB,OACzB7L,EAAE6L,OACAF,OAAQjB,EAAMmB,MACdH,WAAYhB,EAAMmB,QAIpB7L,EAAE6L,SACF7L,EAAE6L,MAAMH,WAAahB,EAAMmB,OAASnB,EAAMmB,MAAMH,YAAc1L,EAAE0L,WAChE1L,EAAE6L,MAAMF,OAASjB,EAAMmB,OAASnB,EAAMmB,MAAMF,QAAU3L,EAAE2L,OAI5D,OAAO3L,IAYTb,EAAQ2M,SAAW,SAASzB,EAAIC,EAAMC,GACpCF,GAAQ,IAAKC,GAAY,IAAKC,GAAU,GACxC,IAAIwB,GAAStH,KAAK4G,IAAIhB,EAAI5F,KAAK4G,IAAIf,EAAMC,IACrCyB,EAASvH,KAAKwH,IAAI5B,EAAI5F,KAAKwH,IAAI3B,EAAMC,GAGzC,IAAIwB,GAAUC,EACZ,OAAQd,EAAE,EAAEC,EAAE,EAAEC,EAAEW,EAIpB,IAAIG,GAAK7B,GAAK0B,EAAUzB,EAAMC,EAASA,GAAMwB,EAAU1B,EAAIC,EAAQC,EAAKF,EACpEa,EAAKb,GAAK0B,EAAU,EAAMxB,GAAMwB,EAAU,EAAI,EAC9CI,EAAM,IAAIjB,EAAIgB,GAAGF,EAASD,IAAS,IACnCK,GAAcJ,EAASD,GAAQC,EAC/BpF,EAAQoF,CACZ,QAAQd,EAAEiB,EAAIhB,EAAEiB,EAAWhB,EAAExE,GAG/B,IAAIyF,IAEF/E,MAAO,SAAUgF,GACf,GAAIC,KAWJ,OATAD,GAAQhF,MAAM,KAAKM,QAAQ,SAAU4E,GACnC,GAAoB,IAAhBA,EAAMC,OAAc,CACtB,GAAIC,GAAQF,EAAMlF,MAAM,KACpBW,EAAMyE,EAAM,GAAGD,OACf7F,EAAQ8F,EAAM,GAAGD,MACrBF,GAAOtE,GAAOrB,KAIX2F,GAIT/E,KAAM,SAAU+E,GACd,MAAOzG,QAAO6G,KAAKJ,GACdK,IAAI,SAAU3E,GACb,MAAOA,GAAM,KAAOsE,EAAOtE,KAE5BT,KAAK,OASdrI,GAAQ0N,WAAa,SAAU1E,EAASmE,GACtC,GAAIQ,GAAgBT,EAAQ/E,MAAMa,EAAQqE,MAAMF,SAC5CS,EAAYV,EAAQ/E,MAAMgF,GAC1BC,EAASpN,EAAQ0F,OAAOiI,EAAeC,EAE3C5E,GAAQqE,MAAMF,QAAUD,EAAQ7E,KAAK+E,IAQvCpN,EAAQ6N,cAAgB,SAAU7E,EAASmE,GACzC,GAAIC,GAASF,EAAQ/E,MAAMa,EAAQqE,MAAMF,SACrCW,EAAeZ,EAAQ/E,MAAMgF,EAEjC,KAAK,GAAIrE,KAAOgF,GACVA,EAAa5H,eAAe4C,UACvBsE,GAAOtE,EAIlBE,GAAQqE,MAAMF,QAAUD,EAAQ7E,KAAK+E,IAWvCpN,EAAQ+N,SAAW,SAAShC,EAAGC,EAAGC,GAChC,GAAIpB,GAAGC,EAAGtE,EAENZ,EAAIN,KAAKC,MAAU,EAAJwG,GACfiC,EAAQ,EAAJjC,EAAQnG,EACZ9E,EAAImL,GAAK,EAAID,GACbiC,EAAIhC,GAAK,EAAI+B,EAAIhC,GACjBkC,EAAIjC,GAAK,GAAK,EAAI+B,GAAKhC,EAE3B,QAAQpG,EAAI,GACV,IAAK,GAAGiF,EAAIoB,EAAGnB,EAAIoD,EAAG1H,EAAI1F,CAAG,MAC7B,KAAK,GAAG+J,EAAIoD,EAAGnD,EAAImB,EAAGzF,EAAI1F,CAAG,MAC7B,KAAK,GAAG+J,EAAI/J,EAAGgK,EAAImB,EAAGzF,EAAI0H,CAAG,MAC7B,KAAK,GAAGrD,EAAI/J,EAAGgK,EAAImD,EAAGzH,EAAIyF,CAAG,MAC7B,KAAK,GAAGpB,EAAIqD,EAAGpD,EAAIhK,EAAG0F,EAAIyF,CAAG,MAC7B,KAAK,GAAGpB,EAAIoB,EAAGnB,EAAIhK,EAAG0F,EAAIyH,EAG5B,OAAQpD,EAAEvF,KAAKC,MAAU,IAAJsF,GAAUC,EAAExF,KAAKC,MAAU,IAAJuF,GAAUtE,EAAElB,KAAKC,MAAU,IAAJiB,KAGrExG,EAAQqM,SAAW,SAASN,EAAGC,EAAGC,GAChC,GAAIR,GAAMzL,EAAQ+N,SAAShC,EAAGC,EAAGC,EACjC,OAAOjM,GAAQiL,SAASQ,EAAIZ,EAAGY,EAAIX,EAAGW,EAAIjF,IAG5CxG,EAAQ6L,SAAW,SAASnB,GAC1B,GAAIe,GAAMzL,EAAQyK,SAASC,EAC3B,OAAO1K,GAAQ2M,SAASlB,EAAIZ,EAAGY,EAAIX,EAAGW,EAAIjF,IAG5CxG,EAAQ2L,WAAa,SAASjB,GAC5B,GAAIyD,GAAO,qCAAqCC,KAAK1D,EACrD,OAAOyD,IAGTnO,EAAQwL,WAAa,SAASC,GAC5BA,EAAMA,EAAIb,QAAQ,IAAI,GACtB,IAAIuD,GAAO,wCAAwCC,KAAK3C,EACxD,OAAO0C,IAUTnO,EAAQqO,sBAAwB,SAASC,EAAQC,GAC/C,GAA8B,gBAAnBA,GAA6B,CAEtC,IAAK,GADDC,GAAW7H,OAAO8H,OAAOF,GACpB3I,EAAI,EAAGA,EAAI0I,EAAOvI,OAAQH,IAC7B2I,EAAgBrI,eAAeoI,EAAO1I,KACC,gBAA9B2I,GAAgBD,EAAO1I,MAChC4I,EAASF,EAAO1I,IAAM5F,EAAQ0O,aAAaH,EAAgBD,EAAO1I,KAIxE,OAAO4I,GAGP,MAAO,OAWXxO,EAAQ0O,aAAe,SAASH,GAC9B,GAA8B,gBAAnBA,GAA6B,CACtC,GAAIC,GAAW7H,OAAO8H,OAAOF,EAC7B,KAAK,GAAI3I,KAAK2I,GACRA,EAAgBrI,eAAeN,IACA,gBAAtB2I,GAAgB3I,KACzB4I,EAAS5I,GAAK5F,EAAQ0O,aAAaH,EAAgB3I,IAIzD,OAAO4I,GAGP,MAAO,OAcXxO,EAAQ2O,aAAe,SAAUC,EAAaC,EAAS3E,GACrD,GAAwBtD,SAApBiI,EAAQ3E,GACV,GAA8B,iBAAnB2E,GAAQ3E,GACjB0E,EAAY1E,GAAQ4E,QAAUD,EAAQ3E,OAEnC,CACH0E,EAAY1E,GAAQ4E,SAAU,CAC9B,KAAK,GAAI7I,KAAQ4I,GAAQ3E,GACnB2E,EAAQ3E,GAAQhE,eAAeD,KACjC2I,EAAY1E,GAAQjE,GAAQ4I,EAAQ3E,GAAQjE,MAmBtDjG,EAAQ+O,mBAAqB,SAASC,EAAcC,EAAgBC,EAAOC,GAMzE,IALA,GAAIC,GAAgB,IAChBC,EAAY,EACZC,EAAM,EACNC,EAAOP,EAAajJ,OAAS,EAEnBwJ,GAAPD,GAA2BF,EAAZC,GAA2B,CAC/C,GAAIG,GAASlK,KAAKC,OAAO+J,EAAMC,GAAQ,GAEnCE,EAAOT,EAAaQ,GACpB/H,EAAoBb,SAAXuI,EAAwBM,EAAKP,GAASO,EAAKP,GAAOC,GAE3DO,EAAeT,EAAexH,EAClC,IAAoB,GAAhBiI,EACF,MAAOF,EAEgB,KAAhBE,EACPJ,EAAME,EAAS,EAGfD,EAAOC,EAAS,EAGlBH,IAGF,MAAO,IAeTrP,EAAQ2P,kBAAoB,SAASX,EAAclF,EAAQoF,EAAOU,GAOhE,IANA,GAIIC,GAAWpI,EAAOqI,EAAWN,EAJ7BJ,EAAgB,IAChBC,EAAY,EACZC,EAAM,EACNC,EAAOP,EAAajJ,OAAS,EAGnBwJ,GAAPD,GAA2BF,EAAZC,GAA2B,CAO/C,GALAG,EAASlK,KAAKC,MAAM,IAAKgK,EAAKD,IAC9BO,EAAYb,EAAa1J,KAAKwH,IAAI,EAAE0C,EAAS,IAAIN,GACjDzH,EAAYuH,EAAaQ,GAAQN,GACjCY,EAAYd,EAAa1J,KAAK4G,IAAI8C,EAAajJ,OAAO,EAAEyJ,EAAS,IAAIN,GAEjEzH,GAASqC,EACX,MAAO0F,EAEJ,IAAgB1F,EAAZ+F,GAAsBpI,EAAQqC,EACrC,MAAyB,UAAlB8F,EAA6BtK,KAAKwH,IAAI,EAAE0C,EAAS,GAAKA,CAE1D,IAAY1F,EAARrC,GAAkBqI,EAAYhG,EACrC,MAAyB,UAAlB8F,EAA6BJ,EAASlK,KAAK4G,IAAI8C,EAAajJ,OAAO,EAAEyJ,EAAS,EAGzE1F,GAARrC,EACF6H,EAAME,EAAS,EAGfD,EAAOC,EAAS,EAGpBH,IAIF,MAAO,IAYTrP,EAAQ+P,cAAgB,SAAU7B,EAAG8B,EAAOC,EAAKC,GAC/C,GAAIC,GAASF,EAAMD,CAEnB,OADA9B,IAAKgC,EAAS,EACN,EAAJhC,EAAciC,EAAO,EAAEjC,EAAEA,EAAI8B,GACjC9B,KACQiC,EAAO,GAAKjC,GAAGA,EAAE,GAAK,GAAK8B,IAUrChQ,EAAQoQ,iBAENC,OAAQ,SAAUnC,GAChB,MAAOA,IAGToC,WAAY,SAAUpC,GACpB,MAAOA,GAAIA,GAGbqC,YAAa,SAAUrC,GACrB,MAAOA,IAAK,EAAIA,IAGlB6B,cAAe,SAAU7B,GACvB,MAAW,GAAJA,EAAS,EAAIA,EAAIA,EAAI,IAAM,EAAI,EAAIA,GAAKA,GAGjDsC,YAAa,SAAUtC,GACrB,MAAOA,GAAIA,EAAIA,GAGjBuC,aAAc,SAAUvC,GACtB,QAAUA,EAAKA,EAAIA,EAAI,GAGzBwC,eAAgB,SAAUxC,GACxB,MAAW,GAAJA,EAAS,EAAIA,EAAIA,EAAIA,GAAKA,EAAI,IAAM,EAAIA,EAAI,IAAM,EAAIA,EAAI,GAAK,GAGxEyC,YAAa,SAAUzC,GACrB,MAAOA,GAAIA,EAAIA,EAAIA,GAGrB0C,aAAc,SAAU1C,GACtB,MAAO,MAAOA,EAAKA,EAAIA,EAAIA,GAG7B2C,eAAgB,SAAU3C,GACxB,MAAW,GAAJA,EAAS,EAAIA,EAAIA,EAAIA,EAAIA,EAAI,EAAI,IAAOA,EAAKA,EAAIA,EAAIA,GAG9D4C,YAAa,SAAU5C,GACrB,MAAOA,GAAIA,EAAIA,EAAIA,EAAIA,GAGzB6C,aAAc,SAAU7C,GACtB,MAAO,KAAOA,EAAKA,EAAIA,EAAIA,EAAIA,GAGjC8C,eAAgB,SAAU9C,GACxB,MAAW,GAAJA,EAAS,GAAKA,EAAIA,EAAIA,EAAIA,EAAIA,EAAI,EAAI,KAAQA,EAAKA,EAAIA,EAAIA,EAAIA,KAMtE,SAASjO,EAAQD,GASrBA,EAAQiR,gBAAkB,SAASC,GAEjC,IAAK,GAAIC,KAAeD,GAClBA,EAAchL,eAAeiL,KAC/BD,EAAcC,GAAaC,UAAYF,EAAcC,GAAaE,KAClEH,EAAcC,GAAaE,UAYjCrR,EAAQsR,gBAAkB,SAASJ,GAEjC,IAAK,GAAIC,KAAeD,GACtB,GAAIA,EAAchL,eAAeiL,IAC3BD,EAAcC,GAAaC,UAAW,CACxC,IAAK,GAAIxL,GAAI,EAAGA,EAAIsL,EAAcC,GAAaC,UAAUrL,OAAQH,IAC/DsL,EAAcC,GAAaC,UAAUxL,GAAGqE,WAAWsH,YAAYL,EAAcC,GAAaC,UAAUxL,GAEtGsL,GAAcC,GAAaC,eAgBnCpR,EAAQwR,cAAgB,SAAUL,EAAaD,EAAeO,GAC5D,GAAIzI,EAqBJ,OAnBIkI,GAAchL,eAAeiL,GAE3BD,EAAcC,GAAaC,UAAUrL,OAAS,GAChDiD,EAAUkI,EAAcC,GAAaC,UAAU,GAC/CF,EAAcC,GAAaC,UAAUM,UAIrC1I,EAAU2I,SAASC,gBAAgB,6BAA8BT,GACjEM,EAAaI,YAAY7I,KAK3BA,EAAU2I,SAASC,gBAAgB,6BAA8BT,GACjED,EAAcC,IAAgBE,QAAUD,cACxCK,EAAaI,YAAY7I,IAE3BkI,EAAcC,GAAaE,KAAKjJ,KAAKY,GAC9BA,GAcThJ,EAAQ8R,cAAgB,SAAUX,EAAaD,EAAea,EAAcC,GAC1E,GAAIhJ,EA+BJ,OA7BIkI,GAAchL,eAAeiL,GAE3BD,EAAcC,GAAaC,UAAUrL,OAAS,GAChDiD,EAAUkI,EAAcC,GAAaC,UAAU,GAC/CF,EAAcC,GAAaC,UAAUM,UAIrC1I,EAAU2I,SAASM,cAAcd,GACZvK,SAAjBoL,EACFD,EAAaC,aAAahJ,EAASgJ,GAGnCD,EAAaF,YAAY7I,KAM7BA,EAAU2I,SAASM,cAAcd,GACjCD,EAAcC,IAAgBE,QAAUD,cACnBxK,SAAjBoL,EACFD,EAAaC,aAAahJ,EAASgJ,GAGnCD,EAAaF,YAAY7I,IAG7BkI,EAAcC,GAAaE,KAAKjJ,KAAKY,GAC9BA,GAkBThJ,EAAQkS,UAAY,SAASC,EAAGC,EAAGC,EAAOnB,EAAeO,GACvD,GAAIa,EAmBJ,OAlBsC,UAAlCD,EAAMxD,QAAQ0D,WAAWlF,OAC3BiF,EAAQtS,EAAQwR,cAAc,SAASN,EAAcO,GACrDa,EAAME,eAAe,KAAM,KAAML,GACjCG,EAAME,eAAe,KAAM,KAAMJ,GACjCE,EAAME,eAAe,KAAM,IAAK,GAAMH,EAAMxD,QAAQ0D,WAAWE,QAG/DH,EAAQtS,EAAQwR,cAAc,OAAON,EAAcO,GACnDa,EAAME,eAAe,KAAM,IAAKL,EAAI,GAAIE,EAAMxD,QAAQ0D,WAAWE,MACjEH,EAAME,eAAe,KAAM,IAAKJ,EAAI,GAAIC,EAAMxD,QAAQ0D,WAAWE,MACjEH,EAAME,eAAe,KAAM,QAASH,EAAMxD,QAAQ0D,WAAWE,MAC7DH,EAAME,eAAe,KAAM,SAAUH,EAAMxD,QAAQ0D,WAAWE,OAGzB7L,SAApCyL,EAAMxD,QAAQ0D,WAAWnF,QAC1BkF,EAAME,eAAe,KAAM,QAASH,EAAMA,MAAMxD,QAAQ0D,WAAWnF,QAErEkF,EAAME,eAAe,KAAM,QAASH,EAAMpK,UAAY,UAC/CqK,GAUTtS,EAAQ0S,QAAU,SAAUP,EAAGC,EAAGO,EAAOC,EAAQ3K,EAAWiJ,EAAeO,GACzE,GAAc,GAAVmB,EAAa,CACF,EAATA,IACFA,GAAU,GACVR,GAAKQ,EAEP,IAAIC,GAAO7S,EAAQwR,cAAc,OAAON,EAAeO,EACvDoB,GAAKL,eAAe,KAAM,IAAKL,EAAI,GAAMQ,GACzCE,EAAKL,eAAe,KAAM,IAAKJ,GAC/BS,EAAKL,eAAe,KAAM,QAASG,GACnCE,EAAKL,eAAe,KAAM,SAAUI,GACpCC,EAAKL,eAAe,KAAM,QAASvK,MAMnC,SAAShI,EAAQD,EAASM,GAgD9B,QAASW,GAAS6R,EAAMjE,GActB,IAZIiE,GAASzM,MAAMC,QAAQwM,IAAU/R,EAAKiE,YAAY8N,KACpDjE,EAAUiE,EACVA,EAAO,MAGT1S,KAAK2S,SAAWlE,MAChBzO,KAAK4S,SACL5S,KAAK6S,SAAW7S,KAAK2S,SAASG,SAAW,KACzC9S,KAAK+S,SAID/S,KAAK2S,SAAS7L,KAChB,IAAK,GAAIgI,KAAS9O,MAAK2S,SAAS7L,KAC9B,GAAI9G,KAAK2S,SAAS7L,KAAKhB,eAAegJ,GAAQ,CAC5C,GAAIzH,GAAQrH,KAAK2S,SAAS7L,KAAKgI,EAE7B9O,MAAK+S,MAAMjE,GADA,QAATzH,GAA4B,WAATA,GAA+B,WAATA,EACvB,OAGAA,EAO5B,GAAIrH,KAAK2S,SAAS9L,QAChB,KAAM,IAAIjD,OAAM,sDAGlB5D,MAAKgT,gBAGDN,GACF1S,KAAKiT,IAAIP,GAGX1S,KAAKkT,WAAWzE,GAtFlB,GAAI9N,GAAOT,EAAoB,GAC3Ba,EAAQb,EAAoB,EAiGhCW,GAAQsS,UAAUD,WAAa,SAASzE,GAClCA,GAA6BjI,SAAlBiI,EAAQ2E,QACjB3E,EAAQ2E,SAAU,EAEhBpT,KAAKqT,SACPrT,KAAKqT,OAAOC,gBACLtT,MAAKqT,SAKTrT,KAAKqT,SACRrT,KAAKqT,OAAStS,EAAMuE,OAAOtF,MACzBwK,SAAU,MAAO,SAAU,aAIF,gBAAlBiE,GAAQ2E,OACjBpT,KAAKqT,OAAOH,WAAWzE,EAAQ2E,UAevCvS,EAAQsS,UAAUI,GAAK,SAASjK,EAAOhB,GACrC,GAAIkL,GAAcxT,KAAKgT,aAAa1J,EAC/BkK,KACHA,KACAxT,KAAKgT,aAAa1J,GAASkK,GAG7BA,EAAYxL,MACVM,SAAUA,KAKdzH,EAAQsS,UAAUM,UAAY5S,EAAQsS,UAAUI,GAOhD1S,EAAQsS,UAAUO,IAAM,SAASpK,EAAOhB,GACtC,GAAIkL,GAAcxT,KAAKgT,aAAa1J,EAChCkK,KACFxT,KAAKgT,aAAa1J,GAASkK,EAAYG,OAAO,SAAU7K,GACtD,MAAQA,GAASR,UAAYA,MAMnCzH,EAAQsS,UAAUS,YAAc/S,EAAQsS,UAAUO,IASlD7S,EAAQsS,UAAUU,SAAW,SAAUvK,EAAOwK,EAAQC,GACpD,GAAa,KAATzK,EACF,KAAM,IAAI1F,OAAM,yBAGlB,IAAI4P,KACAlK,KAAStJ,MAAKgT,eAChBQ,EAAcA,EAAYQ,OAAOhU,KAAKgT,aAAa1J,KAEjD,KAAOtJ,MAAKgT,eACdQ,EAAcA,EAAYQ,OAAOhU,KAAKgT,aAAa,MAGrD,KAAK,GAAIxN,GAAI,EAAGA,EAAIgO,EAAY7N,OAAQH,IAAK,CAC3C,GAAIyO,GAAaT,EAAYhO,EACzByO,GAAW3L,UACb2L,EAAW3L,SAASgB,EAAOwK,EAAQC,GAAY,QAYrDlT,EAAQsS,UAAUF,IAAM,SAAUP,EAAMqB,GACtC,GACI1T,GADA6T,KAEAC,EAAKnU,IAET,IAAIiG,MAAMC,QAAQwM,GAEhB,IAAK,GAAIlN,GAAI,EAAGC,EAAMiN,EAAK/M,OAAYF,EAAJD,EAASA,IAC1CnF,EAAK8T,EAAGC,SAAS1B,EAAKlN,IACtB0O,EAASlM,KAAK3H,OAGb,IAAIM,EAAKiE,YAAY8N,GAGxB,IAAK,GADD2B,GAAUrU,KAAKsU,gBAAgB5B,GAC1B6B,EAAM,EAAGC,EAAO9B,EAAK+B,kBAAyBD,EAAND,EAAYA,IAAO,CAElE,IAAK,GADDlF,MACKqF,EAAM,EAAGC,EAAON,EAAQ1O,OAAcgP,EAAND,EAAYA,IAAO,CAC1D,GAAI5F,GAAQuF,EAAQK,EACpBrF,GAAKP,GAAS4D,EAAKkC,SAASL,EAAKG,GAGnCrU,EAAK8T,EAAGC,SAAS/E,GACjB6E,EAASlM,KAAK3H,OAGb,CAAA,KAAIqS,YAAgBnM,SAMvB,KAAM,IAAI3C,OAAM,mBAJhBvD,GAAK8T,EAAGC,SAAS1B,GACjBwB,EAASlM,KAAK3H,GAUhB,MAJI6T,GAASvO,QACX3F,KAAK6T,SAAS,OAAQ5R,MAAOiS,GAAWH,GAGnCG,GASTrT,EAAQsS,UAAU0B,OAAS,SAAUnC,EAAMqB,GACzC,GAAIG,MACAY,KACAC,KACAZ,EAAKnU,KACL8S,EAAUqB,EAAGtB,SAEbmC,EAAc,SAAU3F,GAC1B,GAAIhP,GAAKgP,EAAKyD,EACVqB,GAAGvB,MAAMvS,IAEXA,EAAK8T,EAAGc,YAAY5F,GACpByF,EAAW9M,KAAK3H,GAChB0U,EAAY/M,KAAKqH,KAIjBhP,EAAK8T,EAAGC,SAAS/E,GACjB6E,EAASlM,KAAK3H,IAIlB,IAAI4F,MAAMC,QAAQwM,GAEhB,IAAK,GAAIlN,GAAI,EAAGC,EAAMiN,EAAK/M,OAAYF,EAAJD,EAASA,IAC1CwP,EAAYtC,EAAKlN,QAGhB,IAAI7E,EAAKiE,YAAY8N,GAGxB,IAAK,GADD2B,GAAUrU,KAAKsU,gBAAgB5B,GAC1B6B,EAAM,EAAGC,EAAO9B,EAAK+B,kBAAyBD,EAAND,EAAYA,IAAO,CAElE,IAAK,GADDlF,MACKqF,EAAM,EAAGC,EAAON,EAAQ1O,OAAcgP,EAAND,EAAYA,IAAO,CAC1D,GAAI5F,GAAQuF,EAAQK,EACpBrF,GAAKP,GAAS4D,EAAKkC,SAASL,EAAKG,GAGnCM,EAAY3F,OAGX,CAAA,KAAIqD,YAAgBnM,SAKvB,KAAM,IAAI3C,OAAM,mBAHhBoR,GAAYtC,GAad,MAPIwB,GAASvO,QACX3F,KAAK6T,SAAS,OAAQ5R,MAAOiS,GAAWH,GAEtCe,EAAWnP,QACb3F,KAAK6T,SAAS,UAAW5R,MAAO6S,EAAYpC,KAAMqC,GAAchB,GAG3DG,EAASF,OAAOc,IAsCzBjU,EAAQsS,UAAU+B,IAAM,WACtB,GAGI7U,GAAI8U,EAAK1G,EAASiE,EAHlByB,EAAKnU,KAILoV,EAAYzU,EAAKwG,QAAQzB,UAAU,GACtB,WAAb0P,GAAsC,UAAbA,GAE3B/U,EAAKqF,UAAU,GACf+I,EAAU/I,UAAU,GACpBgN,EAAOhN,UAAU,IAEG,SAAb0P,GAEPD,EAAMzP,UAAU,GAChB+I,EAAU/I,UAAU,GACpBgN,EAAOhN,UAAU,KAIjB+I,EAAU/I,UAAU,GACpBgN,EAAOhN,UAAU,GAInB,IAAI2P,EACJ,IAAI5G,GAAWA,EAAQ4G,WAAY,CACjC,GAAIC,IAAiB,YAAa,QAAS,SAG3C,IAFAD,EAA0D,IAA7CC,EAAc3O,QAAQ8H,EAAQ4G,YAAoB,QAAU5G,EAAQ4G,WAE7E3C,GAAS2C,GAAc1U,EAAKwG,QAAQuL,GACtC,KAAM,IAAI9O,OAAM,6BAA+BjD,EAAKwG,QAAQuL,GAAQ,sDACVjE,EAAQ3H,KAAO,IAE3E,IAAkB,aAAduO,IAA8B1U,EAAKiE,YAAY8N,GACjD,KAAM,IAAI9O,OAAM,6EAKlByR,GADO3C,GAC6B,aAAtB/R,EAAKwG,QAAQuL,GAAwB,YAGtC,OAIf,IAEgBrD,GAAMkG,EAAQ/P,EAAGC,EAF7BqB,EAAO2H,GAAWA,EAAQ3H,MAAQ9G,KAAK2S,SAAS7L,KAChD6M,EAASlF,GAAWA,EAAQkF,OAC5B1R,IAGJ,IAAUuE,QAANnG,EAEFgP,EAAO8E,EAAGqB,SAASnV,EAAIyG,GACnB6M,IAAWA,EAAOtE,KACpBA,EAAO,UAGN,IAAW7I,QAAP2O,EAEP,IAAK3P,EAAI,EAAGC,EAAM0P,EAAIxP,OAAYF,EAAJD,EAASA,IACrC6J,EAAO8E,EAAGqB,SAASL,EAAI3P,GAAIsB,KACtB6M,GAAUA,EAAOtE,KACpBpN,EAAM+F,KAAKqH,OAMf,KAAKkG,IAAUvV,MAAK4S,MACd5S,KAAK4S,MAAM9M,eAAeyP,KAC5BlG,EAAO8E,EAAGqB,SAASD,EAAQzO,KACtB6M,GAAUA,EAAOtE,KACpBpN,EAAM+F,KAAKqH,GAYnB,IALIZ,GAAWA,EAAQgH,OAAejP,QAANnG,GAC9BL,KAAK0V,MAAMzT,EAAOwM,EAAQgH,OAIxBhH,GAAWA,EAAQP,OAAQ,CAC7B,GAAIA,GAASO,EAAQP,MACrB,IAAU1H,QAANnG,EACFgP,EAAOrP,KAAK2V,cAActG,EAAMnB,OAGhC,KAAK1I,EAAI,EAAGC,EAAMxD,EAAM0D,OAAYF,EAAJD,EAASA,IACvCvD,EAAMuD,GAAKxF,KAAK2V,cAAc1T,EAAMuD,GAAI0I,GAM9C,GAAkB,aAAdmH,EAA2B,CAC7B,GAAIhB,GAAUrU,KAAKsU,gBAAgB5B,EACnC,IAAUlM,QAANnG,EAEF8T,EAAGyB,WAAWlD,EAAM2B,EAAShF,OAI7B,KAAK7J,EAAI,EAAGA,EAAIvD,EAAM0D,OAAQH,IAC5B2O,EAAGyB,WAAWlD,EAAM2B,EAASpS,EAAMuD,GAGvC,OAAOkN,GAEJ,GAAkB,UAAd2C,EAAwB,CAC/B,GAAI1K,KACJ,KAAKnF,EAAI,EAAGA,EAAIvD,EAAM0D,OAAQH,IAC5BmF,EAAO1I,EAAMuD,GAAGnF,IAAM4B,EAAMuD,EAE9B,OAAOmF,GAIP,GAAUnE,QAANnG,EAEF,MAAOgP,EAIP,IAAIqD,EAAM,CAER,IAAKlN,EAAI,EAAGC,EAAMxD,EAAM0D,OAAYF,EAAJD,EAASA,IACvCkN,EAAK1K,KAAK/F,EAAMuD,GAElB,OAAOkN,GAIP,MAAOzQ,IAcfpB,EAAQsS,UAAU0C,OAAS,SAAUpH,GACnC,GAIIjJ,GACAC,EACApF,EACAgP,EACApN,EARAyQ,EAAO1S,KAAK4S,MACZe,EAASlF,GAAWA,EAAQkF,OAC5B8B,EAAQhH,GAAWA,EAAQgH,MAC3B3O,EAAO2H,GAAWA,EAAQ3H,MAAQ9G,KAAK2S,SAAS7L,KAMhDqO,IAEJ,IAAIxB,EAEF,GAAI8B,EAAO,CAETxT,IACA,KAAK5B,IAAMqS,GACLA,EAAK5M,eAAezF,KACtBgP,EAAOrP,KAAKwV,SAASnV,EAAIyG,GACrB6M,EAAOtE,IACTpN,EAAM+F,KAAKqH,GAOjB,KAFArP,KAAK0V,MAAMzT,EAAOwT,GAEbjQ,EAAI,EAAGC,EAAMxD,EAAM0D,OAAYF,EAAJD,EAASA,IACvC2P,EAAI3P,GAAKvD,EAAMuD,GAAGxF,KAAK6S,cAKzB,KAAKxS,IAAMqS,GACLA,EAAK5M,eAAezF,KACtBgP,EAAOrP,KAAKwV,SAASnV,EAAIyG,GACrB6M,EAAOtE,IACT8F,EAAInN,KAAKqH,EAAKrP,KAAK6S,gBAQ3B,IAAI4C,EAAO,CAETxT,IACA,KAAK5B,IAAMqS,GACLA,EAAK5M,eAAezF,IACtB4B,EAAM+F,KAAK0K,EAAKrS,GAMpB,KAFAL,KAAK0V,MAAMzT,EAAOwT,GAEbjQ,EAAI,EAAGC,EAAMxD,EAAM0D,OAAYF,EAAJD,EAASA,IACvC2P,EAAI3P,GAAKvD,EAAMuD,GAAGxF,KAAK6S,cAKzB,KAAKxS,IAAMqS,GACLA,EAAK5M,eAAezF,KACtBgP,EAAOqD,EAAKrS,GACZ8U,EAAInN,KAAKqH,EAAKrP,KAAK6S,WAM3B,OAAOsC,IAOTtU,EAAQsS,UAAU2C,WAAa,WAC7B,MAAO9V,OAaTa,EAAQsS,UAAU9K,QAAU,SAAUC,EAAUmG,GAC9C,GAGIY,GACAhP,EAJAsT,EAASlF,GAAWA,EAAQkF,OAC5B7M,EAAO2H,GAAWA,EAAQ3H,MAAQ9G,KAAK2S,SAAS7L,KAChD4L,EAAO1S,KAAK4S,KAIhB,IAAInE,GAAWA,EAAQgH,MAIrB,IAAK,GAFDxT,GAAQjC,KAAKkV,IAAIzG,GAEZjJ,EAAI,EAAGC,EAAMxD,EAAM0D,OAAYF,EAAJD,EAASA,IAC3C6J,EAAOpN,EAAMuD,GACbnF,EAAKgP,EAAKrP,KAAK6S,UACfvK,EAAS+G,EAAMhP,OAKjB,KAAKA,IAAMqS,GACLA,EAAK5M,eAAezF,KACtBgP,EAAOrP,KAAKwV,SAASnV,EAAIyG,KACpB6M,GAAUA,EAAOtE,KACpB/G,EAAS+G,EAAMhP,KAkBzBQ,EAAQsS,UAAU9F,IAAM,SAAU/E,EAAUmG,GAC1C,GAIIY,GAJAsE,EAASlF,GAAWA,EAAQkF,OAC5B7M,EAAO2H,GAAWA,EAAQ3H,MAAQ9G,KAAK2S,SAAS7L,KAChDiP,KACArD,EAAO1S,KAAK4S,KAIhB,KAAK,GAAIvS,KAAMqS,GACTA,EAAK5M,eAAezF,KACtBgP,EAAOrP,KAAKwV,SAASnV,EAAIyG,KACpB6M,GAAUA,EAAOtE,KACpB0G,EAAY/N,KAAKM,EAAS+G,EAAMhP,IAUtC,OAJIoO,IAAWA,EAAQgH,OACrBzV,KAAK0V,MAAMK,EAAatH,EAAQgH,OAG3BM,GAUTlV,EAAQsS,UAAUwC,cAAgB,SAAUtG,EAAMnB,GAChD,GAAI8H,KAEJ,KAAK,GAAIlH,KAASO,GACZA,EAAKvJ,eAAegJ,IAAoC,IAAzBZ,EAAOvH,QAAQmI,KAChDkH,EAAalH,GAASO,EAAKP,GAI/B,OAAOkH,IASTnV,EAAQsS,UAAUuC,MAAQ,SAAUzT,EAAOwT,GACzC,GAAI9U,EAAKwD,SAASsR,GAAQ,CAExB,GAAIQ,GAAOR,CACXxT,GAAMiU,KAAK,SAAU3Q,EAAGa,GACtB,GAAI+P,GAAK5Q,EAAE0Q,GACPG,EAAKhQ,EAAE6P,EACX,OAAQE,GAAKC,EAAM,EAAWA,EAALD,EAAW,GAAK,QAGxC,CAAA,GAAqB,kBAAVV,GAOd,KAAM,IAAIpP,WAAU,uCALpBpE,GAAMiU,KAAKT,KAgBf5U,EAAQsS,UAAUkD,OAAS,SAAUhW,EAAI0T,GACvC,GACIvO,GAAGC,EAAK6Q,EADRC,IAGJ,IAAItQ,MAAMC,QAAQ7F,GAChB,IAAKmF,EAAI,EAAGC,EAAMpF,EAAGsF,OAAYF,EAAJD,EAASA,IACpC8Q,EAAYtW,KAAKwW,QAAQnW,EAAGmF,IACX,MAAb8Q,GACFC,EAAWvO,KAAKsO,OAKpBA,GAAYtW,KAAKwW,QAAQnW,GACR,MAAbiW,GACFC,EAAWvO,KAAKsO,EAQpB,OAJIC,GAAW5Q,QACb3F,KAAK6T,SAAS,UAAW5R,MAAOsU,GAAaxC,GAGxCwC,GAST1V,EAAQsS,UAAUqD,QAAU,SAAUnW,GACpC,GAAIM,EAAKqD,SAAS3D,IAAOM,EAAKwD,SAAS9D,IACrC,GAAIL,KAAK4S,MAAMvS,GAEb,aADOL,MAAK4S,MAAMvS,GACXA,MAGN,IAAIA,YAAckG,QAAQ,CAC7B,GAAIgP,GAASlV,EAAGL,KAAK6S,SACrB,IAAI0C,GAAUvV,KAAK4S,MAAM2C,GAEvB,aADOvV,MAAK4S,MAAM2C,GACXA,EAGX,MAAO,OAQT1U,EAAQsS,UAAUsD,MAAQ,SAAU1C,GAClC,GAAIoB,GAAM5O,OAAO6G,KAAKpN,KAAK4S,MAM3B,OAJA5S,MAAK4S,SAEL5S,KAAK6T,SAAS,UAAW5R,MAAOkT,GAAMpB,GAE/BoB,GAQTtU,EAAQsS,UAAUzG,IAAM,SAAUoC,GAChC,GAAI4D,GAAO1S,KAAK4S,MACZlG,EAAM,KACNgK,EAAW,IAEf,KAAK,GAAIrW,KAAMqS,GACb,GAAIA,EAAK5M,eAAezF,GAAK,CAC3B,GAAIgP,GAAOqD,EAAKrS,GACZsW,EAAYtH,EAAKP,EACJ,OAAb6H,KAAuBjK,GAAOiK,EAAYD,KAC5ChK,EAAM2C,EACNqH,EAAWC,GAKjB,MAAOjK,IAQT7L,EAAQsS,UAAUrH,IAAM,SAAUgD,GAChC,GAAI4D,GAAO1S,KAAK4S,MACZ9G,EAAM,KACN8K,EAAW,IAEf,KAAK,GAAIvW,KAAMqS,GACb,GAAIA,EAAK5M,eAAezF,GAAK,CAC3B,GAAIgP,GAAOqD,EAAKrS,GACZsW,EAAYtH,EAAKP,EACJ,OAAb6H,KAAuB7K,GAAmB8K,EAAZD,KAChC7K,EAAMuD,EACNuH,EAAWD,GAKjB,MAAO7K,IAUTjL,EAAQsS,UAAU0D,SAAW,SAAU/H,GACrC,GAIItJ,GAJAkN,EAAO1S,KAAK4S,MACZkE,KACAC,EAAY/W,KAAK2S,SAAS7L,MAAQ9G,KAAK2S,SAAS7L,KAAKgI,IAAU,KAC/DkI,EAAQ,CAGZ,KAAK,GAAInR,KAAQ6M,GACf,GAAIA,EAAK5M,eAAeD,GAAO,CAC7B,GAAIwJ,GAAOqD,EAAK7M,GACZwB,EAAQgI,EAAKP,GACbmI,GAAS,CACb,KAAKzR,EAAI,EAAOwR,EAAJxR,EAAWA,IACrB,GAAIsR,EAAOtR,IAAM6B,EAAO,CACtB4P,GAAS,CACT,OAGCA,GAAqBzQ,SAAVa,IACdyP,EAAOE,GAAS3P,EAChB2P,KAKN,GAAID,EACF,IAAKvR,EAAI,EAAGA,EAAIsR,EAAOnR,OAAQH,IAC7BsR,EAAOtR,GAAK7E,EAAKkG,QAAQiQ,EAAOtR,GAAIuR,EAIxC,OAAOD,IASTjW,EAAQsS,UAAUiB,SAAW,SAAU/E,GACrC,GAAIhP,GAAKgP,EAAKrP,KAAK6S,SAEnB,IAAUrM,QAANnG,GAEF,GAAIL,KAAK4S,MAAMvS,GAEb,KAAM,IAAIuD,OAAM,iCAAmCvD,EAAK,uBAK1DA,GAAKM,EAAKqE,aACVqK,EAAKrP,KAAK6S,UAAYxS,CAGxB,IAAIsM,KACJ,KAAK,GAAImC,KAASO,GAChB,GAAIA,EAAKvJ,eAAegJ,GAAQ,CAC9B,GAAIiI,GAAY/W,KAAK+S,MAAMjE,EAC3BnC,GAAEmC,GAASnO,EAAKkG,QAAQwI,EAAKP,GAAQiI,GAKzC,MAFA/W,MAAK4S,MAAMvS,GAAMsM,EAEVtM,GAUTQ,EAAQsS,UAAUqC,SAAW,SAAUnV,EAAI6W,GACzC,GAAIpI,GAAOzH,EAGP8P,EAAMnX,KAAK4S,MAAMvS,EACrB,KAAK8W,EACH,MAAO,KAIT,IAAIC,KACJ,IAAIF,EACF,IAAKpI,IAASqI,GACRA,EAAIrR,eAAegJ,KACrBzH,EAAQ8P,EAAIrI,GACZsI,EAAUtI,GAASnO,EAAKkG,QAAQQ,EAAO6P,EAAMpI,SAMjD,KAAKA,IAASqI,GACRA,EAAIrR,eAAegJ,KACrBzH,EAAQ8P,EAAIrI,GACZsI,EAAUtI,GAASzH,EAIzB,OAAO+P,IAWTvW,EAAQsS,UAAU8B,YAAc,SAAU5F,GACxC,GAAIhP,GAAKgP,EAAKrP,KAAK6S,SACnB,IAAUrM,QAANnG,EACF,KAAM,IAAIuD,OAAM,6CAA+CyT,KAAKC,UAAUjI,GAAQ,IAExF,IAAI1C,GAAI3M,KAAK4S,MAAMvS,EACnB,KAAKsM,EAEH,KAAM,IAAI/I,OAAM,uCAAyCvD,EAAK,SAIhE,KAAK,GAAIyO,KAASO,GAChB,GAAIA,EAAKvJ,eAAegJ,GAAQ,CAC9B,GAAIiI,GAAY/W,KAAK+S,MAAMjE,EAC3BnC,GAAEmC,GAASnO,EAAKkG,QAAQwI,EAAKP,GAAQiI,GAIzC,MAAO1W,IASTQ,EAAQsS,UAAUmB,gBAAkB,SAAUiD,GAE5C,IAAK,GADDlD,MACKK,EAAM,EAAGC,EAAO4C,EAAUC,qBAA4B7C,EAAND,EAAYA,IACnEL,EAAQK,GAAO6C,EAAUE,YAAY/C,IAAQ6C,EAAUG,eAAehD,EAExE,OAAOL,IAUTxT,EAAQsS,UAAUyC,WAAa,SAAU2B,EAAWlD,EAAShF,GAG3D,IAAK,GAFDkF,GAAMgD,EAAUI,SAEXjD,EAAM,EAAGC,EAAON,EAAQ1O,OAAcgP,EAAND,EAAYA,IAAO,CAC1D,GAAI5F,GAAQuF,EAAQK,EACpB6C,GAAUK,SAASrD,EAAKG,EAAKrF,EAAKP,MAItCjP,EAAOD,QAAUiB,GAKb,SAAShB,EAAQD,EAASM,GAe9B,QAASY,GAAU4R,EAAMjE,GACvBzO,KAAK4S,MAAQ,KACb5S,KAAK6X,QACL7X,KAAK2S,SAAWlE,MAChBzO,KAAK6S,SAAW,KAChB7S,KAAKgT,eAEL,IAAImB,GAAKnU,IACTA,MAAK8I,SAAW,WACdqL,EAAG2D,SAASC,MAAM5D,EAAIzO,YAGxB1F,KAAKgY,QAAQtF,GAzBf,GAAI/R,GAAOT,EAAoB,GAC3BW,EAAUX,EAAoB,EAkClCY,GAASqS,UAAU6E,QAAU,SAAUtF,GACrC,GAAIyC,GAAK3P,EAAGC,CAEZ,IAAIzF,KAAK4S,MAAO,CAEV5S,KAAK4S,MAAMgB,aACb5T,KAAK4S,MAAMgB,YAAY,IAAK5T,KAAK8I,UAInCqM,IACA,KAAK,GAAI9U,KAAML,MAAK6X,KACd7X,KAAK6X,KAAK/R,eAAezF,IAC3B8U,EAAInN,KAAK3H,EAGbL,MAAK6X,QACL7X,KAAK6T,SAAS,UAAW5R,MAAOkT,IAKlC,GAFAnV,KAAK4S,MAAQF,EAET1S,KAAK4S,MAAO,CAQd,IANA5S,KAAK6S,SAAW7S,KAAK2S,SAASG,SACzB9S,KAAK4S,OAAS5S,KAAK4S,MAAMnE,SAAWzO,KAAK4S,MAAMnE,QAAQqE,SACxD,KAGJqC,EAAMnV,KAAK4S,MAAMiD,QAAQlC,OAAQ3T,KAAK2S,UAAY3S,KAAK2S,SAASgB,SAC3DnO,EAAI,EAAGC,EAAM0P,EAAIxP,OAAYF,EAAJD,EAASA,IACrCnF,EAAK8U,EAAI3P,GACTxF,KAAK6X,KAAKxX,IAAM,CAElBL,MAAK6T,SAAS,OAAQ5R,MAAOkT,IAGzBnV,KAAK4S,MAAMW,IACbvT,KAAK4S,MAAMW,GAAG,IAAKvT,KAAK8I,YAuC9BhI,EAASqS,UAAU+B,IAAM,WACvB,GAGIC,GAAK1G,EAASiE,EAHdyB,EAAKnU,KAILoV,EAAYzU,EAAKwG,QAAQzB,UAAU,GACtB,WAAb0P,GAAsC,UAAbA,GAAsC,SAAbA,GAEpDD,EAAMzP,UAAU,GAChB+I,EAAU/I,UAAU,GACpBgN,EAAOhN,UAAU,KAIjB+I,EAAU/I,UAAU,GACpBgN,EAAOhN,UAAU,GAInB,IAAIuS,GAActX,EAAK2E,UAAWtF,KAAK2S,SAAUlE,EAG7CzO,MAAK2S,SAASgB,QAAUlF,GAAWA,EAAQkF,SAC7CsE,EAAYtE,OAAS,SAAUtE,GAC7B,MAAO8E,GAAGxB,SAASgB,OAAOtE,IAASZ,EAAQkF,OAAOtE,IAKtD,IAAI6I,KAOJ,OANW1R,SAAP2O,GACF+C,EAAalQ,KAAKmN,GAEpB+C,EAAalQ,KAAKiQ,GAClBC,EAAalQ,KAAK0K,GAEX1S,KAAK4S,OAAS5S,KAAK4S,MAAMsC,IAAI6C,MAAM/X,KAAK4S,MAAOsF,IAWxDpX,EAASqS,UAAU0C,OAAS,SAAUpH,GACpC,GAAI0G,EAEJ,IAAInV,KAAK4S,MAAO,CACd,GACIe,GADAwE,EAAgBnY,KAAK2S,SAASgB,MAK9BA,GAFAlF,GAAWA,EAAQkF,OACjBwE,EACO,SAAU9I,GACjB,MAAO8I,GAAc9I,IAASZ,EAAQkF,OAAOtE,IAItCZ,EAAQkF,OAIVwE,EAGXhD,EAAMnV,KAAK4S,MAAMiD,QACflC,OAAQA,EACR8B,MAAOhH,GAAWA,EAAQgH,YAI5BN,KAGF,OAAOA,IAQTrU,EAASqS,UAAU2C,WAAa,WAE9B,IADA,GAAIsC,GAAUpY,KACPoY,YAAmBtX,IACxBsX,EAAUA,EAAQxF,KAEpB,OAAOwF,IAAW,MAYpBtX,EAASqS,UAAU2E,SAAW,SAAUxO,EAAOwK,EAAQC,GACrD,GAAIvO,GAAGC,EAAKpF,EAAIgP,EACZ8F,EAAMrB,GAAUA,EAAO7R,MACvByQ,EAAO1S,KAAK4S,MACZyF,KACAC,KACAC,IAEJ,IAAIpD,GAAOzC,EAAM,CACf,OAAQpJ,GACN,IAAK,MAEH,IAAK9D,EAAI,EAAGC,EAAM0P,EAAIxP,OAAYF,EAAJD,EAASA,IACrCnF,EAAK8U,EAAI3P,GACT6J,EAAOrP,KAAKkV,IAAI7U,GACZgP,IACFrP,KAAK6X,KAAKxX,IAAM,EAChBgY,EAAMrQ,KAAK3H,GAIf,MAEF,KAAK,SAGH,IAAKmF,EAAI,EAAGC,EAAM0P,EAAIxP,OAAYF,EAAJD,EAASA,IACrCnF,EAAK8U,EAAI3P,GACT6J,EAAOrP,KAAKkV,IAAI7U,GAEZgP,EACErP,KAAK6X,KAAKxX,GACZiY,EAAQtQ,KAAK3H,IAGbL,KAAK6X,KAAKxX,IAAM,EAChBgY,EAAMrQ,KAAK3H,IAITL,KAAK6X,KAAKxX,WACLL,MAAK6X,KAAKxX,GACjBkY,EAAQvQ,KAAK3H,GAQnB,MAEF,KAAK,SAEH,IAAKmF,EAAI,EAAGC,EAAM0P,EAAIxP,OAAYF,EAAJD,EAASA,IACrCnF,EAAK8U,EAAI3P,GACLxF,KAAK6X,KAAKxX,WACLL,MAAK6X,KAAKxX,GACjBkY,EAAQvQ,KAAK3H,IAOjBgY,EAAM1S,QACR3F,KAAK6T,SAAS,OAAQ5R,MAAOoW,GAAQtE,GAEnCuE,EAAQ3S,QACV3F,KAAK6T,SAAS,UAAW5R,MAAOqW,GAAUvE,GAExCwE,EAAQ5S,QACV3F,KAAK6T,SAAS,UAAW5R,MAAOsW,GAAUxE,KAMhDjT,EAASqS,UAAUI,GAAK1S,EAAQsS,UAAUI,GAC1CzS,EAASqS,UAAUO,IAAM7S,EAAQsS,UAAUO,IAC3C5S,EAASqS,UAAUU,SAAWhT,EAAQsS,UAAUU,SAGhD/S,EAASqS,UAAUM,UAAY3S,EAASqS,UAAUI,GAClDzS,EAASqS,UAAUS,YAAc9S,EAASqS,UAAUO,IAEpD7T,EAAOD,QAAUkB,GAIb,SAASjB,GAeb,QAASkB,GAAM0N,GAEbzO,KAAKwY,MAAQ,KACbxY,KAAK0M,IAAM+L,IAGXzY,KAAKqT,UACLrT,KAAK0Y,SAAW,KAChB1Y,KAAK2Y,UAAY,KAEjB3Y,KAAKkT,WAAWzE,GAgBlB1N,EAAMoS,UAAUD,WAAa,SAAUzE,GACjCA,GAAoC,mBAAlBA,GAAQ+J,QAC5BxY,KAAKwY,MAAQ/J,EAAQ+J,OAEnB/J,GAAkC,mBAAhBA,GAAQ/B,MAC5B1M,KAAK0M,IAAM+B,EAAQ/B,KAGrB1M,KAAK4Y,kBAsBP7X,EAAMuE,OAAS,SAAUrB,EAAQwK,GAC/B,GAAI2E,GAAQ,GAAIrS,GAAM0N,EAEtB,IAAqBjI,SAAjBvC,EAAO4U,MACT,KAAM,IAAIjV,OAAM,6CAElBK,GAAO4U,MAAQ,WACbzF,EAAMyF,QAGR,IAAIC,KACF7C,KAAM,QACN8C,SAAUvS,QAGZ,IAAIiI,GAAWA,EAAQjE,QACrB,IAAK,GAAIhF,GAAI,EAAGA,EAAIiJ,EAAQjE,QAAQ7E,OAAQH,IAAK,CAC/C,GAAIyQ,GAAOxH,EAAQjE,QAAQhF,EAC3BsT,GAAQ9Q,MACNiO,KAAMA,EACN8C,SAAU9U,EAAOgS,KAEnB7C,EAAM5I,QAAQvG,EAAQgS,GAS1B,MALA7C,GAAMuF,WACJ1U,OAAQA,EACR6U,QAASA,GAGJ1F,GAOTrS,EAAMoS,UAAUG,QAAU,WAGxB,GAFAtT,KAAK6Y,QAED7Y,KAAK2Y,UAAW,CAGlB,IAAK,GAFD1U,GAASjE,KAAK2Y,UAAU1U,OACxB6U,EAAU9Y,KAAK2Y,UAAUG,QACpBtT,EAAI,EAAGA,EAAIsT,EAAQnT,OAAQH,IAAK,CACvC,GAAIwT,GAASF,EAAQtT,EACjBwT,GAAOD,SACT9U,EAAO+U,EAAO/C,MAAQ+C,EAAOD,eAGtB9U,GAAO+U,EAAO/C,MAGzBjW,KAAK2Y,UAAY,OASrB5X,EAAMoS,UAAU3I,QAAU,SAASvG,EAAQ+U,GACzC,GAAI7E,GAAKnU,KACL+Y,EAAW9U,EAAO+U,EACtB,KAAKD,EACH,KAAM,IAAInV,OAAM,UAAYoV,EAAS,aAGvC/U,GAAO+U,GAAU,WAGf,IAAK,GADDC,MACKzT,EAAI,EAAGA,EAAIE,UAAUC,OAAQH,IACpCyT,EAAKzT,GAAKE,UAAUF,EAItB2O,GAAGf,OACD6F,KAAMA,EACNC,GAAIH,EACJI,QAASnZ,SASfe,EAAMoS,UAAUC,MAAQ,SAASgG,GAE7BpZ,KAAKqT,OAAOrL,KADO,kBAAVoR,IACSF,GAAIE,GAGLA,GAGnBpZ,KAAK4Y,kBAOP7X,EAAMoS,UAAUyF,eAAiB,WAQ/B,GANI5Y,KAAKqT,OAAO1N,OAAS3F,KAAK0M,KAC5B1M,KAAK6Y,QAIPQ,aAAarZ,KAAK0Y,UACd1Y,KAAKoT,MAAMzN,OAAS,GAA2B,gBAAf3F,MAAKwY,MAAoB,CAC3D,GAAIrE,GAAKnU,IACTA,MAAK0Y,SAAWY,WAAW,WACzBnF,EAAG0E,SACF7Y,KAAKwY,SAOZzX,EAAMoS,UAAU0F,MAAQ,WACtB,KAAO7Y,KAAKqT,OAAO1N,OAAS,GAAG,CAC7B,GAAIyT,GAAQpZ,KAAKqT,OAAO/B,OACxB8H,GAAMF,GAAGnB,MAAMqB,EAAMD,SAAWC,EAAMF,GAAIE,EAAMH,YAIpDpZ,EAAOD,QAAUmB,GAKb,SAASlB,EAAQD,EAASM,GAwB9B,QAASc,GAAQuY,EAAW7G,EAAMjE,GAChC,KAAMzO,eAAgBgB,IACpB,KAAM,IAAIwY,aAAY,mDAIxBxZ,MAAKyZ,iBAAmBF,EACxBvZ,KAAKuS,MAAQ,QACbvS,KAAKwS,OAAS,QACdxS,KAAK0Z,OAAS,GACd1Z,KAAK2Z,eAAiB,MACtB3Z,KAAK4Z,eAAiB,MAEtB5Z,KAAK6Z,OAAS,IACd7Z,KAAK8Z,OAAS,IACd9Z,KAAK+Z,OAAS,GAEd,IAAIC,GAAc,SAASnO,GAAK,MAAOA,GACvC7L,MAAKia,YAAcD,EACnBha,KAAKka,YAAcF,EACnBha,KAAKma,YAAcH,EAEnBha,KAAKoa,YAAc,OACnBpa,KAAKqa,YAAc,QAEnBra,KAAKiN,MAAQjM,EAAQsZ,MAAMC,IAC3Bva,KAAKwa,iBAAkB,EACvBxa,KAAKya,UAAW,EAChBza,KAAK0a,iBAAkB,EACvB1a,KAAK2a,YAAa,EAClB3a,KAAK4a,gBAAiB,EACtB5a,KAAK6a,aAAc,EACnB7a,KAAK8a,cAAgB,GAErB9a,KAAK+a,kBAAoB,IACzB/a,KAAKgb,kBAAmB,EAExBhb,KAAKib,OAAS,GAAI/Z,GAClBlB,KAAKkb,IAAM,GAAI7Z,GAAQ,EAAG,EAAG,IAE7BrB,KAAKuX,UAAY,KACjBvX,KAAKmb,WAAa,KAGlBnb,KAAKob,KAAO5U,OACZxG,KAAKqb,KAAO7U,OACZxG,KAAKsb,KAAO9U,OACZxG,KAAKub,SAAW/U,OAChBxG,KAAKwb,UAAYhV,OAEjBxG,KAAKyb,KAAO,EACZzb,KAAK0b,MAAQlV,OACbxG,KAAK2b,KAAO,EACZ3b,KAAK4b,KAAO,EACZ5b,KAAK6b,MAAQrV,OACbxG,KAAK8b,KAAO,EACZ9b,KAAK+b,KAAO,EACZ/b,KAAKgc,MAAQxV,OACbxG,KAAKic,KAAO,EACZjc,KAAKkc,SAAW,EAChBlc,KAAKmc,SAAW,EAChBnc,KAAKoc,UAAY,EACjBpc,KAAKqc,UAAY,EAIjBrc,KAAKsc,UAAY,UACjBtc,KAAKuc,UAAY,UACjBvc,KAAKwc,SAAW,UAChBxc,KAAKyc,eAAiB,UAGtBzc,KAAKqO,SAGLrO,KAAKkT,WAAWzE,GAGZiE,GACF1S,KAAKgY,QAAQtF,GAknEjB,QAASgK,GAAWpT,GAClB,MAAI,WAAaA,GAAcA,EAAMqT,QAC9BrT,EAAMsT,cAAc,IAAMtT,EAAMsT,cAAc,GAAGD,SAAW,EAQrE,QAASE,GAAWvT,GAClB,MAAI,WAAaA,GAAcA,EAAMwT,QAC9BxT,EAAMsT,cAAc,IAAMtT,EAAMsT,cAAc,GAAGE,SAAW,EAnuErE,GAAIC,GAAU7c,EAAoB,IAC9BW,EAAUX,EAAoB,GAC9BY,EAAWZ,EAAoB,GAC/BS,EAAOT,EAAoB,GAC3BmB,EAAUnB,EAAoB,IAC9BkB,EAAUlB,EAAoB,GAC9BgB,EAAShB,EAAoB,GAC7BiB,EAASjB,EAAoB,GAC7BoB,EAASpB,EAAoB,IAC7BqB,EAAarB,EAAoB,GAiGrC6c,GAAQ/b,EAAQmS,WAKhBnS,EAAQmS,UAAU6J,UAAY,WAC5Bhd,KAAKid,MAAQ,GAAI5b,GAAQ,GAAKrB,KAAK2b,KAAO3b,KAAKyb,MAC7C,GAAKzb,KAAK8b,KAAO9b,KAAK4b,MACtB,GAAK5b,KAAKic,KAAOjc,KAAK+b,OAGpB/b,KAAK0a,kBACH1a,KAAKid,MAAMlL,EAAI/R,KAAKid,MAAMjL,EAE5BhS,KAAKid,MAAMjL,EAAIhS,KAAKid,MAAMlL,EAI1B/R,KAAKid,MAAMlL,EAAI/R,KAAKid,MAAMjL,GAK9BhS,KAAKid,MAAMC,GAAKld,KAAK8a,cAIrB9a,KAAKid,MAAM5V,MAAQ,GAAKrH,KAAKmc,SAAWnc,KAAKkc,SAG7C,IAAIiB,IAAWnd,KAAK2b,KAAO3b,KAAKyb,MAAQ,EAAIzb,KAAKid,MAAMlL,EACnDqL,GAAWpd,KAAK8b,KAAO9b,KAAK4b,MAAQ,EAAI5b,KAAKid,MAAMjL,EACnDqL,GAAWrd,KAAKic,KAAOjc,KAAK+b,MAAQ,EAAI/b,KAAKid,MAAMC,CACvDld,MAAKib,OAAOqC,eAAeH,EAASC,EAASC,IAU/Crc,EAAQmS,UAAUoK,eAAiB,SAASC,GAC1C,GAAIC,GAAczd,KAAK0d,2BAA2BF,EAClD,OAAOxd,MAAK2d,4BAA4BF,IAW1Czc,EAAQmS,UAAUuK,2BAA6B,SAASF,GACtD,GAAII,GAAKJ,EAAQzL,EAAI/R,KAAKid,MAAMlL,EAC9B8L,EAAKL,EAAQxL,EAAIhS,KAAKid,MAAMjL,EAC5B8L,EAAKN,EAAQN,EAAIld,KAAKid,MAAMC,EAE5Ba,EAAK/d,KAAKib,OAAO+C,oBAAoBjM,EACrCkM,EAAKje,KAAKib,OAAO+C,oBAAoBhM,EACrCkM,EAAKle,KAAKib,OAAO+C,oBAAoBd,EAGrCiB,EAAQjZ,KAAKkZ,IAAIpe,KAAKib,OAAOoD,oBAAoBtM,GACjDuM,EAAQpZ,KAAKqZ,IAAIve,KAAKib,OAAOoD,oBAAoBtM,GACjDyM,EAAQtZ,KAAKkZ,IAAIpe,KAAKib,OAAOoD,oBAAoBrM,GACjDyM,EAAQvZ,KAAKqZ,IAAIve,KAAKib,OAAOoD,oBAAoBrM,GACjD0M,EAAQxZ,KAAKkZ,IAAIpe,KAAKib,OAAOoD,oBAAoBnB,GACjDyB,EAAQzZ,KAAKqZ,IAAIve,KAAKib,OAAOoD,oBAAoBnB,GAGjD0B,EAAKH,GAASC,GAASb,EAAKI,GAAMU,GAASf,EAAKG,IAAOS,GAASV,EAAKI,GACrEW,EAAKV,GAASM,GAASX,EAAKI,GAAMM,GAASE,GAASb,EAAKI,GAAMU,GAASf,EAAKG,KAAQO,GAASK,GAASd,EAAKI,GAAMS,GAASd,EAAGG,IAC9He,EAAKR,GAASG,GAASX,EAAKI,GAAMM,GAASE,GAASb,EAAKI,GAAMU,GAASf,EAAKG,KAAQI,GAASQ,GAASd,EAAKI,GAAMS,GAASd,EAAGG,GAEhI,OAAO,IAAI1c,GAAQud,EAAIC,EAAIC,IAU7B9d,EAAQmS,UAAUwK,4BAA8B,SAASF,GACvD,GAQIsB,GACAC,EATAC,EAAKjf,KAAKkb,IAAInJ,EAChBmN,EAAKlf,KAAKkb,IAAIlJ,EACdmN,EAAKnf,KAAKkb,IAAIgC,EACd0B,EAAKnB,EAAY1L,EACjB8M,EAAKpB,EAAYzL,EACjB8M,EAAKrB,EAAYP,CAgBnB,OAXIld,MAAKwa,iBACPuE,GAAMH,EAAKK,IAAOE,EAAKL,GACvBE,GAAMH,EAAKK,IAAOC,EAAKL,KAGvBC,EAAKH,IAAOO,EAAKnf,KAAKib,OAAOmE,gBAC7BJ,EAAKH,IAAOM,EAAKnf,KAAKib,OAAOmE,iBAKxB,GAAIhe,GACTpB,KAAKqf,QAAUN,EAAK/e,KAAKsf,MAAMC,OAAOC,YACtCxf,KAAKyf,QAAUT,EAAKhf,KAAKsf,MAAMC,OAAOC,cAO1Cxe,EAAQmS,UAAUuM,oBAAsB,SAASC,GAC/C,GAAIC,GAAO,QACPC,EAAS,OACTC,EAAc,CAElB,IAAgC,gBAAtB,GACRF,EAAOD,EACPE,EAAS,OACTC,EAAc,MAEX,IAAgC,gBAAtB,GACgBtZ,SAAzBmZ,EAAgBC,OAAuBA,EAAOD,EAAgBC,MACnCpZ,SAA3BmZ,EAAgBE,SAAyBA,EAASF,EAAgBE,QAClCrZ,SAAhCmZ,EAAgBG,cAA2BA,EAAcH,EAAgBG,iBAE1E,IAAyBtZ,SAApBmZ,EAIR,KAAM,qCAGR3f,MAAKsf,MAAMrS,MAAM0S,gBAAkBC,EACnC5f,KAAKsf,MAAMrS,MAAM8S,YAAcF,EAC/B7f,KAAKsf,MAAMrS,MAAM+S,YAAcF,EAAc,KAC7C9f,KAAKsf,MAAMrS,MAAMgT,YAAc,SAKjCjf,EAAQsZ,OACN4F,IAAK,EACLC,SAAU,EACVC,QAAS,EACT7F,IAAM,EACN8F,QAAU,EACVC,SAAU,EACVC,QAAS,EACTC,KAAO,EACPC,KAAM,EACNC,QAAU,GASZ1f,EAAQmS,UAAUwN,gBAAkB,SAASC,GAC3C,OAAQA,GACN,IAAK,MAAW,MAAO5f,GAAQsZ,MAAMC,GACrC,KAAK,WAAa,MAAOvZ,GAAQsZ,MAAM+F,OACvC,KAAK,YAAe,MAAOrf,GAAQsZ,MAAMgG,QACzC,KAAK,WAAa,MAAOtf,GAAQsZ,MAAMiG,OACvC,KAAK,OAAW,MAAOvf,GAAQsZ,MAAMmG,IACrC,KAAK,OAAW,MAAOzf,GAAQsZ,MAAMkG,IACrC,KAAK,UAAa,MAAOxf,GAAQsZ,MAAMoG,OACvC,KAAK,MAAW,MAAO1f,GAAQsZ,MAAM4F,GACrC,KAAK,YAAe,MAAOlf,GAAQsZ,MAAM6F,QACzC,KAAK,WAAa,MAAOnf,GAAQsZ,MAAM8F,QAGzC,MAAO,IAQTpf,EAAQmS,UAAU0N,wBAA0B,SAASnO,GACnD,GAAI1S,KAAKiN,QAAUjM,EAAQsZ,MAAMC,KAC/Bva,KAAKiN,QAAUjM,EAAQsZ,MAAM+F,SAC7BrgB,KAAKiN,QAAUjM,EAAQsZ,MAAMmG,MAC7BzgB,KAAKiN,QAAUjM,EAAQsZ,MAAMkG,MAC7BxgB,KAAKiN,QAAUjM,EAAQsZ,MAAMoG,SAC7B1gB,KAAKiN,QAAUjM,EAAQsZ,MAAM4F,IAE7BlgB,KAAKob,KAAO,EACZpb,KAAKqb,KAAO,EACZrb,KAAKsb,KAAO,EACZtb,KAAKub,SAAW/U,OAEZkM,EAAK8E,qBAAuB,IAC9BxX,KAAKwb,UAAY,OAGhB,CAAA,GAAIxb,KAAKiN,QAAUjM,EAAQsZ,MAAMgG,UACpCtgB,KAAKiN,QAAUjM,EAAQsZ,MAAMiG,SAC7BvgB,KAAKiN,QAAUjM,EAAQsZ,MAAM6F,UAC7BngB,KAAKiN,QAAUjM,EAAQsZ,MAAM8F,QAY7B,KAAM,kBAAoBpgB,KAAKiN,MAAQ,GAVvCjN,MAAKob,KAAO,EACZpb,KAAKqb,KAAO,EACZrb,KAAKsb,KAAO,EACZtb,KAAKub,SAAW,EAEZ7I,EAAK8E,qBAAuB,IAC9BxX,KAAKwb,UAAY,KAQvBxa,EAAQmS,UAAUsB,gBAAkB,SAAS/B,GAC3C,MAAOA,GAAK/M,QAId3E,EAAQmS,UAAUqE,mBAAqB,SAAS9E,GAC9C,GAAIoO,GAAU,CACd,KAAK,GAAIC,KAAUrO,GAAK,GAClBA,EAAK,GAAG5M,eAAeib,IACzBD,GAGJ,OAAOA,IAIT9f,EAAQmS,UAAU6N,kBAAoB,SAAStO,EAAMqO,GAEnD,IAAK,GADDE,MACKzb,EAAI,EAAGA,EAAIkN,EAAK/M,OAAQH,IACgB,IAA3Cyb,EAAeta,QAAQ+L,EAAKlN,GAAGub,KACjCE,EAAejZ,KAAK0K,EAAKlN,GAAGub,GAGhC,OAAOE,IAITjgB,EAAQmS,UAAU+N,eAAiB,SAASxO,EAAKqO,GAE/C,IAAK,GADDI,IAAUrV,IAAI4G,EAAK,GAAGqO,GAAQrU,IAAIgG,EAAK,GAAGqO,IACrCvb,EAAI,EAAGA,EAAIkN,EAAK/M,OAAQH,IAC3B2b,EAAOrV,IAAM4G,EAAKlN,GAAGub,KAAWI,EAAOrV,IAAM4G,EAAKlN,GAAGub,IACrDI,EAAOzU,IAAMgG,EAAKlN,GAAGub,KAAWI,EAAOzU,IAAMgG,EAAKlN,GAAGub,GAE3D,OAAOI,IASTngB,EAAQmS,UAAUiO,gBAAkB,SAAUC,GAC5C,GAAIlN,GAAKnU,IAOT,IAJIA,KAAKoY,SACPpY,KAAKoY,QAAQ1E,IAAI,IAAK1T,KAAKshB,WAGb9a,SAAZ6a,EAAJ,CAGIpb,MAAMC,QAAQmb,KAChBA,EAAU,GAAIxgB,GAAQwgB,GAGxB,IAAI3O,EACJ,MAAI2O,YAAmBxgB,IAAWwgB,YAAmBvgB,IAInD,KAAM,IAAI8C,OAAM,uCAGlB,IANE8O,EAAO2O,EAAQnM,MAME,GAAfxC,EAAK/M,OAAT,CAGA3F,KAAKoY,QAAUiJ,EACfrhB,KAAKuX,UAAY7E,EAGjB1S,KAAKshB,UAAY,WACfnN,EAAG6D,QAAQ7D,EAAGiE,UAEhBpY,KAAKoY,QAAQ7E,GAAG,IAAKvT,KAAKshB,WAS1BthB,KAAKob,KAAO,IACZpb,KAAKqb,KAAO,IACZrb,KAAKsb,KAAO,IACZtb,KAAKub,SAAW,QAChBvb,KAAKwb,UAAY,SAKb9I,EAAK,GAAG5M,eAAe,WACDU,SAApBxG,KAAKuhB,aACPvhB,KAAKuhB,WAAa,GAAIpgB,GAAOkgB,EAASrhB,KAAKwb,UAAWxb,MACtDA,KAAKuhB,WAAWC,kBAAkB,WAAYrN,EAAGsN,WAKrD,IAAIC,GAAW1hB,KAAKiN,OAASjM,EAAQsZ,MAAM4F,KACzClgB,KAAKiN,OAASjM,EAAQsZ,MAAM6F,UAC5BngB,KAAKiN,OAASjM,EAAQsZ,MAAM8F,OAG9B,IAAIsB,EAAU,CACZ,GAA8Blb,SAA1BxG,KAAK2hB,iBACP3hB,KAAKoc,UAAYpc,KAAK2hB,qBAEnB,CACH,GAAIC,GAAQ5hB,KAAKghB,kBAAkBtO,EAAK1S,KAAKob,KAC7Cpb,MAAKoc,UAAawF,EAAM,GAAKA,EAAM,IAAO,EAG5C,GAA8Bpb,SAA1BxG,KAAK6hB,iBACP7hB,KAAKqc,UAAYrc,KAAK6hB,qBAEnB,CACH,GAAIC,GAAQ9hB,KAAKghB,kBAAkBtO,EAAK1S,KAAKqb,KAC7Crb,MAAKqc,UAAayF,EAAM,GAAKA,EAAM,IAAO,GAK9C,GAAIC,GAAS/hB,KAAKkhB,eAAexO,EAAK1S,KAAKob,KACvCsG,KACFK,EAAOjW,KAAO9L,KAAKoc,UAAY,EAC/B2F,EAAOrV,KAAO1M,KAAKoc,UAAY,GAEjCpc,KAAKyb,KAA6BjV,SAArBxG,KAAKgiB,YAA6BhiB,KAAKgiB,YAAcD,EAAOjW,IACzE9L,KAAK2b,KAA6BnV,SAArBxG,KAAKiiB,YAA6BjiB,KAAKiiB,YAAcF,EAAOrV,IACrE1M,KAAK2b,MAAQ3b,KAAKyb,OAAMzb,KAAK2b,KAAO3b,KAAKyb,KAAO,GACpDzb,KAAK0b,MAA+BlV,SAAtBxG,KAAKkiB,aAA8BliB,KAAKkiB,cAAgBliB,KAAK2b,KAAK3b,KAAKyb,MAAM,CAE3F,IAAI0G,GAASniB,KAAKkhB,eAAexO,EAAK1S,KAAKqb,KACvCqG,KACFS,EAAOrW,KAAO9L,KAAKqc,UAAY,EAC/B8F,EAAOzV,KAAO1M,KAAKqc,UAAY,GAEjCrc,KAAK4b,KAA6BpV,SAArBxG,KAAKoiB,YAA6BpiB,KAAKoiB,YAAcD,EAAOrW,IACzE9L,KAAK8b,KAA6BtV,SAArBxG,KAAKqiB,YAA6BriB,KAAKqiB,YAAcF,EAAOzV,IACrE1M,KAAK8b,MAAQ9b,KAAK4b,OAAM5b,KAAK8b,KAAO9b,KAAK4b,KAAO,GACpD5b,KAAK6b,MAA+BrV,SAAtBxG,KAAKsiB,aAA8BtiB,KAAKsiB,cAAgBtiB,KAAK8b,KAAK9b,KAAK4b,MAAM,CAE3F;GAAI2G,GAASviB,KAAKkhB,eAAexO,EAAK1S,KAAKsb,KAM3C,IALAtb,KAAK+b,KAA6BvV,SAArBxG,KAAKwiB,YAA6BxiB,KAAKwiB,YAAcD,EAAOzW,IACzE9L,KAAKic,KAA6BzV,SAArBxG,KAAKyiB,YAA6BziB,KAAKyiB,YAAcF,EAAO7V,IACrE1M,KAAKic,MAAQjc,KAAK+b,OAAM/b,KAAKic,KAAOjc,KAAK+b,KAAO,GACpD/b,KAAKgc,MAA+BxV,SAAtBxG,KAAK0iB,aAA8B1iB,KAAK0iB,cAAgB1iB,KAAKic,KAAKjc,KAAK+b,MAAM,EAErEvV,SAAlBxG,KAAKub,SAAwB,CAC/B,GAAIoH,GAAa3iB,KAAKkhB,eAAexO,EAAK1S,KAAKub,SAC/Cvb,MAAKkc,SAAqC1V,SAAzBxG,KAAK4iB,gBAAiC5iB,KAAK4iB,gBAAkBD,EAAW7W,IACzF9L,KAAKmc,SAAqC3V,SAAzBxG,KAAK6iB,gBAAiC7iB,KAAK6iB,gBAAkBF,EAAWjW,IACrF1M,KAAKmc,UAAYnc,KAAKkc,WAAUlc,KAAKmc,SAAWnc,KAAKkc,SAAW,GAItElc,KAAKgd,eAUPhc,EAAQmS,UAAU2P,eAAiB,SAAUpQ,GAE3C,GAAIX,GAAGC,EAAGxM,EAAG0X,EAAG6F,EAAK7Q,EAEjBiJ,IAEJ,IAAInb,KAAKiN,QAAUjM,EAAQsZ,MAAMkG,MAC/BxgB,KAAKiN,QAAUjM,EAAQsZ,MAAMoG,QAAS,CAKtC,GAAIkB,MACAE,IACJ,KAAKtc,EAAI,EAAGA,EAAIxF,KAAKyU,gBAAgB/B,GAAOlN,IAC1CuM,EAAIW,EAAKlN,GAAGxF,KAAKob,OAAS,EAC1BpJ,EAAIU,EAAKlN,GAAGxF,KAAKqb,OAAS,EAED,KAArBuG,EAAMjb,QAAQoL,IAChB6P,EAAM5Z,KAAK+J,GAEY,KAArB+P,EAAMnb,QAAQqL,IAChB8P,EAAM9Z,KAAKgK,EAIf,IAAIgR,GAAa,SAAUzd,EAAGa,GAC5B,MAAOb,GAAIa,EAEbwb,GAAM1L,KAAK8M,GACXlB,EAAM5L,KAAK8M,EAGX,IAAIC,KACJ,KAAKzd,EAAI,EAAGA,EAAIkN,EAAK/M,OAAQH,IAAK,CAChCuM,EAAIW,EAAKlN,GAAGxF,KAAKob,OAAS,EAC1BpJ,EAAIU,EAAKlN,GAAGxF,KAAKqb,OAAS,EAC1B6B,EAAIxK,EAAKlN,GAAGxF,KAAKsb,OAAS,CAE1B,IAAI4H,GAAStB,EAAMjb,QAAQoL,GACvBoR,EAASrB,EAAMnb,QAAQqL,EAEAxL,UAAvByc,EAAWC,KACbD,EAAWC,MAGb,IAAI1F,GAAU,GAAInc,EAClBmc,GAAQzL,EAAIA,EACZyL,EAAQxL,EAAIA,EACZwL,EAAQN,EAAIA,EAEZ6F,KACAA,EAAI7Q,MAAQsL,EACZuF,EAAIK,MAAQ5c,OACZuc,EAAIM,OAAS7c,OACbuc,EAAIO,OAAS,GAAIjiB,GAAQ0Q,EAAGC,EAAGhS,KAAK+b,MAEpCkH,EAAWC,GAAQC,GAAUJ,EAE7B5H,EAAWnT,KAAK+a,GAIlB,IAAKhR,EAAI,EAAGA,EAAIkR,EAAWtd,OAAQoM,IACjC,IAAKC,EAAI,EAAGA,EAAIiR,EAAWlR,GAAGpM,OAAQqM,IAChCiR,EAAWlR,GAAGC,KAChBiR,EAAWlR,GAAGC,GAAGuR,WAAcxR,EAAIkR,EAAWtd,OAAO,EAAKsd,EAAWlR,EAAE,GAAGC,GAAKxL,OAC/Eyc,EAAWlR,GAAGC,GAAGwR,SAAcxR,EAAIiR,EAAWlR,GAAGpM,OAAO,EAAKsd,EAAWlR,GAAGC,EAAE,GAAKxL,OAClFyc,EAAWlR,GAAGC,GAAGyR,WACd1R,EAAIkR,EAAWtd,OAAO,GAAKqM,EAAIiR,EAAWlR,GAAGpM,OAAO,EACnDsd,EAAWlR,EAAE,GAAGC,EAAE,GAClBxL,YAOV,KAAKhB,EAAI,EAAGA,EAAIkN,EAAK/M,OAAQH,IAC3B0M,EAAQ,GAAI7Q,GACZ6Q,EAAMH,EAAIW,EAAKlN,GAAGxF,KAAKob,OAAS,EAChClJ,EAAMF,EAAIU,EAAKlN,GAAGxF,KAAKqb,OAAS,EAChCnJ,EAAMgL,EAAIxK,EAAKlN,GAAGxF,KAAKsb,OAAS,EAEV9U,SAAlBxG,KAAKub,WACPrJ,EAAM7K,MAAQqL,EAAKlN,GAAGxF,KAAKub,WAAa,GAG1CwH,KACAA,EAAI7Q,MAAQA,EACZ6Q,EAAIO,OAAS,GAAIjiB,GAAQ6Q,EAAMH,EAAGG,EAAMF,EAAGhS,KAAK+b,MAChDgH,EAAIK,MAAQ5c,OACZuc,EAAIM,OAAS7c,OAEb2U,EAAWnT,KAAK+a,EAIpB,OAAO5H,IASTna,EAAQmS,UAAU9E,OAAS,WAEzB,KAAOrO,KAAKyZ,iBAAiBiK,iBAC3B1jB,KAAKyZ,iBAAiBtI,YAAYnR,KAAKyZ,iBAAiBkK,WAG1D3jB,MAAKsf,MAAQ/N,SAASM,cAAc,OACpC7R,KAAKsf,MAAMrS,MAAM2W,SAAW,WAC5B5jB,KAAKsf,MAAMrS,MAAM4W,SAAW,SAG5B7jB,KAAKsf,MAAMC,OAAShO,SAASM,cAAe,UAC5C7R,KAAKsf,MAAMC,OAAOtS,MAAM2W,SAAW,WACnC5jB,KAAKsf,MAAM7N,YAAYzR,KAAKsf,MAAMC,OAGhC,IAAIuE,GAAWvS,SAASM,cAAe,MACvCiS,GAAS7W,MAAM9B,MAAQ,MACvB2Y,EAAS7W,MAAM8W,WAAc,OAC7BD,EAAS7W,MAAM+W,QAAW,OAC1BF,EAASG,UAAa,mDACtBjkB,KAAKsf,MAAMC,OAAO9N,YAAYqS,GAGhC9jB,KAAKsf,MAAM3L,OAASpC,SAASM,cAAe,OAC5C7R,KAAKsf,MAAM3L,OAAO1G,MAAM2W,SAAW,WACnC5jB,KAAKsf,MAAM3L,OAAO1G,MAAMqW,OAAS,MACjCtjB,KAAKsf,MAAM3L,OAAO1G,MAAMxF,KAAO,MAC/BzH,KAAKsf,MAAM3L,OAAO1G,MAAMsF,MAAQ,OAChCvS,KAAKsf,MAAM7N,YAAYzR,KAAKsf,MAAM3L,OAGlC,IAAIQ,GAAKnU,KACLkkB,EAAc,SAAU5a,GAAQ6K,EAAGgQ,aAAa7a,IAChD8a,EAAe,SAAU9a,GAAQ6K,EAAGkQ,cAAc/a,IAClDgb,EAAe,SAAUhb,GAAQ6K,EAAGoQ,SAASjb,IAC7Ckb,EAAY,SAAUlb,GAAQ6K,EAAGsQ,WAAWnb,GAGhD3I,GAAKgI,iBAAiB3I,KAAKsf,MAAMC,OAAQ,UAAWmF,WACpD/jB,EAAKgI,iBAAiB3I,KAAKsf,MAAMC,OAAQ,YAAa2E,GACtDvjB,EAAKgI,iBAAiB3I,KAAKsf,MAAMC,OAAQ,aAAc6E,GACvDzjB,EAAKgI,iBAAiB3I,KAAKsf,MAAMC,OAAQ,aAAc+E,GACvD3jB,EAAKgI,iBAAiB3I,KAAKsf,MAAMC,OAAQ,YAAaiF,GAGtDxkB,KAAKyZ,iBAAiBhI,YAAYzR,KAAKsf,QAWzCte,EAAQmS,UAAUwR,QAAU,SAASpS,EAAOC,GAC1CxS,KAAKsf,MAAMrS,MAAMsF,MAAQA,EACzBvS,KAAKsf,MAAMrS,MAAMuF,OAASA,EAE1BxS,KAAK4kB,iBAMP5jB,EAAQmS,UAAUyR,cAAgB,WAChC5kB,KAAKsf,MAAMC,OAAOtS,MAAMsF,MAAQ,OAChCvS,KAAKsf,MAAMC,OAAOtS,MAAMuF,OAAS,OAEjCxS,KAAKsf,MAAMC,OAAOhN,MAAQvS,KAAKsf,MAAMC,OAAOC,YAC5Cxf,KAAKsf,MAAMC,OAAO/M,OAASxS,KAAKsf,MAAMC,OAAOsF,aAG7C7kB,KAAKsf,MAAM3L,OAAO1G,MAAMsF,MAASvS,KAAKsf,MAAMC,OAAOC,YAAc,GAAU,MAM7Exe,EAAQmS,UAAU2R,eAAiB,WACjC,IAAK9kB,KAAKsf,MAAM3L,SAAW3T,KAAKsf,MAAM3L,OAAOoR,OAC3C,KAAM,wBAER/kB,MAAKsf,MAAM3L,OAAOoR,OAAOC,QAO3BhkB,EAAQmS,UAAU8R,cAAgB,WAC3BjlB,KAAKsf,MAAM3L,QAAW3T,KAAKsf,MAAM3L,OAAOoR,QAE7C/kB,KAAKsf,MAAM3L,OAAOoR,OAAOG,QAU3BlkB,EAAQmS,UAAUgS,cAAgB,WAG9BnlB,KAAKqf,QAD0D,MAA7Drf,KAAK2Z,eAAeyL,OAAOplB,KAAK2Z,eAAehU,OAAO,GAEtD0f,WAAWrlB,KAAK2Z,gBAAkB,IAChC3Z,KAAKsf,MAAMC,OAAOC,YAGP6F,WAAWrlB,KAAK2Z,gBAK/B3Z,KAAKyf,QAD0D,MAA7Dzf,KAAK4Z,eAAewL,OAAOplB,KAAK4Z,eAAejU,OAAO,GAEtD0f,WAAWrlB,KAAK4Z,gBAAkB,KAC/B5Z,KAAKsf,MAAMC,OAAOsF,aAAe7kB,KAAKsf,MAAM3L,OAAOkR,cAGzCQ,WAAWrlB,KAAK4Z,iBAoBnC5Y,EAAQmS,UAAUmS,kBAAoB,SAASC,GACjC/e,SAAR+e,IAImB/e,SAAnB+e,EAAIC,YAA6Chf,SAAjB+e,EAAIE,UACtCzlB,KAAKib,OAAOyK,eAAeH,EAAIC,WAAYD,EAAIE,UAG5Bjf,SAAjB+e,EAAII,UACN3lB,KAAKib,OAAO2K,aAAaL,EAAII,UAG/B3lB,KAAKyhB,WASPzgB,EAAQmS,UAAU0S,kBAAoB,WACpC,GAAIN,GAAMvlB,KAAKib,OAAO6K,gBAEtB,OADAP,GAAII,SAAW3lB,KAAKib,OAAOmE,eACpBmG,GAMTvkB,EAAQmS,UAAU4S,UAAY,SAASrT,GAErC1S,KAAKohB,gBAAgB1O,EAAM1S,KAAKiN,OAK9BjN,KAAKmb,WAFHnb,KAAKuhB,WAEWvhB,KAAKuhB,WAAWuB,iBAIhB9iB,KAAK8iB,eAAe9iB,KAAKuX,WAI7CvX,KAAKgmB,iBAOPhlB,EAAQmS,UAAU6E,QAAU,SAAUtF,GACpC1S,KAAK+lB,UAAUrT,GACf1S,KAAKyhB,SAGDzhB,KAAKimB,oBAAsBjmB,KAAKuhB,YAClCvhB,KAAK8kB,kBAQT9jB,EAAQmS,UAAUD,WAAa,SAAUzE,GACvC,GAAIyX,GAAiB1f,MAIrB,IAFAxG,KAAKilB,gBAEWze,SAAZiI,EAAuB,CAkBzB,GAhBsBjI,SAAlBiI,EAAQ8D,QAA2BvS,KAAKuS,MAAQ9D,EAAQ8D,OACrC/L,SAAnBiI,EAAQ+D,SAA2BxS,KAAKwS,OAAS/D,EAAQ+D,QAErChM,SAApBiI,EAAQ0O,UAA2Bnd,KAAK2Z,eAAiBlL,EAAQ0O,SAC7C3W,SAApBiI,EAAQ2O,UAA2Bpd,KAAK4Z,eAAiBnL,EAAQ2O,SAEzC5W,SAAxBiI,EAAQ2L,cAA+Bpa,KAAKoa,YAAc3L,EAAQ2L,aAC1C5T,SAAxBiI,EAAQ4L,cAA+Bra,KAAKqa,YAAc5L,EAAQ4L,aAC/C7T,SAAnBiI,EAAQoL,SAA0B7Z,KAAK6Z,OAASpL,EAAQoL,QACrCrT,SAAnBiI,EAAQqL,SAA0B9Z,KAAK8Z,OAASrL,EAAQqL,QACrCtT,SAAnBiI,EAAQsL,SAA0B/Z,KAAK+Z,OAAStL,EAAQsL,QAEhCvT,SAAxBiI,EAAQwL,cAA+Bja,KAAKia,YAAcxL,EAAQwL,aAC1CzT,SAAxBiI,EAAQyL,cAA+Bla,KAAKka,YAAczL,EAAQyL,aAC1C1T,SAAxBiI,EAAQ0L,cAA+Bna,KAAKma,YAAc1L,EAAQ0L,aAEhD3T,SAAlBiI,EAAQxB,MAAqB,CAC/B,GAAIkZ,GAAcnmB,KAAK2gB,gBAAgBlS,EAAQxB,MAC3B,MAAhBkZ,IACFnmB,KAAKiN,MAAQkZ,GAGQ3f,SAArBiI,EAAQgM,WAA6Bza,KAAKya,SAAWhM,EAAQgM,UACjCjU,SAA5BiI,EAAQ+L,kBAAiCxa,KAAKwa,gBAAkB/L,EAAQ+L,iBACjDhU,SAAvBiI,EAAQkM,aAA6B3a,KAAK2a,WAAalM,EAAQkM,YAC3CnU,SAApBiI,EAAQ2X,UAA6BpmB,KAAK6a,YAAcpM,EAAQ2X,SAC9B5f,SAAlCiI,EAAQ4X,wBAAqCrmB,KAAKqmB,sBAAwB5X,EAAQ4X,uBACtD7f,SAA5BiI,EAAQiM,kBAAiC1a,KAAK0a,gBAAkBjM,EAAQiM,iBAC9ClU,SAA1BiI,EAAQqM,gBAA+B9a,KAAK8a,cAAgBrM,EAAQqM,eAEtCtU,SAA9BiI,EAAQsM,oBAAiC/a,KAAK+a,kBAAoBtM,EAAQsM,mBAC7CvU,SAA7BiI,EAAQuM,mBAAiChb,KAAKgb,iBAAmBvM,EAAQuM,kBAC1CxU,SAA/BiI,EAAQwX,qBAAiCjmB,KAAKimB,mBAAqBxX,EAAQwX,oBAErDzf,SAAtBiI,EAAQ2N,YAAyBpc,KAAK2hB,iBAAmBlT,EAAQ2N,WAC3C5V,SAAtBiI,EAAQ4N,YAAyBrc,KAAK6hB,iBAAmBpT,EAAQ4N,WAEhD7V,SAAjBiI,EAAQgN,OAAoBzb,KAAKgiB,YAAcvT,EAAQgN,MACrCjV,SAAlBiI,EAAQiN,QAAqB1b,KAAKkiB,aAAezT,EAAQiN,OACxClV,SAAjBiI,EAAQkN,OAAoB3b,KAAKiiB,YAAcxT,EAAQkN,MACtCnV,SAAjBiI,EAAQmN,OAAoB5b,KAAKoiB,YAAc3T,EAAQmN,MACrCpV,SAAlBiI,EAAQoN,QAAqB7b,KAAKsiB,aAAe7T,EAAQoN,OACxCrV,SAAjBiI,EAAQqN,OAAoB9b,KAAKqiB,YAAc5T,EAAQqN,MACtCtV,SAAjBiI,EAAQsN,OAAoB/b,KAAKwiB,YAAc/T,EAAQsN,MACrCvV,SAAlBiI,EAAQuN,QAAqBhc,KAAK0iB,aAAejU,EAAQuN,OACxCxV,SAAjBiI,EAAQwN,OAAoBjc,KAAKyiB,YAAchU,EAAQwN,MAClCzV,SAArBiI,EAAQyN,WAAwBlc,KAAK4iB,gBAAkBnU,EAAQyN,UAC1C1V,SAArBiI,EAAQ0N,WAAwBnc,KAAK6iB,gBAAkBpU,EAAQ0N,UAEpC3V,SAA3BiI,EAAQyX,iBAA8BA,EAAiBzX,EAAQyX,gBAE5C1f,SAAnB0f,GACFlmB,KAAKib,OAAOyK,eAAeQ,EAAeV,WAAYU,EAAeT,UACrEzlB,KAAKib,OAAO2K,aAAaM,EAAeP,YAGxC3lB,KAAKib,OAAOyK,eAAe,EAAK,IAChC1lB,KAAKib,OAAO2K,aAAa,MAI7B5lB,KAAK0f,oBAAoBjR,GAAWA,EAAQkR,iBAE5C3f,KAAK2kB,QAAQ3kB,KAAKuS,MAAOvS,KAAKwS,QAG1BxS,KAAKuX,WACPvX,KAAKgY,QAAQhY,KAAKuX,WAIhBvX,KAAKimB,oBAAsBjmB,KAAKuhB,YAClCvhB,KAAK8kB,kBAOT9jB,EAAQmS,UAAUsO,OAAS,WACzB,GAAwBjb,SAApBxG,KAAKmb,WACP,KAAM,mCAGRnb,MAAK4kB,gBACL5kB,KAAKmlB,gBACLnlB,KAAKsmB,gBACLtmB,KAAKumB,eACLvmB,KAAKwmB,cAEDxmB,KAAKiN,QAAUjM,EAAQsZ,MAAMkG,MAC/BxgB,KAAKiN,QAAUjM,EAAQsZ,MAAMoG,QAC7B1gB,KAAKymB,kBAEEzmB,KAAKiN,QAAUjM,EAAQsZ,MAAMmG,KACpCzgB,KAAK0mB,kBAEE1mB,KAAKiN,QAAUjM,EAAQsZ,MAAM4F,KACpClgB,KAAKiN,QAAUjM,EAAQsZ,MAAM6F,UAC7BngB,KAAKiN,QAAUjM,EAAQsZ,MAAM8F,QAC7BpgB,KAAK2mB,iBAIL3mB,KAAK4mB,iBAGP5mB,KAAK6mB,cACL7mB,KAAK8mB,iBAMP9lB,EAAQmS,UAAUoT,aAAe,WAC/B,GAAIhH,GAASvf,KAAKsf,MAAMC,OACpBwH,EAAMxH,EAAOyH,WAAW,KAE5BD,GAAIE,UAAU,EAAG,EAAG1H,EAAOhN,MAAOgN,EAAO/M,SAO3CxR,EAAQmS,UAAU2T,cAAgB,WAChC,GAAI9U,EAEJ,IAAIhS,KAAKiN,QAAUjM,EAAQsZ,MAAMgG,UAC/BtgB,KAAKiN,QAAUjM,EAAQsZ,MAAMiG,QAAS,CAEtC,GAEI2G,GAAUC,EAFVC,EAAmC,IAAzBpnB,KAAKsf,MAAME,WAGrBxf,MAAKiN,QAAUjM,EAAQsZ,MAAMiG,SAC/B2G,EAAWE,EAAU,EACrBD,EAAWC,EAAU,EAAc,EAAVA,IAGzBF,EAAW,GACXC,EAAW,GAGb,IAAI3U,GAAStN,KAAKwH,IAA8B,IAA1B1M,KAAKsf,MAAMuF,aAAqB,KAClDld,EAAM3H,KAAK0Z,OACX2N,EAAQrnB,KAAKsf,MAAME,YAAcxf,KAAK0Z,OACtCjS,EAAO4f,EAAQF,EACf7D,EAAS3b,EAAM6K,EAGrB,GAAI+M,GAASvf,KAAKsf,MAAMC,OACpBwH,EAAMxH,EAAOyH,WAAW,KAI5B,IAHAD,EAAIO,UAAY,EAChBP,EAAIQ,KAAO,aAEPvnB,KAAKiN,QAAUjM,EAAQsZ,MAAMgG,SAAU,CAEzC,GAAIkH,GAAO,EACPC,EAAOjV,CACX,KAAKR,EAAIwV,EAAUC,EAAJzV,EAAUA,IAAK,CAC5B,GAAIpE,IAAKoE,EAAIwV,IAASC,EAAOD,GAGzB5a,EAAU,IAAJgB,EACNzC,EAAQnL,KAAK0nB,SAAS9a,EAAK,EAAG,EAElCma,GAAIY,YAAcxc,EAClB4b,EAAIa,YACJb,EAAIc,OAAOpgB,EAAME,EAAMqK,GACvB+U,EAAIe,OAAOT,EAAO1f,EAAMqK,GACxB+U,EAAIlH,SAGNkH,EAAIY,YAAe3nB,KAAKsc,UACxByK,EAAIgB,WAAWtgB,EAAME,EAAKwf,EAAU3U,GAiBtC,GAdIxS,KAAKiN,QAAUjM,EAAQsZ,MAAMiG,UAE/BwG,EAAIY,YAAe3nB,KAAKsc,UACxByK,EAAIiB,UAAahoB,KAAKwc,SACtBuK,EAAIa,YACJb,EAAIc,OAAOpgB,EAAME,GACjBof,EAAIe,OAAOT,EAAO1f,GAClBof,EAAIe,OAAOT,EAAQF,EAAWD,EAAU5D,GACxCyD,EAAIe,OAAOrgB,EAAM6b,GACjByD,EAAIkB,YACJlB,EAAInH,OACJmH,EAAIlH,UAGF7f,KAAKiN,QAAUjM,EAAQsZ,MAAMgG,UAC/BtgB,KAAKiN,QAAUjM,EAAQsZ,MAAMiG,QAAS,CAEtC,GAAI2H,GAAc,EACdC,EAAO,GAAI5mB,GAAWvB,KAAKkc,SAAUlc,KAAKmc,UAAWnc,KAAKmc,SAASnc,KAAKkc,UAAU,GAAG,EAKzF,KAJAiM,EAAKvY,QACDuY,EAAKC,aAAepoB,KAAKkc,UAC3BiM,EAAKE,QAECF,EAAKtY,OACXmC,EAAIsR,GAAU6E,EAAKC,aAAepoB,KAAKkc,WAAalc,KAAKmc,SAAWnc,KAAKkc,UAAY1J,EAErFuU,EAAIa,YACJb,EAAIc,OAAOpgB,EAAOygB,EAAalW,GAC/B+U,EAAIe,OAAOrgB,EAAMuK,GACjB+U,EAAIlH,SAEJkH,EAAIuB,UAAY,QAChBvB,EAAIwB,aAAe,SACnBxB,EAAIiB,UAAYhoB,KAAKsc,UACrByK,EAAIyB,SAASL,EAAKC,aAAc3gB,EAAO,EAAIygB,EAAalW,GAExDmW,EAAKE,MAGPtB,GAAIuB,UAAY,QAChBvB,EAAIwB,aAAe,KACnB,IAAIE,GAAQzoB,KAAKqa,WACjB0M,GAAIyB,SAASC,EAAOpB,EAAO/D,EAAStjB,KAAK0Z,UAO7C1Y,EAAQmS,UAAU6S,cAAgB,WAGhC,GAFAhmB,KAAKsf,MAAM3L,OAAOsQ,UAAY,GAE1BjkB,KAAKuhB,WAAY,CACnB,GAAI9S,IACFia,QAAW1oB,KAAKqmB,uBAEdtB,EAAS,GAAIzjB,GAAOtB,KAAKsf,MAAM3L,OAAQlF,EAC3CzO,MAAKsf,MAAM3L,OAAOoR,OAASA,EAG3B/kB,KAAKsf,MAAM3L,OAAO1G,MAAM+W,QAAU,OAGlCe,EAAO4D,UAAU3oB,KAAKuhB,WAAWzK,QACjCiO,EAAO6D,gBAAgB5oB,KAAK+a,kBAG5B,IAAI5G,GAAKnU,KACL6oB,EAAW,WACb,GAAI1gB,GAAQ4c,EAAO+D,UAEnB3U,GAAGoN,WAAWwH,YAAY5gB,GAC1BgM,EAAGgH,WAAahH,EAAGoN,WAAWuB,iBAE9B3O,EAAGsN,SAELsD,GAAOiE,oBAAoBH,OAG3B7oB,MAAKsf,MAAM3L,OAAOoR,OAASve,QAO/BxF,EAAQmS,UAAUmT,cAAgB,WACE9f,SAA7BxG,KAAKsf,MAAM3L,OAAOoR,QACrB/kB,KAAKsf,MAAM3L,OAAOoR,OAAOtD,UAQ7BzgB,EAAQmS,UAAU0T,YAAc,WAC9B,GAAI7mB,KAAKuhB,WAAY,CACnB,GAAIhC,GAASvf,KAAKsf,MAAMC,OACpBwH,EAAMxH,EAAOyH,WAAW,KAE5BD,GAAIQ,KAAO,aACXR,EAAIkC,UAAY,OAChBlC,EAAIiB,UAAY,OAChBjB,EAAIuB,UAAY,OAChBvB,EAAIwB,aAAe,KAEnB,IAAIxW,GAAI/R,KAAK0Z,OACT1H,EAAIhS,KAAK0Z,MACbqN,GAAIyB,SAASxoB,KAAKuhB,WAAW2H,WAAa,KAAOlpB,KAAKuhB,WAAW4H,mBAAoBpX,EAAGC,KAQ5FhR,EAAQmS,UAAUqT,YAAc,WAC9B,GAEE4C,GAAMC,EAAIlB,EAAMmB,EAChBC,EAAMC,EAAOC,EAAOC,EACpBC,EAAQC,EAASC,EACjBC,EAAQC,EALNxK,EAASvf,KAAKsf,MAAMC,OACtBwH,EAAMxH,EAAOyH,WAAW,KAQ1BD,GAAIQ,KAAO,GAAKvnB,KAAKib,OAAOmE,eAAiB,UAG7C,IAAI4K,GAAW,KAAQhqB,KAAKid,MAAMlL,EAC9BkY,EAAW,KAAQjqB,KAAKid,MAAMjL,EAC9BkY,EAAa,EAAIlqB,KAAKib,OAAOmE,eAC7B+K,EAAWnqB,KAAKib,OAAO6K,iBAAiBN,UAU5C,KAPAuB,EAAIO,UAAY,EAChBgC,EAAoC9iB,SAAtBxG,KAAKkiB,aACnBiG,EAAO,GAAI5mB,GAAWvB,KAAKyb,KAAMzb,KAAK2b,KAAM3b,KAAK0b,MAAO4N,GACxDnB,EAAKvY,QACDuY,EAAKC,aAAepoB,KAAKyb,MAC3B0M,EAAKE,QAECF,EAAKtY,OAAO,CAClB,GAAIkC,GAAIoW,EAAKC,YAETpoB,MAAKya,UACP2O,EAAOppB,KAAKud,eAAe,GAAIlc,GAAQ0Q,EAAG/R,KAAK4b,KAAM5b,KAAK+b,OAC1DsN,EAAKrpB,KAAKud,eAAe,GAAIlc,GAAQ0Q,EAAG/R,KAAK8b,KAAM9b,KAAK+b,OACxDgL,EAAIY,YAAc3nB,KAAKuc,UACvBwK,EAAIa,YACJb,EAAIc,OAAOuB,EAAKrX,EAAGqX,EAAKpX,GACxB+U,EAAIe,OAAOuB,EAAGtX,EAAGsX,EAAGrX,GACpB+U,EAAIlH,WAGJuJ,EAAOppB,KAAKud,eAAe,GAAIlc,GAAQ0Q,EAAG/R,KAAK4b,KAAM5b,KAAK+b,OAC1DsN,EAAKrpB,KAAKud,eAAe,GAAIlc,GAAQ0Q,EAAG/R,KAAK4b,KAAKoO,EAAUhqB,KAAK+b,OACjEgL,EAAIY,YAAc3nB,KAAKsc,UACvByK,EAAIa,YACJb,EAAIc,OAAOuB,EAAKrX,EAAGqX,EAAKpX,GACxB+U,EAAIe,OAAOuB,EAAGtX,EAAGsX,EAAGrX,GACpB+U,EAAIlH,SAEJuJ,EAAOppB,KAAKud,eAAe,GAAIlc,GAAQ0Q,EAAG/R,KAAK8b,KAAM9b,KAAK+b,OAC1DsN,EAAKrpB,KAAKud,eAAe,GAAIlc,GAAQ0Q,EAAG/R,KAAK8b,KAAKkO,EAAUhqB,KAAK+b,OACjEgL,EAAIY,YAAc3nB,KAAKsc,UACvByK,EAAIa,YACJb,EAAIc,OAAOuB,EAAKrX,EAAGqX,EAAKpX,GACxB+U,EAAIe,OAAOuB,EAAGtX,EAAGsX,EAAGrX,GACpB+U,EAAIlH,UAGN4J,EAASvkB,KAAKqZ,IAAI4L,GAAY,EAAKnqB,KAAK4b,KAAO5b,KAAK8b,KACpDyN,EAAOvpB,KAAKud,eAAe,GAAIlc,GAAQ0Q,EAAG0X,EAAOzpB,KAAK+b,OAClD7W,KAAKqZ,IAAe,EAAX4L,GAAgB,GAC3BpD,EAAIuB,UAAY,SAChBvB,EAAIwB,aAAe,MACnBgB,EAAKvX,GAAKkY,GAEHhlB,KAAKkZ,IAAe,EAAX+L,GAAgB,GAChCpD,EAAIuB,UAAY,QAChBvB,EAAIwB,aAAe,WAGnBxB,EAAIuB,UAAY,OAChBvB,EAAIwB,aAAe,UAErBxB,EAAIiB,UAAYhoB,KAAKsc,UACrByK,EAAIyB,SAAS,KAAOxoB,KAAKia,YAAYkO,EAAKC,cAAgB,KAAMmB,EAAKxX,EAAGwX,EAAKvX,GAE7EmW,EAAKE,OAWP,IAPAtB,EAAIO,UAAY,EAChBgC,EAAoC9iB,SAAtBxG,KAAKsiB,aACnB6F,EAAO,GAAI5mB,GAAWvB,KAAK4b,KAAM5b,KAAK8b,KAAM9b,KAAK6b,MAAOyN,GACxDnB,EAAKvY,QACDuY,EAAKC,aAAepoB,KAAK4b,MAC3BuM,EAAKE,QAECF,EAAKtY,OACP7P,KAAKya,UACP2O,EAAOppB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAKyb,KAAM0M,EAAKC,aAAcpoB,KAAK+b,OAC1EsN,EAAKrpB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAK2b,KAAMwM,EAAKC,aAAcpoB,KAAK+b,OACxEgL,EAAIY,YAAc3nB,KAAKuc,UACvBwK,EAAIa,YACJb,EAAIc,OAAOuB,EAAKrX,EAAGqX,EAAKpX,GACxB+U,EAAIe,OAAOuB,EAAGtX,EAAGsX,EAAGrX,GACpB+U,EAAIlH,WAGJuJ,EAAOppB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAKyb,KAAM0M,EAAKC,aAAcpoB,KAAK+b,OAC1EsN,EAAKrpB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAKyb,KAAKwO,EAAU9B,EAAKC,aAAcpoB,KAAK+b,OACjFgL,EAAIY,YAAc3nB,KAAKsc,UACvByK,EAAIa,YACJb,EAAIc,OAAOuB,EAAKrX,EAAGqX,EAAKpX,GACxB+U,EAAIe,OAAOuB,EAAGtX,EAAGsX,EAAGrX,GACpB+U,EAAIlH,SAEJuJ,EAAOppB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAK2b,KAAMwM,EAAKC,aAAcpoB,KAAK+b,OAC1EsN,EAAKrpB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAK2b,KAAKsO,EAAU9B,EAAKC,aAAcpoB,KAAK+b,OACjFgL,EAAIY,YAAc3nB,KAAKsc,UACvByK,EAAIa,YACJb,EAAIc,OAAOuB,EAAKrX,EAAGqX,EAAKpX,GACxB+U,EAAIe,OAAOuB,EAAGtX,EAAGsX,EAAGrX,GACpB+U,EAAIlH,UAGN2J,EAAStkB,KAAKkZ,IAAI+L,GAAa,EAAKnqB,KAAKyb,KAAOzb,KAAK2b,KACrD4N,EAAOvpB,KAAKud,eAAe,GAAIlc,GAAQmoB,EAAOrB,EAAKC,aAAcpoB,KAAK+b,OAClE7W,KAAKqZ,IAAe,EAAX4L,GAAgB,GAC3BpD,EAAIuB,UAAY,SAChBvB,EAAIwB,aAAe,MACnBgB,EAAKvX,GAAKkY,GAEHhlB,KAAKkZ,IAAe,EAAX+L,GAAgB,GAChCpD,EAAIuB,UAAY,QAChBvB,EAAIwB,aAAe,WAGnBxB,EAAIuB,UAAY,OAChBvB,EAAIwB,aAAe,UAErBxB,EAAIiB,UAAYhoB,KAAKsc,UACrByK,EAAIyB,SAAS,KAAOxoB,KAAKka,YAAYiO,EAAKC,cAAgB,KAAMmB,EAAKxX,EAAGwX,EAAKvX,GAE7EmW,EAAKE,MAaP,KATAtB,EAAIO,UAAY,EAChBgC,EAAoC9iB,SAAtBxG,KAAK0iB,aACnByF,EAAO,GAAI5mB,GAAWvB,KAAK+b,KAAM/b,KAAKic,KAAMjc,KAAKgc,MAAOsN,GACxDnB,EAAKvY,QACDuY,EAAKC,aAAepoB,KAAK+b,MAC3BoM,EAAKE,OAEPmB,EAAStkB,KAAKqZ,IAAI4L,GAAa,EAAKnqB,KAAKyb,KAAOzb,KAAK2b,KACrD8N,EAASvkB,KAAKkZ,IAAI+L,GAAa,EAAKnqB,KAAK4b,KAAO5b,KAAK8b,MAC7CqM,EAAKtY,OAEXuZ,EAAOppB,KAAKud,eAAe,GAAIlc,GAAQmoB,EAAOC,EAAOtB,EAAKC,eAC1DrB,EAAIY,YAAc3nB,KAAKsc,UACvByK,EAAIa,YACJb,EAAIc,OAAOuB,EAAKrX,EAAGqX,EAAKpX,GACxB+U,EAAIe,OAAOsB,EAAKrX,EAAImY,EAAYd,EAAKpX,GACrC+U,EAAIlH,SAEJkH,EAAIuB,UAAY,QAChBvB,EAAIwB,aAAe,SACnBxB,EAAIiB,UAAYhoB,KAAKsc,UACrByK,EAAIyB,SAASxoB,KAAKma,YAAYgO,EAAKC,cAAgB,IAAKgB,EAAKrX,EAAI,EAAGqX,EAAKpX,GAEzEmW,EAAKE,MAEPtB,GAAIO,UAAY,EAChB8B,EAAOppB,KAAKud,eAAe,GAAIlc,GAAQmoB,EAAOC,EAAOzpB,KAAK+b,OAC1DsN,EAAKrpB,KAAKud,eAAe,GAAIlc,GAAQmoB,EAAOC,EAAOzpB,KAAKic,OACxD8K,EAAIY,YAAc3nB,KAAKsc,UACvByK,EAAIa,YACJb,EAAIc,OAAOuB,EAAKrX,EAAGqX,EAAKpX,GACxB+U,EAAIe,OAAOuB,EAAGtX,EAAGsX,EAAGrX,GACpB+U,EAAIlH,SAGJkH,EAAIO,UAAY,EAEhBwC,EAAS9pB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAKyb,KAAMzb,KAAK4b,KAAM5b,KAAK+b,OACpEgO,EAAS/pB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAK2b,KAAM3b,KAAK4b,KAAM5b,KAAK+b,OACpEgL,EAAIY,YAAc3nB,KAAKsc,UACvByK,EAAIa,YACJb,EAAIc,OAAOiC,EAAO/X,EAAG+X,EAAO9X,GAC5B+U,EAAIe,OAAOiC,EAAOhY,EAAGgY,EAAO/X,GAC5B+U,EAAIlH,SAEJiK,EAAS9pB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAKyb,KAAMzb,KAAK8b,KAAM9b,KAAK+b,OACpEgO,EAAS/pB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAK2b,KAAM3b,KAAK8b,KAAM9b,KAAK+b,OACpEgL,EAAIY,YAAc3nB,KAAKsc,UACvByK,EAAIa,YACJb,EAAIc,OAAOiC,EAAO/X,EAAG+X,EAAO9X,GAC5B+U,EAAIe,OAAOiC,EAAOhY,EAAGgY,EAAO/X,GAC5B+U,EAAIlH,SAGJkH,EAAIO,UAAY,EAEhB8B,EAAOppB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAKyb,KAAMzb,KAAK4b,KAAM5b,KAAK+b,OAClEsN,EAAKrpB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAKyb,KAAMzb,KAAK8b,KAAM9b,KAAK+b,OAChEgL,EAAIY,YAAc3nB,KAAKsc,UACvByK,EAAIa,YACJb,EAAIc,OAAOuB,EAAKrX,EAAGqX,EAAKpX,GACxB+U,EAAIe,OAAOuB,EAAGtX,EAAGsX,EAAGrX,GACpB+U,EAAIlH,SAEJuJ,EAAOppB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAK2b,KAAM3b,KAAK4b,KAAM5b,KAAK+b,OAClEsN,EAAKrpB,KAAKud,eAAe,GAAIlc,GAAQrB,KAAK2b,KAAM3b,KAAK8b,KAAM9b,KAAK+b,OAChEgL,EAAIY,YAAc3nB,KAAKsc,UACvByK,EAAIa,YACJb,EAAIc,OAAOuB,EAAKrX,EAAGqX,EAAKpX,GACxB+U,EAAIe,OAAOuB,EAAGtX,EAAGsX,EAAGrX,GACpB+U,EAAIlH,QAGJ,IAAIhG,GAAS7Z,KAAK6Z,MACdA,GAAOlU,OAAS,IAClBkkB,EAAU,GAAM7pB,KAAKid,MAAMjL,EAC3BwX,GAASxpB,KAAKyb,KAAOzb,KAAK2b,MAAQ,EAClC8N,EAASvkB,KAAKqZ,IAAI4L,GAAY,EAAKnqB,KAAK4b,KAAOiO,EAAS7pB,KAAK8b,KAAO+N,EACpEN,EAAOvpB,KAAKud,eAAe,GAAIlc,GAAQmoB,EAAOC,EAAOzpB,KAAK+b,OACtD7W,KAAKqZ,IAAe,EAAX4L,GAAgB,GAC3BpD,EAAIuB,UAAY,SAChBvB,EAAIwB,aAAe,OAEZrjB,KAAKkZ,IAAe,EAAX+L,GAAgB,GAChCpD,EAAIuB,UAAY,QAChBvB,EAAIwB,aAAe,WAGnBxB,EAAIuB,UAAY,OAChBvB,EAAIwB,aAAe,UAErBxB,EAAIiB,UAAYhoB,KAAKsc,UACrByK,EAAIyB,SAAS3O,EAAQ0P,EAAKxX,EAAGwX,EAAKvX,GAIpC,IAAI8H,GAAS9Z,KAAK8Z,MACdA,GAAOnU,OAAS,IAClBikB,EAAU,GAAM5pB,KAAKid,MAAMlL,EAC3ByX,EAAStkB,KAAKkZ,IAAI+L,GAAa,EAAKnqB,KAAKyb,KAAOmO,EAAU5pB,KAAK2b,KAAOiO,EACtEH,GAASzpB,KAAK4b,KAAO5b,KAAK8b,MAAQ,EAClCyN,EAAOvpB,KAAKud,eAAe,GAAIlc,GAAQmoB,EAAOC,EAAOzpB,KAAK+b,OACtD7W,KAAKqZ,IAAe,EAAX4L,GAAgB,GAC3BpD,EAAIuB,UAAY,SAChBvB,EAAIwB,aAAe,OAEZrjB,KAAKkZ,IAAe,EAAX+L,GAAgB,GAChCpD,EAAIuB,UAAY,QAChBvB,EAAIwB,aAAe,WAGnBxB,EAAIuB,UAAY,OAChBvB,EAAIwB,aAAe,UAErBxB,EAAIiB,UAAYhoB,KAAKsc,UACrByK,EAAIyB,SAAS1O,EAAQyP,EAAKxX,EAAGwX,EAAKvX,GAIpC,IAAI+H,GAAS/Z,KAAK+Z,MACdA,GAAOpU,OAAS,IAClBgkB,EAAS,GACTH,EAAStkB,KAAKqZ,IAAI4L,GAAa,EAAKnqB,KAAKyb,KAAOzb,KAAK2b,KACrD8N,EAASvkB,KAAKkZ,IAAI+L,GAAa,EAAKnqB,KAAK4b,KAAO5b,KAAK8b,KACrD4N,GAAS1pB,KAAK+b,KAAO/b,KAAKic,MAAQ,EAClCsN,EAAOvpB,KAAKud,eAAe,GAAIlc,GAAQmoB,EAAOC,EAAOC,IACrD3C,EAAIuB,UAAY,QAChBvB,EAAIwB,aAAe,SACnBxB,EAAIiB,UAAYhoB,KAAKsc,UACrByK,EAAIyB,SAASzO,EAAQwP,EAAKxX,EAAI4X,EAAQJ,EAAKvX,KAU/ChR,EAAQmS,UAAUuU,SAAW,SAAS0C,EAAGC,EAAGC,GAC1C,GAAIC,GAAGC,EAAGC,EAAGC,EAAGC,EAAIC,CAMpB,QAJAF,EAAIJ,EAAID,EACRM,EAAKzlB,KAAKC,MAAMilB,EAAE,IAClBQ,EAAIF,GAAK,EAAIxlB,KAAK2lB,IAAMT,EAAE,GAAM,EAAK,IAE7BO,GACN,IAAK,GAAGJ,EAAIG,EAAGF,EAAII,EAAGH,EAAI,CAAG,MAC7B,KAAK,GAAGF,EAAIK,EAAGJ,EAAIE,EAAGD,EAAI,CAAG,MAC7B,KAAK,GAAGF,EAAI,EAAGC,EAAIE,EAAGD,EAAIG,CAAG,MAC7B,KAAK,GAAGL,EAAI,EAAGC,EAAII,EAAGH,EAAIC,CAAG,MAC7B,KAAK,GAAGH,EAAIK,EAAGJ,EAAI,EAAGC,EAAIC,CAAG,MAC7B,KAAK,GAAGH,EAAIG,EAAGF,EAAI,EAAGC,EAAIG,CAAG,MAE7B,SAASL,EAAI,EAAGC,EAAI,EAAGC,EAAI,EAG7B,MAAO,OAAS7f,SAAW,IAAF2f,GAAS,IAAM3f,SAAW,IAAF4f,GAAS,IAAM5f,SAAW,IAAF6f,GAAS,KAQpFzpB,EAAQmS,UAAUsT,gBAAkB,WAClC,GAEEvU,GAAOmV,EAAO1f,EAAKmjB,EACnBtlB,EACAulB,EAAgB/C,EAAWL,EAAaL,EACxC3b,EAAGC,EAAGC,EAAGmf,EALPzL,EAASvf,KAAKsf,MAAMC,OACtBwH,EAAMxH,EAAOyH,WAAW,KAO1B,MAAwBxgB,SAApBxG,KAAKmb,YAA4Bnb,KAAKmb,WAAWxV,QAAU,GAA/D,CAIA,IAAKH,EAAI,EAAGA,EAAIxF,KAAKmb,WAAWxV,OAAQH,IAAK,CAC3C,GAAI4d,GAAQpjB,KAAK0d,2BAA2B1d,KAAKmb,WAAW3V,GAAG0M,OAC3DmR,EAASrjB,KAAK2d,4BAA4ByF,EAE9CpjB,MAAKmb,WAAW3V,GAAG4d,MAAQA,EAC3BpjB,KAAKmb,WAAW3V,GAAG6d,OAASA,CAG5B,IAAI4H,GAAcjrB,KAAK0d,2BAA2B1d,KAAKmb,WAAW3V,GAAG8d,OACrEtjB,MAAKmb,WAAW3V,GAAG0lB,KAAOlrB,KAAKwa,gBAAkByQ,EAAYtlB,UAAYslB,EAAY/N,EAIvF,GAAIiO,GAAY,SAAU5lB,EAAGa,GAC3B,MAAOA,GAAE8kB,KAAO3lB,EAAE2lB,KAIpB,IAFAlrB,KAAKmb,WAAWjF,KAAKiV,GAEjBnrB,KAAKiN,QAAUjM,EAAQsZ,MAAMoG,SAC/B,IAAKlb,EAAI,EAAGA,EAAIxF,KAAKmb,WAAWxV,OAAQH,IAMtC,GALA0M,EAAQlS,KAAKmb,WAAW3V,GACxB6hB,EAAQrnB,KAAKmb,WAAW3V,GAAG+d,WAC3B5b,EAAQ3H,KAAKmb,WAAW3V,GAAGge,SAC3BsH,EAAQ9qB,KAAKmb,WAAW3V,GAAGie,WAEbjd,SAAV0L,GAAiC1L,SAAV6gB,GAA+B7gB,SAARmB,GAA+BnB,SAAVskB,EAAqB,CAE1F,GAAI9qB,KAAK4a,gBAAkB5a,KAAK2a,WAAY,CAK1C,GAAIyQ,GAAQ/pB,EAAQgqB,SAASP,EAAM1H,MAAOlR,EAAMkR,OAC5CkI,EAAQjqB,EAAQgqB,SAAS1jB,EAAIyb,MAAOiE,EAAMjE,OAC1CmI,EAAelqB,EAAQmqB,aAAaJ,EAAOE,GAC3C7lB,EAAM8lB,EAAa5lB,QAGvBolB,GAAkBQ,EAAarO,EAAI,MAGnC6N,IAAiB,CAGfA,IAEFC,GAAQ9Y,EAAMA,MAAMgL,EAAImK,EAAMnV,MAAMgL,EAAIvV,EAAIuK,MAAMgL,EAAI4N,EAAM5Y,MAAMgL,GAAK,EACvEvR,EAAoE,KAA/D,GAAKqf,EAAOhrB,KAAK+b,MAAQ/b,KAAKid,MAAMC,EAAKld,KAAK8a,eACnDlP,EAAI,EAEA5L,KAAK2a,YACP9O,EAAI3G,KAAK4G,IAAI,EAAKyf,EAAaxZ,EAAItM,EAAO,EAAG,GAC7CuiB,EAAYhoB,KAAK0nB,SAAS/b,EAAGC,EAAGC,GAChC8b,EAAcK,IAGdnc,EAAI,EACJmc,EAAYhoB,KAAK0nB,SAAS/b,EAAGC,EAAGC,GAChC8b,EAAc3nB,KAAKsc,aAIrB0L,EAAY,OACZL,EAAc3nB,KAAKsc,WAErBgL,EAAY,GAEZP,EAAIO,UAAYA,EAChBP,EAAIiB,UAAYA,EAChBjB,EAAIY,YAAcA,EAClBZ,EAAIa,YACJb,EAAIc,OAAO3V,EAAMmR,OAAOtR,EAAGG,EAAMmR,OAAOrR,GACxC+U,EAAIe,OAAOT,EAAMhE,OAAOtR,EAAGsV,EAAMhE,OAAOrR,GACxC+U,EAAIe,OAAOgD,EAAMzH,OAAOtR,EAAG+Y,EAAMzH,OAAOrR,GACxC+U,EAAIe,OAAOngB,EAAI0b,OAAOtR,EAAGpK,EAAI0b,OAAOrR,GACpC+U,EAAIkB,YACJlB,EAAInH,OACJmH,EAAIlH,cAKR,KAAKra,EAAI,EAAGA,EAAIxF,KAAKmb,WAAWxV,OAAQH,IACtC0M,EAAQlS,KAAKmb,WAAW3V,GACxB6hB,EAAQrnB,KAAKmb,WAAW3V,GAAG+d,WAC3B5b,EAAQ3H,KAAKmb,WAAW3V,GAAGge,SAEbhd,SAAV0L,IAEAoV,EADEtnB,KAAKwa,gBACK,GAAKtI,EAAMkR,MAAMlG,EAGjB,IAAMld,KAAKkb,IAAIgC,EAAIld,KAAKib,OAAOmE,iBAIjC5Y,SAAV0L,GAAiC1L,SAAV6gB,IAEzB2D,GAAQ9Y,EAAMA,MAAMgL,EAAImK,EAAMnV,MAAMgL,GAAK,EACzCvR,EAAoE,KAA/D,GAAKqf,EAAOhrB,KAAK+b,MAAQ/b,KAAKid,MAAMC,EAAKld,KAAK8a,eAEnDiM,EAAIO,UAAYA,EAChBP,EAAIY,YAAc3nB,KAAK0nB,SAAS/b,EAAG,EAAG,GACtCob,EAAIa,YACJb,EAAIc,OAAO3V,EAAMmR,OAAOtR,EAAGG,EAAMmR,OAAOrR,GACxC+U,EAAIe,OAAOT,EAAMhE,OAAOtR,EAAGsV,EAAMhE,OAAOrR,GACxC+U,EAAIlH,UAGQrZ,SAAV0L,GAA+B1L,SAARmB,IAEzBqjB,GAAQ9Y,EAAMA,MAAMgL,EAAIvV,EAAIuK,MAAMgL,GAAK,EACvCvR,EAAoE,KAA/D,GAAKqf,EAAOhrB,KAAK+b,MAAQ/b,KAAKid,MAAMC,EAAKld,KAAK8a,eAEnDiM,EAAIO,UAAYA,EAChBP,EAAIY,YAAc3nB,KAAK0nB,SAAS/b,EAAG,EAAG,GACtCob,EAAIa,YACJb,EAAIc,OAAO3V,EAAMmR,OAAOtR,EAAGG,EAAMmR,OAAOrR,GACxC+U,EAAIe,OAAOngB,EAAI0b,OAAOtR,EAAGpK,EAAI0b,OAAOrR,GACpC+U,EAAIlH,YAWZ7e,EAAQmS,UAAUyT,eAAiB,WACjC,GAEIphB,GAFA+Z,EAASvf,KAAKsf,MAAMC,OACpBwH,EAAMxH,EAAOyH,WAAW,KAG5B,MAAwBxgB,SAApBxG,KAAKmb,YAA4Bnb,KAAKmb,WAAWxV,QAAU,GAA/D,CAIA,IAAKH,EAAI,EAAGA,EAAIxF,KAAKmb,WAAWxV,OAAQH,IAAK,CAC3C,GAAI4d,GAAQpjB,KAAK0d,2BAA2B1d,KAAKmb,WAAW3V,GAAG0M,OAC3DmR,EAASrjB,KAAK2d,4BAA4ByF,EAC9CpjB,MAAKmb,WAAW3V,GAAG4d,MAAQA,EAC3BpjB,KAAKmb,WAAW3V,GAAG6d,OAASA,CAG5B,IAAI4H,GAAcjrB,KAAK0d,2BAA2B1d,KAAKmb,WAAW3V,GAAG8d,OACrEtjB,MAAKmb,WAAW3V,GAAG0lB,KAAOlrB,KAAKwa,gBAAkByQ,EAAYtlB,UAAYslB,EAAY/N,EAIvF,GAAIiO,GAAY,SAAU5lB,EAAGa,GAC3B,MAAOA,GAAE8kB,KAAO3lB,EAAE2lB,KAEpBlrB,MAAKmb,WAAWjF,KAAKiV,EAGrB,IAAI/D,GAAmC,IAAzBpnB,KAAKsf,MAAME,WACzB,KAAKha,EAAI,EAAGA,EAAIxF,KAAKmb,WAAWxV,OAAQH,IAAK,CAC3C,GAAI0M,GAAQlS,KAAKmb,WAAW3V,EAE5B,IAAIxF,KAAKiN,QAAUjM,EAAQsZ,MAAM+F,QAAS,CAGxC,GAAI+I,GAAOppB,KAAKud,eAAerL,EAAMoR,OACrCyD,GAAIO,UAAY,EAChBP,EAAIY,YAAc3nB,KAAKuc,UACvBwK,EAAIa,YACJb,EAAIc,OAAOuB,EAAKrX,EAAGqX,EAAKpX,GACxB+U,EAAIe,OAAO5V,EAAMmR,OAAOtR,EAAGG,EAAMmR,OAAOrR,GACxC+U,EAAIlH,SAIN,GAAIxN,EAEFA,GADErS,KAAKiN,QAAUjM,EAAQsZ,MAAMiG,QACxB6G,EAAQ,EAAI,EAAEA,GAAWlV,EAAMA,MAAM7K,MAAQrH,KAAKkc,WAAalc,KAAKmc,SAAWnc,KAAKkc,UAGpFkL,CAGT,IAAIqE,EAEFA,GADEzrB,KAAKwa,gBACEnI,GAAQH,EAAMkR,MAAMlG,EAGpB7K,IAASrS,KAAKkb,IAAIgC,EAAIld,KAAKib,OAAOmE,gBAEhC,EAATqM,IACFA,EAAS,EAGX,IAAI7e,GAAKzB,EAAO4U,CACZ/f,MAAKiN,QAAUjM,EAAQsZ,MAAMgG,UAE/B1T,EAAqE,KAA9D,GAAKsF,EAAMA,MAAM7K,MAAQrH,KAAKkc,UAAYlc,KAAKid,MAAM5V,OAC5D8D,EAAQnL,KAAK0nB,SAAS9a,EAAK,EAAG,GAC9BmT,EAAc/f,KAAK0nB,SAAS9a,EAAK,EAAG,KAE7B5M,KAAKiN,QAAUjM,EAAQsZ,MAAMiG,SACpCpV,EAAQnL,KAAKwc,SACbuD,EAAc/f,KAAKyc,iBAInB7P,EAA+E,KAAxE,GAAKsF,EAAMA,MAAMgL,EAAIld,KAAK+b,MAAQ/b,KAAKid,MAAMC,EAAKld,KAAK8a,eAC9D3P,EAAQnL,KAAK0nB,SAAS9a,EAAK,EAAG,GAC9BmT,EAAc/f,KAAK0nB,SAAS9a,EAAK,EAAG,KAItCma,EAAIO,UAAY,EAChBP,EAAIY,YAAc5H,EAClBgH,EAAIiB,UAAY7c,EAChB4b,EAAIa,YACJb,EAAI2E,IAAIxZ,EAAMmR,OAAOtR,EAAGG,EAAMmR,OAAOrR,EAAGyZ,EAAQ,EAAW,EAARvmB,KAAKymB,IAAM,GAC9D5E,EAAInH,OACJmH,EAAIlH,YAQR7e,EAAQmS,UAAUwT,eAAiB,WACjC,GAEInhB,GAAGomB,EAAGC,EAASC,EAFfvM,EAASvf,KAAKsf,MAAMC,OACpBwH,EAAMxH,EAAOyH,WAAW,KAG5B,MAAwBxgB,SAApBxG,KAAKmb,YAA4Bnb,KAAKmb,WAAWxV,QAAU,GAA/D,CAIA,IAAKH,EAAI,EAAGA,EAAIxF,KAAKmb,WAAWxV,OAAQH,IAAK,CAC3C,GAAI4d,GAAQpjB,KAAK0d,2BAA2B1d,KAAKmb,WAAW3V,GAAG0M,OAC3DmR,EAASrjB,KAAK2d,4BAA4ByF,EAC9CpjB,MAAKmb,WAAW3V,GAAG4d,MAAQA,EAC3BpjB,KAAKmb,WAAW3V,GAAG6d,OAASA,CAG5B,IAAI4H,GAAcjrB,KAAK0d,2BAA2B1d,KAAKmb,WAAW3V,GAAG8d,OACrEtjB,MAAKmb,WAAW3V,GAAG0lB,KAAOlrB,KAAKwa,gBAAkByQ,EAAYtlB,UAAYslB,EAAY/N,EAIvF,GAAIiO,GAAY,SAAU5lB,EAAGa,GAC3B,MAAOA,GAAE8kB,KAAO3lB,EAAE2lB,KAEpBlrB,MAAKmb,WAAWjF,KAAKiV,EAGrB,IAAIY,GAAS/rB,KAAKoc,UAAY,EAC1B4P,EAAShsB,KAAKqc,UAAY,CAC9B,KAAK7W,EAAI,EAAGA,EAAIxF,KAAKmb,WAAWxV,OAAQH,IAAK,CAC3C,GAGIoH,GAAKzB,EAAO4U,EAHZ7N,EAAQlS,KAAKmb,WAAW3V,EAIxBxF,MAAKiN,QAAUjM,EAAQsZ,MAAM6F,UAE/BvT,EAAqE,KAA9D,GAAKsF,EAAMA,MAAM7K,MAAQrH,KAAKkc,UAAYlc,KAAKid,MAAM5V,OAC5D8D,EAAQnL,KAAK0nB,SAAS9a,EAAK,EAAG,GAC9BmT,EAAc/f,KAAK0nB,SAAS9a,EAAK,EAAG,KAE7B5M,KAAKiN,QAAUjM,EAAQsZ,MAAM8F,SACpCjV,EAAQnL,KAAKwc,SACbuD,EAAc/f,KAAKyc,iBAInB7P,EAA+E,KAAxE,GAAKsF,EAAMA,MAAMgL,EAAIld,KAAK+b,MAAQ/b,KAAKid,MAAMC,EAAKld,KAAK8a,eAC9D3P,EAAQnL,KAAK0nB,SAAS9a,EAAK,EAAG,GAC9BmT,EAAc/f,KAAK0nB,SAAS9a,EAAK,EAAG,KAIlC5M,KAAKiN,QAAUjM,EAAQsZ,MAAM8F,UAC/B2L,EAAU/rB,KAAKoc,UAAY,IAAOlK,EAAMA,MAAM7K,MAAQrH,KAAKkc,WAAalc,KAAKmc,SAAWnc,KAAKkc,UAAY,GAAM,IAC/G8P,EAAUhsB,KAAKqc,UAAY,IAAOnK,EAAMA,MAAM7K,MAAQrH,KAAKkc,WAAalc,KAAKmc,SAAWnc,KAAKkc,UAAY,GAAM,IAIjH,IAAI/H,GAAKnU,KACLwd,EAAUtL,EAAMA,MAChBvK,IACDuK,MAAO,GAAI7Q,GAAQmc,EAAQzL,EAAIga,EAAQvO,EAAQxL,EAAIga,EAAQxO,EAAQN,KACnEhL,MAAO,GAAI7Q,GAAQmc,EAAQzL,EAAIga,EAAQvO,EAAQxL,EAAIga,EAAQxO,EAAQN,KACnEhL,MAAO,GAAI7Q,GAAQmc,EAAQzL,EAAIga,EAAQvO,EAAQxL,EAAIga,EAAQxO,EAAQN,KACnEhL,MAAO,GAAI7Q,GAAQmc,EAAQzL,EAAIga,EAAQvO,EAAQxL,EAAIga,EAAQxO,EAAQN,KAElEoG,IACDpR,MAAO,GAAI7Q,GAAQmc,EAAQzL,EAAIga,EAAQvO,EAAQxL,EAAIga,EAAQhsB,KAAK+b,QAChE7J,MAAO,GAAI7Q,GAAQmc,EAAQzL,EAAIga,EAAQvO,EAAQxL,EAAIga,EAAQhsB,KAAK+b,QAChE7J,MAAO,GAAI7Q,GAAQmc,EAAQzL,EAAIga,EAAQvO,EAAQxL,EAAIga,EAAQhsB,KAAK+b,QAChE7J,MAAO,GAAI7Q,GAAQmc,EAAQzL,EAAIga,EAAQvO,EAAQxL,EAAIga,EAAQhsB,KAAK+b,OAInEpU,GAAIU,QAAQ,SAAU0a,GACpBA,EAAIM,OAASlP,EAAGoJ,eAAewF,EAAI7Q,SAErCoR,EAAOjb,QAAQ,SAAU0a,GACvBA,EAAIM,OAASlP,EAAGoJ,eAAewF,EAAI7Q,QAIrC,IAAI+Z,KACDH,QAASnkB,EAAKukB,OAAQ7qB,EAAQ8qB,IAAI7I,EAAO,GAAGpR,MAAOoR,EAAO,GAAGpR,SAC7D4Z,SAAUnkB,EAAI,GAAIA,EAAI,GAAI2b,EAAO,GAAIA,EAAO,IAAK4I,OAAQ7qB,EAAQ8qB,IAAI7I,EAAO,GAAGpR,MAAOoR,EAAO,GAAGpR,SAChG4Z,SAAUnkB,EAAI,GAAIA,EAAI,GAAI2b,EAAO,GAAIA,EAAO,IAAK4I,OAAQ7qB,EAAQ8qB,IAAI7I,EAAO,GAAGpR,MAAOoR,EAAO,GAAGpR,SAChG4Z,SAAUnkB,EAAI,GAAIA,EAAI,GAAI2b,EAAO,GAAIA,EAAO,IAAK4I,OAAQ7qB,EAAQ8qB,IAAI7I,EAAO,GAAGpR,MAAOoR,EAAO,GAAGpR,SAChG4Z,SAAUnkB,EAAI,GAAIA,EAAI,GAAI2b,EAAO,GAAIA,EAAO,IAAK4I,OAAQ7qB,EAAQ8qB,IAAI7I,EAAO,GAAGpR,MAAOoR,EAAO,GAAGpR,QAKnG,KAHAA,EAAM+Z,SAAWA,EAGZL,EAAI,EAAGA,EAAIK,EAAStmB,OAAQimB,IAAK,CACpCC,EAAUI,EAASL,EACnB,IAAIQ,GAAcpsB,KAAK0d,2BAA2BmO,EAAQK,OAC1DL,GAAQX,KAAOlrB,KAAKwa,gBAAkB4R,EAAYzmB,UAAYymB,EAAYlP,EAwB5E,IAjBA+O,EAAS/V,KAAK,SAAU3Q,EAAGa,GACzB,GAAIimB,GAAOjmB,EAAE8kB,KAAO3lB,EAAE2lB,IACtB,OAAImB,GAAaA,EAGb9mB,EAAEumB,UAAYnkB,EAAY,EAC1BvB,EAAE0lB,UAAYnkB,EAAY,GAGvB,IAITof,EAAIO,UAAY,EAChBP,EAAIY,YAAc5H,EAClBgH,EAAIiB,UAAY7c,EAEXygB,EAAI,EAAGA,EAAIK,EAAStmB,OAAQimB,IAC/BC,EAAUI,EAASL,GACnBE,EAAUD,EAAQC,QAClB/E,EAAIa,YACJb,EAAIc,OAAOiE,EAAQ,GAAGzI,OAAOtR,EAAG+Z,EAAQ,GAAGzI,OAAOrR,GAClD+U,EAAIe,OAAOgE,EAAQ,GAAGzI,OAAOtR,EAAG+Z,EAAQ,GAAGzI,OAAOrR,GAClD+U,EAAIe,OAAOgE,EAAQ,GAAGzI,OAAOtR,EAAG+Z,EAAQ,GAAGzI,OAAOrR,GAClD+U,EAAIe,OAAOgE,EAAQ,GAAGzI,OAAOtR,EAAG+Z,EAAQ,GAAGzI,OAAOrR,GAClD+U,EAAIe,OAAOgE,EAAQ,GAAGzI,OAAOtR,EAAG+Z,EAAQ,GAAGzI,OAAOrR,GAClD+U,EAAInH,OACJmH,EAAIlH,YAUV7e,EAAQmS,UAAUuT,gBAAkB,WAClC,GAEExU,GAAO1M,EAFL+Z,EAASvf,KAAKsf,MAAMC,OACtBwH,EAAMxH,EAAOyH,WAAW,KAG1B,MAAwBxgB,SAApBxG,KAAKmb,YAA4Bnb,KAAKmb,WAAWxV,QAAU,GAA/D,CAIA,IAAKH,EAAI,EAAGA,EAAIxF,KAAKmb,WAAWxV,OAAQH,IAAK,CAC3C,GAAI4d,GAAQpjB,KAAK0d,2BAA2B1d,KAAKmb,WAAW3V,GAAG0M,OAC3DmR,EAASrjB,KAAK2d,4BAA4ByF,EAE9CpjB,MAAKmb,WAAW3V,GAAG4d,MAAQA,EAC3BpjB,KAAKmb,WAAW3V,GAAG6d,OAASA,EAc9B,IAVIrjB,KAAKmb,WAAWxV,OAAS,IAC3BuM,EAAQlS,KAAKmb,WAAW,GAExB4L,EAAIO,UAAY,EAChBP,EAAIY,YAAc,OAClBZ,EAAIa,YACJb,EAAIc,OAAO3V,EAAMmR,OAAOtR,EAAGG,EAAMmR,OAAOrR,IAIrCxM,EAAI,EAAGA,EAAIxF,KAAKmb,WAAWxV,OAAQH,IACtC0M,EAAQlS,KAAKmb,WAAW3V,GACxBuhB,EAAIe,OAAO5V,EAAMmR,OAAOtR,EAAGG,EAAMmR,OAAOrR,EAItChS,MAAKmb,WAAWxV,OAAS,GAC3BohB,EAAIlH,WASR7e,EAAQmS,UAAUgR,aAAe,SAAS7a,GAWxC,GAVAA,EAAQA,GAASC,OAAOD,MAIpBtJ,KAAKssB,gBACPtsB,KAAKusB,WAAWjjB,GAIlBtJ,KAAKssB,eAAiBhjB,EAAMkjB,MAAyB,IAAhBljB,EAAMkjB,MAAiC,IAAjBljB,EAAMmjB,OAC5DzsB,KAAKssB,gBAAmBtsB,KAAK0sB,UAAlC,CAGA1sB,KAAK2sB,YAAcjQ,EAAUpT,GAC7BtJ,KAAK4sB,YAAc/P,EAAUvT,GAE7BtJ,KAAK6sB,WAAa,GAAIvoB,MAAKtE,KAAK4P,OAChC5P,KAAK8sB,SAAW,GAAIxoB,MAAKtE,KAAK6P,KAC9B7P,KAAK+sB,iBAAmB/sB,KAAKib,OAAO6K,iBAEpC9lB,KAAKsf,MAAMrS,MAAM+f,OAAS,MAK1B,IAAI7Y,GAAKnU,IACTA,MAAKitB,YAAc,SAAU3jB,GAAQ6K,EAAG+Y,aAAa5jB,IACrDtJ,KAAKmtB,UAAc,SAAU7jB,GAAQ6K,EAAGoY,WAAWjjB,IACnD3I,EAAKgI,iBAAiB4I,SAAU,YAAa4C,EAAG8Y,aAChDtsB,EAAKgI,iBAAiB4I,SAAU,UAAW4C,EAAGgZ,WAC9CxsB,EAAK0I,eAAeC,KAStBtI,EAAQmS,UAAU+Z,aAAe,SAAU5jB,GACzCA,EAAQA,GAASC,OAAOD,KAGxB,IAAI8jB,GAAQ/H,WAAW3I,EAAUpT,IAAUtJ,KAAK2sB,YAC5CU,EAAQhI,WAAWxI,EAAUvT,IAAUtJ,KAAK4sB,YAE5CU,EAAgBttB,KAAK+sB,iBAAiBvH,WAAa4H,EAAQ,IAC3DG,EAAcvtB,KAAK+sB,iBAAiBtH,SAAW4H,EAAQ,IAEvDG,EAAY,EACZC,EAAYvoB,KAAKkZ,IAAIoP,EAAY,IAAM,EAAItoB,KAAKymB,GAIhDzmB,MAAK2lB,IAAI3lB,KAAKkZ,IAAIkP,IAAkBG,IACtCH,EAAgBpoB,KAAKwoB,MAAOJ,EAAgBpoB,KAAKymB,IAAOzmB,KAAKymB,GAAK,MAEhEzmB,KAAK2lB,IAAI3lB,KAAKqZ,IAAI+O,IAAkBG,IACtCH,GAAiBpoB,KAAKwoB,MAAOJ,EAAepoB,KAAKymB,GAAK,IAAQ,IAAOzmB,KAAKymB,GAAK,MAI7EzmB,KAAK2lB,IAAI3lB,KAAKkZ,IAAImP,IAAgBE,IACpCF,EAAcroB,KAAKwoB,MAAOH,EAAcroB,KAAKymB,IAAOzmB,KAAKymB,IAEvDzmB,KAAK2lB,IAAI3lB,KAAKqZ,IAAIgP,IAAgBE,IACpCF,GAAeroB,KAAKwoB,MAAOH,EAAaroB,KAAKymB,GAAK,IAAQ,IAAOzmB,KAAKymB,IAGxE3rB,KAAKib,OAAOyK,eAAe4H,EAAeC,GAC1CvtB,KAAKyhB,QAGL,IAAIkM,GAAa3tB,KAAK6lB,mBACtB7lB,MAAK4tB,KAAK,uBAAwBD,GAElChtB,EAAK0I,eAAeC,IAStBtI,EAAQmS,UAAUoZ,WAAa,SAAUjjB,GACvCtJ,KAAKsf,MAAMrS,MAAM+f,OAAS,OAC1BhtB,KAAKssB,gBAAiB,EAGtB3rB,EAAKwI,oBAAoBoI,SAAU,YAAavR,KAAKitB,aACrDtsB,EAAKwI,oBAAoBoI,SAAU,UAAavR,KAAKmtB,WACrDxsB,EAAK0I,eAAeC,IAOtBtI,EAAQmS,UAAUsR,WAAa,SAAUnb,GACvC,GAAIkP,GAAQ,IACRqV,EAAe7tB,KAAKsf,MAAM9X,wBAC1BsmB,EAASpR,EAAUpT,GAASukB,EAAapmB,KACzCsmB,EAASlR,EAAUvT,GAASukB,EAAalmB,GAE7C,IAAK3H,KAAK6a,YAAV,CASA,GALI7a,KAAKguB,gBACP3U,aAAarZ,KAAKguB,gBAIhBhuB,KAAKssB,eAEP,WADAtsB,MAAKiuB,cAIP,IAAIjuB,KAAKomB,SAAWpmB,KAAKomB,QAAQ8H,UAAW,CAE1C,GAAIA,GAAYluB,KAAKmuB,iBAAiBL,EAAQC,EAC1CG,KAAcluB,KAAKomB,QAAQ8H,YAEzBA,EACFluB,KAAKouB,aAAaF,GAGlBluB,KAAKiuB,oBAIN,CAEH,GAAI9Z,GAAKnU,IACTA,MAAKguB,eAAiB1U,WAAW,WAC/BnF,EAAG6Z,eAAiB,IAGpB,IAAIE,GAAY/Z,EAAGga,iBAAiBL,EAAQC,EACxCG,IACF/Z,EAAGia,aAAaF,IAEjB1V,MAOPxX,EAAQmS,UAAUkR,cAAgB,SAAS/a,GACzCtJ,KAAK0sB,WAAY,CAEjB,IAAIvY,GAAKnU,IACTA,MAAKquB,YAAc,SAAU/kB,GAAQ6K,EAAGma,aAAahlB,IACrDtJ,KAAKuuB,WAAc,SAAUjlB,GAAQ6K,EAAGqa,YAAYllB,IACpD3I,EAAKgI,iBAAiB4I,SAAU,YAAa4C,EAAGka,aAChD1tB,EAAKgI,iBAAiB4I,SAAU,WAAY4C,EAAGoa,YAE/CvuB,KAAKmkB,aAAa7a,IAMpBtI,EAAQmS,UAAUmb,aAAe,SAAShlB,GACxCtJ,KAAKktB,aAAa5jB,IAMpBtI,EAAQmS,UAAUqb,YAAc,SAASllB,GACvCtJ,KAAK0sB,WAAY,EAEjB/rB,EAAKwI,oBAAoBoI,SAAU,YAAavR,KAAKquB,aACrD1tB,EAAKwI,oBAAoBoI,SAAU,WAAcvR,KAAKuuB,YAEtDvuB,KAAKusB,WAAWjjB,IASlBtI,EAAQmS,UAAUoR,SAAW,SAASjb,GAC/BA,IACHA,EAAQC,OAAOD,MAGjB,IAAImlB,GAAQ,CAYZ,IAXInlB,EAAMolB,WACRD,EAAQnlB,EAAMolB,WAAW,IAChBplB,EAAMqlB,SAGfF,GAASnlB,EAAMqlB,OAAO,GAMpBF,EAAO,CACT,GAAIG,GAAY5uB,KAAKib,OAAOmE,eACxByP,EAAYD,GAAa,EAAIH,EAAQ,GAEzCzuB,MAAKib,OAAO2K,aAAaiJ,GACzB7uB,KAAKyhB,SAELzhB,KAAKiuB,eAIP,GAAIN,GAAa3tB,KAAK6lB,mBACtB7lB,MAAK4tB,KAAK,uBAAwBD,GAKlChtB,EAAK0I,eAAeC,IAUtBtI,EAAQmS,UAAU2b,gBAAkB,SAAU5c,EAAO6c,GAKnD,QAASC,GAAMjd,GACb,MAAOA,GAAI,EAAI,EAAQ,EAAJA,EAAQ,GAAK,EALlC,GAAIxM,GAAIwpB,EAAS,GACf3oB,EAAI2oB,EAAS,GACbtuB,EAAIsuB,EAAS,GAMXE,EAAKD,GAAM5oB,EAAE2L,EAAIxM,EAAEwM,IAAMG,EAAMF,EAAIzM,EAAEyM,IAAM5L,EAAE4L,EAAIzM,EAAEyM,IAAME,EAAMH,EAAIxM,EAAEwM,IACrEmd,EAAKF,GAAMvuB,EAAEsR,EAAI3L,EAAE2L,IAAMG,EAAMF,EAAI5L,EAAE4L,IAAMvR,EAAEuR,EAAI5L,EAAE4L,IAAME,EAAMH,EAAI3L,EAAE2L,IACrEod,EAAKH,GAAMzpB,EAAEwM,EAAItR,EAAEsR,IAAMG,EAAMF,EAAIvR,EAAEuR,IAAMzM,EAAEyM,EAAIvR,EAAEuR,IAAME,EAAMH,EAAItR,EAAEsR,GAGzE,SAAc,GAANkd,GAAiB,GAANC,GAAWD,GAAMC,GAC3B,GAANA,GAAiB,GAANC,GAAWD,GAAMC,GACtB,GAANF,GAAiB,GAANE,GAAWF,GAAME,IAUjCnuB,EAAQmS,UAAUgb,iBAAmB,SAAUpc,EAAGC,GAChD,GAAIxM,GACF4pB,EAAU,IACVlB,EAAY,KACZmB,EAAmB,KACnBC,EAAc,KACdpD,EAAS,GAAI9qB,GAAQ2Q,EAAGC,EAE1B,IAAIhS,KAAKiN,QAAUjM,EAAQsZ,MAAM4F,KAC/BlgB,KAAKiN,QAAUjM,EAAQsZ,MAAM6F,UAC7BngB,KAAKiN,QAAUjM,EAAQsZ,MAAM8F,QAE7B,IAAK5a,EAAIxF,KAAKmb,WAAWxV,OAAS,EAAGH,GAAK,EAAGA,IAAK,CAChD0oB,EAAYluB,KAAKmb,WAAW3V,EAC5B,IAAIymB,GAAYiC,EAAUjC,QAC1B,IAAIA,EACF,IAAK,GAAIrgB,GAAIqgB,EAAStmB,OAAS,EAAGiG,GAAK,EAAGA,IAAK,CAE7C,GAAIigB,GAAUI,EAASrgB,GACnBkgB,EAAUD,EAAQC,QAClByD,GAAazD,EAAQ,GAAGzI,OAAQyI,EAAQ,GAAGzI,OAAQyI,EAAQ,GAAGzI,QAC9DmM,GAAa1D,EAAQ,GAAGzI,OAAQyI,EAAQ,GAAGzI,OAAQyI,EAAQ,GAAGzI,OAClE,IAAIrjB,KAAK8uB,gBAAgB5C,EAAQqD,IAC/BvvB,KAAK8uB,gBAAgB5C,EAAQsD,GAE7B,MAAOtB,QAQf,KAAK1oB,EAAI,EAAGA,EAAIxF,KAAKmb,WAAWxV,OAAQH,IAAK,CAC3C0oB,EAAYluB,KAAKmb,WAAW3V,EAC5B,IAAI0M,GAAQgc,EAAU7K,MACtB,IAAInR,EAAO,CACT,GAAIud,GAAQvqB,KAAK2lB,IAAI9Y,EAAIG,EAAMH,GAC3B2d,EAAQxqB,KAAK2lB,IAAI7Y,EAAIE,EAAMF,GAC3BkZ,EAAQhmB,KAAKyqB,KAAKF,EAAQA,EAAQC,EAAQA,IAEzB,OAAhBJ,GAA+BA,EAAPpE,IAA8BkE,EAAPlE,IAClDoE,EAAcpE,EACdmE,EAAmBnB,IAO3B,MAAOmB,IAQTruB,EAAQmS,UAAUib,aAAe,SAAUF,GACzC,GAAI0B,GAASC,EAAMC,CAEd9vB,MAAKomB,SAiCRwJ,EAAU5vB,KAAKomB,QAAQ2J,IAAIH,QAC3BC,EAAQ7vB,KAAKomB,QAAQ2J,IAAIF,KACzBC,EAAQ9vB,KAAKomB,QAAQ2J,IAAID,MAlCzBF,EAAUre,SAASM,cAAc,OACjC+d,EAAQ3iB,MAAM2W,SAAW,WACzBgM,EAAQ3iB,MAAM+W,QAAU,OACxB4L,EAAQ3iB,MAAMb,OAAS,oBACvBwjB,EAAQ3iB,MAAM9B,MAAQ,UACtBykB,EAAQ3iB,MAAMd,WAAa,wBAC3ByjB,EAAQ3iB,MAAM+iB,aAAe,MAC7BJ,EAAQ3iB,MAAMgjB,UAAY,qCAE1BJ,EAAOte,SAASM,cAAc,OAC9Bge,EAAK5iB,MAAM2W,SAAW,WACtBiM,EAAK5iB,MAAMuF,OAAS,OACpBqd,EAAK5iB,MAAMsF,MAAQ,IACnBsd,EAAK5iB,MAAMijB,WAAa,oBAExBJ,EAAMve,SAASM,cAAc,OAC7Bie,EAAI7iB,MAAM2W,SAAW,WACrBkM,EAAI7iB,MAAMuF,OAAS,IACnBsd,EAAI7iB,MAAMsF,MAAQ,IAClBud,EAAI7iB,MAAMb,OAAS,oBACnB0jB,EAAI7iB,MAAM+iB,aAAe,MAEzBhwB,KAAKomB,SACH8H,UAAW,KACX6B,KACEH,QAASA,EACTC,KAAMA,EACNC,IAAKA,KAUX9vB,KAAKiuB,eAELjuB,KAAKomB,QAAQ8H,UAAYA,EAEvB0B,EAAQ3L,UADsB,kBAArBjkB,MAAK6a,YACM7a,KAAK6a,YAAYqT,EAAUhc,OAG3B,6BACMgc,EAAUhc,MAAMH,EAAI,gCACpBmc,EAAUhc,MAAMF,EAAI,gCACpBkc,EAAUhc,MAAMgL,EAAI,qBAIhD0S,EAAQ3iB,MAAMxF,KAAQ,IACtBmoB,EAAQ3iB,MAAMtF,IAAQ,IACtB3H,KAAKsf,MAAM7N,YAAYme,GACvB5vB,KAAKsf,MAAM7N,YAAYoe,GACvB7vB,KAAKsf,MAAM7N,YAAYqe,EAGvB,IAAIK,GAAgBP,EAAQQ,YACxBC,EAAkBT,EAAQU,aAC1BC,EAAgBV,EAAKS,aACrBE,EAAcV,EAAIM,YAClBK,EAAgBX,EAAIQ,aAEpB7oB,EAAOymB,EAAU7K,OAAOtR,EAAIoe,EAAe,CAC/C1oB,GAAOvC,KAAK4G,IAAI5G,KAAKwH,IAAIjF,EAAM,IAAKzH,KAAKsf,MAAME,YAAc,GAAK2Q,GAElEN,EAAK5iB,MAAMxF,KAASymB,EAAU7K,OAAOtR,EAAI,KACzC8d,EAAK5iB,MAAMtF,IAAUumB,EAAU7K,OAAOrR,EAAIue,EAAc,KACxDX,EAAQ3iB,MAAMxF,KAAQA,EAAO,KAC7BmoB,EAAQ3iB,MAAMtF,IAASumB,EAAU7K,OAAOrR,EAAIue,EAAaF,EAAiB,KAC1EP,EAAI7iB,MAAMxF,KAAWymB,EAAU7K,OAAOtR,EAAIye,EAAW,EAAK,KAC1DV,EAAI7iB,MAAMtF,IAAWumB,EAAU7K,OAAOrR,EAAIye,EAAY,EAAK,MAO7DzvB,EAAQmS,UAAU8a,aAAe,WAC/B,GAAIjuB,KAAKomB,QAAS,CAChBpmB,KAAKomB,QAAQ8H,UAAY,IAEzB,KAAK,GAAIroB,KAAQ7F,MAAKomB,QAAQ2J,IAC5B,GAAI/vB,KAAKomB,QAAQ2J,IAAIjqB,eAAeD,GAAO,CACzC,GAAI0B,GAAOvH,KAAKomB,QAAQ2J,IAAIlqB,EACxB0B,IAAQA,EAAKsC,YACftC,EAAKsC,WAAWsH,YAAY5J,MA8BtC1H,EAAOD,QAAUoB,GAKb,SAASnB,EAAQD,EAASM,GAc9B,QAASgB,KACPlB,KAAK0wB,YAAc,GAAIrvB,GACvBrB,KAAK2wB,eACL3wB,KAAK2wB,YAAYnL,WAAa,EAC9BxlB,KAAK2wB,YAAYlL,SAAW,EAC5BzlB,KAAK4wB,UAAY,IAEjB5wB,KAAK6wB,eAAiB,GAAIxvB,GAC1BrB,KAAK8wB,eAAkB,GAAIzvB,GAAQ,GAAI6D,KAAKymB,GAAI,EAAG,GAEnD3rB,KAAK+wB,6BAtBP,GAAI1vB,GAAUnB,EAAoB,GA+BlCgB,GAAOiS,UAAUmK,eAAiB,SAASvL,EAAGC,EAAGkL,GAC/Cld,KAAK0wB,YAAY3e,EAAIA,EACrB/R,KAAK0wB,YAAY1e,EAAIA,EACrBhS,KAAK0wB,YAAYxT,EAAIA,EAErBld,KAAK+wB,8BAWP7vB,EAAOiS,UAAUuS,eAAiB,SAASF,EAAYC,GAClCjf,SAAfgf,IACFxlB,KAAK2wB,YAAYnL,WAAaA,GAGfhf,SAAbif,IACFzlB,KAAK2wB,YAAYlL,SAAWA,EACxBzlB,KAAK2wB,YAAYlL,SAAW,IAAGzlB,KAAK2wB,YAAYlL,SAAW,GAC3DzlB,KAAK2wB,YAAYlL,SAAW,GAAIvgB,KAAKymB,KAAI3rB,KAAK2wB,YAAYlL,SAAW,GAAIvgB,KAAKymB,MAGjEnlB,SAAfgf,GAAyChf,SAAbif,IAC9BzlB,KAAK+wB,8BAQT7vB,EAAOiS,UAAU2S,eAAiB,WAChC,GAAIkL,KAIJ,OAHAA,GAAIxL,WAAaxlB,KAAK2wB,YAAYnL,WAClCwL,EAAIvL,SAAWzlB,KAAK2wB,YAAYlL,SAEzBuL,GAOT9vB,EAAOiS,UAAUyS,aAAe,SAASjgB,GACxBa,SAAXb,IAGJ3F,KAAK4wB,UAAYjrB,EAKb3F,KAAK4wB,UAAY,MAAM5wB,KAAK4wB,UAAY,KACxC5wB,KAAK4wB,UAAY,IAAK5wB,KAAK4wB,UAAY,GAE3C5wB,KAAK+wB,+BAOP7vB,EAAOiS,UAAUiM,aAAe,WAC9B,MAAOpf,MAAK4wB,WAOd1vB,EAAOiS,UAAU6K,kBAAoB,WACnC,MAAOhe,MAAK6wB,gBAOd3vB,EAAOiS,UAAUkL,kBAAoB,WACnC,MAAOre,MAAK8wB,gBAOd5vB,EAAOiS,UAAU4d,2BAA6B,WAE5C/wB,KAAK6wB,eAAe9e,EAAI/R,KAAK0wB,YAAY3e,EAAI/R,KAAK4wB,UAAY1rB,KAAKkZ,IAAIpe,KAAK2wB,YAAYnL,YAActgB,KAAKqZ,IAAIve,KAAK2wB,YAAYlL,UAChIzlB,KAAK6wB,eAAe7e,EAAIhS,KAAK0wB,YAAY1e,EAAIhS,KAAK4wB,UAAY1rB,KAAKqZ,IAAIve,KAAK2wB,YAAYnL,YAActgB,KAAKqZ,IAAIve,KAAK2wB,YAAYlL,UAChIzlB,KAAK6wB,eAAe3T,EAAIld,KAAK0wB,YAAYxT,EAAIld,KAAK4wB,UAAY1rB,KAAKkZ,IAAIpe,KAAK2wB,YAAYlL,UAGxFzlB,KAAK8wB,eAAe/e,EAAI7M,KAAKymB,GAAG,EAAI3rB,KAAK2wB,YAAYlL,SACrDzlB,KAAK8wB,eAAe9e,EAAI,EACxBhS,KAAK8wB,eAAe5T,GAAKld,KAAK2wB,YAAYnL,YAG5C3lB,EAAOD,QAAUsB,GAIb,SAASrB,EAAQD,EAASM,GAW9B,QAASiB,GAAQuR,EAAMqO,EAAQkQ,GAC7BjxB,KAAK0S,KAAOA,EACZ1S,KAAK+gB,OAASA,EACd/gB,KAAKixB,MAAQA,EAEbjxB,KAAKmI,MAAQ3B,OACbxG,KAAKqH,MAAQb,OAGbxG,KAAK8W,OAASma,EAAMjQ,kBAAkBtO,EAAKwC,MAAOlV,KAAK+gB,QAGvD/gB,KAAK8W,OAAOZ,KAAK,SAAU3Q,EAAGa,GAC5B,MAAOb,GAAIa,EAAI,EAAQA,EAAJb,EAAQ,GAAK,IAG9BvF,KAAK8W,OAAOnR,OAAS,GACvB3F,KAAK+oB,YAAY,GAInB/oB,KAAKmb,cAELnb,KAAKM,QAAS,EACdN,KAAKkxB,eAAiB1qB,OAElByqB,EAAMjW,kBACRhb,KAAKM,QAAS,EACdN,KAAKmxB,oBAGLnxB,KAAKM,QAAS,EAxClB,GAAIQ,GAAWZ,EAAoB,EAiDnCiB,GAAOgS,UAAUie,SAAW,WAC1B,MAAOpxB,MAAKM,QAQda,EAAOgS,UAAUke,kBAAoB,WAInC,IAHA,GAAI5rB,GAAMzF,KAAK8W,OAAOnR,OAElBH,EAAI,EACDxF,KAAKmb,WAAW3V,IACrBA,GAGF,OAAON,MAAKwoB,MAAMloB,EAAIC,EAAM,MAQ9BtE,EAAOgS,UAAU+V,SAAW,WAC1B,MAAOlpB,MAAKixB,MAAM7W,aAQpBjZ,EAAOgS,UAAUme,UAAY,WAC3B,MAAOtxB,MAAK+gB,QAOd5f,EAAOgS,UAAUgW,iBAAmB,WAClC,MAAmB3iB,UAAfxG,KAAKmI,MACA3B,OAEFxG,KAAK8W,OAAO9W,KAAKmI,QAO1BhH,EAAOgS,UAAUoe,UAAY,WAC3B,MAAOvxB,MAAK8W,QAQd3V,EAAOgS,UAAUyB,SAAW,SAASzM,GACnC,GAAIA,GAASnI,KAAK8W,OAAOnR,OACvB,KAAM,2BAER,OAAO3F,MAAK8W,OAAO3O,IASrBhH,EAAOgS,UAAU2P,eAAiB,SAAS3a,GAIzC,GAHc3B,SAAV2B,IACFA,EAAQnI,KAAKmI,OAED3B,SAAV2B,EACF,QAEF,IAAIgT,EACJ,IAAInb,KAAKmb,WAAWhT,GAClBgT,EAAanb,KAAKmb,WAAWhT,OAE1B,CACH,GAAIyF,KACJA,GAAEmT,OAAS/gB,KAAK+gB,OAChBnT,EAAEvG,MAAQrH,KAAK8W,OAAO3O,EAEtB,IAAIqpB,GAAW,GAAI1wB,GAASd,KAAK0S,MAAMiB,OAAQ,SAAUtE,GAAO,MAAQA,GAAKzB,EAAEmT,SAAWnT,EAAEvG,SAAW6N,KACvGiG,GAAanb,KAAKixB,MAAMnO,eAAe0O,GAEvCxxB,KAAKmb,WAAWhT,GAASgT,EAG3B,MAAOA,IAQTha,EAAOgS,UAAUqO,kBAAoB,SAASlZ,GAC5CtI,KAAKkxB,eAAiB5oB,GASxBnH,EAAOgS,UAAU4V,YAAc,SAAS5gB,GACtC,GAAIA,GAASnI,KAAK8W,OAAOnR,OACvB,KAAM,2BAER3F,MAAKmI,MAAQA,EACbnI,KAAKqH,MAAQrH,KAAK8W,OAAO3O,IAO3BhH,EAAOgS,UAAUge,iBAAmB,SAAShpB,GAC7B3B,SAAV2B,IACFA,EAAQ,EAEV,IAAImX,GAAQtf,KAAKixB,MAAM3R,KAEvB,IAAInX,EAAQnI,KAAK8W,OAAOnR,OAAQ,CAC9B,CAAqB3F,KAAK8iB,eAAe3a,GAIlB3B,SAAnB8Y,EAAMmS,WACRnS,EAAMmS,SAAWlgB,SAASM,cAAc,OACxCyN,EAAMmS,SAASxkB,MAAM2W,SAAW,WAChCtE,EAAMmS,SAASxkB,MAAM9B,MAAQ,OAC7BmU,EAAM7N,YAAY6N,EAAMmS,UAE1B,IAAIA,GAAWzxB,KAAKqxB,mBACpB/R,GAAMmS,SAASxN,UAAY,wBAA0BwN,EAAW,IAEhEnS,EAAMmS,SAASxkB,MAAMqW,OAAS,OAC9BhE,EAAMmS,SAASxkB,MAAMxF,KAAO,MAE5B,IAAI0M,GAAKnU,IACTsZ,YAAW,WAAYnF,EAAGgd,iBAAiBhpB,EAAM,IAAM,IACvDnI,KAAKM,QAAS,MAGdN,MAAKM,QAAS,EAGSkG,SAAnB8Y,EAAMmS,WACRnS,EAAMnO,YAAYmO,EAAMmS,UACxBnS,EAAMmS,SAAWjrB,QAGfxG,KAAKkxB,gBACPlxB,KAAKkxB,kBAIXrxB,EAAOD,QAAUuB,GAKb,SAAStB,GAOb,QAASuB,GAAS2Q,EAAGC,GACnBhS,KAAK+R,EAAUvL,SAANuL,EAAkBA,EAAI,EAC/B/R,KAAKgS,EAAUxL,SAANwL,EAAkBA,EAAI,EAGjCnS,EAAOD,QAAUwB,GAKb,SAASvB,GAQb,QAASwB,GAAQ0Q,EAAGC,EAAGkL,GACrBld,KAAK+R,EAAUvL,SAANuL,EAAkBA,EAAI,EAC/B/R,KAAKgS,EAAUxL,SAANwL,EAAkBA,EAAI,EAC/BhS,KAAKkd,EAAU1W,SAAN0W,EAAkBA,EAAI,EASjC7b,EAAQgqB,SAAW,SAAS9lB,EAAGa,GAC7B,GAAIsrB,GAAM,GAAIrwB,EAId,OAHAqwB,GAAI3f,EAAIxM,EAAEwM,EAAI3L,EAAE2L,EAChB2f,EAAI1f,EAAIzM,EAAEyM,EAAI5L,EAAE4L,EAChB0f,EAAIxU,EAAI3X,EAAE2X,EAAI9W,EAAE8W,EACTwU,GASTrwB,EAAQ4R,IAAM,SAAS1N,EAAGa,GACxB,GAAIurB,GAAM,GAAItwB,EAId;MAHAswB,GAAI5f,EAAIxM,EAAEwM,EAAI3L,EAAE2L,EAChB4f,EAAI3f,EAAIzM,EAAEyM,EAAI5L,EAAE4L,EAChB2f,EAAIzU,EAAI3X,EAAE2X,EAAI9W,EAAE8W,EACTyU,GASTtwB,EAAQ8qB,IAAM,SAAS5mB,EAAGa,GACxB,MAAO,IAAI/E,IACFkE,EAAEwM,EAAI3L,EAAE2L,GAAK,GACbxM,EAAEyM,EAAI5L,EAAE4L,GAAK,GACbzM,EAAE2X,EAAI9W,EAAE8W,GAAK,IAWxB7b,EAAQmqB,aAAe,SAASjmB,EAAGa,GACjC,GAAImlB,GAAe,GAAIlqB,EAMvB,OAJAkqB,GAAaxZ,EAAIxM,EAAEyM,EAAI5L,EAAE8W,EAAI3X,EAAE2X,EAAI9W,EAAE4L,EACrCuZ,EAAavZ,EAAIzM,EAAE2X,EAAI9W,EAAE2L,EAAIxM,EAAEwM,EAAI3L,EAAE8W,EACrCqO,EAAarO,EAAI3X,EAAEwM,EAAI3L,EAAE4L,EAAIzM,EAAEyM,EAAI5L,EAAE2L,EAE9BwZ,GAQTlqB,EAAQ8R,UAAUxN,OAAS,WACzB,MAAOT,MAAKyqB,KACJ3vB,KAAK+R,EAAI/R,KAAK+R,EACd/R,KAAKgS,EAAIhS,KAAKgS,EACdhS,KAAKkd,EAAIld,KAAKkd,IAIxBrd,EAAOD,QAAUyB,GAKb,SAASxB,EAAQD,EAASM,GAa9B,QAASoB,GAAOiY,EAAW9K,GACzB,GAAkBjI,SAAd+S,EACF,KAAM,qCAKR,IAHAvZ,KAAKuZ,UAAYA,EACjBvZ,KAAK0oB,QAAWja,GAA8BjI,QAAnBiI,EAAQia,QAAwBja,EAAQia,SAAU,EAEzE1oB,KAAK0oB,QAAS,CAChB1oB,KAAKsf,MAAQ/N,SAASM,cAAc,OAEpC7R,KAAKsf,MAAMrS,MAAMsF,MAAQ,OACzBvS,KAAKsf,MAAMrS,MAAM2W,SAAW,WAC5B5jB,KAAKuZ,UAAU9H,YAAYzR,KAAKsf,OAEhCtf,KAAKsf,MAAMsS,KAAOrgB,SAASM,cAAc,SACzC7R,KAAKsf,MAAMsS,KAAK9qB,KAAO,SACvB9G,KAAKsf,MAAMsS,KAAKvqB,MAAQ,OACxBrH,KAAKsf,MAAM7N,YAAYzR,KAAKsf,MAAMsS,MAElC5xB,KAAKsf,MAAM0F,KAAOzT,SAASM,cAAc,SACzC7R,KAAKsf,MAAM0F,KAAKle,KAAO,SACvB9G,KAAKsf,MAAM0F,KAAK3d,MAAQ,OACxBrH,KAAKsf,MAAM7N,YAAYzR,KAAKsf,MAAM0F,MAElChlB,KAAKsf,MAAM+I,KAAO9W,SAASM,cAAc,SACzC7R,KAAKsf,MAAM+I,KAAKvhB,KAAO,SACvB9G,KAAKsf,MAAM+I,KAAKhhB,MAAQ,OACxBrH,KAAKsf,MAAM7N,YAAYzR,KAAKsf,MAAM+I,MAElCroB,KAAKsf,MAAMuS,IAAMtgB,SAASM,cAAc,SACxC7R,KAAKsf,MAAMuS,IAAI/qB,KAAO,SACtB9G,KAAKsf,MAAMuS,IAAI5kB,MAAM2W,SAAW,WAChC5jB,KAAKsf,MAAMuS,IAAI5kB,MAAMb,OAAS,gBAC9BpM,KAAKsf,MAAMuS,IAAI5kB,MAAMsF,MAAQ,QAC7BvS,KAAKsf,MAAMuS,IAAI5kB,MAAMuF,OAAS,MAC9BxS,KAAKsf,MAAMuS,IAAI5kB,MAAM+iB,aAAe,MACpChwB,KAAKsf,MAAMuS,IAAI5kB,MAAM6kB,gBAAkB,MACvC9xB,KAAKsf,MAAMuS,IAAI5kB,MAAMb,OAAS,oBAC9BpM,KAAKsf,MAAMuS,IAAI5kB,MAAM0S,gBAAkB,UACvC3f,KAAKsf,MAAM7N,YAAYzR,KAAKsf,MAAMuS,KAElC7xB,KAAKsf,MAAMyS,MAAQxgB,SAASM,cAAc,SAC1C7R,KAAKsf,MAAMyS,MAAMjrB,KAAO,SACxB9G,KAAKsf,MAAMyS,MAAM9kB,MAAMyM,OAAS,MAChC1Z,KAAKsf,MAAMyS,MAAM1qB,MAAQ,IACzBrH,KAAKsf,MAAMyS,MAAM9kB,MAAM2W,SAAW,WAClC5jB,KAAKsf,MAAMyS,MAAM9kB,MAAMxF,KAAO,SAC9BzH,KAAKsf,MAAM7N,YAAYzR,KAAKsf,MAAMyS,MAGlC,IAAI5d,GAAKnU,IACTA,MAAKsf,MAAMyS,MAAM7N,YAAc,SAAU5a,GAAQ6K,EAAGgQ,aAAa7a,IACjEtJ,KAAKsf,MAAMsS,KAAKI,QAAU,SAAU1oB,GAAQ6K,EAAGyd,KAAKtoB,IACpDtJ,KAAKsf,MAAM0F,KAAKgN,QAAU,SAAU1oB,GAAQ6K,EAAG8d,WAAW3oB,IAC1DtJ,KAAKsf,MAAM+I,KAAK2J,QAAU,SAAU1oB,GAAQ6K,EAAGkU,KAAK/e,IAGtDtJ,KAAKkyB,iBAAmB1rB,OAExBxG,KAAK8W,UACL9W,KAAKmI,MAAQ3B,OAEbxG,KAAKmyB,YAAc3rB,OACnBxG,KAAKoyB,aAAe,IACpBpyB,KAAKqyB,UAAW,EA3ElB,GAAI1xB,GAAOT,EAAoB,EAiF/BoB,GAAO6R,UAAUye,KAAO,WACtB,GAAIzpB,GAAQnI,KAAK8oB,UACb3gB,GAAQ,IACVA,IACAnI,KAAKsyB,SAASnqB,KAOlB7G,EAAO6R,UAAUkV,KAAO,WACtB,GAAIlgB,GAAQnI,KAAK8oB,UACb3gB,GAAQnI,KAAK8W,OAAOnR,OAAS,IAC/BwC,IACAnI,KAAKsyB,SAASnqB,KAOlB7G,EAAO6R,UAAUof,SAAW,WAC1B,GAAI3iB,GAAQ,GAAItL,MAEZ6D,EAAQnI,KAAK8oB,UACb3gB,GAAQnI,KAAK8W,OAAOnR,OAAS,GAC/BwC,IACAnI,KAAKsyB,SAASnqB,IAEPnI,KAAKqyB,WAEZlqB,EAAQ,EACRnI,KAAKsyB,SAASnqB,GAGhB,IAAI0H,GAAM,GAAIvL,MACV+nB,EAAQxc,EAAMD,EAId4iB,EAAWttB,KAAKwH,IAAI1M,KAAKoyB,aAAe/F,EAAM,GAG9ClY,EAAKnU,IACTA,MAAKmyB,YAAc7Y,WAAW,WAAYnF,EAAGoe,YAAcC,IAM7DlxB,EAAO6R,UAAU8e,WAAa,WACHzrB,SAArBxG,KAAKmyB,YACPnyB,KAAKglB,OAELhlB,KAAKklB,QAOT5jB,EAAO6R,UAAU6R,KAAO,WAElBhlB,KAAKmyB,cAETnyB,KAAKuyB,WAEDvyB,KAAKsf,QACPtf,KAAKsf,MAAM0F,KAAK3d,MAAQ,UAO5B/F,EAAO6R,UAAU+R,KAAO,WACtBuN,cAAczyB,KAAKmyB,aACnBnyB,KAAKmyB,YAAc3rB,OAEfxG,KAAKsf,QACPtf,KAAKsf,MAAM0F,KAAK3d,MAAQ,SAQ5B/F,EAAO6R,UAAU6V,oBAAsB,SAAS1gB,GAC9CtI,KAAKkyB,iBAAmB5pB,GAO1BhH,EAAO6R,UAAUyV,gBAAkB,SAAS4J,GAC1CxyB,KAAKoyB,aAAeI,GAOtBlxB,EAAO6R,UAAUuf,gBAAkB,WACjC,MAAO1yB,MAAKoyB,cASd9wB,EAAO6R,UAAUwf,YAAc,SAASC,GACtC5yB,KAAKqyB,SAAWO,GAOlBtxB,EAAO6R,UAAU0f,SAAW,WACIrsB,SAA1BxG,KAAKkyB,kBACPlyB,KAAKkyB,oBAOT5wB,EAAO6R,UAAUsO,OAAS,WACxB,GAAIzhB,KAAKsf,MAAO,CAEdtf,KAAKsf,MAAMuS,IAAI5kB,MAAMtF,IAAO3H,KAAKsf,MAAMuF,aAAa,EAChD7kB,KAAKsf,MAAMuS,IAAIvB,aAAa,EAAK,KACrCtwB,KAAKsf,MAAMuS,IAAI5kB,MAAMsF,MAASvS,KAAKsf,MAAME,YACrCxf,KAAKsf,MAAMsS,KAAKpS,YAChBxf,KAAKsf,MAAM0F,KAAKxF,YAChBxf,KAAKsf,MAAM+I,KAAK7I,YAAc,GAAO,IAGzC,IAAI/X,GAAOzH,KAAK8yB,YAAY9yB,KAAKmI,MACjCnI,MAAKsf,MAAMyS,MAAM9kB,MAAMxF,KAAO,EAAS,OAS3CnG,EAAO6R,UAAUwV,UAAY,SAAS7R,GACpC9W,KAAK8W,OAASA,EAEV9W,KAAK8W,OAAOnR,OAAS,EACvB3F,KAAKsyB,SAAS,GAEdtyB,KAAKmI,MAAQ3B,QAOjBlF,EAAO6R,UAAUmf,SAAW,SAASnqB,GACnC,KAAIA,EAAQnI,KAAK8W,OAAOnR,QAOtB,KAAM,2BANN3F,MAAKmI,MAAQA,EAEbnI,KAAKyhB,SACLzhB,KAAK6yB,YAWTvxB,EAAO6R,UAAU2V,SAAW,WAC1B,MAAO9oB,MAAKmI,OAQd7G,EAAO6R,UAAU+B,IAAM,WACrB,MAAOlV,MAAK8W,OAAO9W,KAAKmI,QAI1B7G,EAAO6R,UAAUgR,aAAe,SAAS7a,GAEvC,GAAIgjB,GAAiBhjB,EAAMkjB,MAAyB,IAAhBljB,EAAMkjB,MAAiC,IAAjBljB,EAAMmjB,MAChE,IAAKH,EAAL,CAEAtsB,KAAK+yB,aAAezpB,EAAMqT,QAC1B3c,KAAKgzB,YAAc3N,WAAWrlB,KAAKsf,MAAMyS,MAAM9kB,MAAMxF,MAErDzH,KAAKsf,MAAMrS,MAAM+f,OAAS,MAK1B,IAAI7Y,GAAKnU,IACTA,MAAKitB,YAAc,SAAU3jB,GAAQ6K,EAAG+Y,aAAa5jB,IACrDtJ,KAAKmtB,UAAc,SAAU7jB,GAAQ6K,EAAGoY,WAAWjjB,IACnD3I,EAAKgI,iBAAiB4I,SAAU,YAAavR,KAAKitB,aAClDtsB,EAAKgI,iBAAiB4I,SAAU,UAAavR,KAAKmtB,WAClDxsB,EAAK0I,eAAeC,KAItBhI,EAAO6R,UAAU8f,YAAc,SAAUxrB,GACvC,GAAI8K,GAAQ8S,WAAWrlB,KAAKsf,MAAMuS,IAAI5kB,MAAMsF,OACxCvS,KAAKsf,MAAMyS,MAAMvS,YAAc,GAC/BzN,EAAItK,EAAO,EAEXU,EAAQjD,KAAKwoB,MAAM3b,EAAIQ,GAASvS,KAAK8W,OAAOnR,OAAO,GAIvD,OAHY,GAARwC,IAAWA,EAAQ,GACnBA,EAAQnI,KAAK8W,OAAOnR,OAAO,IAAGwC,EAAQnI,KAAK8W,OAAOnR,OAAO,GAEtDwC,GAGT7G,EAAO6R,UAAU2f,YAAc,SAAU3qB,GACvC,GAAIoK,GAAQ8S,WAAWrlB,KAAKsf,MAAMuS,IAAI5kB,MAAMsF,OACxCvS,KAAKsf,MAAMyS,MAAMvS,YAAc,GAE/BzN,EAAI5J,GAASnI,KAAK8W,OAAOnR,OAAO,GAAK4M,EACrC9K,EAAOsK,EAAI,CAEf,OAAOtK,IAKTnG,EAAO6R,UAAU+Z,aAAe,SAAU5jB,GACxC,GAAI+iB,GAAO/iB,EAAMqT,QAAU3c,KAAK+yB,aAC5BhhB,EAAI/R,KAAKgzB,YAAc3G,EAEvBlkB,EAAQnI,KAAKizB,YAAYlhB,EAE7B/R,MAAKsyB,SAASnqB,GAEdxH,EAAK0I,kBAIP/H,EAAO6R,UAAUoZ,WAAa,WAC5BvsB,KAAKsf,MAAMrS,MAAM+f,OAAS,OAG1BrsB,EAAKwI,oBAAoBoI,SAAU,YAAavR,KAAKitB,aACrDtsB,EAAKwI,oBAAoBoI,SAAU,UAAWvR,KAAKmtB,WAEnDxsB,EAAK0I,kBAGPxJ,EAAOD,QAAU0B,GAKb,SAASzB,GA2Bb,QAAS0B,GAAWqO,EAAOC,EAAKsY,EAAMmB,GAEpCtpB,KAAKkzB,OAAS,EACdlzB,KAAKmzB,KAAO,EACZnzB,KAAKozB,MAAQ,EACbpzB,KAAKspB,YAAa,EAClBtpB,KAAKqzB,UAAY,EAEjBrzB,KAAKszB,SAAW,EAChBtzB,KAAKuzB,SAAS3jB,EAAOC,EAAKsY,EAAMmB,GAYlC/nB,EAAW4R,UAAUogB,SAAW,SAAS3jB,EAAOC,EAAKsY,EAAMmB,GACzDtpB,KAAKkzB,OAAStjB,EAAQA,EAAQ,EAC9B5P,KAAKmzB,KAAOtjB,EAAMA,EAAM,EAExB7P,KAAKwzB,QAAQrL,EAAMmB,IASrB/nB,EAAW4R,UAAUqgB,QAAU,SAASrL,EAAMmB,GAC/B9iB,SAAT2hB,GAA8B,GAARA,IAGP3hB,SAAf8iB,IACFtpB,KAAKspB,WAAaA,GAGlBtpB,KAAKozB,MADHpzB,KAAKspB,cAAe,EACT/nB,EAAWkyB,oBAAoBtL,GAE/BA,IAUjB5mB,EAAWkyB,oBAAsB,SAAUtL,GACzC,GAAIuL,GAAQ,SAAU3hB,GAAI,MAAO7M,MAAKyuB,IAAI5hB,GAAK7M,KAAK0uB,MAGhDC,EAAQ3uB,KAAK4uB,IAAI,GAAI5uB,KAAKwoB,MAAMgG,EAAMvL,KACtC4L,EAAQ,EAAI7uB,KAAK4uB,IAAI,GAAI5uB,KAAKwoB,MAAMgG,EAAMvL,EAAO,KACjD6L,EAAQ,EAAI9uB,KAAK4uB,IAAI,GAAI5uB,KAAKwoB,MAAMgG,EAAMvL,EAAO,KAGjDmB,EAAauK,CASjB,OARI3uB,MAAK2lB,IAAIkJ,EAAQ5L,IAASjjB,KAAK2lB,IAAIvB,EAAanB,KAAOmB,EAAayK,GACpE7uB,KAAK2lB,IAAImJ,EAAQ7L,IAASjjB,KAAK2lB,IAAIvB,EAAanB,KAAOmB,EAAa0K,GAGtD,GAAd1K,IACFA,EAAa,GAGRA,GAOT/nB,EAAW4R,UAAUiV,WAAa,WAChC,MAAO/C,YAAWrlB,KAAKszB,SAASW,YAAYj0B,KAAKqzB,aAOnD9xB,EAAW4R,UAAU+gB,QAAU,WAC7B,MAAOl0B,MAAKozB,OAOd7xB,EAAW4R,UAAUvD,MAAQ,WAC3B5P,KAAKszB,SAAWtzB,KAAKkzB,OAASlzB,KAAKkzB,OAASlzB,KAAKozB,OAMnD7xB,EAAW4R,UAAUkV,KAAO,WAC1BroB,KAAKszB,UAAYtzB,KAAKozB,OAOxB7xB,EAAW4R,UAAUtD,IAAM,WACzB,MAAQ7P,MAAKszB,SAAWtzB,KAAKmzB,MAG/BtzB,EAAOD,QAAU2B,GAKb,SAAS1B,EAAQD,EAASM,GAuB9B,QAASsB,GAAU+X,EAAWtX,EAAOkyB,EAAQ1lB,GAC3C,KAAMzO,eAAgBwB,IACpB,KAAM,IAAIgY,aAAY,mDAIxB,MAAMvT,MAAMC,QAAQiuB,IAAWA,YAAkBtzB,KAAYszB,YAAkB5tB,QAAQ,CACrF,GAAI6tB,GAAgB3lB,CACpBA,GAAU0lB,EACVA,EAASC,EAGX,GAAIjgB,GAAKnU,IACTA,MAAKq0B,gBACHzkB,MAAO,KACPC,IAAO,KAEPykB,YAAY,EAEZC,YAAa,SACbhiB,MAAO,KACPC,OAAQ,KACRgiB,UAAW,KACXC,UAAW,MAEbz0B,KAAKyO,QAAU9N,EAAK8F,cAAezG,KAAKq0B,gBAGxCr0B,KAAK00B,QAAQnb,GAGbvZ,KAAKgC,cAELhC,KAAK20B,MACH5E,IAAK/vB,KAAK+vB,IACV6E,SAAU50B,KAAKgG,MACf6uB,SACEthB,GAAIvT,KAAKuT,GAAGuhB,KAAK90B,MACjB0T,IAAK1T,KAAK0T,IAAIohB,KAAK90B,MACnB4tB,KAAM5tB,KAAK4tB,KAAKkH,KAAK90B,OAEvB+0B,eACAp0B,MACEq0B,KAAM,KACNC,SAAU9gB,EAAG+gB,UAAUJ,KAAK3gB,GAC5BghB,eAAgBhhB,EAAGihB,gBAAgBN,KAAK3gB,GACxCkhB,OAAQlhB,EAAGmhB,QAAQR,KAAK3gB,GACxBohB,aAAephB,EAAGqhB,cAAcV,KAAK3gB,KAKzCnU,KAAKy1B,MAAQ,GAAI5zB,GAAM7B,KAAK20B,MAC5B30B,KAAKgC,WAAWgG,KAAKhI,KAAKy1B,OAC1Bz1B,KAAK20B,KAAKc,MAAQz1B,KAAKy1B,MAGvBz1B,KAAK01B,SAAW,GAAIzyB,GAASjD,KAAK20B,MAClC30B,KAAKgC,WAAWgG,KAAKhI,KAAK01B,UAC1B11B,KAAK20B,KAAKh0B,KAAKq0B,KAAOh1B,KAAK01B,SAASV,KAAKF,KAAK90B,KAAK01B,UAGnD11B,KAAK21B,YAAc,GAAInzB,GAAYxC,KAAK20B,MACxC30B,KAAKgC,WAAWgG,KAAKhI,KAAK21B,aAI1B31B,KAAK41B,WAAa,GAAInzB,GAAWzC,KAAK20B,MACtC30B,KAAKgC,WAAWgG,KAAKhI,KAAK41B,YAG1B51B,KAAK61B,QAAU,GAAI/yB,GAAQ9C,KAAK20B,MAChC30B,KAAKgC,WAAWgG,KAAKhI,KAAK61B,SAE1B71B,KAAK81B,UAAY,KACjB91B,KAAK+1B,WAAa,KAGdtnB,GACFzO,KAAKkT,WAAWzE,GAId0lB,GACFn0B,KAAKg2B,UAAU7B,GAIblyB,EACFjC,KAAKi2B,SAASh0B,GAGdjC,KAAKyhB,SAjHT,GAEI9gB,IAFUT,EAAoB,IACrBA,EAAoB,IACtBA,EAAoB,IAC3BW,EAAUX,EAAoB,GAC9BY,EAAWZ,EAAoB,GAC/B2B,EAAQ3B,EAAoB,IAC5Bg2B,EAAOh2B,EAAoB,IAC3B+C,EAAW/C,EAAoB,IAC/BsC,EAActC,EAAoB,IAClCuC,EAAavC,EAAoB,IACjC4C,EAAU5C,EAAoB,GA4GlCsB,GAAS2R,UAAY,GAAI+iB,GAMzB10B,EAAS2R,UAAU8iB,SAAW,SAASh0B,GACrC,GAGIk0B,GAHAC,EAAiC,MAAlBp2B,KAAK81B,SAwBxB,IAhBEK,EAJGl0B,EAGIA,YAAiBpB,IAAWoB,YAAiBnB,GACvCmB,EAIA,GAAIpB,GAAQoB,GACvB6E,MACE8I,MAAO,OACPC,IAAK,UAVI,KAgBf7P,KAAK81B,UAAYK,EACjBn2B,KAAK61B,SAAW71B,KAAK61B,QAAQI,SAASE,GAElCC,EACF,GAA0B5vB,QAAtBxG,KAAKyO,QAAQmB,OAA0CpJ,QAApBxG,KAAKyO,QAAQoB,IAAkB,CACpE,GAA0BrJ,QAAtBxG,KAAKyO,QAAQmB,OAA0CpJ,QAApBxG,KAAKyO,QAAQoB,IAClD,GAAIwmB,GAAYr2B,KAAKs2B,eAGvB,IAAI1mB,GAA8BpJ,QAAtBxG,KAAKyO,QAAQmB,MAAqB5P,KAAKyO,QAAQmB,MAAQymB,EAAUzmB,MACzEC,EAA4BrJ,QAApBxG,KAAKyO,QAAQoB,IAAqB7P,KAAKyO,QAAQoB,IAAQwmB,EAAUxmB,GAE7E7P,MAAKu2B,UAAU3mB,EAAOC,GAAM2mB,SAAS,QAGrCx2B,MAAKy2B,KAAKD,SAAS,KASzBh1B,EAAS2R,UAAU6iB,UAAY,SAAS7B,GAEtC,GAAIgC,EAKFA,GAJGhC,EAGIA,YAAkBtzB,IAAWszB,YAAkBrzB,GACzCqzB,EAIA,GAAItzB,GAAQszB,GAPZ,KAUfn0B,KAAK+1B,WAAaI,EAClBn2B,KAAK61B,QAAQG,UAAUG,IAmBzB30B,EAAS2R,UAAUujB,aAAe,SAASvhB,EAAK1G,GAC9CzO,KAAK61B,SAAW71B,KAAK61B,QAAQa,aAAavhB,GAEtC1G,GAAWA,EAAQkoB,OACrB32B,KAAK22B,MAAMxhB,EAAK1G,IAQpBjN,EAAS2R,UAAUyjB,aAAe,WAChC,MAAO52B,MAAK61B,SAAW71B,KAAK61B,QAAQe,oBAetCp1B,EAAS2R,UAAUwjB,MAAQ,SAASt2B,EAAIoO,GACtC,GAAKzO,KAAK81B,WAAmBtvB,QAANnG,EAAvB,CAEA,GAAI8U,GAAMlP,MAAMC,QAAQ7F,GAAMA,GAAMA,GAGhCy1B,EAAY91B,KAAK81B,UAAUhgB,aAAaZ,IAAIC,GAC9CrO,MACE8I,MAAO,OACPC,IAAK,UAKLD,EAAQ,KACRC,EAAM,IAcV,IAbAimB,EAAUztB,QAAQ,SAAUwuB,GAC1B,GAAIjrB,GAAIirB,EAASjnB,MAAM5I,UACnB8vB,EAAI,OAASD,GAAWA,EAAShnB,IAAI7I,UAAY6vB,EAASjnB,MAAM5I,WAEtD,OAAV4I,GAAsBA,EAAJhE,KACpBgE,EAAQhE,IAGE,OAARiE,GAAgBinB,EAAIjnB,KACtBA,EAAMinB,KAII,OAAVlnB,GAA0B,OAARC,EAAc,CAElC,GAAIT,IAAUQ,EAAQC,GAAO,EACzB2iB,EAAWttB,KAAKwH,IAAK1M,KAAKy1B,MAAM5lB,IAAM7P,KAAKy1B,MAAM7lB,MAAwB,KAAfC,EAAMD,IAEhE4mB,EAAW/nB,GAA+BjI,SAApBiI,EAAQ+nB,QAAyB/nB,EAAQ+nB,SAAU,CAC7Ex2B,MAAKy1B,MAAMlC,SAASnkB,EAASojB,EAAW,EAAGpjB,EAASojB,EAAW,EAAGgE,MAUtEh1B,EAAS2R,UAAU4jB,aAAe,WAEhC,GAAIC,GAAUh3B,KAAK81B,UAAUhgB,aAC3BhK,EAAM,KACNY,EAAM,IAER,IAAIsqB,EAAS,CAEX,GAAIC,GAAUD,EAAQlrB,IAAI,QAC1BA,GAAMmrB,EAAUt2B,EAAKkG,QAAQowB,EAAQrnB,MAAO,QAAQ5I,UAAY,IAKhE,IAAIkwB,GAAeF,EAAQtqB,IAAI,QAC3BwqB,KACFxqB,EAAM/L,EAAKkG,QAAQqwB,EAAatnB,MAAO,QAAQ5I,UAEjD,IAAImwB,GAAaH,EAAQtqB,IAAI,MACzByqB,KAEAzqB,EADS,MAAPA,EACI/L,EAAKkG,QAAQswB,EAAWtnB,IAAK,QAAQ7I,UAGrC9B,KAAKwH,IAAIA,EAAK/L,EAAKkG,QAAQswB,EAAWtnB,IAAK,QAAQ7I,YAK/D,OACE8E,IAAa,MAAPA,EAAe,GAAIxH,MAAKwH,GAAO,KACrCY,IAAa,MAAPA,EAAe,GAAIpI,MAAKoI,GAAO,OAKzC7M,EAAOD,QAAU4B,GAKb,SAAS3B,EAAQD,EAASM,GAsB9B,QAASuB,GAAS8X,EAAWtX,EAAOkyB,EAAQ1lB,GAE1C,KAAMxI,MAAMC,QAAQiuB,IAAWA,YAAkBtzB,KAAYszB,YAAkB5tB,QAAQ,CACrF,GAAI6tB,GAAgB3lB,CACpBA,GAAU0lB,EACVA,EAASC,EAGX,GAAIjgB,GAAKnU,IACTA,MAAKq0B,gBACHzkB,MAAO,KACPC,IAAO,KAEPykB,YAAY,EAEZC,YAAa,SACbhiB,MAAO,KACPC,OAAQ,KACRgiB,UAAW,KACXC,UAAW,MAEbz0B,KAAKyO,QAAU9N,EAAK8F,cAAezG,KAAKq0B,gBAGxCr0B,KAAK00B,QAAQnb,GAGbvZ,KAAKgC,cAELhC,KAAK20B,MACH5E,IAAK/vB,KAAK+vB,IACV6E,SAAU50B,KAAKgG,MACf6uB,SACEthB,GAAIvT,KAAKuT,GAAGuhB,KAAK90B,MACjB0T,IAAK1T,KAAK0T,IAAIohB,KAAK90B,MACnB4tB,KAAM5tB,KAAK4tB,KAAKkH,KAAK90B,OAEvB+0B,eACAp0B,MACEq0B,KAAM,KACNC,SAAU9gB,EAAG+gB,UAAUJ,KAAK3gB,GAC5BghB,eAAgBhhB,EAAGihB,gBAAgBN,KAAK3gB,GACxCkhB,OAAQlhB,EAAGmhB,QAAQR,KAAK3gB,GACxBohB,aAAephB,EAAGqhB,cAAcV,KAAK3gB,KAKzCnU,KAAKy1B,MAAQ,GAAI5zB,GAAM7B,KAAK20B,MAC5B30B,KAAKgC,WAAWgG,KAAKhI,KAAKy1B,OAC1Bz1B,KAAK20B,KAAKc,MAAQz1B,KAAKy1B,MAGvBz1B,KAAK01B,SAAW,GAAIzyB,GAASjD,KAAK20B,MAClC30B,KAAKgC,WAAWgG,KAAKhI,KAAK01B,UAC1B11B,KAAK20B,KAAKh0B,KAAKq0B,KAAOh1B,KAAK01B,SAASV,KAAKF,KAAK90B,KAAK01B,UAGnD11B,KAAK21B,YAAc,GAAInzB,GAAYxC,KAAK20B,MACxC30B,KAAKgC,WAAWgG,KAAKhI,KAAK21B,aAI1B31B,KAAK41B,WAAa,GAAInzB,GAAWzC,KAAK20B,MACtC30B,KAAKgC,WAAWgG,KAAKhI,KAAK41B,YAG1B51B,KAAKo3B,UAAY,GAAIp0B,GAAUhD,KAAK20B,MACpC30B,KAAKgC,WAAWgG,KAAKhI,KAAKo3B,WAE1Bp3B,KAAK81B,UAAY,KACjB91B,KAAK+1B,WAAa,KAGdtnB,GACFzO,KAAKkT,WAAWzE,GAId0lB,GACFn0B,KAAKg2B,UAAU7B,GAIblyB,EACFjC,KAAKi2B,SAASh0B,GAGdjC,KAAKyhB,SA5GT,GAEI9gB,IAFUT,EAAoB,IACrBA,EAAoB,IACtBA,EAAoB,IAC3BW,EAAUX,EAAoB,GAC9BY,EAAWZ,EAAoB,GAC/B2B,EAAQ3B,EAAoB,IAC5Bg2B,EAAOh2B,EAAoB,IAC3B+C,EAAW/C,EAAoB,IAC/BsC,EAActC,EAAoB,IAClCuC,EAAavC,EAAoB,IACjC8C,EAAY9C,EAAoB,GAuGpCuB,GAAQ0R,UAAY,GAAI+iB,GAMxBz0B,EAAQ0R,UAAU8iB,SAAW,SAASh0B,GACpC,GAGIk0B,GAHAC,EAAiC,MAAlBp2B,KAAK81B,SAwBxB,IAhBEK,EAJGl0B,EAGIA,YAAiBpB,IAAWoB,YAAiBnB,GACvCmB,EAIA,GAAIpB,GAAQoB,GACvB6E,MACE8I,MAAO,OACPC,IAAK,UAVI,KAgBf7P,KAAK81B,UAAYK,EACjBn2B,KAAKo3B,WAAap3B,KAAKo3B,UAAUnB,SAASE,GAEtCC,EACF,GAA0B5vB,QAAtBxG,KAAKyO,QAAQmB,OAA0CpJ,QAApBxG,KAAKyO,QAAQoB,IAAkB,CACpE,GAAID,GAA8BpJ,QAAtBxG,KAAKyO,QAAQmB,MAAqB5P,KAAKyO,QAAQmB,MAAQ,KAC/DC,EAA4BrJ,QAApBxG,KAAKyO,QAAQoB,IAAqB7P,KAAKyO,QAAQoB,IAAM,IAEjE7P,MAAKu2B,UAAU3mB,EAAOC,GAAM2mB,SAAS,QAGrCx2B,MAAKy2B,KAAKD,SAAS,KASzB/0B,EAAQ0R,UAAU6iB,UAAY,SAAS7B,GAErC,GAAIgC,EAKFA,GAJGhC,EAGIA,YAAkBtzB,IAAWszB,YAAkBrzB,GACzCqzB,EAIA,GAAItzB,GAAQszB,GAPZ,KAUfn0B,KAAK+1B,WAAaI,EAClBn2B,KAAKo3B,UAAUpB,UAAUG,IAS3B10B,EAAQ0R,UAAUkkB,UAAY,SAASC,EAAS/kB,EAAOC,GAGrD,MAFehM,UAAX+L,IAAuBA,EAAS,IACrB/L,SAAXgM,IAAuBA,EAAS,IACGhM,SAAnCxG,KAAKo3B,UAAUjD,OAAOmD,GACjBt3B,KAAKo3B,UAAUjD,OAAOmD,GAASD,UAAU9kB,EAAMC,GAG/C,qBAAwB8kB,GASnC71B,EAAQ0R,UAAUokB,eAAiB,SAASD,GAC1C,MAAuC9wB,UAAnCxG,KAAKo3B,UAAUjD,OAAOmD,GAChBt3B,KAAKo3B,UAAUjD,OAAOmD,GAAS5O,UAAkEliB,SAAtDxG,KAAKo3B,UAAU3oB,QAAQ0lB,OAAOqD,WAAWF,IAA+E,GAArDt3B,KAAKo3B,UAAU3oB,QAAQ0lB,OAAOqD,WAAWF,KAGxJ,GAWX71B,EAAQ0R,UAAU4jB,aAAe,WAC/B,GAAIjrB,GAAM,KACNY,EAAM,IAGV,KAAK,GAAI4qB,KAAWt3B,MAAKo3B,UAAUjD,OACjC,GAAIn0B,KAAKo3B,UAAUjD,OAAOruB,eAAewxB,IACO,GAA1Ct3B,KAAKo3B,UAAUjD,OAAOmD,GAAS5O,QACjC,IAAK,GAAIljB,GAAI,EAAGA,EAAIxF,KAAKo3B,UAAUjD,OAAOmD,GAASxB,UAAUnwB,OAAQH,IAAK,CACxE,GAAI6J,GAAOrP,KAAKo3B,UAAUjD,OAAOmD,GAASxB,UAAUtwB,GAChD6B,EAAQ1G,EAAKkG,QAAQwI,EAAK0C,EAAG,QAAQ/K,SACzC8E,GAAa,MAAPA,EAAczE,EAAQyE,EAAMzE,EAAQA,EAAQyE,EAClDY,EAAa,MAAPA,EAAcrF,EAAcA,EAANqF,EAAcrF,EAAQqF,EAM1D,OACEZ,IAAa,MAAPA,EAAe,GAAIxH,MAAKwH,GAAO,KACrCY,IAAa,MAAPA,EAAe,GAAIpI,MAAKoI,GAAO,OAMzC7M,EAAOD,QAAU6B,GAKb,SAAS5B,EAAQD,EAASM,GAK9B,GAAI2D,GAAS3D,EAAoB,GAQjCN,GAAQ63B,qBAAuB,SAAS9C,EAAMI,GAE5C,GADAJ,EAAKI,eACDA,GACgC,GAA9B9uB,MAAMC,QAAQ6uB,GAAsB,CACtC,IAAK,GAAIvvB,GAAI,EAAGA,EAAIuvB,EAAYpvB,OAAQH,IACtC,GAA8BgB,SAA1BuuB,EAAYvvB,GAAGkyB,OAAsB,CACvC,GAAIC,KACJA,GAAS/nB,MAAQ/L,EAAOkxB,EAAYvvB,GAAGoK,OAAO1I,SAASF,UACvD2wB,EAAS9nB,IAAMhM,EAAOkxB,EAAYvvB,GAAGqK,KAAK3I,SAASF,UACnD2tB,EAAKI,YAAY/sB,KAAK2vB,GAG1BhD,EAAKI,YAAY7e,KAAK,SAAU3Q,EAAGa,GACjC,MAAOb,GAAEqK,MAAQxJ,EAAEwJ,UAY3BhQ,EAAQg4B,kBAAoB,SAAUjD,EAAMI,GAC1C,GAAIA,GAAuDvuB,SAAxCmuB,EAAKC,SAASiD,gBAAgBtlB,MAAqB,CACpE3S,EAAQ63B,qBAAqB9C,EAAMI,EAQnC,KAAK,GANDnlB,GAAQ/L,EAAO8wB,EAAKc,MAAM7lB,OAC1BC,EAAMhM,EAAO8wB,EAAKc,MAAM5lB,KAExBioB,EAAcnD,EAAKc,MAAM5lB,IAAM8kB,EAAKc,MAAM7lB,MAC1CmoB,EAAYD,EAAanD,EAAKC,SAASiD,gBAAgBtlB,MAElD/M,EAAI,EAAGA,EAAIuvB,EAAYpvB,OAAQH,IACtC,GAA8BgB,SAA1BuuB,EAAYvvB,GAAGkyB,OAAsB,CACvC,GAAIM,GAAYn0B,EAAOkxB,EAAYvvB,GAAGoK,OAClCqoB,EAAUp0B,EAAOkxB,EAAYvvB,GAAGqK,IAEpC,IAAoB,gBAAhBmoB,EAAUE,GACZ,KAAM,IAAIt0B,OAAM,qCAAuCmxB,EAAYvvB,GAAGoK,MAExE,IAAkB,gBAAdqoB,EAAQC,GACV,KAAM,IAAIt0B,OAAM,mCAAqCmxB,EAAYvvB,GAAGqK,IAGtE,IAAIC,GAAWmoB,EAAUD,CACzB,IAAIloB,GAAY,EAAIioB,EAAW,CAE7B,GAAIpO,GAAS,EACTwO,EAAWtoB,EAAIuoB,OACnB,QAAQrD,EAAYvvB,GAAGkyB,QACrB,IAAK,QACCM,EAAUK,OAASJ,EAAQI,QAC7B1O,EAAS,GAEXqO,EAAUM,UAAU1oB,EAAM0oB,aAC1BN,EAAUO,KAAK3oB,EAAM2oB,QACrBP,EAAU3M,SAAS,EAAE,QAErB4M,EAAQK,UAAU1oB,EAAM0oB,aACxBL,EAAQM,KAAK3oB,EAAM2oB,QACnBN,EAAQ5M,SAAS,EAAI1B,EAAO,QAE5BwO,EAASllB,IAAI,EAAG,QAChB,MACF,KAAK,SACH,GAAIulB,GAAYP,EAAQ5L,KAAK2L,EAAU,QACnCK,EAAML,EAAUK,KAGpBL,GAAUS,KAAK7oB,EAAM6oB,QACrBT,EAAUU,MAAM9oB,EAAM8oB,SACtBV,EAAUO,KAAK3oB,EAAM2oB,QACrBN,EAAUD,EAAUI,QAGpBJ,EAAUK,IAAIA,GACdJ,EAAQI,IAAIA,GACZJ,EAAQhlB,IAAIulB,EAAU,QAEtBR,EAAU3M,SAAS,EAAE,SACrB4M,EAAQ5M,SAAS,EAAE,SAEnB8M,EAASllB,IAAI,EAAG,QAChB,MACF,KAAK,UACC+kB,EAAUU,SAAWT,EAAQS,UAC/B/O,EAAS,GAEXqO,EAAUU,MAAM9oB,EAAM8oB,SACtBV,EAAUO,KAAK3oB,EAAM2oB,QACrBP,EAAU3M,SAAS,EAAE,UAErB4M,EAAQS,MAAM9oB,EAAM8oB,SACpBT,EAAQM,KAAK3oB,EAAM2oB,QACnBN,EAAQ5M,SAAS,EAAE,UACnB4M,EAAQhlB,IAAI0W,EAAO,UAEnBwO,EAASllB,IAAI,EAAG,SAChB,MACF,KAAK,SACC+kB,EAAUO,QAAUN,EAAQM,SAC9B5O,EAAS,GAEXqO,EAAUO,KAAK3oB,EAAM2oB,QACrBP,EAAU3M,SAAS,EAAE,SACrB4M,EAAQM,KAAK3oB,EAAM2oB,QACnBN,EAAQ5M,SAAS,EAAE,SACnB4M,EAAQhlB,IAAI0W,EAAO,SAEnBwO,EAASllB,IAAI,EAAG,QAChB,MACF,SAEE,WADA0lB,SAAQhF,IAAI,2EAA4EoB,EAAYvvB,GAAGkyB,QAG3G,KAAmBS,EAAZH,GAEL,OADArD,EAAKI,YAAY/sB,MAAM4H,MAAOooB,EAAUhxB,UAAW6I,IAAKooB,EAAQjxB,YACxD+tB,EAAYvvB,GAAGkyB,QACrB,IAAK,QACHM,EAAU/kB,IAAI,EAAG,QACjBglB,EAAQhlB,IAAI,EAAG,OACf,MACF,KAAK,SACH+kB,EAAU/kB,IAAI,EAAG,SACjBglB,EAAQhlB,IAAI,EAAG,QACf,MACF,KAAK,UACH+kB,EAAU/kB,IAAI,EAAG,UACjBglB,EAAQhlB,IAAI,EAAG,SACf,MACF,KAAK,SACH+kB,EAAU/kB,IAAI,EAAG,KACjBglB,EAAQhlB,IAAI,EAAG,IACf,MACF,SAEE,WADA0lB,SAAQhF,IAAI,2EAA4EoB,EAAYvvB,GAAGkyB,QAI7G/C,EAAKI,YAAY/sB,MAAM4H,MAAOooB,EAAUhxB,UAAW6I,IAAKooB,EAAQjxB,aAKtEpH,EAAQg5B,iBAAiBjE,EAEzB,IAAIkE,GAAcj5B,EAAQk5B,SAASnE,EAAKc,MAAM7lB,MAAO+kB,EAAKI,aACtDgE,EAAYn5B,EAAQk5B,SAASnE,EAAKc,MAAM5lB,IAAI8kB,EAAKI,aACjDiE,EAAarE,EAAKc,MAAM7lB,MACxBqpB,EAAWtE,EAAKc,MAAM5lB,GACA,IAAtBgpB,EAAYK,SAAiBF,EAAwC,GAA3BrE,EAAKc,MAAM0D,aAAuBN,EAAYb,UAAY,EAAIa,EAAYZ,QAAU,GAC1G,GAApBc,EAAUG,SAAmBD,EAAsC,GAAzBtE,EAAKc,MAAM2D,WAAuBL,EAAUf,UAAY,EAAMe,EAAUd,QAAU,IACtG,GAAtBY,EAAYK,QAAsC,GAApBH,EAAUG,SAC1CvE,EAAKc,MAAM4D,YAAYL,EAAYC,KAYzCr5B,EAAQg5B,iBAAmB,SAASjE,GAGlC,IAAK,GAFDI,GAAcJ,EAAKI,YACnBuE,KACK9zB,EAAI,EAAGA,EAAIuvB,EAAYpvB,OAAQH,IACtC,IAAK,GAAIomB,GAAI,EAAGA,EAAImJ,EAAYpvB,OAAQimB,IAClCpmB,GAAKomB,GAA8B,GAAzBmJ,EAAYnJ,GAAGvV,QAA2C,GAAzB0e,EAAYvvB,GAAG6Q,SAExD0e,EAAYnJ,GAAGhc,OAASmlB,EAAYvvB,GAAGoK,OAASmlB,EAAYnJ,GAAG/b,KAAOklB,EAAYvvB,GAAGqK,IACvFklB,EAAYnJ,GAAGvV,QAAS,EAGjB0e,EAAYnJ,GAAGhc,OAASmlB,EAAYvvB,GAAGoK,OAASmlB,EAAYnJ,GAAGhc,OAASmlB,EAAYvvB,GAAGqK,KAC9FklB,EAAYvvB,GAAGqK,IAAMklB,EAAYnJ,GAAG/b,IACpCklB,EAAYnJ,GAAGvV,QAAS,GAGjB0e,EAAYnJ,GAAG/b,KAAOklB,EAAYvvB,GAAGoK,OAASmlB,EAAYnJ,GAAG/b,KAAOklB,EAAYvvB,GAAGqK,MAC1FklB,EAAYvvB,GAAGoK,MAAQmlB,EAAYnJ,GAAGhc,MACtCmlB,EAAYnJ,GAAGvV,QAAS,GAMhC,KAAK,GAAI7Q,GAAI,EAAGA,EAAIuvB,EAAYpvB,OAAQH,IAClCuvB,EAAYvvB,GAAG6Q,UAAW,GAC5BijB,EAAUtxB,KAAK+sB,EAAYvvB,GAI/BmvB,GAAKI,YAAcuE,EACnB3E,EAAKI,YAAY7e,KAAK,SAAU3Q,EAAGa,GACjC,MAAOb,GAAEqK,MAAQxJ,EAAEwJ,SAIvBhQ,EAAQ25B,WAAa,SAASC,GAC5B,IAAK,GAAIh0B,GAAG,EAAGA,EAAIg0B,EAAM7zB,OAAQH,IAC/BmzB,QAAQhF,IAAInuB,EAAG,GAAIlB,MAAKk1B,EAAMh0B,GAAGoK,OAAO,GAAItL,MAAKk1B,EAAMh0B,GAAGqK,KAAM2pB,EAAMh0B,GAAGoK,MAAO4pB,EAAMh0B,GAAGqK,IAAK2pB,EAAMh0B,GAAG6Q,SAS3GzW,EAAQ65B,oBAAsB,SAASC,EAAUC,GAG/C,IAAK,GAFDC,IAAe,EACfC,EAAeH,EAASI,QAAQ9yB,UAC3BxB,EAAI,EAAGA,EAAIk0B,EAAS3E,YAAYpvB,OAAQH,IAAK,CACpD,GAAIwyB,GAAY0B,EAAS3E,YAAYvvB,GAAGoK,MACpCqoB,EAAUyB,EAAS3E,YAAYvvB,GAAGqK,GACtC,IAAIgqB,GAAgB7B,GAA4BC,EAAf4B,EAAwB,CACvDD,GAAe,CACf,QAIJ,GAAoB,GAAhBA,GAAwBC,EAAeH,EAASvG,KAAKnsB,WAAa6yB,GAAgBF,EAAc,CAClG,GAAIlqB,GAAY5L,EAAO81B,GACnBI,EAAWl2B,EAAOo0B,EAElBxoB,GAAU8oB,QAAUwB,EAASxB,OAASmB,EAASM,cAAe,EACzDvqB,EAAUipB,SAAWqB,EAASrB,QAAUgB,EAASO,eAAgB,EACjExqB,EAAU6oB,aAAeyB,EAASzB,cAAcoB,EAASQ,aAAc,GAEhFR,EAASI,QAAUC,EAAS7yB,WAmChCtH,EAAQq1B,SAAW,SAASiB,EAAMiE,EAAM5nB,GACtC,GAAoC,GAAhC2jB,EAAKvB,KAAKI,YAAYpvB,OAAa,CACrC,GAAIy0B,GAAalE,EAAKT,MAAM2E,WAAW7nB,EACvC,QAAQ4nB,EAAKnzB,UAAYozB,EAAWzQ,QAAUyQ,EAAWnd,MAGzD,GAAIic,GAASt5B,EAAQk5B,SAASqB,EAAMjE,EAAKvB,KAAKI,YACzB,IAAjBmE,EAAOA,SACTiB,EAAOjB,EAAOlB,UAGhB,IAAIloB,GAAWlQ,EAAQy6B,yBAAyBnE,EAAKvB,KAAKI,YAAamB,EAAKT,MAAM7lB,MAAOsmB,EAAKT,MAAM5lB,IACpGsqB,GAAOv6B,EAAQ06B,qBAAqBpE,EAAKvB,KAAKI,YAAamB,EAAKT,MAAO0E,EAEvE,IAAIC,GAAalE,EAAKT,MAAM2E,WAAW7nB,EAAOzC,EAC9C,QAAQqqB,EAAKnzB,UAAYozB,EAAWzQ,QAAUyQ,EAAWnd,OAa7Drd,EAAQy1B,OAAS,SAASa,EAAMnkB,EAAGQ,GACjC,GAAoC,GAAhC2jB,EAAKvB,KAAKI,YAAYpvB,OAAa,CACrC,GAAIy0B,GAAalE,EAAKT,MAAM2E,WAAW7nB,EACvC,OAAO,IAAIjO,MAAKyN,EAAIqoB,EAAWnd,MAAQmd,EAAWzQ,QAGlD,GAAI4Q,GAAiB36B,EAAQy6B,yBAAyBnE,EAAKvB,KAAKI,YAAamB,EAAKT,MAAM7lB,MAAOsmB,EAAKT,MAAM5lB,KACtG2qB,EAAgBtE,EAAKT,MAAM5lB,IAAMqmB,EAAKT,MAAM7lB,MAAQ2qB,EACpDE,EAAkBD,EAAgBzoB,EAAIQ,EACtCmoB,EAA4B96B,EAAQ+6B,6BAA6BzE,EAAKvB,KAAKI,YAAamB,EAAKT,MAAOgF,GAEpGG,EAAU,GAAIt2B,MAAKo2B,EAA4BD,EAAkBvE,EAAKT,MAAM7lB,MAChF,OAAOgrB,IAYXh7B,EAAQy6B,yBAA2B,SAAStF,EAAanlB,EAAOC,GAE9D,IAAK,GADDC,GAAW,EACNtK,EAAI,EAAGA,EAAIuvB,EAAYpvB,OAAQH,IAAK,CAC3C,GAAIwyB,GAAYjD,EAAYvvB,GAAGoK,MAC3BqoB,EAAUlD,EAAYvvB,GAAGqK,GAEzBmoB,IAAapoB,GAAmBC,EAAVooB,IACxBnoB,GAAYmoB,EAAUD,GAG1B,MAAOloB,IAWTlQ,EAAQ06B,qBAAuB,SAASvF,EAAaU,EAAO0E,GAG1D,MAFAA,GAAOt2B,EAAOs2B,GAAMjzB,SAASF,UAC7BmzB,GAAQv6B,EAAQi7B,wBAAwB9F,EAAYU,EAAM0E,IAI5Dv6B,EAAQi7B,wBAA0B,SAAS9F,EAAaU,EAAO0E,GAC7D,GAAIW,GAAa,CACjBX,GAAOt2B,EAAOs2B,GAAMjzB,SAASF,SAE7B,KAAK,GAAIxB,GAAI,EAAGA,EAAIuvB,EAAYpvB,OAAQH,IAAK,CAC3C,GAAIwyB,GAAYjD,EAAYvvB,GAAGoK,MAC3BqoB,EAAUlD,EAAYvvB,GAAGqK,GAEzBmoB,IAAavC,EAAM7lB,OAASqoB,EAAUxC,EAAM5lB,KAC1CsqB,GAAQlC,IACV6C,GAAe7C,EAAUD,GAI/B,MAAO8C,IAWTl7B,EAAQ+6B,6BAA+B,SAAS5F,EAAaU,EAAOsF,GAKlE,IAAK,GAJDR,GAAiB,EACjBzqB,EAAW,EACXkrB,EAAgBvF,EAAM7lB,MAEjBpK,EAAI,EAAGA,EAAIuvB,EAAYpvB,OAAQH,IAAK,CAC3C,GAAIwyB,GAAYjD,EAAYvvB,GAAGoK,MAC3BqoB,EAAUlD,EAAYvvB,GAAGqK,GAE7B,IAAImoB,GAAavC,EAAM7lB,OAASqoB,EAAUxC,EAAM5lB,IAAK,CAGnD,GAFAC,GAAYkoB,EAAYgD,EACxBA,EAAgB/C,EACZnoB,GAAYirB,EACd,KAGAR,IAAkBtC,EAAUD,GAKlC,MAAOuC,IAaT36B,EAAQq7B,mBAAqB,SAASlG,EAAaoF,EAAMe,EAAWC,GAClE,GAAIrC,GAAWl5B,EAAQk5B,SAASqB,EAAMpF,EACtC,OAAuB,IAAnB+D,EAASI,OACK,EAAZgC,EACuB,GAArBC,EACKrC,EAASd,WAAac,EAASb,QAAUkC,GAAQ,EAGjDrB,EAASd,UAAY,EAIL,GAArBmD,EACKrC,EAASb,SAAWkC,EAAOrB,EAASd,WAAa,EAGjDc,EAASb,QAAU,EAKvBkC,GAaXv6B,EAAQk5B,SAAW,SAASqB,EAAMpF,GAChC,IAAK,GAAIvvB,GAAI,EAAGA,EAAIuvB,EAAYpvB,OAAQH,IAAK,CAC3C,GAAIwyB,GAAYjD,EAAYvvB,GAAGoK,MAC3BqoB,EAAUlD,EAAYvvB,GAAGqK,GAE7B,IAAIsqB,GAAQnC,GAAoBC,EAAPkC,EACvB,OAAQjB,QAAQ,EAAMlB,UAAWA,EAAWC,QAASA,GAIzD,OAAQiB,QAAQ,EAAOlB,UAAWA,EAAWC,QAASA,KAKpD,SAASp4B,GA4Bb,QAAS+B,GAASgO,EAAOC,EAAKurB,EAAaC,EAAiBC,EAAaC,GAEvEv7B,KAAK85B,QAAU,EAEf95B,KAAKw7B,WAAY,EACjBx7B,KAAKy7B,UAAY,EACjBz7B,KAAKmoB,KAAO,EACZnoB,KAAKid,MAAQ,EAEbjd,KAAK07B,YACL17B,KAAK27B,UACL37B,KAAK47B,UAAY,EAEjB57B,KAAK67B,YAAc,EAAO,EAAM,EAAI,IACpC77B,KAAK87B,YAAc,IAAO,GAAM,EAAI,GAEpC97B,KAAKu7B,WAAaA,EAElBv7B,KAAKuzB,SAAS3jB,EAAOC,EAAKurB,EAAaC,EAAiBC,GAe1D15B,EAASuR,UAAUogB,SAAW,SAAS3jB,EAAOC,EAAKurB,EAAaC,EAAiBC,GAC/Et7B,KAAKkzB,OAA6B1sB,SAApB80B,EAAYxvB,IAAoB8D,EAAQ0rB,EAAYxvB,IAClE9L,KAAKmzB,KAA2B3sB,SAApB80B,EAAY5uB,IAAoBmD,EAAMyrB,EAAY5uB,IAE1D1M,KAAKkzB,QAAUlzB,KAAKmzB,OACtBnzB,KAAKkzB,QAAU,IACflzB,KAAKmzB,MAAQ,GAGO,GAAlBnzB,KAAKw7B,WACPx7B,KAAK+7B,eAAeX,EAAaC,GAGnCr7B,KAAKg8B,SAASV,IAOhB15B,EAASuR,UAAU4oB,eAAiB,SAASX,EAAaC,GAExD,GAAIhpB,GAAOrS,KAAKmzB,KAAOnzB,KAAKkzB,OACxB+I,EAAkB,IAAP5pB,EACX6pB,EAAmBd,GAAea,EAAWZ,GAC7Cc,EAAmBj3B,KAAKwoB,MAAMxoB,KAAKyuB,IAAIsI,GAAU/2B,KAAK0uB,MAEtDwI,EAAe,GACfC,EAAkBn3B,KAAK4uB,IAAI,GAAGqI,GAE9BvsB,EAAQ,CACW,GAAnBusB,IACFvsB,EAAQusB,EAIV,KAAK,GADDG,IAAgB,EACX92B,EAAIoK,EAAO1K,KAAK2lB,IAAIrlB,IAAMN,KAAK2lB,IAAIsR,GAAmB32B,IAAK,CAClE62B,EAAkBn3B,KAAK4uB,IAAI,GAAGtuB,EAC9B,KAAK,GAAIomB,GAAI,EAAGA,EAAI5rB,KAAK87B,WAAWn2B,OAAQimB,IAAK,CAC/C,GAAI2Q,GAAWF,EAAkBr8B,KAAK87B,WAAWlQ,EACjD,IAAI2Q,GAAYL,EAAkB,CAChCI,GAAgB,EAChBF,EAAexQ,CACf,QAGJ,GAAqB,GAAjB0Q,EACF,MAGJt8B,KAAKy7B,UAAYW,EACjBp8B,KAAKid,MAAQof,EACbr8B,KAAKmoB,KAAOkU,EAAkBr8B,KAAK87B,WAAWM,IAShDx6B,EAASuR,UAAU6oB,SAAW,SAASV,GACjB90B,SAAhB80B,IACFA,KAGF,IAAIkB,GAAgCh2B,SAApB80B,EAAYxvB,IAAoB9L,KAAKkzB,OAAuB,EAAblzB,KAAKid,MAAYjd,KAAK87B,WAAW97B,KAAKy7B,WAAcH,EAAYxvB,IAC3H2wB,EAA8Bj2B,SAApB80B,EAAY5uB,IAAoB1M,KAAKmzB,KAAQnzB,KAAKid,MAAQjd,KAAK87B,WAAW97B,KAAKy7B,WAAcH,EAAY5uB,GAEvH1M,MAAK27B,UAAgCn1B,SAApB80B,EAAY5uB,IAAoB1M,KAAK08B,aAAaD,GAAWnB,EAAY5uB,IAC1F1M,KAAK07B,YAAkCl1B,SAApB80B,EAAYxvB,IAAoB9L,KAAK08B,aAAaF,GAAalB,EAAYxvB,IAGvE,GAAnB9L,KAAKu7B,aAAuBv7B,KAAK27B,UAAY37B,KAAK07B,aAAe17B,KAAKmoB,MAAQ,IAChFnoB,KAAK27B,WAAa37B,KAAK27B,UAAY37B,KAAKmoB,MAG1CnoB,KAAK47B,UAAY57B,KAAK08B,aAAaD,GAAWA,EAAUz8B,KAAK08B,aAAaF,GAAaA,EACvFx8B,KAAK28B,YAAc38B,KAAK27B,UAAY37B,KAAK07B,YAGzC17B,KAAK85B,QAAU95B,KAAK27B,WAGtB/5B,EAASuR,UAAUupB,aAAe,SAASr1B,GACzC,GAAIu1B,GAAUv1B,EAASA,GAASrH,KAAKid,MAAQjd,KAAK87B,WAAW97B,KAAKy7B,WAClE,OAAIp0B,IAASrH,KAAKid,MAAQjd,KAAK87B,WAAW97B,KAAKy7B,YAAc,GAAOz7B,KAAKid,MAAQjd,KAAK87B,WAAW97B,KAAKy7B,WAC7FmB,EAAW58B,KAAKid,MAAQjd,KAAK87B,WAAW97B,KAAKy7B,WAG7CmB,GASXh7B,EAASuR,UAAU0pB,QAAU,WAC3B,MAAQ78B,MAAK85B,SAAW95B,KAAK07B,aAM/B95B,EAASuR,UAAUkV,KAAO,WACxB,GAAIuJ,GAAO5xB,KAAK85B,OAChB95B,MAAK85B,SAAW95B,KAAKmoB,KAGjBnoB,KAAK85B,SAAWlI,IAClB5xB,KAAK85B,QAAU95B,KAAKmzB,OAOxBvxB,EAASuR,UAAU2pB,SAAW,WAC5B98B,KAAK85B,SAAW95B,KAAKmoB,KACrBnoB,KAAK27B,WAAa37B,KAAKmoB,KACvBnoB,KAAK28B,YAAc38B,KAAK27B,UAAY37B,KAAK07B,aAS3C95B,EAASuR,UAAUiV,WAAa,SAAS2U,GAEvC,GAAIjD,GAAW50B,KAAK2lB,IAAI7qB,KAAK85B,SAAW95B,KAAKmoB,KAAO,EAAK,EAAInoB,KAAK85B,QAC9D7F,EAAc,GAAK/vB,OAAO41B,GAAS7F,YAAY,EAGnD,IAAgBztB,SAAbu2B,GAA2Br4B,MAAMR,OAAO64B,KAqCzC,GAAgC,IAA5B9I,EAAYttB,QAAQ,MAA0C,IAA5BstB,EAAYttB,QAAQ,KAExD,IAAK,GAAInB,GAAIyuB,EAAYtuB,OAAS,EAAGH,EAAI,EAAGA,IAAK,CAC/C,GAAsB,KAAlByuB,EAAYzuB,GAGX,CAAA,GAAsB,KAAlByuB,EAAYzuB,IAA+B,KAAlByuB,EAAYzuB,GAAW,CACvDyuB,EAAcA,EAAYhpB,MAAM,EAAGzF,EACnC,OAGA,MAPAyuB,EAAcA,EAAYhpB,MAAM,EAAGzF,QAzCY,CAErD,GAAIw3B,GAAM,GACN70B,EAAQ8rB,EAAYttB,QAAQ,IAoBhC,IAnBY,IAATwB,IAED60B,EAAM/I,EAAYhpB,MAAM9C,GAExB8rB,EAAcA,EAAYhpB,MAAM,EAAG9C,IAErCA,EAAQjD,KAAKwH,IAAIunB,EAAYttB,QAAQ,KAAMstB,EAAYttB,QAAQ,MAClD,KAAVwB,GAEe,IAAb40B,IACD9I,GAAe,KAGjB9rB,EAAQ8rB,EAAYtuB,OAASo3B,GAEV,IAAbA,IAEN50B,GAAS40B,EAAW,GAEnB50B,EAAQ8rB,EAAYtuB,OAErB,IAAI,GAAIs3B,GAAM90B,EAAQ8rB,EAAYtuB,OAAQs3B,EAAM,EAAGA,IACjDhJ,GAAe,QAKjBA,GAAcA,EAAYhpB,MAAM,EAAG9C,EAGrC8rB,IAAe+I,EAoBjB,MAAO/I,IAWTryB,EAASuR,UAAU6hB,KAAO,aAS1BpzB,EAASuR,UAAU+pB,QAAU,WAC3B,MAAQl9B,MAAK85B,SAAW95B,KAAKid,MAAQjd,KAAK67B,WAAW77B,KAAKy7B,aAAe,GAG3E57B,EAAOD,QAAUgC,GAKb,SAAS/B,EAAQD,EAASM,GAgB9B,QAAS2B,GAAM8yB,EAAMlmB,GACnB,GAAI0uB,GAAMt5B,IAASu5B,MAAM,GAAGC,QAAQ,GAAGC,QAAQ,GAAGC,aAAa,EAC/Dv9B,MAAK4P,MAAQutB,EAAI/E,QAAQnlB,IAAI,GAAI,QAAQjM,UACzChH,KAAK6P,IAAMstB,EAAI/E,QAAQnlB,IAAI,EAAG,QAAQjM,UAEtChH,KAAK20B,KAAOA,EACZ30B,KAAKw9B,gBAAkB,EACvBx9B,KAAKy9B,YAAc,EACnBz9B,KAAKm5B,cAAe,EACpBn5B,KAAKo5B,YAAa,EAGlBp5B,KAAKq0B,gBACHzkB,MAAO,KACPC,IAAK,KACLqrB,UAAW,aACXwC,UAAU,EACVC,UAAU,EACV7xB,IAAK,KACLY,IAAK,KACLkxB,QAAS,GACTC,QAAS,UAEX79B,KAAKyO,QAAU9N,EAAK2E,UAAWtF,KAAKq0B,gBAEpCr0B,KAAKgG,OACH83B,UAEF99B,KAAK+9B,aAAe,KAGpB/9B,KAAK20B,KAAKE,QAAQthB,GAAG,WAAYvT,KAAKg+B,aAAalJ,KAAK90B,OACxDA,KAAK20B,KAAKE,QAAQthB,GAAG,UAAYvT,KAAKi+B,QAAQnJ,KAAK90B,OACnDA,KAAK20B,KAAKE,QAAQthB,GAAG,SAAYvT,KAAKk+B,WAAWpJ,KAAK90B,OAGtDA,KAAK20B,KAAKE,QAAQthB,GAAG,QAASvT,KAAKm+B,QAAQrJ,KAAK90B,OAGhDA,KAAK20B,KAAKE,QAAQthB,GAAG,aAAcvT,KAAKo+B,cAActJ,KAAK90B,OAG3DA,KAAK20B,KAAKE,QAAQthB,GAAG,QAASvT,KAAKq+B,SAASvJ,KAAK90B,OACjDA,KAAK20B,KAAKE,QAAQthB,GAAG,QAASvT,KAAKs+B,SAASxJ,KAAK90B,OAEjDA,KAAKkT,WAAWzE,GAsClB,QAAS8vB,GAAmBrD,GAC1B,GAAiB,cAAbA,GAA0C,YAAbA,EAC/B,KAAM,IAAI70B,WAAU,sBAAwB60B,EAAY,yCAof5D,QAASsD,GAAYV,EAAOl1B,GAC1B,OACEmJ,EAAG+rB,EAAM/rB,EAAIpR,EAAK2G,gBAAgBsB,GAClCoJ,EAAG8rB,EAAM9rB,EAAIrR,EAAK+G,eAAekB,IA1lBrC,GAAIjI,GAAOT,EAAoB,GAE3B2D,GADa3D,EAAoB,IACxBA,EAAoB,KAC7BqC,EAAYrC,EAAoB,IAChCyB,EAAWzB,EAAoB,GA0DnC2B,GAAMsR,UAAY,GAAI5Q,GAkBtBV,EAAMsR,UAAUD,WAAa,SAAUzE,GACrC,GAAIA,EAAS,CAEX,GAAIP,IAAU,YAAa,MAAO,MAAO,UAAW,UAAW,WAAY,WAAY,WAAY,cACnGvN,GAAKoF,gBAAgBmI,EAAQlO,KAAKyO,QAASA,IAEvC,SAAWA,IAAW,OAASA,KAEjCzO,KAAKuzB,SAAS9kB,EAAQmB,MAAOnB,EAAQoB,OA4B3ChO,EAAMsR,UAAUogB,SAAW,SAAS3jB,EAAOC,EAAK2mB,EAASiI,GACnDA,KAAW,IACbA,GAAS,EAEX,IAAIvL,GAAkB1sB,QAAToJ,EAAqBjP,EAAKkG,QAAQ+I,EAAO,QAAQ5I,UAAY,KACtEmsB,EAAgB3sB,QAAPqJ,EAAqBlP,EAAKkG,QAAQgJ,EAAK,QAAQ7I,UAAc,IAG1E,IAFAhH,KAAK0+B,mBAEDlI,EAAS,CACX,GAAIriB,GAAKnU,KACL2+B,EAAY3+B,KAAK4P,MACjBgvB,EAAU5+B,KAAK6P,IACfC,EAA8B,gBAAZ0mB,GAAuBA,EAAU,IACnDqI,GAAW,GAAIv6B,OAAO0C,UACtB83B,GAAa,EAEbzW,EAAO,WACT,IAAKlU,EAAGnO,MAAM83B,MAAMiB,SAAU,CAC5B,GAAI5B,IAAM,GAAI74B,OAAO0C,UACjBmzB,EAAOgD,EAAM0B,EACbG,EAAO7E,EAAOrqB,EACdlE,EAAKozB,GAAmB,OAAX9L,EAAmBA,EAASvyB,EAAKgP,cAAcwqB,EAAMwE,EAAWzL,EAAQpjB,GACrFgnB,EAAKkI,GAAiB,OAAT7L,EAAmBA,EAASxyB,EAAKgP,cAAcwqB,EAAMyE,EAASzL,EAAMrjB,EAErFmvB,GAAU9qB,EAAGklB,YAAYztB,EAAGkrB,GAC5Bn1B,EAASi2B,kBAAkBzjB,EAAGwgB,KAAMxgB,EAAG1F,QAAQsmB,aAC/C+J,EAAaA,GAAcG,EACvBA,GACF9qB,EAAGwgB,KAAKE,QAAQjH,KAAK,eAAgBhe,MAAO,GAAItL,MAAK6P,EAAGvE,OAAQC,IAAK,GAAIvL,MAAK6P,EAAGtE,KAAM4uB,OAAOA,IAG5FO,EACEF,GACF3qB,EAAGwgB,KAAKE,QAAQjH,KAAK,gBAAiBhe,MAAO,GAAItL,MAAK6P,EAAGvE,OAAQC,IAAK,GAAIvL,MAAK6P,EAAGtE,KAAM4uB,OAAOA,IAMjGtqB,EAAG4pB,aAAezkB,WAAW+O,EAAM,KAKzC,OAAOA,KAGP,GAAI4W,GAAUj/B,KAAKq5B,YAAYnG,EAAQC,EAEvC,IADAxxB,EAASi2B,kBAAkB53B,KAAK20B,KAAM30B,KAAKyO,QAAQsmB,aAC/CkK,EAAS,CACX,GAAInrB,IAAUlE,MAAO,GAAItL,MAAKtE,KAAK4P,OAAQC,IAAK,GAAIvL,MAAKtE,KAAK6P,KAAM4uB,OAAOA,EAC3Ez+B,MAAK20B,KAAKE,QAAQjH,KAAK,cAAe9Z,GACtC9T,KAAK20B,KAAKE,QAAQjH,KAAK,eAAgB9Z,KAS7CjS,EAAMsR,UAAUurB,iBAAmB,WAC7B1+B,KAAK+9B,eACP1kB,aAAarZ,KAAK+9B,cAClB/9B,KAAK+9B,aAAe,OAaxBl8B,EAAMsR,UAAUkmB,YAAc,SAASzpB,EAAOC,GAC5C,GAIIwc,GAJA6S,EAAqB,MAATtvB,EAAiBjP,EAAKkG,QAAQ+I,EAAO,QAAQ5I,UAAYhH,KAAK4P,MAC1EuvB,EAAmB,MAAPtvB,EAAiBlP,EAAKkG,QAAQgJ,EAAK,QAAQ7I,UAAchH,KAAK6P,IAC1EnD,EAA2B,MAApB1M,KAAKyO,QAAQ/B,IAAe/L,EAAKkG,QAAQ7G,KAAKyO,QAAQ/B,IAAK,QAAQ1F,UAAY,KACtF8E,EAA2B,MAApB9L,KAAKyO,QAAQ3C,IAAenL,EAAKkG,QAAQ7G,KAAKyO,QAAQ3C,IAAK,QAAQ9E,UAAY,IAI1F,IAAItC,MAAMw6B,IAA0B,OAAbA,EACrB,KAAM,IAAIt7B,OAAM,kBAAoBgM,EAAQ,IAE9C,IAAIlL,MAAMy6B,IAAsB,OAAXA,EACnB,KAAM,IAAIv7B,OAAM,gBAAkBiM,EAAM,IAyC1C,IArCaqvB,EAATC,IACFA,EAASD,GAIC,OAARpzB,GACaA,EAAXozB,IACF7S,EAAQvgB,EAAMozB,EACdA,GAAY7S,EACZ8S,GAAU9S,EAGC,MAAP3f,GACEyyB,EAASzyB,IACXyyB,EAASzyB,IAOL,OAARA,GACEyyB,EAASzyB,IACX2f,EAAQ8S,EAASzyB,EACjBwyB,GAAY7S,EACZ8S,GAAU9S,EAGC,MAAPvgB,GACaA,EAAXozB,IACFA,EAAWpzB,IAOU,OAAzB9L,KAAKyO,QAAQmvB,QAAkB,CACjC,GAAIA,GAAUvY,WAAWrlB,KAAKyO,QAAQmvB,QACxB,GAAVA,IACFA,EAAU,GAEcA,EAArBuB,EAASD,IACPl/B,KAAK6P,IAAM7P,KAAK4P,QAAWguB,GAE9BsB,EAAWl/B,KAAK4P,MAChBuvB,EAASn/B,KAAK6P,MAIdwc,EAAQuR,GAAWuB,EAASD,GAC5BA,GAAY7S,EAAO,EACnB8S,GAAU9S,EAAO,IAMvB,GAA6B,OAAzBrsB,KAAKyO,QAAQovB,QAAkB,CACjC,GAAIA,GAAUxY,WAAWrlB,KAAKyO,QAAQovB,QACxB,GAAVA,IACFA,EAAU,GAEPsB,EAASD,EAAYrB,IACnB79B,KAAK6P,IAAM7P,KAAK4P,QAAWiuB,GAE9BqB,EAAWl/B,KAAK4P,MAChBuvB,EAASn/B,KAAK6P,MAIdwc,EAAS8S,EAASD,EAAYrB,EAC9BqB,GAAY7S,EAAO,EACnB8S,GAAU9S,EAAO,IAKvB,GAAI4S,GAAWj/B,KAAK4P,OAASsvB,GAAYl/B,KAAK6P,KAAOsvB,CAUrD,OAPOD,IAAYl/B,KAAK4P,OAASsvB,GAAcl/B,KAAK6P,KAASsvB,GAAYn/B,KAAK4P,OAASuvB,GAAYn/B,KAAK6P,KACjG7P,KAAK4P,OAASsvB,GAAYl/B,KAAK4P,OAASuvB,GAAcn/B,KAAK6P,KAAOqvB,GAAcl/B,KAAK6P,KAAOsvB,GACjGn/B,KAAK20B,KAAKE,QAAQjH,KAAK,oBAGzB5tB,KAAK4P,MAAQsvB,EACbl/B,KAAK6P,IAAMsvB,EACJF,GAOTp9B,EAAMsR,UAAUisB,SAAW,WACzB,OACExvB,MAAO5P,KAAK4P,MACZC,IAAK7P,KAAK6P,MAUdhO,EAAMsR,UAAUinB,WAAa,SAAU7nB,EAAO8sB,GAC5C,MAAOx9B,GAAMu4B,WAAWp6B,KAAK4P,MAAO5P,KAAK6P,IAAK0C,EAAO8sB,IAWvDx9B,EAAMu4B,WAAa,SAAUxqB,EAAOC,EAAK0C,EAAO8sB,GAI9C,MAHoB74B,UAAhB64B,IACFA,EAAc,GAEH,GAAT9sB,GAAe1C,EAAMD,GAAS,GAE9B+Z,OAAQ/Z,EACRqN,MAAO1K,GAAS1C,EAAMD,EAAQyvB,KAK9B1V,OAAQ,EACR1M,MAAO,IAUbpb,EAAMsR,UAAU6qB,aAAe,SAAS10B,GACtCtJ,KAAKw9B,gBAAkB,EACvBx9B,KAAKs/B,cAAgB,EAEhBt/B,KAAKyO,QAAQivB,UAIb19B,KAAKgG,MAAM83B,MAAMyB,gBAEtBv/B,KAAKgG,MAAM83B,MAAMluB,MAAQ5P,KAAK4P,MAC9B5P,KAAKgG,MAAM83B,MAAMjuB,IAAM7P,KAAK6P,IAC5B7P,KAAKgG,MAAM83B,MAAMiB,UAAW,EAExB/+B,KAAK20B,KAAK5E,IAAIrwB,OAChBM,KAAK20B,KAAK5E,IAAIrwB,KAAKuN,MAAM+f,OAAS,QAGpC1jB,EAAMD,mBAQRxH,EAAMsR,UAAU8qB,QAAU,SAAU30B,GAElC,GAAKtJ,KAAKyO,QAAQivB,UAKb19B,KAAKgG,MAAM83B,MAAMyB,cAAtB,CAEA,GAAIrE,GAAYl7B,KAAKyO,QAAQysB,SAC7BqD,GAAkBrD,EAClB,IAAIzM,GAAsB,cAAbyM,EAA6B5xB,EAAMk2B,OAASl2B,EAAMm2B,MAC/DhR,IAASzuB,KAAKw9B,eACd,IAAIhL,GAAYxyB,KAAKgG,MAAM83B,MAAMjuB,IAAM7P,KAAKgG,MAAM83B,MAAMluB,MAGpDE,EAAWnO,EAAS04B,yBAAyBr6B,KAAK20B,KAAKI,YAAa/0B,KAAK4P,MAAO5P,KAAK6P,IACzF2iB,IAAY1iB,CAEZ,IAAIyC,GAAsB,cAAb2oB,EAA6Bl7B,KAAK20B,KAAKC,SAAS1I,OAAO3Z,MAAQvS,KAAK20B,KAAKC,SAAS1I,OAAO1Z,OAClGktB,GAAajR,EAAQlc,EAAQigB,EAC7B0M,EAAWl/B,KAAKgG,MAAM83B,MAAMluB,MAAQ8vB,EACpCP,EAASn/B,KAAKgG,MAAM83B,MAAMjuB,IAAM6vB,EAGhCC,EAAYh+B,EAASs5B,mBAAmBj7B,KAAK20B,KAAKI,YAAamK,EAAUl/B,KAAKs/B,cAAc7Q,GAAO,GACnGmR,EAAUj+B,EAASs5B,mBAAmBj7B,KAAK20B,KAAKI,YAAaoK,EAAQn/B,KAAKs/B,cAAc7Q,GAAO,EACnG,IAAIkR,GAAaT,GAAYU,GAAWT,EAKtC,MAJAn/B,MAAKw9B,iBAAmB/O,EACxBzuB,KAAKgG,MAAM83B,MAAMluB,MAAQ+vB,EACzB3/B,KAAKgG,MAAM83B,MAAMjuB,IAAM+vB,MACvB5/B,MAAKi+B,QAAQ30B,EAIftJ,MAAKs/B,cAAgB7Q,EACrBzuB,KAAKq5B,YAAY6F,EAAUC,GAG3Bn/B,KAAK20B,KAAKE,QAAQjH,KAAK,eACrBhe,MAAO,GAAItL,MAAKtE,KAAK4P,OACrBC,IAAO,GAAIvL,MAAKtE,KAAK6P,KACrB4uB,QAAQ,IAGVn1B,EAAMD,mBAQRxH,EAAMsR,UAAU+qB,WAAa,WAEtBl+B,KAAKyO,QAAQivB,UAKb19B,KAAKgG,MAAM83B,MAAMyB,gBAEtBv/B,KAAKgG,MAAM83B,MAAMiB,UAAW,EACxB/+B,KAAK20B,KAAK5E,IAAIrwB,OAChBM,KAAK20B,KAAK5E,IAAIrwB,KAAKuN,MAAM+f,OAAS,QAIpChtB,KAAK20B,KAAKE,QAAQjH,KAAK,gBACrBhe,MAAO,GAAItL,MAAKtE,KAAK4P,OACrBC,IAAO,GAAIvL,MAAKtE,KAAK6P,KACrB4uB,QAAQ,MAUZ58B,EAAMsR,UAAUirB,cAAgB,SAAS90B,GAEvC,GAAMtJ,KAAKyO,QAAQkvB,UAAY39B,KAAKyO,QAAQivB,SAA5C,CAGA,GAAIjP,GAAQ,CAYZ,IAXInlB,EAAMolB,WACRD,EAAQnlB,EAAMolB,WAAa,IAClBplB,EAAMqlB,SAGfF,GAASnlB,EAAMqlB,OAAS,GAMtBF,EAAO,CAKT,GAAIxR,EAEFA,GADU,EAARwR,EACM,EAAKA,EAAQ,EAGb,GAAK,EAAKA,EAAQ,EAI5B,IAAIoR,GAAUrB,GAAYzsB,EAAGzI,EAAMw2B,MAAO9tB,EAAG1I,EAAMy2B,OAAQ//B,KAAK20B,KAAK5E,IAAI7D,QACrE8T,EAAchgC,KAAKigC,eAAeJ,EAEtC7/B,MAAKkgC,KAAKjjB,EAAO+iB,EAAavR,GAKhCnlB,EAAMD,mBAORxH,EAAMsR,UAAUkrB,SAAW,WACzBr+B,KAAKgG,MAAM83B,MAAMluB,MAAQ5P,KAAK4P,MAC9B5P,KAAKgG,MAAM83B,MAAMjuB,IAAM7P,KAAK6P,IAC5B7P,KAAKgG,MAAM83B,MAAMyB,eAAgB,EACjCv/B,KAAKgG,MAAM83B,MAAM5R,OAAS,KAC1BlsB,KAAKy9B,YAAc,EACnBz9B,KAAKw9B,gBAAkB,GAOzB37B,EAAMsR,UAAUgrB,QAAU,WACxBn+B,KAAKgG,MAAM83B,MAAMyB,eAAgB,GAQnC19B,EAAMsR,UAAUmrB,SAAW,SAAUh1B,GAEnC,GAAMtJ,KAAKyO,QAAQkvB,UAAY39B,KAAKyO,QAAQivB,SAA5C,CAEA19B,KAAKgG,MAAM83B,MAAMyB,eAAgB,EAE5Bv/B,KAAKgG,MAAM83B,MAAM5R,SACpBlsB,KAAKgG,MAAM83B,MAAM5R,OAASsS,EAAWl1B,EAAM4iB,OAAQlsB,KAAK20B,KAAK5E,IAAI7D,QAGnE,IAAIjP,GAAQ,GAAK3T,EAAM2T,MAAQjd,KAAKy9B,aAChC0C,EAAangC,KAAKigC,eAAejgC,KAAKgG,MAAM83B,MAAM5R,QAElDqO,EAAiB54B,EAAS04B,yBAAyBr6B,KAAK20B,KAAKI,YAAa/0B,KAAK4P,MAAO5P,KAAK6P,KAC3FuwB,EAAuBz+B,EAASk5B,wBAAwB76B,KAAK20B,KAAKI,YAAa/0B,KAAMmgC,GACrFE,EAAsB9F,EAAiB6F,EAGvClB,EAAYiB,EAAaC,GAAyBpgC,KAAKgG,MAAM83B,MAAMluB,OAASuwB,EAAaC,IAAyBnjB,EAClHkiB,EAAUgB,EAAaE,GAAwBrgC,KAAKgG,MAAM83B,MAAMjuB,KAAOswB,EAAaE,IAAwBpjB,CAGhHjd,MAAKm5B,aAA4B,GAAb,EAAIlc,EACxBjd,KAAKo5B,WAA0B,GAAbnc,EAAQ,CAE1B,IAAI0iB,GAAYh+B,EAASs5B,mBAAmBj7B,KAAK20B,KAAKI,YAAamK,EAAU,EAAIjiB,GAAO,GACpF2iB,EAAUj+B,EAASs5B,mBAAmBj7B,KAAK20B,KAAKI,YAAaoK,EAAQliB,EAAQ,GAAG,IAChF0iB,GAAaT,GAAYU,GAAWT,KACtCn/B,KAAKgG,MAAM83B,MAAMluB,MAAQ+vB,EACzB3/B,KAAKgG,MAAM83B,MAAMjuB,IAAM+vB,EACvB5/B,KAAKy9B,YAAc,EAAIn0B,EAAM2T,MAC7BiiB,EAAWS,EACXR,EAASS,GAGX5/B,KAAKuzB,SAAS2L,EAAUC,GAAQ,GAAO,GAEvCn/B,KAAKm5B,cAAe,EACpBn5B,KAAKo5B,YAAa,EAElB9vB,EAAMD,mBASRxH,EAAMsR,UAAU8sB,eAAiB,SAAUJ,GACzC,GAAIzF,GACAc,EAAYl7B,KAAKyO,QAAQysB,SAI7B,IAFAqD,EAAkBrD,GAED,cAAbA,EACF,MAAOl7B,MAAK20B,KAAKh0B,KAAK00B,OAAOwK,EAAQ9tB,GAAG/K,SAGxC,IAAIwL,GAASxS,KAAK20B,KAAKC,SAAS1I,OAAO1Z,MAEvC,OADA4nB,GAAap6B,KAAKo6B,WAAW5nB,GACtBqtB,EAAQ7tB,EAAIooB,EAAWnd,MAAQmd,EAAWzQ,QA4BrD9nB,EAAMsR,UAAU+sB,KAAO,SAASjjB,EAAOiP,EAAQuC,GAE/B,MAAVvC,IACFA,GAAUlsB,KAAK4P,MAAQ5P,KAAK6P,KAAO,EAGrC,IAAI0qB,GAAiB54B,EAAS04B,yBAAyBr6B,KAAK20B,KAAKI,YAAa/0B,KAAK4P,MAAO5P,KAAK6P,KAC3FuwB,EAAuBz+B,EAASk5B,wBAAwB76B,KAAK20B,KAAKI,YAAa/0B,KAAMksB,GACrFmU,EAAsB9F,EAAiB6F,EAGvClB,EAAYhT,EAAOkU,GAAyBpgC,KAAK4P,OAASsc,EAAOkU,IAAyBnjB,EAC1FkiB,EAAYjT,EAAOmU,GAAwBrgC,KAAK6P,KAAOqc,EAAOmU,IAAwBpjB,CAG1Fjd,MAAKm5B,aAAe1K,EAAQ,GAAI,GAAQ,EACxCzuB,KAAKo5B,YAAc3K,EAAS,GAAI,GAAQ,CACxC,IAAIkR,GAAYh+B,EAASs5B,mBAAmBj7B,KAAK20B,KAAKI,YAAamK,EAAUzQ,GAAO,GAChFmR,EAAUj+B,EAASs5B,mBAAmBj7B,KAAK20B,KAAKI,YAAaoK,GAAS1Q,GAAO,IAC7EkR,GAAaT,GAAYU,GAAWT,KACtCD,EAAWS,EACXR,EAASS,GAGX5/B,KAAKuzB,SAAS2L,EAAUC,GAAQ,GAAO,GAEvCn/B,KAAKm5B,cAAe,EACpBn5B,KAAKo5B,YAAa,GAWpBv3B,EAAMsR,UAAUmtB,KAAO,SAAS7R,GAE9B,GAAIpC,GAAQrsB,KAAK6P,IAAM7P,KAAK4P,MAGxBsvB,EAAWl/B,KAAK4P,MAAQyc,EAAOoC,EAC/B0Q,EAASn/B,KAAK6P,IAAMwc,EAAOoC,CAI/BzuB,MAAK4P,MAAQsvB,EACbl/B,KAAK6P,IAAMsvB,GAObt9B,EAAMsR,UAAU0U,OAAS,SAASA,GAChC,GAAIqE,IAAUlsB,KAAK4P,MAAQ5P,KAAK6P,KAAO,EAEnCwc,EAAOH,EAASrE,EAGhBqX,EAAWl/B,KAAK4P,MAAQyc,EACxB8S,EAASn/B,KAAK6P,IAAMwc,CAExBrsB,MAAKuzB,SAAS2L,EAAUC,IAG1Bt/B,EAAOD,QAAUiC,GAKb,SAAShC,EAAQD,GAGrB,GAAI2gC,GAAU,IAMd3gC,GAAQ4gC,aAAe,SAASv+B,GAC9BA,EAAMiU,KAAK,SAAU3Q,EAAGa,GACtB,MAAOb,GAAEmN,KAAK9C,MAAQxJ,EAAEsM,KAAK9C,SASjChQ,EAAQ6gC,WAAa,SAASx+B,GAC5BA,EAAMiU,KAAK,SAAU3Q,EAAGa,GACtB,GAAIs6B,GAAS,OAASn7B,GAAEmN,KAAQnN,EAAEmN,KAAK7C,IAAMtK,EAAEmN,KAAK9C,MAChD+wB,EAAS,OAASv6B,GAAEsM,KAAQtM,EAAEsM,KAAK7C,IAAMzJ,EAAEsM,KAAK9C,KAEpD,OAAO8wB,GAAQC,KAenB/gC,EAAQkC,MAAQ,SAASG,EAAOyX,EAAQknB,GACtC,GAAIp7B,GAAGq7B,CAEP,IAAID,EAEF,IAAKp7B,EAAI,EAAGq7B,EAAO5+B,EAAM0D,OAAYk7B,EAAJr7B,EAAUA,IACzCvD,EAAMuD,GAAGmC,IAAM,IAKnB,KAAKnC,EAAI,EAAGq7B,EAAO5+B,EAAM0D,OAAYk7B,EAAJr7B,EAAUA,IAAK,CAC9C,GAAI6J,GAAOpN,EAAMuD,EACjB,IAAI6J,EAAKvN,OAAsB,OAAbuN,EAAK1H,IAAc,CAEnC0H,EAAK1H,IAAM+R,EAAOonB,IAElB,GAAG,CAID,IAAK,GADDC,GAAgB,KACXnV,EAAI,EAAGoV,EAAK/+B,EAAM0D,OAAYq7B,EAAJpV,EAAQA,IAAK,CAC9C,GAAIhmB,GAAQ3D,EAAM2pB,EAClB,IAAkB,OAAdhmB,EAAM+B,KAAgB/B,IAAUyJ,GAAQzJ,EAAM9D,OAASlC,EAAQqhC,UAAU5xB,EAAMzJ,EAAO8T,EAAOrK,MAAO,CACtG0xB,EAAgBn7B,CAChB,QAIiB,MAAjBm7B,IAEF1xB,EAAK1H,IAAMo5B,EAAcp5B,IAAMo5B,EAAcvuB,OAASkH,EAAOrK,KAAKoW,gBAE7Dsb,MAafnhC,EAAQshC,QAAU,SAASj/B,EAAOyX,EAAQynB,GACxC,GAAI37B,GAAGq7B,EAAMO,CAGb,KAAK57B,EAAI,EAAGq7B,EAAO5+B,EAAM0D,OAAYk7B,EAAJr7B,EAAUA,IACzC,GAA+BgB,SAA3BvE,EAAMuD,GAAGkN,KAAK2uB,SAAwB,CACxCD,EAAS1nB,EAAOonB,IAChB,KAAK,GAAIO,KAAYF,GACfA,EAAUr7B,eAAeu7B,IACQ,GAA/BF,EAAUE,GAAU3Y,SAAmByY,EAAUE,GAAUl5B,MAAQg5B,EAAUl/B,EAAMuD,GAAGkN,KAAK2uB,UAAUl5B,QACvGi5B,GAAUD,EAAUE,GAAU7uB,OAASkH,EAAOrK,KAAKoW,SAIzDxjB,GAAMuD,GAAGmC,IAAMy5B,MAGfn/B,GAAMuD,GAAGmC,IAAM+R,EAAOonB,MAe5BlhC,EAAQqhC,UAAY,SAAS17B,EAAGa,EAAGsT,GACjC,MAASnU,GAAEkC,KAAOiS,EAAO8L,WAAa+a,EAAkBn6B,EAAEqB,KAAOrB,EAAEmM,OAC9DhN,EAAEkC,KAAOlC,EAAEgN,MAAQmH,EAAO8L,WAAa+a,EAAWn6B,EAAEqB,MACpDlC,EAAEoC,IAAM+R,EAAO+L,SAAW8a,EAAyBn6B,EAAEuB,IAAMvB,EAAEoM,QAC7DjN,EAAEoC,IAAMpC,EAAEiN,OAASkH,EAAO+L,SAAW8a,EAAan6B,EAAEuB,MAMvD,SAAS9H,EAAQD,EAASM,GAgC9B,QAAS6B,GAAS6N,EAAOC,EAAKurB,EAAarG,GAEzC/0B,KAAK85B,QAAU,GAAIx1B,MACnBtE,KAAKkzB,OAAS,GAAI5uB,MAClBtE,KAAKmzB,KAAO,GAAI7uB,MAEhBtE,KAAKw7B,WAAa,EAClBx7B,KAAKid,MAAQ,MACbjd,KAAKmoB,KAAO,EAGZnoB,KAAKuzB,SAAS3jB,EAAOC,EAAKurB,GAG1Bp7B,KAAKk6B,aAAc,EACnBl6B,KAAKi6B,eAAgB,EACrBj6B,KAAKg6B,cAAe,EACpBh6B,KAAK+0B,YAAcA,EACCvuB,SAAhBuuB,IACF/0B,KAAK+0B,gBAGP/0B,KAAKshC,OAASv/B,EAASw/B,OApDzB,GAAI19B,GAAS3D,EAAoB,IAC7ByB,EAAWzB,EAAoB,IAC/BS,EAAOT,EAAoB,EAsD/B6B,GAASw/B,QACPC,aACEC,YAAY,MACZC,OAAY,IACZC,OAAY,QACZC,KAAY,QACZC,QAAY,QACZxJ,IAAY,IACZK,MAAY,MACZH,KAAY,QAEduJ,aACEL,YAAY,WACZC,OAAY,eACZC,OAAY,aACZC,KAAY,aACZC,QAAY,YACZxJ,IAAY,YACZK,MAAY,OACZH,KAAY,KAUhBx2B,EAASoR,UAAU4uB,UAAY,SAAUT,GACvC,GAAIU,GAAgBrhC,EAAK8F,cAAe1E,EAASw/B,OACjDvhC,MAAKshC,OAAS3gC,EAAK8F,WAAWu7B,EAAeV,IAa/Cv/B,EAASoR,UAAUogB,SAAW,SAAS3jB,EAAOC,EAAKurB,GACjD,KAAMxrB,YAAiBtL,OAAWuL,YAAevL,OAC/C,KAAO,+CAGTtE,MAAKkzB,OAAmB1sB,QAAToJ,EAAsB,GAAItL,MAAKsL,EAAM5I,WAAa,GAAI1C,MACrEtE,KAAKmzB,KAAe3sB,QAAPqJ,EAAoB,GAAIvL,MAAKuL,EAAI7I,WAAa,GAAI1C,MAE3DtE,KAAKw7B,WACPx7B,KAAK+7B,eAAeX,IAOxBr5B,EAASoR,UAAU8uB,MAAQ,WACzBjiC,KAAK85B,QAAU,GAAIx1B,MAAKtE,KAAKkzB,OAAOlsB,WACpChH,KAAK08B,gBAOP36B,EAASoR,UAAUupB,aAAe,WAIhC,OAAQ18B,KAAKid,OACX,IAAK,OACHjd,KAAK85B,QAAQoI,YAAYliC,KAAKmoB,KAAOjjB,KAAKC,MAAMnF,KAAK85B,QAAQqI,cAAgBniC,KAAKmoB,OAClFnoB,KAAK85B,QAAQsI,SAAS,EACxB,KAAK,QAAgBpiC,KAAK85B,QAAQuI,QAAQ,EAC1C,KAAK,MACL,IAAK,UAAgBriC,KAAK85B,QAAQwI,SAAS,EAC3C,KAAK,OAAgBtiC,KAAK85B,QAAQyI,WAAW,EAC7C,KAAK,SAAgBviC,KAAK85B,QAAQ0I,WAAW,EAC7C,KAAK,SAAgBxiC,KAAK85B,QAAQ2I,gBAAgB,GAIpD,GAAiB,GAAbziC,KAAKmoB,KAEP,OAAQnoB,KAAKid,OACX,IAAK,cAAgBjd,KAAK85B,QAAQ2I,gBAAgBziC,KAAK85B,QAAQ4I,kBAAoB1iC,KAAK85B,QAAQ4I,kBAAoB1iC,KAAKmoB,KAAQ,MACjI,KAAK,SAAgBnoB,KAAK85B,QAAQ0I,WAAWxiC,KAAK85B,QAAQ6I,aAAe3iC,KAAK85B,QAAQ6I,aAAe3iC,KAAKmoB,KAAO,MACjH,KAAK,SAAgBnoB,KAAK85B,QAAQyI,WAAWviC,KAAK85B,QAAQ8I,aAAe5iC,KAAK85B,QAAQ8I,aAAe5iC,KAAKmoB,KAAO,MACjH,KAAK,OAAgBnoB,KAAK85B,QAAQwI,SAAStiC,KAAK85B,QAAQ+I,WAAa7iC,KAAK85B,QAAQ+I,WAAa7iC,KAAKmoB,KAAO,MAC3G,KAAK,UACL,IAAK,MAAgBnoB,KAAK85B,QAAQuI,QAASriC,KAAK85B,QAAQgJ,UAAU,GAAM9iC,KAAK85B,QAAQgJ,UAAU,GAAK9iC,KAAKmoB,KAAO,EAAI,MACpH,KAAK,QAAgBnoB,KAAK85B,QAAQsI,SAASpiC,KAAK85B,QAAQiJ,WAAa/iC,KAAK85B,QAAQiJ,WAAa/iC,KAAKmoB,KAAQ,MAC5G,KAAK,OAAgBnoB,KAAK85B,QAAQoI,YAAYliC,KAAK85B,QAAQqI,cAAgBniC,KAAK85B,QAAQqI,cAAgBniC,KAAKmoB,QAUnHpmB,EAASoR,UAAU0pB,QAAU,WAC3B,MAAQ78B,MAAK85B,QAAQ9yB,WAAahH,KAAKmzB,KAAKnsB,WAM9CjF,EAASoR,UAAUkV,KAAO,WACxB,GAAIuJ,GAAO5xB,KAAK85B,QAAQ9yB,SAIxB,IAAIhH,KAAK85B,QAAQiJ,WAAa,EAC5B,OAAQ/iC,KAAKid,OACX,IAAK,cAEHjd,KAAK85B,QAAU,GAAIx1B,MAAKtE,KAAK85B,QAAQ9yB,UAAYhH,KAAKmoB,KAAO,MAC/D,KAAK,SAAgBnoB,KAAK85B,QAAU,GAAIx1B,MAAKtE,KAAK85B,QAAQ9yB,UAAwB,IAAZhH,KAAKmoB,KAAc,MACzF,KAAK,SAAgBnoB,KAAK85B,QAAU,GAAIx1B,MAAKtE,KAAK85B,QAAQ9yB,UAAwB,IAAZhH,KAAKmoB,KAAc,GAAK,MAC9F,KAAK,OACHnoB,KAAK85B,QAAU,GAAIx1B,MAAKtE,KAAK85B,QAAQ9yB,UAAwB,IAAZhH,KAAKmoB,KAAc,GAAK,GAEzE,IAAIxc,GAAI3L,KAAK85B,QAAQ+I,UACrB7iC,MAAK85B,QAAQwI,SAAS32B,EAAKA,EAAI3L,KAAKmoB,KACpC,MACF,KAAK,UACL,IAAK,MAAgBnoB,KAAK85B,QAAQuI,QAAQriC,KAAK85B,QAAQgJ,UAAY9iC,KAAKmoB,KAAO,MAC/E,KAAK,QAAgBnoB,KAAK85B,QAAQsI,SAASpiC,KAAK85B,QAAQiJ,WAAa/iC,KAAKmoB,KAAO,MACjF,KAAK,OAAgBnoB,KAAK85B,QAAQoI,YAAYliC,KAAK85B,QAAQqI,cAAgBniC,KAAKmoB,UAKlF,QAAQnoB,KAAKid,OACX,IAAK,cAAgBjd,KAAK85B,QAAU,GAAIx1B,MAAKtE,KAAK85B,QAAQ9yB,UAAYhH,KAAKmoB,KAAO,MAClF,KAAK,SAAgBnoB,KAAK85B,QAAQ0I,WAAWxiC,KAAK85B,QAAQ6I,aAAe3iC,KAAKmoB,KAAO,MACrF,KAAK,SAAgBnoB,KAAK85B,QAAQyI,WAAWviC,KAAK85B,QAAQ8I,aAAe5iC,KAAKmoB,KAAO;KACrF,KAAK,OAAgBnoB,KAAK85B,QAAQwI,SAAStiC,KAAK85B,QAAQ+I,WAAa7iC,KAAKmoB,KAAO,MACjF,KAAK,UACL,IAAK,MAAgBnoB,KAAK85B,QAAQuI,QAAQriC,KAAK85B,QAAQgJ,UAAY9iC,KAAKmoB,KAAO,MAC/E,KAAK,QAAgBnoB,KAAK85B,QAAQsI,SAASpiC,KAAK85B,QAAQiJ,WAAa/iC,KAAKmoB,KAAO,MACjF,KAAK,OAAgBnoB,KAAK85B,QAAQoI,YAAYliC,KAAK85B,QAAQqI,cAAgBniC,KAAKmoB,MAKpF,GAAiB,GAAbnoB,KAAKmoB,KAEP,OAAQnoB,KAAKid,OACX,IAAK,cAAmBjd,KAAK85B,QAAQ4I,kBAAoB1iC,KAAKmoB,MAAMnoB,KAAK85B,QAAQ2I,gBAAgB,EAAK,MACtG,KAAK,SAAmBziC,KAAK85B,QAAQ6I,aAAe3iC,KAAKmoB,MAAMnoB,KAAK85B,QAAQ0I,WAAW,EAAK,MAC5F,KAAK,SAAmBxiC,KAAK85B,QAAQ8I,aAAe5iC,KAAKmoB,MAAMnoB,KAAK85B,QAAQyI,WAAW,EAAK,MAC5F,KAAK,OAAmBviC,KAAK85B,QAAQ+I,WAAa7iC,KAAKmoB,MAAMnoB,KAAK85B,QAAQwI,SAAS,EAAK,MACxF,KAAK,UACL,IAAK,MAAmBtiC,KAAK85B,QAAQgJ,UAAY9iC,KAAKmoB,KAAK,GAAGnoB,KAAK85B,QAAQuI,QAAQ,EAAI,MACvF,KAAK,QAAmBriC,KAAK85B,QAAQiJ,WAAa/iC,KAAKmoB,MAAMnoB,KAAK85B,QAAQsI,SAAS,EAAK,MACxF,KAAK,QAMLpiC,KAAK85B,QAAQ9yB,WAAa4qB,IAC5B5xB,KAAK85B,QAAU,GAAIx1B,MAAKtE,KAAKmzB,KAAKnsB,YAGpCrF,EAAS83B,oBAAoBz5B,KAAM4xB,IAQrC7vB,EAASoR,UAAUiV,WAAa,WAC9B,MAAOpoB,MAAK85B,SAcd/3B,EAASoR,UAAU6vB,SAAW,SAASC,EAAUC,GAC/CljC,KAAKid,MAAQgmB,EAETC,EAAU,IACZljC,KAAKmoB,KAAO+a,GAGdljC,KAAKw7B,WAAY,GAOnBz5B,EAASoR,UAAUgwB,aAAe,SAAUC,GAC1CpjC,KAAKw7B,UAAY4H,GAQnBrhC,EAASoR,UAAU4oB,eAAiB,SAASX,GAC3C,GAAmB50B,QAAf40B,EAAJ,CAMA,GAAIiI,GAAiB,QACjBC,EAAiB,OACjBC,EAAiB,MACjBC,EAAiB,KACjBC,EAAiB,IACjBC,EAAiB,IACjBC,EAAiB,CAGR,KAATN,EAAgBjI,IAAqBp7B,KAAKid,MAAQ,OAAejd,KAAKmoB,KAAO,KACpE,IAATkb,EAAejI,IAAsBp7B,KAAKid,MAAQ,OAAejd,KAAKmoB,KAAO,KACpE,IAATkb,EAAejI,IAAsBp7B,KAAKid,MAAQ,OAAejd,KAAKmoB,KAAO,KACpE,GAATkb,EAAcjI,IAAuBp7B,KAAKid,MAAQ,OAAejd,KAAKmoB,KAAO,IACpE,GAATkb,EAAcjI,IAAuBp7B,KAAKid,MAAQ,OAAejd,KAAKmoB,KAAO,IACpE,EAATkb,EAAajI,IAAwBp7B,KAAKid,MAAQ,OAAejd,KAAKmoB,KAAO,GAC7Ekb,EAAWjI,IAA0Bp7B,KAAKid,MAAQ,OAAejd,KAAKmoB,KAAO,GACnE,EAAVmb,EAAclI,IAAuBp7B,KAAKid,MAAQ,QAAejd,KAAKmoB,KAAO,GAC7Emb,EAAYlI,IAAyBp7B,KAAKid,MAAQ,QAAejd,KAAKmoB,KAAO,GACrE,EAARob,EAAYnI,IAAyBp7B,KAAKid,MAAQ,MAAejd,KAAKmoB,KAAO,GACrE,EAARob,EAAYnI,IAAyBp7B,KAAKid,MAAQ,MAAejd,KAAKmoB,KAAO,GAC7Eob,EAAUnI,IAA2Bp7B,KAAKid,MAAQ,MAAejd,KAAKmoB,KAAO,GAC7Eob,EAAQ,EAAInI,IAAyBp7B,KAAKid,MAAQ,UAAejd,KAAKmoB,KAAO,GACpE,EAATqb,EAAapI,IAAwBp7B,KAAKid,MAAQ,OAAejd,KAAKmoB,KAAO,GAC7Eqb,EAAWpI,IAA0Bp7B,KAAKid,MAAQ,OAAejd,KAAKmoB,KAAO,GAClE,GAAXsb,EAAgBrI,IAAqBp7B,KAAKid,MAAQ,SAAejd,KAAKmoB,KAAO,IAClE,GAAXsb,EAAgBrI,IAAqBp7B,KAAKid,MAAQ,SAAejd,KAAKmoB,KAAO,IAClE,EAAXsb,EAAerI,IAAsBp7B,KAAKid,MAAQ,SAAejd,KAAKmoB,KAAO,GAC7Esb,EAAarI,IAAwBp7B,KAAKid,MAAQ,SAAejd,KAAKmoB,KAAO,GAClE,GAAXub,EAAgBtI,IAAqBp7B,KAAKid,MAAQ,SAAejd,KAAKmoB,KAAO,IAClE,GAAXub,EAAgBtI,IAAqBp7B,KAAKid,MAAQ,SAAejd,KAAKmoB,KAAO,IAClE,EAAXub,EAAetI,IAAsBp7B,KAAKid,MAAQ,SAAejd,KAAKmoB,KAAO,GAC7Eub,EAAatI,IAAwBp7B,KAAKid,MAAQ,SAAejd,KAAKmoB,KAAO,GAC7D,IAAhBwb,EAAsBvI,IAAep7B,KAAKid,MAAQ,cAAejd,KAAKmoB,KAAO,KAC7D,IAAhBwb,EAAsBvI,IAAep7B,KAAKid,MAAQ,cAAejd,KAAKmoB,KAAO,KAC7D,GAAhBwb,EAAqBvI,IAAgBp7B,KAAKid,MAAQ,cAAejd,KAAKmoB,KAAO,IAC7D,GAAhBwb,EAAqBvI,IAAgBp7B,KAAKid,MAAQ,cAAejd,KAAKmoB,KAAO,IAC7D,EAAhBwb,EAAoBvI,IAAiBp7B,KAAKid,MAAQ,cAAejd,KAAKmoB,KAAO,GAC7Ewb,EAAkBvI,IAAmBp7B,KAAKid,MAAQ,cAAejd,KAAKmoB,KAAO,KASnFpmB,EAASoR,UAAU6hB,KAAO,SAASyD,GACjC,GAAIL,GAAQ,GAAI9zB,MAAKm0B,EAAKzxB,UAE1B,IAAkB,QAAdhH,KAAKid,MAAiB,CACxB,GAAIsb,GAAOH,EAAM+J,cAAgBj9B,KAAKwoB,MAAM0K,EAAM2K,WAAa,GAC/D3K,GAAM8J,YAAYh9B,KAAKwoB,MAAM6K,EAAOv4B,KAAKmoB,MAAQnoB,KAAKmoB,MACtDiQ,EAAMgK,SAAS,GACfhK,EAAMiK,QAAQ,GACdjK,EAAMkK,SAAS,GACflK,EAAMmK,WAAW,GACjBnK,EAAMoK,WAAW,GACjBpK,EAAMqK,gBAAgB,OAEnB,IAAkB,SAAdziC,KAAKid,MACRmb,EAAM0K,UAAY,IACpB1K,EAAMiK,QAAQ,GACdjK,EAAMgK,SAAShK,EAAM2K,WAAa,IAIlC3K,EAAMiK,QAAQ,GAGhBjK,EAAMkK,SAAS,GACflK,EAAMmK,WAAW,GACjBnK,EAAMoK,WAAW,GACjBpK,EAAMqK,gBAAgB,OAEnB,IAAkB,OAAdziC,KAAKid,MAAgB,CAE5B,OAAQjd,KAAKmoB,MACX,IAAK,GACL,IAAK,GACHiQ,EAAMkK,SAA6C,GAApCp9B,KAAKwoB,MAAM0K,EAAMyK,WAAa,IAAW,MAC1D,SACEzK,EAAMkK,SAA6C,GAApCp9B,KAAKwoB,MAAM0K,EAAMyK,WAAa,KAEjDzK,EAAMmK,WAAW,GACjBnK,EAAMoK,WAAW,GACjBpK,EAAMqK,gBAAgB,OAEnB,IAAkB,WAAdziC,KAAKid,MAAoB,CAEhC,OAAQjd,KAAKmoB,MACX,IAAK,GACL,IAAK,GACHiQ,EAAMkK,SAA6C,GAApCp9B,KAAKwoB,MAAM0K,EAAMyK,WAAa,IAAW,MAC1D,SACEzK,EAAMkK,SAA4C,EAAnCp9B,KAAKwoB,MAAM0K,EAAMyK,WAAa,IAEjDzK,EAAMmK,WAAW,GACjBnK,EAAMoK,WAAW,GACjBpK,EAAMqK,gBAAgB,OAEnB,IAAkB,QAAdziC,KAAKid,MAAiB,CAC7B,OAAQjd,KAAKmoB,MACX,IAAK,GACHiQ,EAAMmK,WAAiD,GAAtCr9B,KAAKwoB,MAAM0K,EAAMwK,aAAe,IAAW,MAC9D,SACExK,EAAMmK,WAAiD,GAAtCr9B,KAAKwoB,MAAM0K,EAAMwK,aAAe,KAErDxK,EAAMoK,WAAW,GACjBpK,EAAMqK,gBAAgB,OACjB,IAAkB,UAAdziC,KAAKid,MAAmB,CAEjC,OAAQjd,KAAKmoB,MACX,IAAK,IACL,IAAK,IACHiQ,EAAMmK,WAAgD,EAArCr9B,KAAKwoB,MAAM0K,EAAMwK,aAAe,IACjDxK,EAAMoK,WAAW,EACjB,MACF,KAAK,GACHpK,EAAMoK,WAAiD,GAAtCt9B,KAAKwoB,MAAM0K,EAAMuK,aAAe,IAAW,MAC9D,SACEvK,EAAMoK,WAAiD,GAAtCt9B,KAAKwoB,MAAM0K,EAAMuK,aAAe,KAErDvK,EAAMqK,gBAAgB,OAEnB,IAAkB,UAAdziC,KAAKid,MAEZ,OAAQjd,KAAKmoB,MACX,IAAK,IACL,IAAK,IACHiQ,EAAMoK,WAAgD,EAArCt9B,KAAKwoB,MAAM0K,EAAMuK,aAAe,IACjDvK,EAAMqK,gBAAgB,EACtB,MACF,KAAK,GACHrK,EAAMqK,gBAA6D,IAA7Cv9B,KAAKwoB,MAAM0K,EAAMsK,kBAAoB,KAAe,MAC5E,SACEtK,EAAMqK,gBAA4D,IAA5Cv9B,KAAKwoB,MAAM0K,EAAMsK,kBAAoB,UAG5D,IAAkB,eAAd1iC,KAAKid,MAAwB,CACpC,GAAIkL,GAAOnoB,KAAKmoB,KAAO,EAAInoB,KAAKmoB,KAAO,EAAI,CAC3CiQ,GAAMqK,gBAAgBv9B,KAAKwoB,MAAM0K,EAAMsK,kBAAoBva,GAAQA,GAGrE,MAAOiQ,IAQTr2B,EAASoR,UAAU+pB,QAAU,WAC3B,GAAyB,GAArBl9B,KAAKg6B,aAEP,OADAh6B,KAAKg6B,cAAe,EACZh6B,KAAKid,OACX,IAAK,OACL,IAAK,QACL,IAAK,UACL,IAAK,MACL,IAAK,OACL,IAAK,SACL,IAAK,SACL,IAAK,cACH,OAAO,CACT,SACE,OAAO,MAGR,IAA0B,GAAtBjd,KAAKi6B,cAEZ,OADAj6B,KAAKi6B,eAAgB,EACbj6B,KAAKid,OACX,IAAK,UACL,IAAK,MACL,IAAK,OACL,IAAK,SACL,IAAK,SACL,IAAK,cACH,OAAO,CACT,SACE,OAAO,MAGR,IAAwB,GAApBjd,KAAKk6B,YAEZ,OADAl6B,KAAKk6B,aAAc,EACXl6B,KAAKid,OACX,IAAK,cACL,IAAK,SACL,IAAK,SACL,IAAK,OACH,OAAO,CACT,SACE,OAAO,EAIb,OAAQjd,KAAKid,OACX,IAAK,cACH,MAA0C,IAAlCjd,KAAK85B,QAAQ4I,iBACvB,KAAK,SACH,MAAqC,IAA7B1iC,KAAK85B,QAAQ6I,YACvB,KAAK,SACH,MAAmC,IAA3B3iC,KAAK85B,QAAQ+I,YAAkD,GAA7B7iC,KAAK85B,QAAQ8I,YACzD,KAAK,OACH,MAAmC,IAA3B5iC,KAAK85B,QAAQ+I,UACvB,KAAK,UACL,IAAK,MACH,MAAkC,IAA1B7iC,KAAK85B,QAAQgJ,SACvB,KAAK,QACH,MAAmC,IAA3B9iC,KAAK85B,QAAQiJ,UACvB,KAAK,OACH,OAAO,CACT,SACE,OAAO,IAWbhhC,EAASoR,UAAUywB,cAAgB,SAASnL,GAC9BjyB,QAARiyB,IACFA,EAAOz4B,KAAK85B,QAGd,IAAIwH,GAASthC,KAAKshC,OAAOE,YAAYxhC,KAAKid,MAC1C,OAAQqkB,IAAUA,EAAO37B,OAAS,EAAK9B,EAAO40B,GAAM6I,OAAOA,GAAU,IASvEv/B,EAASoR,UAAU0wB,cAAgB,SAASpL,GAC9BjyB,QAARiyB,IACFA,EAAOz4B,KAAK85B,QAGd,IAAIwH,GAASthC,KAAKshC,OAAOQ,YAAY9hC,KAAKid,MAC1C,OAAQqkB,IAAUA,EAAO37B,OAAS,EAAK9B,EAAO40B,GAAM6I,OAAOA,GAAU,IAGvEv/B,EAASoR,UAAU2wB,aAAe,WAKhC,QAASC,GAAK18B,GACZ,MAAQA,GAAQ8gB,EAAO,GAAK,EAAK,QAAU,OAG7C,QAAS6b,GAAMvL,GACb,MAAIA,GAAKwL,OAAO,GAAI3/B,MAAQ,OACnB,SAELm0B,EAAKwL,OAAOpgC,IAASoP,IAAI,EAAG,OAAQ,OAC/B,YAELwlB,EAAKwL,OAAOpgC,IAASoP,IAAI,GAAI,OAAQ,OAChC,aAEF,GAGT,QAASixB,GAAYzL,GACnB,MAAOA,GAAKwL,OAAO,GAAI3/B,MAAQ,QAAU,gBAAkB,GAG7D,QAAS6/B,GAAa1L,GACpB,MAAOA,GAAKwL,OAAO,GAAI3/B,MAAQ,SAAW,iBAAmB,GAG/D,QAAS8/B,GAAY3L,GACnB,MAAOA,GAAKwL,OAAO,GAAI3/B,MAAQ,QAAU,gBAAkB,GA9B7D,GAAI9D,GAAIqD,EAAO7D,KAAK85B,SAChBrB,EAAOj4B,EAAE6jC,OAAS7jC,EAAE6jC,OAAO,MAAQ7jC,EAAE8jC,KAAK,MAC1Cnc,EAAOnoB,KAAKmoB,IA+BhB,QAAQnoB,KAAKid,OACX,IAAK,cACH,MAAO8mB,GAAKtL,EAAK8E,gBAAgBrwB,MAEnC,KAAK,SACH,MAAO62B,GAAKtL,EAAK6E,WAAWpwB,MAE9B,KAAK,SACH,MAAO62B,GAAKtL,EAAK4E,WAAWnwB,MAE9B,KAAK,OACH,GAAIkwB,GAAQ3E,EAAK2E,OAIjB,OAHiB,IAAbp9B,KAAKmoB,OACPiV,EAAQA,EAAQ,KAAOA,EAAQ,IAE1BA,EAAQ,IAAM4G,EAAMvL,GAAQsL,EAAKtL,EAAK2E,QAE/C,KAAK,UACH,MAAO3E,GAAK6I,OAAO,QAAQiD,cACvBP,EAAMvL,GAAQyL,EAAYzL,GAAQsL,EAAKtL,EAAKA,OAElD,KAAK,MACH,GAAIJ,GAAMI,EAAKA,OACXC,EAAQD,EAAK6I,OAAO,QAAQiD,aAChC,OAAO,MAAQlM,EAAM,IAAMK,EAAQyL,EAAa1L,GAAQsL,EAAK1L,EAAM,EAErE,KAAK,QACH,MAAOI,GAAK6I,OAAO,QAAQiD,cACvBJ,EAAa1L,GAAQsL,EAAKtL,EAAKC,QAErC,KAAK,OACH,GAAIH,GAAOE,EAAKF,MAChB,OAAO,OAASA,EAAO6L,EAAY3L,GAAOsL,EAAKxL,EAEjD,SACE,MAAO,KAIb14B,EAAOD,QAAUmC,GAKb,SAASlC,GAOb,QAAS0C,KACPvC,KAAKyO,QAAU,KACfzO,KAAKgG,MAAQ,KAQfzD,EAAU4Q,UAAUD,WAAa,SAASzE,GACpCA,GACF9N,KAAK2E,OAAOtF,KAAKyO,QAASA,IAQ9BlM,EAAU4Q,UAAUsO,OAAS,WAE3B,OAAO,GAMTlf,EAAU4Q,UAAUG,QAAU,aAU9B/Q,EAAU4Q,UAAUqxB,WAAa,WAC/B,GAAIC,GAAWzkC,KAAKgG,MAAM0+B,iBAAmB1kC,KAAKgG,MAAMuM,OACpDvS,KAAKgG,MAAM2+B,kBAAoB3kC,KAAKgG,MAAMwM,MAK9C,OAHAxS,MAAKgG,MAAM0+B,eAAiB1kC,KAAKgG,MAAMuM,MACvCvS,KAAKgG,MAAM2+B,gBAAkB3kC,KAAKgG,MAAMwM,OAEjCiyB,GAGT5kC,EAAOD,QAAU2C,GAKb,SAAS1C,EAAQD,EAASM,GAe9B,QAASsC,GAAamyB,EAAMlmB,GAC1BzO,KAAK20B,KAAOA,EAGZ30B,KAAKq0B,gBACHuQ,iBAAiB,EAEjBC,QAASA,EACTR,OAAQ,MAEVrkC,KAAKyO,QAAU9N,EAAK2E,UAAWtF,KAAKq0B,gBACpCr0B,KAAK2pB,OAAS,EAEd3pB,KAAK00B,UAEL10B,KAAKkT,WAAWzE,GA5BlB,GAAI9N,GAAOT,EAAoB,GAC3BqC,EAAYrC,EAAoB,IAChC2D,EAAS3D,EAAoB,IAC7B2kC,EAAU3kC,EAAoB,GA4BlCsC,GAAY2Q,UAAY,GAAI5Q,GAM5BC,EAAY2Q,UAAUuhB,QAAU,WAC9B,GAAI7C,GAAMtgB,SAASM,cAAc,MACjCggB,GAAIhqB,UAAY,cAChBgqB,EAAI5kB,MAAM2W,SAAW,WACrBiO,EAAI5kB,MAAMtF,IAAM,MAChBkqB,EAAI5kB,MAAMuF,OAAS,OAEnBxS,KAAK6xB,IAAMA,GAMbrvB,EAAY2Q,UAAUG,QAAU,WAC9BtT,KAAKyO,QAAQm2B,iBAAkB,EAC/B5kC,KAAKyhB,SAELzhB,KAAK20B,KAAO,MAQdnyB,EAAY2Q,UAAUD,WAAa,SAASzE,GACtCA,GAEF9N,EAAKoF,iBAAiB,kBAAmB,SAAU,WAAY/F,KAAKyO,QAASA,IAQjFjM,EAAY2Q,UAAUsO,OAAS,WAC7B,GAAIzhB,KAAKyO,QAAQm2B,gBAAiB,CAChC,GAAIE,GAAS9kC,KAAK20B,KAAK5E,IAAIgV,kBACvB/kC,MAAK6xB,IAAIhoB,YAAci7B,IAErB9kC,KAAK6xB,IAAIhoB,YACX7J,KAAK6xB,IAAIhoB,WAAWsH,YAAYnR,KAAK6xB,KAEvCiT,EAAOrzB,YAAYzR,KAAK6xB,KAExB7xB,KAAK4P,QAGP,IAAIutB,GAAM,GAAI74B,OAAK,GAAIA,OAAO0C,UAAYhH,KAAK2pB,QAC3C5X,EAAI/R,KAAK20B,KAAKh0B,KAAKs0B,SAASkI,GAE5BkH,EAASrkC,KAAKyO,QAAQo2B,QAAQ7kC,KAAKyO,QAAQ41B,QAC3CW,EAAQX,EAAOvK,QAAU,IAAMuK,EAAOlK,KAAO,KAAOt2B,EAAOs5B,GAAKmE,OAAO,8BAC3E0D,GAAQA,EAAM5f,OAAO,GAAG6f,cAAgBD,EAAME,UAAU,GAExDllC,KAAK6xB,IAAI5kB,MAAMxF,KAAOsK,EAAI,KAC1B/R,KAAK6xB,IAAImT,MAAQA,MAIbhlC,MAAK6xB,IAAIhoB,YACX7J,KAAK6xB,IAAIhoB,WAAWsH,YAAYnR,KAAK6xB,KAEvC7xB,KAAKklB,MAGP,QAAO,GAMT1iB,EAAY2Q,UAAUvD,MAAQ,WAG5B,QAASiF,KACPV,EAAG+Q,MAGH,IAAIjI,GAAQ9I,EAAGwgB,KAAKc,MAAM2E,WAAWjmB,EAAGwgB,KAAKC,SAAS1I,OAAO3Z,OAAO0K,MAChEuV,EAAW,EAAIvV,EAAQ,EACZ,IAAXuV,IAAiBA,EAAW,IAC5BA,EAAW,MAAMA,EAAW,KAEhCre,EAAGsN,SAGHtN,EAAGgxB,iBAAmB7rB,WAAWzE,EAAQ2d,GAd3C,GAAIre,GAAKnU,IAiBT6U,MAMFrS,EAAY2Q,UAAU+R,KAAO,WACG1e,SAA1BxG,KAAKmlC,mBACP9rB,aAAarZ,KAAKmlC,wBACXnlC,MAAKmlC,mBAUhB3iC,EAAY2Q,UAAUiyB,eAAiB,SAASjL,GAC9C,GAAIrsB,GAAInN,EAAKkG,QAAQszB,EAAM,QAAQnzB,UAC/Bm2B,GAAM,GAAI74B,OAAO0C,SACrBhH,MAAK2pB,OAAS7b,EAAIqvB,EAClBn9B,KAAKyhB,UAOPjf,EAAY2Q,UAAUkyB,eAAiB,WACrC,MAAO,IAAI/gC,OAAK,GAAIA,OAAO0C,UAAYhH,KAAK2pB,SAG9C9pB,EAAOD,QAAU4C,GAKb,SAAS3C,EAAQD,EAASM,GAiB9B,QAASuC,GAAYkyB,EAAMlmB,GACzBzO,KAAK20B,KAAOA,EAGZ30B,KAAKq0B,gBACHiR,gBAAgB,EAChBT,QAASA,EACTR,OAAQ,MAEVrkC,KAAKyO,QAAU9N,EAAK2E,UAAWtF,KAAKq0B,gBAEpCr0B,KAAK41B,WAAa,GAAItxB,MACtBtE,KAAKulC,eAGLvlC,KAAK00B,UAEL10B,KAAKkT,WAAWzE,GAhClB,GAAI1K,GAAS7D,EAAoB,IAC7BS,EAAOT,EAAoB,GAC3BqC,EAAYrC,EAAoB,IAChC2D,EAAS3D,EAAoB,IAC7B2kC,EAAU3kC,EAAoB,GA+BlCuC,GAAW0Q,UAAY,GAAI5Q,GAO3BE,EAAW0Q,UAAUD,WAAa,SAASzE,GACrCA,GAEF9N,EAAKoF,iBAAiB,iBAAkB,SAAU,WAAY/F,KAAKyO,QAASA,IAQhFhM,EAAW0Q,UAAUuhB,QAAU,WAC7B,GAAI7C,GAAMtgB,SAASM,cAAc,MACjCggB,GAAIhqB,UAAY,aAChBgqB,EAAI5kB,MAAM2W,SAAW,WACrBiO,EAAI5kB,MAAMtF,IAAM,MAChBkqB,EAAI5kB,MAAMuF,OAAS,OACnBxS,KAAK6xB,IAAMA,CAEX,IAAI2T,GAAOj0B,SAASM,cAAc,MAClC2zB,GAAKv4B,MAAM2W,SAAW,WACtB4hB,EAAKv4B,MAAMtF,IAAM,MACjB69B,EAAKv4B,MAAMxF,KAAO,QAClB+9B,EAAKv4B,MAAMuF,OAAS,OACpBgzB,EAAKv4B,MAAMsF,MAAQ,OACnBsf,EAAIpgB,YAAY+zB,GAGhBxlC,KAAK8D,OAAS,GAAIC,GAAOyhC,GACzBxlC,KAAK8D,OAAOyP,GAAG,WAAYvT,KAAKg+B,aAAalJ,KAAK90B,OAClDA,KAAK8D,OAAOyP,GAAG,UAAYvT,KAAKi+B,QAAQnJ,KAAK90B,OAC7CA,KAAK8D,OAAOyP,GAAG,SAAYvT,KAAKk+B,WAAWpJ,KAAK90B,OAChDA,KAAK8D,OAAOyP,GAAG,MAAS,SAAUjK,GAChCA,EAAMD,oBAOV5G,EAAW0Q,UAAUG,QAAU,WAC7BtT,KAAKyO,QAAQ62B,gBAAiB,EAC9BtlC,KAAKyhB,SAELzhB,KAAK8D,OAAOs/B,QAAO,GACnBpjC,KAAK8D,OAAS,KAEd9D,KAAK20B,KAAO,MAOdlyB,EAAW0Q,UAAUsO,OAAS,WAC5B,GAAIzhB,KAAKyO,QAAQ62B,eAAgB,CAC/B,GAAIR,GAAS9kC,KAAK20B,KAAK5E,IAAIgV,kBACvB/kC,MAAK6xB,IAAIhoB,YAAci7B,IAErB9kC,KAAK6xB,IAAIhoB,YACX7J,KAAK6xB,IAAIhoB,WAAWsH,YAAYnR,KAAK6xB,KAEvCiT,EAAOrzB,YAAYzR,KAAK6xB,KAG1B,IAAI9f,GAAI/R,KAAK20B,KAAKh0B,KAAKs0B,SAASj1B,KAAK41B,YAEjCyO,EAASrkC,KAAKyO,QAAQo2B,QAAQ7kC,KAAKyO,QAAQ41B,QAC3CW,EAAQX,EAAOlK,KAAO,KAAOt2B,EAAO7D,KAAK41B,YAAY0L,OAAO,8BAChE0D,GAAQA,EAAM5f,OAAO,GAAG6f,cAAgBD,EAAME,UAAU,GAExDllC,KAAK6xB,IAAI5kB,MAAMxF,KAAOsK,EAAI,KAC1B/R,KAAK6xB,IAAImT,MAAQA,MAIbhlC,MAAK6xB,IAAIhoB,YACX7J,KAAK6xB,IAAIhoB,WAAWsH,YAAYnR,KAAK6xB,IAIzC,QAAO,GAOTpvB,EAAW0Q,UAAUsyB,cAAgB,SAAStL,GAC5Cn6B,KAAK41B,WAAaj1B,EAAKkG,QAAQszB,EAAM,QACrCn6B,KAAKyhB,UAOPhf,EAAW0Q,UAAUuyB,cAAgB,WACnC,MAAO,IAAIphC,MAAKtE,KAAK41B,WAAW5uB,YAQlCvE,EAAW0Q,UAAU6qB,aAAe,SAAS10B,GAC3CtJ,KAAKulC,YAAYxG,UAAW,EAC5B/+B,KAAKulC,YAAY3P,WAAa51B,KAAK41B,WAEnCtsB,EAAMq8B,kBACNr8B,EAAMD,kBAQR5G,EAAW0Q,UAAU8qB,QAAU,SAAU30B,GACvC,GAAKtJ,KAAKulC,YAAYxG,SAAtB,CAEA,GAAIhtB,GAAI/R,KAAK20B,KAAKh0B,KAAKs0B,SAASj1B,KAAKulC,YAAY3P,YAActsB,EAAMk2B,OACjErF,EAAOn6B,KAAK20B,KAAKh0B,KAAK00B,OAAOtjB,EAEjC/R,MAAKylC,cAActL,GAGnBn6B,KAAK20B,KAAKE,QAAQjH,KAAK,cACrBuM,KAAM,GAAI71B,MAAKtE,KAAK41B,WAAW5uB,aAGjCsC,EAAMq8B,kBACNr8B,EAAMD,mBAQR5G,EAAW0Q,UAAU+qB,WAAa,SAAU50B,GACrCtJ,KAAKulC,YAAYxG,WAGtB/+B,KAAK20B,KAAKE,QAAQjH,KAAK,eACrBuM,KAAM,GAAI71B,MAAKtE,KAAK41B,WAAW5uB,aAGjCsC,EAAMq8B,kBACNr8B,EAAMD,mBAGRxJ,EAAOD,QAAU6C,GAKb,SAAS5C,EAAQD,EAASM,GAe9B,QAASwC,GAAUiyB,EAAMlmB,EAASm3B,EAAKC,GACrC7lC,KAAKK,GAAKM,EAAKqE,aACfhF,KAAK20B,KAAOA,EAEZ30B,KAAKq0B,gBACHE,YAAa,OACbuR,iBAAiB,EACjBC,iBAAiB,EACjBC,OAAO,EACPC,iBAAkB,EAClBC,iBAAkB,EAClBC,aAAc,GACdC,aAAc,EACdC,UAAW,GACX9zB,MAAO,OACPmW,SAAS,EACT6S,YAAY,EACZD,aACE7zB,MAAOqE,IAAItF,OAAWkG,IAAIlG,QAC1B6gB,OAAQvb,IAAItF,OAAWkG,IAAIlG,SAE7Bw+B,OACEv9B,MAAO8hB,KAAK/iB,QACZ6gB,OAAQkC,KAAK/iB,SAEf86B,QACE75B,MAAOs1B,SAAUv2B,QACjB6gB,OAAQ0V,SAAUv2B,UAItBxG,KAAK6lC,iBAAmBA,EACxB7lC,KAAKsmC,aAAeV,EACpB5lC,KAAKgG,SACLhG,KAAKumC,aACHC,SACAC,UACAzB,UAGFhlC,KAAK+vB,OAEL/vB,KAAKy1B,OAAS7lB,MAAM,EAAGC,IAAI,GAE3B7P,KAAKyO,QAAU9N,EAAK2E,UAAWtF,KAAKq0B,gBACpCr0B,KAAK0mC,iBAAmB,EAExB1mC,KAAKkT,WAAWzE,GAChBzO,KAAKuS,MAAQrO,QAAQ,GAAKlE,KAAKyO,QAAQ8D,OAAO/H,QAAQ,KAAK,KAC3DxK,KAAK2mC,SAAW3mC,KAAKuS,MACrBvS,KAAKwS,OAASxS,KAAKsmC,aAAahW,aAChCtwB,KAAKk5B,QAAS,EAEdl5B,KAAK4mC,WAAa,GAClB5mC,KAAK6mC,iBAAmB,GACxB7mC,KAAK8mC,aAAe,GAEpB9mC,KAAK+mC,WAAa,EAClB/mC,KAAKgnC,QAAS,EACdhnC,KAAKinC,eACLjnC,KAAKknC,cAAe,EAGpBlnC,KAAKm0B,UACLn0B,KAAKmnC,eAAiB,EAGtBnnC,KAAK00B,SAEL,IAAIvgB,GAAKnU,IACTA,MAAK20B,KAAKE,QAAQthB,GAAG,eAAgB,WACnCY,EAAG4b,IAAIqX,cAAcn6B,MAAMtF,IAAMwM,EAAGwgB,KAAKC,SAASyS,UAAY,OApFlE,GAAI1mC,GAAOT,EAAoB,GAC3BU,EAAUV,EAAoB,GAC9BqC,EAAYrC,EAAoB,IAChC0B,EAAW1B,EAAoB,GAqFnCwC,GAASyQ,UAAY,GAAI5Q,GAGzBG,EAASyQ,UAAUm0B,SAAW,SAAS7e,EAAO8e,GACvCvnC,KAAKm0B,OAAOruB,eAAe2iB,KAC9BzoB,KAAKm0B,OAAO1L,GAAS8e,GAEvBvnC,KAAKmnC,gBAAkB,GAGzBzkC,EAASyQ,UAAUq0B,YAAc,SAAS/e,EAAO8e,GAC/CvnC,KAAKm0B,OAAO1L,GAAS8e,GAGvB7kC,EAASyQ,UAAUs0B,YAAc,SAAShf,GACpCzoB,KAAKm0B,OAAOruB,eAAe2iB,WACtBzoB,MAAKm0B,OAAO1L,GACnBzoB,KAAKmnC,gBAAkB,IAK3BzkC,EAASyQ,UAAUD,WAAa,SAAUzE,GACxC,GAAIA,EAAS,CACX,GAAIgT,IAAS,CACTzhB,MAAKyO,QAAQ8lB,aAAe9lB,EAAQ8lB,aAAuC/tB,SAAxBiI,EAAQ8lB,cAC7D9S,GAAS,EAEX,IAAIvT,IACF,cACA,kBACA,kBACA,QACA,mBACA,mBACA,eACA,eACA,YACA,QACA,UACA,cACA,QACA,SACA,aAEFvN,GAAKoF,gBAAgBmI,EAAQlO,KAAKyO,QAASA,GAE3CzO,KAAK2mC,SAAWziC,QAAQ,GAAKlE,KAAKyO,QAAQ8D,OAAO/H,QAAQ,KAAK,KAEhD,GAAViX,GAAkBzhB,KAAK+vB,IAAIzQ,QAC7Btf,KAAK0nC,OACL1nC,KAAK2nC,UASXjlC,EAASyQ,UAAUuhB,QAAU,WAC3B10B,KAAK+vB,IAAIzQ,MAAQ/N,SAASM,cAAc,OACxC7R,KAAK+vB,IAAIzQ,MAAMrS,MAAMsF,MAAQvS,KAAKyO,QAAQ8D,MAC1CvS,KAAK+vB,IAAIzQ,MAAMrS,MAAMuF,OAASxS,KAAKwS,OAEnCxS,KAAK+vB,IAAIqX,cAAgB71B,SAASM,cAAc,OAChD7R,KAAK+vB,IAAIqX,cAAcn6B,MAAMsF,MAAQ,OACrCvS,KAAK+vB,IAAIqX,cAAcn6B,MAAMuF,OAASxS,KAAKwS,OAC3CxS,KAAK+vB,IAAIqX,cAAcn6B,MAAM2W,SAAW,WAGxC5jB,KAAK4lC,IAAMr0B,SAASC,gBAAgB,6BAA6B,OACjExR,KAAK4lC,IAAI34B,MAAM2W,SAAW,WAC1B5jB,KAAK4lC,IAAI34B,MAAMtF,IAAM,MACrB3H,KAAK4lC,IAAI34B,MAAMuF,OAAS,OACxBxS,KAAK4lC,IAAI34B,MAAMsF,MAAQ,OACvBvS,KAAK4lC,IAAI34B,MAAM26B,QAAU,QACzB5nC,KAAK+vB,IAAIzQ,MAAM7N,YAAYzR,KAAK4lC,MAGlCljC,EAASyQ,UAAU00B,kBAAoB,WACrCjnC,EAAQiQ,gBAAgB7Q,KAAKinC,YAE7B,IAAIl1B,GACAs0B,EAAYrmC,KAAKyO,QAAQ43B,UACzByB,EAAa,GACbC,EAAa,EACb/1B,EAAI+1B,EAAa,GAAMD,CAGzB/1B,GAD8B,QAA5B/R,KAAKyO,QAAQ8lB,YACXwT,EAGA/nC,KAAKuS,MAAQ8zB,EAAY0B,CAG/B,KAAK,GAAIzQ,KAAWt3B,MAAKm0B,OACnBn0B,KAAKm0B,OAAOruB,eAAewxB,KACO,GAAhCt3B,KAAKm0B,OAAOmD,GAAS5O,SAAkEliB,SAA9CxG,KAAK6lC,iBAAiBrO,WAAWF,IAAuE,GAA7Ct3B,KAAK6lC,iBAAiBrO,WAAWF,KACvIt3B,KAAKm0B,OAAOmD,GAAS0Q,SAASj2B,EAAGC,EAAGhS,KAAKinC,YAAajnC,KAAK4lC,IAAKS,EAAWyB,GAC3E91B,GAAK81B,EAAaC,GAKxBnnC,GAAQsQ,gBAAgBlR,KAAKinC,aAC7BjnC,KAAKknC,cAAe,GAGtBxkC,EAASyQ,UAAU80B,cAAgB,WACR,GAArBjoC,KAAKknC,eACPtmC,EAAQiQ,gBAAgB7Q,KAAKinC,aAC7BrmC,EAAQsQ,gBAAgBlR,KAAKinC,aAC7BjnC,KAAKknC,cAAe,IAOxBxkC,EAASyQ,UAAUw0B,KAAO,WACxB3nC,KAAKk5B,QAAS,EACTl5B,KAAK+vB,IAAIzQ,MAAMzV,aACc,QAA5B7J,KAAKyO,QAAQ8lB,YACfv0B,KAAK20B,KAAK5E,IAAItoB,KAAKgK,YAAYzR,KAAK+vB,IAAIzQ,OAGxCtf,KAAK20B,KAAK5E,IAAI1I,MAAM5V,YAAYzR,KAAK+vB,IAAIzQ,QAIxCtf,KAAK+vB,IAAIqX,cAAcv9B,YAC1B7J,KAAK20B,KAAK5E,IAAImY,qBAAqBz2B,YAAYzR,KAAK+vB,IAAIqX,gBAO5D1kC,EAASyQ,UAAUu0B,KAAO,WACxB1nC,KAAKk5B,QAAS,EACVl5B,KAAK+vB,IAAIzQ,MAAMzV,YACjB7J,KAAK+vB,IAAIzQ,MAAMzV,WAAWsH,YAAYnR,KAAK+vB,IAAIzQ,OAG7Ctf,KAAK+vB,IAAIqX,cAAcv9B,YACzB7J,KAAK+vB,IAAIqX,cAAcv9B,WAAWsH,YAAYnR,KAAK+vB,IAAIqX,gBAU3D1kC,EAASyQ,UAAUogB,SAAW,SAAU3jB,EAAOC,GAC1B,GAAf7P,KAAKgnC,QAA8C,GAA3BhnC,KAAKyO,QAAQ8sB,YAA2C,IAArBv7B,KAAK8mC,cAC9Dl3B,EAAQ,IACVA,EAAQ,GAGZ5P,KAAKy1B,MAAM7lB,MAAQA,EACnB5P,KAAKy1B,MAAM5lB,IAAMA,GAOnBnN,EAASyQ,UAAUsO,OAAS,WAC1B,GAAIgjB,IAAU,EACV0D,EAAe,CAGnBnoC,MAAK+vB,IAAIqX,cAAcn6B,MAAMtF,IAAM3H,KAAK20B,KAAKC,SAASyS,UAAY,IAElE,KAAK,GAAI/P,KAAWt3B,MAAKm0B,OACnBn0B,KAAKm0B,OAAOruB,eAAewxB,KACO,GAAhCt3B,KAAKm0B,OAAOmD,GAAS5O,SAAkEliB,SAA9CxG,KAAK6lC,iBAAiBrO,WAAWF,IAAuE,GAA7Ct3B,KAAK6lC,iBAAiBrO,WAAWF,IACvI6Q,IAIN,IAA2B,GAAvBnoC,KAAKmnC,gBAAuC,GAAhBgB,EAC9BnoC,KAAK0nC,WAEF,CACH1nC,KAAK2nC,OACL3nC,KAAKwS,OAAStO,OAAOlE,KAAKsmC,aAAar5B,MAAMuF,OAAOhI,QAAQ,KAAK,KAGjExK,KAAK+vB,IAAIqX,cAAcn6B,MAAMuF,OAASxS,KAAKwS,OAAS,KACpDxS,KAAKuS,MAAgC,GAAxBvS,KAAKyO,QAAQia,QAAkBxkB,QAAQ,GAAKlE,KAAKyO,QAAQ8D,OAAO/H,QAAQ,KAAK,KAAO,CAEjG,IAAIxE,GAAQhG,KAAKgG,MACbsZ,EAAQtf,KAAK+vB,IAAIzQ,KAGrBA,GAAMzX,UAAY,WAGlB7H,KAAKooC,oBAEL,IAAI7T,GAAcv0B,KAAKyO,QAAQ8lB,YAC3BuR,EAAkB9lC,KAAKyO,QAAQq3B,gBAC/BC,EAAkB/lC,KAAKyO,QAAQs3B,eAGnC//B,GAAMqiC,iBAAmBvC,EAAkB9/B,EAAMsiC,gBAAkB,EACnEtiC,EAAMuiC,iBAAmBxC,EAAkB//B,EAAMwiC,gBAAkB,EAEnExiC,EAAMyiC,eAAiBzoC,KAAK20B,KAAK5E,IAAImY,qBAAqB9X,YAAcpwB,KAAK+mC,WAAa/mC,KAAKuS,MAAQ,EAAIvS,KAAKyO,QAAQy3B,iBACxHlgC,EAAM0iC,gBAAkB,EACxB1iC,EAAM2iC,eAAiB3oC,KAAK20B,KAAK5E,IAAImY,qBAAqB9X,YAAcpwB,KAAK+mC,WAAa/mC,KAAKuS,MAAQ,EAAIvS,KAAKyO,QAAQw3B,iBACxHjgC,EAAM4iC,gBAAkB,EAGL,QAAfrU,GACFjV,EAAMrS,MAAMtF,IAAM,IAClB2X,EAAMrS,MAAMxF,KAAO,IACnB6X,EAAMrS,MAAMqW,OAAS,GACrBhE,EAAMrS,MAAMsF,MAAQvS,KAAKuS,MAAQ,KACjC+M,EAAMrS,MAAMuF,OAASxS,KAAKwS,OAAS,KACnCxS,KAAKgG,MAAMuM,MAAQvS,KAAK20B,KAAKC,SAASntB,KAAK8K,MAC3CvS,KAAKgG,MAAMwM,OAASxS,KAAK20B,KAAKC,SAASntB,KAAK+K,SAG5C8M,EAAMrS,MAAMtF,IAAM,GAClB2X,EAAMrS,MAAMqW,OAAS,IACrBhE,EAAMrS,MAAMxF,KAAO,IACnB6X,EAAMrS,MAAMsF,MAAQvS,KAAKuS,MAAQ,KACjC+M,EAAMrS,MAAMuF,OAASxS,KAAKwS,OAAS,KACnCxS,KAAKgG,MAAMuM,MAAQvS,KAAK20B,KAAKC,SAASvN,MAAM9U,MAC5CvS,KAAKgG,MAAMwM,OAASxS,KAAK20B,KAAKC,SAASvN,MAAM7U,QAG/CiyB,EAAUzkC,KAAK6oC,gBACfpE,EAAUzkC,KAAKwkC,cAAgBC,EAEL,GAAtBzkC,KAAKyO,QAAQu3B,MACfhmC,KAAK6nC,oBAGL7nC,KAAKioC,gBAGPjoC,KAAK8oC,aAAavU,GAEpB,MAAOkQ,IAOT/hC,EAASyQ,UAAU01B,cAAgB,WACjC,GAAIpE,IAAU,CACd7jC,GAAQiQ,gBAAgB7Q,KAAKumC,YAAYC,OACzC5lC,EAAQiQ,gBAAgB7Q,KAAKumC,YAAYE,OAEzC,IAAIlS,GAAcv0B,KAAKyO,QAAqB,YAGxC2sB,EAAcp7B,KAAKgnC,OAAShnC,KAAKgG,MAAMwiC,iBAAmB,GAAKxoC,KAAK6mC,iBAEpE1e,EAAO,GAAIvmB,GACb5B,KAAKy1B,MAAM7lB,MACX5P,KAAKy1B,MAAM5lB,IACXurB,EACAp7B,KAAK+vB,IAAIzQ,MAAMgR,aACftwB,KAAKyO,QAAQ6sB,YAAYt7B,KAAKyO,QAAQ8lB,aACvB,GAAfv0B,KAAKgnC,QAAmBhnC,KAAKyO,QAAQ8sB,WAGvCv7B,MAAKmoB,KAAOA,CAGZ,IAAIye,IAAc5mC,KAAK+vB,IAAIzQ,MAAMgR,aAAgBnI,EAAKyT,WAAa57B,KAAK+vB,IAAIzQ,MAAMgR,aAAenI,EAAKwU,gBAAoBxU,EAAKwU,YAAcxU,EAAKyT,WAAazT,EAAKA,KAEpKnoB,MAAK4mC,WAAaA,CAElB,IAAImC,GAAgB/oC,KAAKwS,OAASo0B,EAC9BoC,EAAiB,CAGrB,IAAmB,GAAfhpC,KAAKgnC,OAAiB,CACxBJ,EAAa5mC,KAAK6mC,iBAClBmC,EAAiB9jC,KAAKwoB,MAAO1tB,KAAK+vB,IAAIzQ,MAAMgR,aAAesW,EAAcmC,EACzE,KAAK,GAAIvjC,GAAI,EAAO,GAAMwjC,EAAVxjC,EAA0BA,IACxC2iB,EAAK2U,UAIP,IAFAiM,EAAgB/oC,KAAKwS,OAASo0B,EAEL,IAArB5mC,KAAK8mC,cAAiD,GAA3B9mC,KAAKyO,QAAQ8sB,WAAoB,CAC9D,GAAI0N,GAAsB9gB,EAAKwT,UAAYxT,EAAKA,KAAQnoB,KAAK8mC,YAC7D,IAAImC,EAAqB,EACvB,IAAK,GAAIzjC,GAAI,EAAOyjC,EAAJzjC,EAAwBA,IAAM2iB,EAAKE,WAEhD,IAAyB,EAArB4gB,EACP,IAAK,GAAIzjC,GAAI,GAAQyjC,EAALzjC,EAAyBA,IAAM2iB,EAAK2U,gBAKxDiM,IAAiB,GAInB/oC,MAAKkpC,YAAc/gB,EAAKwT,SACxB,IAMIoB,GANAoM,EAAiB,EAGjBz8B,EAAM,CAI8BlG,UAArCxG,KAAKyO,QAAQ6yB,OAAO/M,KACrBwI,EAAW/8B,KAAKyO,QAAQ6yB,OAAO/M,GAAawI,UAG9C/8B,KAAKopC,aAAe,CAEpB,KADA,GAAIp3B,GAAI,EACDtF,EAAMxH,KAAKwoB,MAAMqb,IAAgB,CACtC5gB,EAAKE,OACLrW,EAAI9M,KAAKwoB,MAAMhhB,EAAMk6B,GACrBuC,EAAiBz8B,EAAMk6B,CACvB,IAAI1J,GAAU/U,EAAK+U,WAEfl9B,KAAKyO,QAAyB,iBAAgB,GAAXyuB,GAAmC,GAAfl9B,KAAKgnC,QAAsD,GAAnChnC,KAAKyO,QAAyB,kBAC/GzO,KAAKqpC,aAAar3B,EAAI,EAAGmW,EAAKC,WAAW2U,GAAWxI,EAAa,cAAev0B,KAAKgG,MAAMsiC,iBAGzFpL,GAAWl9B,KAAKyO,QAAyB,iBAAoB,GAAfzO,KAAKgnC,QAChB,GAAnChnC,KAAKyO,QAAyB,iBAA6B,GAAfzO,KAAKgnC,QAA8B,GAAX9J,GAClElrB,GAAK,GACPhS,KAAKqpC,aAAar3B,EAAI,EAAGmW,EAAKC,WAAW2U,GAAWxI,EAAa,cAAev0B,KAAKgG,MAAMwiC,iBAE7FxoC,KAAKspC,YAAYt3B,EAAGuiB,EAAa,wBAAyBv0B,KAAKyO,QAAQw3B,iBAAkBjmC,KAAKgG,MAAM2iC,iBAGpG3oC,KAAKspC,YAAYt3B,EAAGuiB,EAAa,wBAAyBv0B,KAAKyO,QAAQy3B,iBAAkBlmC,KAAKgG,MAAMyiC,gBAGnF,GAAfzoC,KAAKgnC,QAAkC,GAAhB7e,EAAK2R,UAC9B95B,KAAK8mC,aAAep6B,GAGtBA,IAIA1M,KAAK0mC,iBADY,GAAf1mC,KAAKgnC,OACiBh1B,GAAKhS,KAAKkpC,YAAc/gB,EAAK2R,SAG7B95B,KAAK+vB,IAAIzQ,MAAMgR,aAAenI,EAAKwU,WAI7D,IAAI4M,GAAa,CACuB/iC,UAApCxG,KAAKyO,QAAQu2B,MAAMzQ,IAAuE/tB,SAAzCxG,KAAKyO,QAAQu2B,MAAMzQ,GAAahL,OACnFggB,EAAavpC,KAAKgG,MAAMwjC,gBAE1B,IAAI7f,GAA+B,GAAtB3pB,KAAKyO,QAAQu3B,MAAgB9gC,KAAKwH,IAAI1M,KAAKyO,QAAQ43B,UAAWkD,GAAcvpC,KAAKyO,QAAQ03B,aAAe,GAAKoD,EAAavpC,KAAKyO,QAAQ03B,aAAe,EA0BnK,OAvBInmC,MAAKopC,aAAgBppC,KAAKuS,MAAQoX,GAAmC,GAAxB3pB,KAAKyO,QAAQia,SAC5D1oB,KAAKuS,MAAQvS,KAAKopC,aAAezf,EACjC3pB,KAAKyO,QAAQ8D,MAAQvS,KAAKuS,MAAQ,KAClC3R,EAAQsQ,gBAAgBlR,KAAKumC,YAAYC,OACzC5lC,EAAQsQ,gBAAgBlR,KAAKumC,YAAYE,QACzCzmC,KAAKyhB,SACLgjB,GAAU,GAGHzkC,KAAKopC,aAAgBppC,KAAKuS,MAAQoX,GAAmC,GAAxB3pB,KAAKyO,QAAQia,SAAmB1oB,KAAKuS,MAAQvS,KAAK2mC,UACtG3mC,KAAKuS,MAAQrN,KAAKwH,IAAI1M,KAAK2mC,SAAS3mC,KAAKopC,aAAezf,GACxD3pB,KAAKyO,QAAQ8D,MAAQvS,KAAKuS,MAAQ,KAClC3R,EAAQsQ,gBAAgBlR,KAAKumC,YAAYC,OACzC5lC,EAAQsQ,gBAAgBlR,KAAKumC,YAAYE,QACzCzmC,KAAKyhB,SACLgjB,GAAU,IAGV7jC,EAAQsQ,gBAAgBlR,KAAKumC,YAAYC,OACzC5lC,EAAQsQ,gBAAgBlR,KAAKumC,YAAYE,QACzChC,GAAU,GAGLA,GAGT/hC,EAASyQ,UAAUs2B,aAAe,SAAUpiC,GAC1C,GAAIqiC,GAAgB1pC,KAAKkpC,YAAc7hC,EACnCsiC,EAAiBD,EAAgB1pC,KAAK0mC,gBAC1C,OAAOiD,IAYTjnC,EAASyQ,UAAUk2B,aAAe,SAAUr3B,EAAGuX,EAAMgL,EAAa1sB,EAAW+hC,GAE3E,GAAInhB,GAAQ7nB,EAAQ8Q,cAAc,MAAM1R,KAAKumC,YAAYE,OAAQzmC,KAAK+vB,IAAIzQ,MAC1EmJ,GAAM5gB,UAAYA,EAClB4gB,EAAMxE,UAAYsF,EACC,QAAfgL,GACF9L,EAAMxb,MAAMxF,KAAO,IAAMzH,KAAKyO,QAAQ03B,aAAe,KACrD1d,EAAMxb,MAAMqb,UAAY,UAGxBG,EAAMxb,MAAMoa,MAAQ,IAAMrnB,KAAKyO,QAAQ03B,aAAe,KACtD1d,EAAMxb,MAAMqb,UAAY,QAG1BG,EAAMxb,MAAMtF,IAAMqK,EAAI,GAAM43B,EAAkB5pC,KAAKyO,QAAQ23B,aAAe,KAE1E7c,GAAQ,EAER,IAAIsgB,GAAe3kC,KAAKwH,IAAI1M,KAAKgG,MAAM8jC,eAAe9pC,KAAKgG,MAAM+jC,eAC7D/pC,MAAKopC,aAAe7f,EAAK5jB,OAASkkC,IACpC7pC,KAAKopC,aAAe7f,EAAK5jB,OAASkkC,IAYtCnnC,EAASyQ,UAAUm2B,YAAc,SAAUt3B,EAAGuiB,EAAa1sB,EAAW8hB,EAAQpX,GAC5E,GAAmB,GAAfvS,KAAKgnC,OAAgB,CACvB,GAAInX,GAAOjvB,EAAQ8Q,cAAc,MAAM1R,KAAKumC,YAAYC,MAAOxmC,KAAK+vB,IAAIqX,cACxEvX,GAAKhoB,UAAYA,EACjBgoB,EAAK5L,UAAY,GAEE,QAAfsQ,EACF1E,EAAK5iB,MAAMxF,KAAQzH,KAAKuS,MAAQoX,EAAU,KAG1CkG,EAAK5iB,MAAMoa,MAASrnB,KAAKuS,MAAQoX,EAAU,KAG7CkG,EAAK5iB,MAAMsF,MAAQA,EAAQ,KAC3Bsd,EAAK5iB,MAAMtF,IAAMqK,EAAI,OASzBtP,EAASyQ,UAAU21B,aAAe,SAAUvU,GAI1C,GAHA3zB,EAAQiQ,gBAAgB7Q,KAAKumC,YAAYvB,OAGDx+B,SAApCxG,KAAKyO,QAAQu2B,MAAMzQ,IAAuE/tB,SAAzCxG,KAAKyO,QAAQu2B,MAAMzQ,GAAahL,KAAoB,CACvG,GAAIyb,GAAQpkC,EAAQ8Q,cAAc,MAAO1R,KAAKumC,YAAYvB,MAAOhlC,KAAK+vB,IAAIzQ,MAC1E0lB,GAAMn9B,UAAY,eAAiB0sB,EACnCyQ,EAAM/gB,UAAYjkB,KAAKyO,QAAQu2B,MAAMzQ,GAAahL,KAGJ/iB,SAA1CxG,KAAKyO,QAAQu2B,MAAMzQ,GAAatnB,OAClCtM,EAAK2M,WAAW03B,EAAOhlC,KAAKyO,QAAQu2B,MAAMzQ,GAAatnB,OAGtC,QAAfsnB,EACFyQ,EAAM/3B,MAAMxF,KAAOzH,KAAKgG,MAAMwjC,gBAAkB,KAGhDxE,EAAM/3B,MAAMoa,MAAQrnB,KAAKgG,MAAMwjC,gBAAkB,KAGnDxE,EAAM/3B,MAAMsF,MAAQvS,KAAKwS,OAAS,KAIpC5R,EAAQsQ,gBAAgBlR,KAAKumC,YAAYvB,QAW3CtiC,EAASyQ,UAAUi1B,mBAAqB,WAEtC,KAAM,mBAAqBpoC,MAAKgG,OAAQ,CACtC,GAAIgkC,GAAYz4B,SAAS04B,eAAe,KACpCC,EAAmB34B,SAASM,cAAc,MAC9Cq4B,GAAiBriC,UAAY,sBAC7BqiC,EAAiBz4B,YAAYu4B,GAC7BhqC,KAAK+vB,IAAIzQ,MAAM7N,YAAYy4B,GAE3BlqC,KAAKgG,MAAMsiC,gBAAkB4B,EAAiBrlB,aAC9C7kB,KAAKgG,MAAM+jC,eAAiBG,EAAiB1qB,YAE7Cxf,KAAK+vB,IAAIzQ,MAAMnO,YAAY+4B,GAG7B,KAAM,mBAAqBlqC,MAAKgG,OAAQ,CACtC,GAAImkC,GAAY54B,SAAS04B,eAAe,KACpCG,EAAmB74B,SAASM,cAAc,MAC9Cu4B,GAAiBviC,UAAY,sBAC7BuiC,EAAiB34B,YAAY04B,GAC7BnqC,KAAK+vB,IAAIzQ,MAAM7N,YAAY24B,GAE3BpqC,KAAKgG,MAAMwiC,gBAAkB4B,EAAiBvlB,aAC9C7kB,KAAKgG,MAAM8jC,eAAiBM,EAAiB5qB,YAE7Cxf,KAAK+vB,IAAIzQ,MAAMnO,YAAYi5B,GAG7B,KAAM,mBAAqBpqC,MAAKgG,OAAQ,CACtC,GAAIqkC,GAAY94B,SAAS04B,eAAe,KACpCK,EAAmB/4B,SAASM,cAAc,MAC9Cy4B,GAAiBziC,UAAY,sBAC7ByiC,EAAiB74B,YAAY44B,GAC7BrqC,KAAK+vB,IAAIzQ,MAAM7N,YAAY64B,GAE3BtqC,KAAKgG,MAAMwjC,gBAAkBc,EAAiBzlB,aAC9C7kB,KAAKgG,MAAMukC,eAAiBD,EAAiB9qB,YAE7Cxf,KAAK+vB,IAAIzQ,MAAMnO,YAAYm5B,KAU/B5nC,EAASyQ,UAAU6hB,KAAO,SAASyD,GACjC,MAAOz4B,MAAKmoB,KAAK6M,KAAKyD,IAGxB54B,EAAOD,QAAU8C,GAKb,SAAS7C,EAAQD,EAASM,GAkB9B,QAASyC,GAAYsP,EAAOqlB,EAAS7oB,EAAS+7B,GAC5CxqC,KAAKK,GAAKi3B,CACV,IAAIppB,IAAU,WAAW,QAAQ,OAAO,mBAAmB,WAAW,aAAa,SAAS,aAC5FlO,MAAKyO,QAAU9N,EAAKsN,sBAAsBC,EAAOO,GACjDzO,KAAKyqC,kBAAwCjkC,SAApByL,EAAMpK,UAC/B7H,KAAKwqC,yBAA2BA,EAChCxqC,KAAK0qC,aAAe,EACpB1qC,KAAK6U,OAAO5C,GACkB,GAA1BjS,KAAKyqC,oBACPzqC,KAAKwqC,yBAAyB,IAAM,GAEtCxqC,KAAK81B,aACL91B,KAAK0oB,QAA4BliB,SAAlByL,EAAMyW,SAAwB,EAAOzW,EAAMyW,QA5B5D,GAAI/nB,GAAOT,EAAoB,GAC3BU,EAAUV,EAAoB,GAC9ByqC,EAAOzqC,EAAoB,IAC3B0qC,EAAM1qC,EAAoB,IAC1B2qC,EAAS3qC,EAAoB,GAgCjCyC,GAAWwQ,UAAU8iB,SAAW,SAASh0B,GAC1B,MAATA,GACFjC,KAAK81B,UAAY7zB,EACQ,GAArBjC,KAAKyO,QAAQyH,MACflW,KAAK81B,UAAU5f,KAAK,SAAU3Q,EAAEa,GAAI,MAAOb,GAAEwM,EAAI3L,EAAE2L,KAIrD/R,KAAK81B,cASTnzB,EAAWwQ,UAAU23B,gBAAkB,SAASvlB,GAC9CvlB,KAAK0qC,aAAenlB,GAQtB5iB,EAAWwQ,UAAUD,WAAa,SAASzE,GACzC,GAAgBjI,SAAZiI,EAAuB,CACzB,GAAIP,IAAU,WAAW,QAAQ,OAAO,mBAAmB,WAC3DvN,GAAKwF,oBAAoB+H,EAAQlO,KAAKyO,QAASA,GAE/C9N,EAAK4N,aAAavO,KAAKyO,QAASA,EAAQ,cACxC9N,EAAK4N,aAAavO,KAAKyO,QAASA,EAAQ,cACxC9N,EAAK4N,aAAavO,KAAKyO,QAASA,EAAQ,UAEpCA,EAAQs8B,YACuB,gBAAtBt8B,GAAQs8B,YACbt8B,EAAQs8B,WAAWC,kBACqB,WAAtCv8B,EAAQs8B,WAAWC,gBACrBhrC,KAAKyO,QAAQs8B,WAAWE,MAAQ,EAEa,WAAtCx8B,EAAQs8B,WAAWC,gBAC1BhrC,KAAKyO,QAAQs8B,WAAWE,MAAQ,GAGhCjrC,KAAKyO,QAAQs8B,WAAWC,gBAAkB,cAC1ChrC,KAAKyO,QAAQs8B,WAAWE,MAAQ,KAOhB,QAAtBjrC,KAAKyO,QAAQxB,MACfjN,KAAK8G,KAAO,GAAI6jC,GAAK3qC,KAAKK,GAAIL,KAAKyO,SAEN,OAAtBzO,KAAKyO,QAAQxB,MACpBjN,KAAK8G,KAAO,GAAI8jC,GAAI5qC,KAAKK,GAAIL,KAAKyO,SAEL,UAAtBzO,KAAKyO,QAAQxB,QACpBjN,KAAK8G,KAAO,GAAI+jC,GAAO7qC,KAAKK,GAAIL,KAAKyO,WASzC9L,EAAWwQ,UAAU0B,OAAS,SAAS5C,GACrCjS,KAAKiS,MAAQA,EACbjS,KAAK4vB,QAAU3d,EAAM2d,SAAW,QAChC5vB,KAAK6H,UAAYoK,EAAMpK,WAAa7H,KAAK6H,WAAa,aAAe7H,KAAKwqC,yBAAyB,GAAK,GACxGxqC,KAAK0oB,QAA4BliB,SAAlByL,EAAMyW,SAAwB,EAAOzW,EAAMyW,QAC1D1oB,KAAKiN,MAAQgF,EAAMhF,MACnBjN,KAAKkT,WAAWjB,EAAMxD,UAcxB9L,EAAWwQ,UAAU60B,SAAW,SAASj2B,EAAGC,EAAGlB,EAAeo6B,EAAc7E,EAAWyB,GACrF,GACIqD,GAAMC,EADNC,EAA0B,GAAbvD,EAGbwD,EAAU1qC,EAAQwQ,cAAc,OAAQN,EAAeo6B,EAO3D,IANAI,EAAQl5B,eAAe,KAAM,IAAKL,GAClCu5B,EAAQl5B,eAAe,KAAM,IAAKJ,EAAIq5B,GACtCC,EAAQl5B,eAAe,KAAM,QAASi0B,GACtCiF,EAAQl5B,eAAe,KAAM,SAAU,EAAEi5B,GACzCC,EAAQl5B,eAAe,KAAM,QAAS,WAEZ,QAAtBpS,KAAKyO,QAAQxB,MACfk+B,EAAOvqC,EAAQwQ,cAAc,OAAQN,EAAeo6B,GACpDC,EAAK/4B,eAAe,KAAM,QAASpS,KAAK6H,WACtBrB,SAAfxG,KAAKiN,OACNk+B,EAAK/4B,eAAe,KAAM,QAASpS,KAAKiN,OAG1Ck+B,EAAK/4B,eAAe,KAAM,IAAK,IAAML,EAAI,IAAIC,EAAE,MAAQD,EAAIs0B,GAAa,IAAIr0B,GACzC,GAA/BhS,KAAKyO,QAAQ88B,OAAO78B,UACtB08B,EAAWxqC,EAAQwQ,cAAc,OAAQN,EAAeo6B,GACjB,OAAnClrC,KAAKyO,QAAQ88B,OAAOhX,YACtB6W,EAASh5B,eAAe,KAAM,IAAK,IAAIL,EAAE,MAAQC,EAAIq5B,GACnD,IAAIt5B,EAAE,IAAIC,EAAE,MAAOD,EAAIs0B,GAAa,IAAIr0B,EAAE,MAAOD,EAAIs0B,GAAa,KAAOr0B,EAAIq5B,IAG/ED,EAASh5B,eAAe,KAAM,IAAK,IAAIL,EAAE,IAAIC,EAAE,KACzCD,EAAE,KAAOC,EAAIq5B,GAAc,MACzBt5B,EAAIs0B,GAAa,KAAOr0B,EAAIq5B,GAClC,KAAMt5B,EAAIs0B,GAAa,IAAIr0B,GAE/Bo5B,EAASh5B,eAAe,KAAM,QAASpS,KAAK6H,UAAY,cAGnB,GAAnC7H,KAAKyO,QAAQ0D,WAAWzD,SAC1B9N,EAAQkR,UAAUC,EAAI,GAAMs0B,EAAUr0B,EAAGhS,KAAM8Q,EAAeo6B,OAG7D,CACH,GAAIM,GAAWtmC,KAAKwoB,MAAM,GAAM2Y,GAC5BoF,EAAavmC,KAAKwoB,MAAM,GAAMoa,GAC9B4D,EAAaxmC,KAAKwoB,MAAM,IAAOoa,GAE/Bne,EAASzkB,KAAKwoB,OAAO2Y,EAAa,EAAImF,GAAW,EAErD5qC,GAAQ0R,QAAQP,EAAI,GAAIy5B,EAAW7hB,EAAY3X,EAAIq5B,EAAaI,EAAa,EAAGD,EAAUC,EAAYzrC,KAAK6H,UAAY,OAAQiJ,EAAeo6B,GAC9ItqC,EAAQ0R,QAAQP,EAAI,IAAIy5B,EAAW7hB,EAAS,EAAG3X,EAAIq5B,EAAaK,EAAa,EAAGF,EAAUE,EAAY1rC,KAAK6H,UAAY,OAAQiJ,EAAeo6B,KAYlJvoC,EAAWwQ,UAAUkkB,UAAY,SAASgP,EAAWyB,GACnD,GAAIlC,GAAMr0B,SAASC,gBAAgB,6BAA6B,MAEhE,OADAxR,MAAKgoC,SAAS,EAAE,GAAIF,KAAclC,EAAIS,EAAUyB,IACxC6D,KAAM/F,EAAKnd,MAAOzoB,KAAK4vB,QAAS2E,YAAYv0B,KAAKyO,QAAQm9B,mBAGnEjpC,EAAWwQ,UAAU04B,UAAY,SAASC,GACxC,MAAO9rC,MAAK8G,KAAK+kC,UAAUC,IAG7BnpC,EAAWwQ,UAAU44B,KAAO,SAAS/U,EAAS/kB,EAAO+5B,GACnDhsC,KAAK8G,KAAKilC,KAAK/U,EAAS/kB,EAAO+5B,IAIjCnsC,EAAOD,QAAU+C,GAKb,SAAS9C,EAAQD,EAASM,GAY9B,QAAS0C,GAAO00B,EAAS5kB,EAAMmjB,GAC7B71B,KAAKs3B,QAAUA,EACft3B,KAAKmhC,aACLnhC,KAAKisC,cAAgB,EACrBjsC,KAAKksC,gBAAkBx5B,GAAQA,EAAKy5B,cACpCnsC,KAAK61B,QAAUA,EAEf71B,KAAK+vB,OACL/vB,KAAKgG,OACHyiB,OACElW,MAAO,EACPC,OAAQ,IAGZxS,KAAK6H,UAAY,KAEjB7H,KAAKiC,SACLjC,KAAKosC,gBACLpsC,KAAK4O,cACHy9B,WACAC,UAEFtsC,KAAKusC,kBAAmB,CACxB,IAAIp4B,GAAKnU,IACTA,MAAK61B,QAAQlB,KAAKE,QAAQthB,GAAG,mBAAoB,WAC/CY,EAAGo4B,kBAAmB,IAGxBvsC,KAAK00B,UAEL10B,KAAKgY,QAAQtF,GAxCf,CAAA,GAAI/R,GAAOT,EAAoB,GAC3B4B,EAAQ5B,EAAoB,GAChBA,GAAoB,IA6CpC0C,EAAMuQ,UAAUuhB,QAAU,WACxB,GAAIjM,GAAQlX,SAASM,cAAc,MACnC4W,GAAM5gB,UAAY,SAClB7H,KAAK+vB,IAAItH,MAAQA,CAEjB,IAAI+jB,GAAQj7B,SAASM,cAAc,MACnC26B,GAAM3kC,UAAY,QAClB4gB,EAAMhX,YAAY+6B,GAClBxsC,KAAK+vB,IAAIyc,MAAQA,CAEjB,IAAIC,GAAal7B,SAASM,cAAc,MACxC46B,GAAW5kC,UAAY,QACvB4kC,EAAW,kBAAoBzsC,KAC/BA,KAAK+vB,IAAI0c,WAAaA,EAEtBzsC,KAAK+vB,IAAI5jB,WAAaoF,SAASM,cAAc,OAC7C7R,KAAK+vB,IAAI5jB,WAAWtE,UAAY,QAEhC7H,KAAK+vB,IAAI+Q,KAAOvvB,SAASM,cAAc,OACvC7R,KAAK+vB,IAAI+Q,KAAKj5B,UAAY,QAK1B7H,KAAK+vB,IAAI2c,OAASn7B,SAASM,cAAc,OACzC7R,KAAK+vB,IAAI2c,OAAOz/B,MAAMuqB,WAAa,SACnCx3B,KAAK+vB,IAAI2c,OAAOzoB,UAAY,IAC5BjkB,KAAK+vB,IAAI5jB,WAAWsF,YAAYzR,KAAK+vB,IAAI2c,SAO3C9pC,EAAMuQ,UAAU6E,QAAU,SAAStF,GAEjC,GAAIkd,GAAUld,GAAQA,EAAKkd,OACvBA,aAAmB+c,SACrB3sC,KAAK+vB,IAAIyc,MAAM/6B,YAAYme,GAG3B5vB,KAAK+vB,IAAIyc,MAAMvoB,UADIzd,SAAZopB,GAAqC,OAAZA,EACLA,EAGA5vB,KAAKs3B,SAAW,GAI7Ct3B,KAAK+vB,IAAItH,MAAMuc,MAAQtyB,GAAQA,EAAKsyB,OAAS,GAExChlC,KAAK+vB,IAAIyc,MAAM7oB,WAIlBhjB,EAAKuH,gBAAgBlI,KAAK+vB,IAAIyc,MAAO,UAHrC7rC,EAAKiH,aAAa5H,KAAK+vB,IAAIyc,MAAO,SAOpC,IAAI3kC,GAAY6K,GAAQA,EAAK7K,WAAa,IACtCA,IAAa7H,KAAK6H,YAChB7H,KAAK6H,YACPlH,EAAKuH,gBAAgBlI,KAAK+vB,IAAItH,MAAOzoB,KAAK6H,WAC1ClH,EAAKuH,gBAAgBlI,KAAK+vB,IAAI0c,WAAYzsC,KAAK6H,WAC/ClH,EAAKuH,gBAAgBlI,KAAK+vB,IAAI5jB,WAAYnM,KAAK6H,WAC/ClH,EAAKuH,gBAAgBlI,KAAK+vB,IAAI+Q,KAAM9gC,KAAK6H,YAE3ClH,EAAKiH,aAAa5H,KAAK+vB,IAAItH,MAAO5gB,GAClClH,EAAKiH,aAAa5H,KAAK+vB,IAAI0c,WAAY5kC,GACvClH,EAAKiH,aAAa5H,KAAK+vB,IAAI5jB,WAAYtE,GACvClH,EAAKiH,aAAa5H,KAAK+vB,IAAI+Q,KAAMj5B,GACjC7H,KAAK6H,UAAYA,GAIf7H,KAAKiN,QACPtM,EAAK8M,cAAczN,KAAK+vB,IAAItH,MAAOzoB,KAAKiN,OACxCjN,KAAKiN,MAAQ,MAEXyF,GAAQA,EAAKzF,QACftM,EAAK2M,WAAWtN,KAAK+vB,IAAItH,MAAO/V,EAAKzF,OACrCjN,KAAKiN,MAAQyF,EAAKzF,QAQtBrK,EAAMuQ,UAAUy5B,cAAgB,WAC9B,MAAO5sC,MAAKgG,MAAMyiB,MAAMlW,OAW1B3P,EAAMuQ,UAAUsO,OAAS,SAASgU,EAAO/b,EAAQmzB,GAC/C,GAAIpI,IAAU,CAEdzkC,MAAKosC,aAAepsC,KAAK8sC,oBAAoB9sC,KAAK4O,aAAc5O,KAAKosC,aAAc3W,EAInF,IAAIsX,GAAe/sC,KAAK+vB,IAAI2c,OAAO7nB,YAC/BkoB,IAAgB/sC,KAAKgtC,mBACvBhtC,KAAKgtC,iBAAmBD,EAExBpsC,EAAK0H,QAAQrI,KAAKiC,MAAO,SAAUoN,GACjCA,EAAK49B,OAAQ,EACT59B,EAAK69B,WAAW79B,EAAKoS,WAG3BorB,GAAU,GAIR7sC,KAAK61B,QAAQpnB,QAAQ3M,MACvBA,EAAMA,MAAM9B,KAAKosC,aAAc1yB,EAAQmzB,GAGvC/qC,EAAMo/B,QAAQlhC,KAAKosC,aAAc1yB,EAAQ1Z,KAAKmhC,UAIhD,IAAI3uB,GAASxS,KAAKmtC,iBAAiBzzB,GAG/B+yB,EAAazsC,KAAK+vB,IAAI0c,UAC1BzsC,MAAK2H,IAAM8kC,EAAWW,UACtBptC,KAAKyH,KAAOglC,EAAWY,WACvBrtC,KAAKuS,MAAQk6B,EAAWrc,YACxBqU,EAAU9jC,EAAK8H,eAAezI,KAAM,SAAUwS,IAAWiyB,EAGzDA,EAAU9jC,EAAK8H,eAAezI,KAAKgG,MAAMyiB,MAAO,QAASzoB,KAAK+vB,IAAIyc,MAAMhtB,cAAgBilB,EACxFA,EAAU9jC,EAAK8H,eAAezI,KAAKgG,MAAMyiB,MAAO,SAAUzoB,KAAK+vB,IAAIyc,MAAM3nB,eAAiB4f,EAG1FzkC,KAAK+vB,IAAI5jB,WAAWc,MAAMuF,OAAUA,EAAS,KAC7CxS,KAAK+vB,IAAI0c,WAAWx/B,MAAMuF,OAAUA,EAAS,KAC7CxS,KAAK+vB,IAAItH,MAAMxb,MAAMuF,OAASA,EAAS,IAGvC,KAAK,GAAIhN,GAAI,EAAG8nC,EAAKttC,KAAKosC,aAAazmC,OAAY2nC,EAAJ9nC,EAAQA,IAAK,CAC1D,GAAI6J,GAAOrP,KAAKosC,aAAa5mC,EAC7B6J,GAAKk+B,YAAY7zB,GAGnB,MAAO+qB,IAST7hC,EAAMuQ,UAAUg6B,iBAAmB,SAAUzzB,GAE3C,GAAIlH,GACA45B,EAAepsC,KAAKosC,YAGxBpsC,MAAKwtC,gBACL,IAAIr5B,GAAKnU,IACT,IAAIosC,EAAazmC,OAAQ,CACvB,GAAImG,GAAMsgC,EAAa,GAAGzkC,IACtB+E,EAAM0/B,EAAa,GAAGzkC,IAAMykC,EAAa,GAAG55B,MAahD,IAZA7R,EAAK0H,QAAQ+jC,EAAc,SAAU/8B,GACnCvD,EAAM5G,KAAK4G,IAAIA,EAAKuD,EAAK1H,KACzB+E,EAAMxH,KAAKwH,IAAIA,EAAM2C,EAAK1H,IAAM0H,EAAKmD,QACVhM,SAAvB6I,EAAKqD,KAAK2uB,WACZltB,EAAGgtB,UAAU9xB,EAAKqD,KAAK2uB,UAAU7uB,OAAStN,KAAKwH,IAAIyH,EAAGgtB,UAAU9xB,EAAKqD,KAAK2uB,UAAU7uB,OAAOnD,EAAKmD,QAChG2B,EAAGgtB,UAAU9xB,EAAKqD,KAAK2uB,UAAU3Y,SAAU,KAO3C5c,EAAM4N,EAAOonB,KAAM,CAErB,GAAInX,GAAS7d,EAAM4N,EAAOonB,IAC1Bp0B,IAAOid,EACPhpB,EAAK0H,QAAQ+jC,EAAc,SAAU/8B,GACnCA,EAAK1H,KAAOgiB,IAGhBnX,EAAS9F,EAAMgN,EAAOrK,KAAKoW,SAAW,MAGtCjT,GAASkH,EAAOonB,KAAOpnB,EAAOrK,KAAKoW,QAIrC,OAFAjT,GAAStN,KAAKwH,IAAI8F,EAAQxS,KAAKgG,MAAMyiB,MAAMjW,SAQ7C5P,EAAMuQ,UAAUw0B,KAAO,WAChB3nC,KAAK+vB,IAAItH,MAAM5e,YAClB7J,KAAK61B,QAAQ9F,IAAI0d,SAASh8B,YAAYzR,KAAK+vB,IAAItH,OAG5CzoB,KAAK+vB,IAAI0c,WAAW5iC,YACvB7J,KAAK61B,QAAQ9F,IAAI0c,WAAWh7B,YAAYzR,KAAK+vB,IAAI0c,YAG9CzsC,KAAK+vB,IAAI5jB,WAAWtC,YACvB7J,KAAK61B,QAAQ9F,IAAI5jB,WAAWsF,YAAYzR,KAAK+vB,IAAI5jB,YAG9CnM,KAAK+vB,IAAI+Q,KAAKj3B,YACjB7J,KAAK61B,QAAQ9F,IAAI+Q,KAAKrvB,YAAYzR,KAAK+vB,IAAI+Q,OAO/Cl+B,EAAMuQ,UAAUu0B,KAAO,WACrB,GAAIjf,GAAQzoB,KAAK+vB,IAAItH,KACjBA,GAAM5e,YACR4e,EAAM5e,WAAWsH,YAAYsX,EAG/B,IAAIgkB,GAAazsC,KAAK+vB,IAAI0c,UACtBA,GAAW5iC,YACb4iC,EAAW5iC,WAAWsH,YAAYs7B,EAGpC,IAAItgC,GAAanM,KAAK+vB,IAAI5jB,UACtBA,GAAWtC,YACbsC,EAAWtC,WAAWsH,YAAYhF,EAGpC,IAAI20B,GAAO9gC,KAAK+vB,IAAI+Q,IAChBA,GAAKj3B,YACPi3B,EAAKj3B,WAAWsH,YAAY2vB,IAQhCl+B,EAAMuQ,UAAUF,IAAM,SAAS5D,GAc7B,GAbArP,KAAKiC,MAAMoN,EAAKhP,IAAMgP,EACtBA,EAAKq+B,UAAU1tC,MAGYwG,SAAvB6I,EAAKqD,KAAK2uB,WAC+B76B,SAAvCxG,KAAKmhC,UAAU9xB,EAAKqD,KAAK2uB,YAC3BrhC,KAAKmhC,UAAU9xB,EAAKqD,KAAK2uB,WAAa7uB,OAAO,EAAGkW,SAAS,EAAOvgB,MAAMnI,KAAKisC,cAAehqC,UAC1FjC,KAAKisC,iBAEPjsC,KAAKmhC,UAAU9xB,EAAKqD,KAAK2uB,UAAUp/B,MAAM+F,KAAKqH,IAEhDrP,KAAK2tC,iBAEkC,IAAnC3tC,KAAKosC,aAAazlC,QAAQ0I,GAAa,CACzC,GAAIomB,GAAQz1B,KAAK61B,QAAQlB,KAAKc,KAC9Bz1B,MAAK4tC,gBAAgBv+B,EAAMrP,KAAKosC,aAAc3W,KAIlD7yB,EAAMuQ,UAAUw6B,eAAiB,WAC/B,GAA6BnnC,SAAzBxG,KAAKksC,gBAA+B,CACtC,GAAI2B,KACJ,IAAmC,gBAAxB7tC,MAAKksC,gBAA6B,CAC3C,IAAK,GAAI7K,KAAYrhC,MAAKmhC,UACxB0M,EAAU7lC,MAAMq5B,SAAUA,EAAUyM,UAAW9tC,KAAKmhC,UAAUE,GAAUp/B,MAAM,GAAGyQ,KAAK1S,KAAKksC,kBAE7F2B,GAAU33B,KAAK,SAAU3Q,EAAGa,GAC1B,MAAOb,GAAEuoC,UAAY1nC,EAAE0nC,gBAGtB,IAAmC,kBAAxB9tC,MAAKksC,gBAA+B,CAClD,IAAK,GAAI7K,KAAYrhC,MAAKmhC,UACxB0M,EAAU7lC,KAAKhI,KAAKmhC,UAAUE,GAAUp/B,MAAM,GAAGyQ,KAEnDm7B,GAAU33B,KAAKlW,KAAKksC,iBAGtB,GAAI2B,EAAUloC,OAAS,EACrB,IAAK,GAAIH,GAAI,EAAGA,EAAIqoC,EAAUloC,OAAQH,IACpCxF,KAAKmhC,UAAU0M,EAAUroC,GAAG67B,UAAUl5B,MAAQ3C,IAMtD5C,EAAMuQ,UAAUq6B,eAAiB,WAC/B,IAAK,GAAInM,KAAYrhC,MAAKmhC,UACpBnhC,KAAKmhC,UAAUr7B,eAAeu7B,KAChCrhC,KAAKmhC,UAAUE,GAAU3Y,SAAU,IASzC9lB,EAAMuQ,UAAUkD,OAAS,SAAShH,SACzBrP,MAAKiC,MAAMoN,EAAKhP,IACvBgP,EAAKq+B,UAAU,KAGf,IAAIvlC,GAAQnI,KAAKosC,aAAazlC,QAAQ0I,EACzB,KAATlH,GAAanI,KAAKosC,aAAahkC,OAAOD,EAAO,IAUnDvF,EAAMuQ,UAAU46B,kBAAoB,SAAS1+B,GAC3CrP,KAAK61B,QAAQmY,WAAW3+B,EAAKhP,KAO/BuC,EAAMuQ,UAAUsC,MAAQ,WAKtB,IAAK,GAJDjN,GAAQ7H,EAAK4H,QAAQvI,KAAKiC,OAC1BgsC,KACAC,KAEK1oC,EAAI,EAAGA,EAAIgD,EAAM7C,OAAQH,IACNgB,SAAtBgC,EAAMhD,GAAGkN,KAAK7C,KAChBq+B,EAASlmC,KAAKQ,EAAMhD,IAEtByoC,EAAWjmC,KAAKQ,EAAMhD,GAExBxF,MAAK4O,cACHy9B,QAAS4B,EACT3B,MAAO4B,GAGTpsC,EAAM0+B,aAAaxgC,KAAK4O,aAAay9B,SACrCvqC,EAAM2+B,WAAWzgC,KAAK4O,aAAa09B,QAYrC1pC,EAAMuQ,UAAU25B,oBAAsB,SAASl+B,EAAcu/B,EAAiB1Y,GAC5E,GAKIpmB,GAAM7J,EALN4mC,KACAgC,KACA5b,GAAYiD,EAAM5lB,IAAM4lB,EAAM7lB,OAAS,EACvCy+B,EAAa5Y,EAAM7lB,MAAQ4iB,EAC3B8b,EAAa7Y,EAAM5lB,IAAM2iB,EAIzB3jB,EAAiB,SAAUxH,GAC7B,MAAiBgnC,GAARhnC,EAA6B,GACpBinC,GAATjnC,EAA8B,EACA,EAMzC,IAAI8mC,EAAgBxoC,OAAS,EAC3B,IAAKH,EAAI,EAAGA,EAAI2oC,EAAgBxoC,OAAQH,IACtCxF,KAAKuuC,6BAA6BJ,EAAgB3oC,GAAI4mC,EAAcgC,EAAoB3Y,EAK5F,IAAI+Y,GAAoB7tC,EAAKgO,mBAAmBC,EAAay9B,QAASx9B,EAAgB,OAAO,QAS7F,IANA7O,KAAKyuC,cAAcD,EAAmB5/B,EAAay9B,QAASD,EAAcgC,EAAoB,SAAU/+B,GACtG,MAAQA,GAAKqD,KAAK9C,MAAQy+B,GAAch/B,EAAKqD,KAAK9C,MAAQ0+B,IAK/B,GAAzBtuC,KAAKusC,iBAEP,IADAvsC,KAAKusC,kBAAmB,EACnB/mC,EAAI,EAAGA,EAAIoJ,EAAa09B,MAAM3mC,OAAQH,IACzCxF,KAAKuuC,6BAA6B3/B,EAAa09B,MAAM9mC,GAAI4mC,EAAcgC,EAAoB3Y,OAG1F,CAEH,GAAIiZ,GAAkB/tC,EAAKgO,mBAAmBC,EAAa09B,MAAOz9B,EAAgB,OAAO,MAGzF7O,MAAKyuC,cAAcC,EAAiB9/B,EAAa09B,MAAOF,EAAcgC,EAAoB,SAAU/+B,GAClG,MAAQA,GAAKqD,KAAK7C,IAAMw+B,GAAch/B,EAAKqD,KAAK7C,IAAMy+B,IAM1D,IAAK9oC,EAAI,EAAGA,EAAI4mC,EAAazmC,OAAQH,IACnC6J,EAAO+8B,EAAa5mC,GACf6J,EAAK69B,WAAW79B,EAAKs4B,OAE1Bt4B,EAAKs/B,aAgBP,OAAOvC,IAGTxpC,EAAMuQ,UAAUs7B,cAAgB,SAAUG,EAAY3sC,EAAOmqC,EAAcgC,EAAoBS,GAC7F,GAAIx/B,GACA7J,CAEJ,IAAkB,IAAdopC,EAAkB,CACpB,IAAKppC,EAAIopC,EAAYppC,GAAK,IACxB6J,EAAOpN,EAAMuD,IACTqpC,EAAex/B,IAFQ7J,IAMWgB,SAAhC4nC,EAAmB/+B,EAAKhP,MAC1B+tC,EAAmB/+B,EAAKhP,KAAM,EAC9B+rC,EAAapkC,KAAKqH,GAKxB,KAAK7J,EAAIopC,EAAa,EAAGppC,EAAIvD,EAAM0D,SACjC0J,EAAOpN,EAAMuD,IACTqpC,EAAex/B,IAFsB7J,IAMHgB,SAAhC4nC,EAAmB/+B,EAAKhP,MAC1B+tC,EAAmB/+B,EAAKhP,KAAM,EAC9B+rC,EAAapkC,KAAKqH,MAmB5BzM,EAAMuQ,UAAUy6B,gBAAkB,SAASv+B,EAAM+8B,EAAc3W,GACvDpmB,EAAKy/B,UAAUrZ,IACZpmB,EAAK69B,WAAW79B,EAAKs4B,OAE1Bt4B,EAAKs/B,cACLvC,EAAapkC,KAAKqH,IAGdA,EAAK69B,WAAW79B,EAAKq4B,QAgB/B9kC,EAAMuQ,UAAUo7B,6BAA+B,SAASl/B,EAAM+8B,EAAcgC,EAAoB3Y,GAC1FpmB,EAAKy/B,UAAUrZ,GACmBjvB,SAAhC4nC,EAAmB/+B,EAAKhP,MAC1B+tC,EAAmB/+B,EAAKhP,KAAM,EAC9B+rC,EAAapkC,KAAKqH,IAIhBA,EAAK69B,WAAW79B,EAAKq4B;EAM7B7nC,EAAOD,QAAUgD,GAKb,SAAS/C,EAAQD,EAASM,GAW9B,QAAS2C,GAAiBy0B,EAAS5kB,EAAMmjB,GACvCjzB,EAAMrC,KAAKP,KAAMs3B,EAAS5kB,EAAMmjB,GAEhC71B,KAAKuS,MAAQ,EACbvS,KAAKwS,OAAS,EACdxS,KAAK2H,IAAM,EACX3H,KAAKyH,KAAO,EAfd,GACI7E,IADO1C,EAAoB,GACnBA,EAAoB,IAiBhC2C,GAAgBsQ,UAAY5M,OAAO8H,OAAOzL,EAAMuQ,WAShDtQ,EAAgBsQ,UAAUsO,OAAS,SAASgU,EAAO/b,GACjD,GAAI+qB,IAAU,CAEdzkC,MAAKosC,aAAepsC,KAAK8sC,oBAAoB9sC,KAAK4O,aAAc5O,KAAKosC,aAAc3W,GAGnFz1B,KAAKuS,MAAQvS,KAAK+vB,IAAI5jB,WAAWikB,YAGjCpwB,KAAK+vB,IAAI5jB,WAAWc,MAAMuF,OAAU,GAGpC,KAAK,GAAIhN,GAAI,EAAG8nC,EAAKttC,KAAKosC,aAAazmC,OAAY2nC,EAAJ9nC,EAAQA,IAAK,CAC1D,GAAI6J,GAAOrP,KAAKosC,aAAa5mC,EAC7B6J,GAAKk+B,YAAY7zB,GAGnB,MAAO+qB,IAMT5hC,EAAgBsQ,UAAUw0B,KAAO,WAC1B3nC,KAAK+vB,IAAI5jB,WAAWtC,YACvB7J,KAAK61B,QAAQ9F,IAAI5jB,WAAWsF,YAAYzR,KAAK+vB,IAAI5jB,aAIrDtM,EAAOD,QAAUiD,GAKb,SAAShD,EAAQD,EAASM,GA2B9B,QAAS4C,GAAQ6xB,EAAMlmB,GACrBzO,KAAK20B,KAAOA,EAEZ30B,KAAKq0B,gBACHvtB,KAAM,KACNytB,YAAa,SACbwa,MAAO,OACPjtC,OAAO,EACPktC,WAAY,KAEZC,YAAY,EACZC,UACEC,YAAY,EACZ3H,aAAa,EACbv0B,KAAK,EACLoD,QAAQ,GAGV+4B,MAAO,SAAU//B,EAAM/G,GACrBA,EAAS+G,IAEXggC,SAAU,SAAUhgC,EAAM/G,GACxBA,EAAS+G,IAEXigC,OAAQ,SAAUjgC,EAAM/G,GACtBA,EAAS+G,IAEXkgC,SAAU,SAAUlgC,EAAM/G,GACxBA,EAAS+G,IAEXmgC,SAAU,SAAUngC,EAAM/G,GACxBA,EAAS+G,IAGXqK,QACErK,MACEmW,WAAY,GACZC,SAAU,IAEZqb,KAAM,IAER9c,QAAS,GAIXhkB,KAAKyO,QAAU9N,EAAK2E,UAAWtF,KAAKq0B,gBAGpCr0B,KAAKyvC,aACH3oC,MAAO8I,MAAO,OAAQC,IAAK,SAG7B7P,KAAKo6B,YACHnF,SAAUN,EAAKh0B,KAAKs0B,SACpBI,OAAQV,EAAKh0B,KAAK00B,QAEpBr1B,KAAK+vB,OACL/vB,KAAKgG,SACLhG,KAAK8D,OAAS,IAEd,IAAIqQ,GAAKnU,IACTA,MAAK81B,UAAY,KACjB91B,KAAK+1B,WAAa,KAGlB/1B,KAAK0vC,eACHz8B,IAAO,SAAU3J,EAAOwK,GACtBK,EAAGw7B,OAAO77B,EAAO7R,QAEnB4S,OAAU,SAAUvL,EAAOwK,GACzBK,EAAGy7B,UAAU97B,EAAO7R,QAEtBoU,OAAU,SAAU/M,EAAOwK,GACzBK,EAAG07B,UAAU/7B,EAAO7R,SAKxBjC,KAAK8vC,gBACH78B,IAAO,SAAU3J,EAAOwK,GACtBK,EAAG47B,aAAaj8B,EAAO7R,QAEzB4S,OAAU,SAAUvL,EAAOwK,GACzBK,EAAG67B,gBAAgBl8B,EAAO7R,QAE5BoU,OAAU,SAAU/M,EAAOwK,GACzBK,EAAG87B,gBAAgBn8B,EAAO7R,SAI9BjC,KAAKiC,SACLjC,KAAKm0B,UACLn0B,KAAKkwC,YAELlwC,KAAKmwC,aACLnwC,KAAKowC,YAAa,EAElBpwC,KAAKqwC,eAGLrwC,KAAK00B,UAEL10B,KAAKkT,WAAWzE,GA/HlB,GAAI1K,GAAS7D,EAAoB,IAC7BS,EAAOT,EAAoB,GAC3BW,EAAUX,EAAoB,GAC9BY,EAAWZ,EAAoB,GAC/BqC,EAAYrC,EAAoB,IAChC0C,EAAQ1C,EAAoB,IAC5B2C,EAAkB3C,EAAoB,IACtCkC,EAAUlC,EAAoB,IAC9BmC,EAAYnC,EAAoB,IAChCoC,EAAYpC,EAAoB,IAChCiC,EAAiBjC,EAAoB,IAGrCowC,EAAY,gBACZC,EAAa,gBAoHjBztC,GAAQqQ,UAAY,GAAI5Q,GAGxBO,EAAQoU,OACN/K,WAAYhK,EACZquC,IAAKpuC,EACLqzB,MAAOnzB,EACP4P,MAAO7P,GAMTS,EAAQqQ,UAAUuhB,QAAU,WAC1B,GAAIpV,GAAQ/N,SAASM,cAAc,MACnCyN,GAAMzX,UAAY,UAClByX,EAAM,oBAAsBtf,KAC5BA,KAAK+vB,IAAIzQ,MAAQA,CAGjB,IAAInT,GAAaoF,SAASM,cAAc,MACxC1F,GAAWtE,UAAY,aACvByX,EAAM7N,YAAYtF,GAClBnM,KAAK+vB,IAAI5jB,WAAaA,CAGtB,IAAIsgC,GAAal7B,SAASM,cAAc,MACxC46B,GAAW5kC,UAAY,aACvByX,EAAM7N,YAAYg7B,GAClBzsC,KAAK+vB,IAAI0c,WAAaA,CAGtB,IAAI3L,GAAOvvB,SAASM,cAAc,MAClCivB,GAAKj5B,UAAY,OACjB7H,KAAK+vB,IAAI+Q,KAAOA,CAGhB,IAAI2M,GAAWl8B,SAASM,cAAc,MACtC47B,GAAS5lC,UAAY,WACrB7H,KAAK+vB,IAAI0d,SAAWA,EAGpBztC,KAAKywC,kBAGL,IAAIC,GAAkB,GAAI7tC,GAAgB0tC,EAAY,KAAMvwC,KAC5D0wC,GAAgB/I,OAChB3nC,KAAKm0B,OAAOoc,GAAcG,EAM1B1wC,KAAK8D,OAAS,GAAIC,GAAO/D,KAAK20B,KAAK5E,IAAI8H,iBAGvC73B,KAAK8D,OAAOyP,GAAG,eAAgB,SAAUjK,GACnCA,EAAMqnC,SACR3wC,KAAKq+B,SAAS/0B,IAEhBwrB,KAAK90B,OACPA,KAAK8D,OAAOyP,GAAG,WAAYvT,KAAKg+B,aAAalJ,KAAK90B,OAClDA,KAAK8D,OAAOyP,GAAG,UAAYvT,KAAKi+B,QAAQnJ,KAAK90B,OAC7CA,KAAK8D,OAAOyP,GAAG,SAAYvT,KAAKk+B,WAAWpJ,KAAK90B,OAGhDA,KAAK8D,OAAOyP,GAAG,MAAQvT,KAAK4wC,cAAc9b,KAAK90B,OAG/CA,KAAK8D,OAAOyP,GAAG,QAASvT,KAAK6wC,mBAAmB/b,KAAK90B,OAGrDA,KAAK8D,OAAOyP,GAAG,YAAavT,KAAK8wC,WAAWhc,KAAK90B,OAGjDA,KAAK2nC,QAmEP7kC,EAAQqQ,UAAUD,WAAa,SAASzE,GACtC,GAAIA,EAAS,CAEX,GAAIP,IAAU,OAAQ,QAAS,cAAe,UAAW,QAAS,aAAc,aAAc,iBAAkB,WAAW,OAC3HvN,GAAKoF,gBAAgBmI,EAAQlO,KAAKyO,QAASA,GAEvC,UAAYA,KACgB,gBAAnBA,GAAQiL,QACjB1Z,KAAKyO,QAAQiL,OAAOonB,KAAOryB,EAAQiL,OACnC1Z,KAAKyO,QAAQiL,OAAOrK,KAAKmW,WAAa/W,EAAQiL,OAC9C1Z,KAAKyO,QAAQiL,OAAOrK,KAAKoW,SAAWhX,EAAQiL,QAEX,gBAAnBjL,GAAQiL,SACtB/Y,EAAKoF,iBAAiB,QAAS/F,KAAKyO,QAAQiL,OAAQjL,EAAQiL,QACxD,QAAUjL,GAAQiL,SACe,gBAAxBjL,GAAQiL,OAAOrK,MACxBrP,KAAKyO,QAAQiL,OAAOrK,KAAKmW,WAAa/W,EAAQiL,OAAOrK,KACrDrP,KAAKyO,QAAQiL,OAAOrK,KAAKoW,SAAWhX,EAAQiL,OAAOrK,MAEb,gBAAxBZ,GAAQiL,OAAOrK,MAC7B1O,EAAKoF,iBAAiB,aAAc,YAAa/F,KAAKyO,QAAQiL,OAAOrK,KAAMZ,EAAQiL,OAAOrK,SAM9F,YAAcZ,KACgB,iBAArBA,GAAQygC,UACjBlvC,KAAKyO,QAAQygC,SAASC,WAAc1gC,EAAQygC,SAC5ClvC,KAAKyO,QAAQygC,SAAS1H,YAAc/4B,EAAQygC,SAC5ClvC,KAAKyO,QAAQygC,SAASj8B,IAAcxE,EAAQygC,SAC5ClvC,KAAKyO,QAAQygC,SAAS74B,OAAc5H,EAAQygC,UAET,gBAArBzgC,GAAQygC,UACtBvuC,EAAKoF,iBAAiB,aAAc,cAAe,MAAO,UAAW/F,KAAKyO,QAAQygC,SAAUzgC,EAAQygC,UAKxG,IAAI6B,GAAc,SAAW96B,GAC3B,GAAIiD,GAAKzK,EAAQwH,EACjB,IAAIiD,EAAI,CACN,KAAMA,YAAc83B,WAClB,KAAM,IAAIptC,OAAM,UAAYqS,EAAO,uBAAyBA,EAAO,mBAErEjW,MAAKyO,QAAQwH,GAAQiD,IAEtB4b,KAAK90B,OACP,QAAS,WAAY,WAAY,SAAU,YAAYqI,QAAQ0oC,GAGhE/wC,KAAKixC,cAOTnuC,EAAQqQ,UAAU89B,UAAY,WAC5BjxC,KAAKkwC,YACLlwC,KAAKowC,YAAa,GAMpBttC,EAAQqQ,UAAUG,QAAU,WAC1BtT,KAAK0nC,OACL1nC,KAAKi2B,SAAS,MACdj2B,KAAKg2B,UAAU,MAEfh2B,KAAK8D,OAAS,KAEd9D,KAAK20B,KAAO,KACZ30B,KAAKo6B,WAAa,MAMpBt3B,EAAQqQ,UAAUu0B,KAAO,WAEnB1nC,KAAK+vB,IAAIzQ,MAAMzV,YACjB7J,KAAK+vB,IAAIzQ,MAAMzV,WAAWsH,YAAYnR,KAAK+vB,IAAIzQ,OAI7Ctf,KAAK+vB,IAAI+Q,KAAKj3B,YAChB7J,KAAK+vB,IAAI+Q,KAAKj3B,WAAWsH,YAAYnR,KAAK+vB,IAAI+Q,MAI5C9gC,KAAK+vB,IAAI0d,SAAS5jC,YACpB7J,KAAK+vB,IAAI0d,SAAS5jC,WAAWsH,YAAYnR,KAAK+vB,IAAI0d,WAQtD3qC,EAAQqQ,UAAUw0B,KAAO,WAElB3nC,KAAK+vB,IAAIzQ,MAAMzV,YAClB7J,KAAK20B,KAAK5E,IAAI7D,OAAOza,YAAYzR,KAAK+vB,IAAIzQ,OAIvCtf,KAAK+vB,IAAI+Q,KAAKj3B,YACjB7J,KAAK20B,KAAK5E,IAAIgV,mBAAmBtzB,YAAYzR,KAAK+vB,IAAI+Q,MAInD9gC,KAAK+vB,IAAI0d,SAAS5jC,YACrB7J,KAAK20B,KAAK5E,IAAItoB,KAAKgK,YAAYzR,KAAK+vB,IAAI0d,WAW5C3qC,EAAQqQ,UAAUujB,aAAe,SAASvhB,GACxC,GAAI3P,GAAG8nC,EAAIjtC,EAAIgP,CAMf,KAJW7I,QAAP2O,IAAkBA,MACjBlP,MAAMC,QAAQiP,KAAMA,GAAOA,IAG3B3P,EAAI,EAAG8nC,EAAKttC,KAAKmwC,UAAUxqC,OAAY2nC,EAAJ9nC,EAAQA,IAC9CnF,EAAKL,KAAKmwC,UAAU3qC,GACpB6J,EAAOrP,KAAKiC,MAAM5B,GACdgP,GAAMA,EAAK6hC,UAKjB,KADAlxC,KAAKmwC,aACA3qC,EAAI,EAAG8nC,EAAKn4B,EAAIxP,OAAY2nC,EAAJ9nC,EAAQA,IACnCnF,EAAK8U,EAAI3P,GACT6J,EAAOrP,KAAKiC,MAAM5B,GACdgP,IACFrP,KAAKmwC,UAAUnoC,KAAK3H,GACpBgP,EAAK8hC,WASXruC,EAAQqQ,UAAUyjB,aAAe,WAC/B,MAAO52B,MAAKmwC,UAAUn8B,YAOxBlR,EAAQqQ,UAAUi+B,gBAAkB,WAClC,GAAI3b,GAAQz1B,KAAK20B,KAAKc,MAAM2J,WACxB33B,EAAQzH,KAAK20B,KAAKh0B,KAAKs0B,SAASQ,EAAM7lB,OACtCyX,EAAQrnB,KAAK20B,KAAKh0B,KAAKs0B,SAASQ,EAAM5lB,KAEtCsF,IACJ,KAAK,GAAImiB,KAAWt3B,MAAKm0B,OACvB,GAAIn0B,KAAKm0B,OAAOruB,eAAewxB,GAM7B,IAAK,GALDrlB,GAAQjS,KAAKm0B,OAAOmD,GACpB+Z,EAAkBp/B,EAAMm6B,aAInB5mC,EAAI,EAAGA,EAAI6rC,EAAgB1rC,OAAQH,IAAK,CAC/C,GAAI6J,GAAOgiC,EAAgB7rC,EAEtB6J,GAAK5H,KAAO4f,GAAWhY,EAAK5H,KAAO4H,EAAKkD,MAAQ9K,GACnD0N,EAAInN,KAAKqH,EAAKhP,IAMtB,MAAO8U,IAQTrS,EAAQqQ,UAAUm+B,UAAY,SAASjxC,GAErC,IAAK,GADD8vC,GAAYnwC,KAAKmwC,UACZ3qC,EAAI,EAAG8nC,EAAK6C,EAAUxqC,OAAY2nC,EAAJ9nC,EAAQA,IAC7C,GAAI2qC,EAAU3qC,IAAMnF,EAAI,CACtB8vC,EAAU/nC,OAAO5C,EAAG,EACpB,SASN1C,EAAQqQ,UAAUsO,OAAS,WACzB,GAAI/H,GAAS1Z,KAAKyO,QAAQiL,OACtB+b,EAAQz1B,KAAK20B,KAAKc,MAClBtrB,EAASxJ,EAAKmJ,OAAOK,OACrBsE,EAAUzO,KAAKyO,QACf8lB,EAAc9lB,EAAQ8lB,YACtBkQ,GAAU,EACVnlB,EAAQtf,KAAK+vB,IAAIzQ,MACjB4vB,EAAWzgC,EAAQygC,SAASC,YAAc1gC,EAAQygC,SAAS1H,WAG/DxnC,MAAKgG,MAAM2B,IAAM3H,KAAK20B,KAAKC,SAASjtB,IAAI6K,OAASxS,KAAK20B,KAAKC,SAASxoB,OAAOzE,IAC3E3H,KAAKgG,MAAMyB,KAAOzH,KAAK20B,KAAKC,SAASntB,KAAK8K,MAAQvS,KAAK20B,KAAKC,SAASxoB,OAAO3E,KAG5E6X,EAAMzX,UAAY,WAAaqnC,EAAW,YAAc,IAGxDzK,EAAUzkC,KAAKuxC,gBAAkB9M,CAIjC,IAAI+M,GAAkB/b,EAAM5lB,IAAM4lB,EAAM7lB,MACpC6hC,EAAUD,GAAmBxxC,KAAK0xC,qBAAyB1xC,KAAKgG,MAAMuM,OAASvS,KAAKgG,MAAM2rC,SAC1FF,KAAQzxC,KAAKowC,YAAa,GAC9BpwC,KAAK0xC,oBAAsBF,EAC3BxxC,KAAKgG,MAAM2rC,UAAY3xC,KAAKgG,MAAMuM,KAElC,IAAIs6B,GAAU7sC,KAAKowC,WACfwB,EAAa5xC,KAAK6xC,cAClBC,GACFziC,KAAMqK,EAAOrK,KACbyxB,KAAMpnB,EAAOonB,MAEXiR,GACF1iC,KAAMqK,EAAOrK,KACbyxB,KAAMpnB,EAAOrK,KAAKoW,SAAW,GAE3BjT,EAAS,EACTiiB,EAAY/a,EAAOonB,KAAOpnB,EAAOrK,KAAKoW,QA+B1C,OA5BAzlB,MAAKm0B,OAAOoc,GAAY9uB,OAAOgU,EAAOsc,EAAgBlF,GAGtDlsC,EAAK0H,QAAQrI,KAAKm0B,OAAQ,SAAUliB,GAClC,GAAI+/B,GAAe//B,GAAS2/B,EAAcE,EAAcC,EACpDE,EAAehgC,EAAMwP,OAAOgU,EAAOuc,EAAanF,EACpDpI,GAAUwN,GAAgBxN,EAC1BjyB,GAAUP,EAAMO,SAElBA,EAAStN,KAAKwH,IAAI8F,EAAQiiB,GAC1Bz0B,KAAKowC,YAAa,EAGlB9wB,EAAMrS,MAAMuF,OAAUrI,EAAOqI,GAG7BxS,KAAKgG,MAAMuM,MAAQ+M,EAAM8Q,YACzBpwB,KAAKgG,MAAMwM,OAASA,EAGpBxS,KAAK+vB,IAAI+Q,KAAK7zB,MAAMtF,IAAMwC,EAAuB,OAAfoqB,EAC7Bv0B,KAAK20B,KAAKC,SAASjtB,IAAI6K,OAASxS,KAAK20B,KAAKC,SAASxoB,OAAOzE,IAC1D3H,KAAK20B,KAAKC,SAASjtB,IAAI6K,OAASxS,KAAK20B,KAAKC,SAASiD,gBAAgBrlB,QACxExS,KAAK+vB,IAAI+Q,KAAK7zB,MAAMxF,KAAO,IAG3Bg9B,EAAUzkC,KAAKwkC,cAAgBC,GAUjC3hC,EAAQqQ,UAAU0+B,YAAc,WAC9B,GAAIK,GAA+C,OAA5BlyC,KAAKyO,QAAQ8lB,YAAwB,EAAKv0B,KAAKkwC,SAASvqC,OAAS,EACpFwsC,EAAenyC,KAAKkwC,SAASgC,GAC7BN,EAAa5xC,KAAKm0B,OAAOge,IAAiBnyC,KAAKm0B,OAAOmc,EAE1D,OAAOsB,IAAc,MAQvB9uC,EAAQqQ,UAAUs9B,iBAAmB,WACnC,CAAA,GAEIphC,GAAMkG,EAFN68B,EAAYpyC,KAAKm0B,OAAOmc,EACXtwC,MAAKm0B,OAAOoc,GAG7B,GAAIvwC,KAAK+1B,YAEP,GAAIqc,EAAW,CACbA,EAAU1K,aACH1nC,MAAKm0B,OAAOmc,EAEnB,KAAK/6B,IAAUvV,MAAKiC,MAClB,GAAIjC,KAAKiC,MAAM6D,eAAeyP,GAAS,CACrClG,EAAOrP,KAAKiC,MAAMsT,GAClBlG,EAAKy1B,QAAUz1B,EAAKy1B,OAAOzuB,OAAOhH,EAClC,IAAIioB,GAAUt3B,KAAKqyC,YAAYhjC,EAAKqD,MAChCT,EAAQjS,KAAKm0B,OAAOmD,EACxBrlB,IAASA,EAAMgB,IAAI5D,IAASA,EAAKq4B,aAOvC,KAAK0K,EAAW,CACd,GAAI/xC,GAAK,KACLqS,EAAO,IACX0/B,GAAY,GAAIxvC,GAAMvC,EAAIqS,EAAM1S,MAChCA,KAAKm0B,OAAOmc,GAAa8B,CAEzB,KAAK78B,IAAUvV,MAAKiC,MACdjC,KAAKiC,MAAM6D,eAAeyP,KAC5BlG,EAAOrP,KAAKiC,MAAMsT,GAClB68B,EAAUn/B,IAAI5D,GAIlB+iC,GAAUzK,SAShB7kC,EAAQqQ,UAAUm/B,YAAc,WAC9B,MAAOtyC,MAAK+vB,IAAI0d,UAOlB3qC,EAAQqQ,UAAU8iB,SAAW,SAASh0B,GACpC,GACIkT,GADAhB,EAAKnU,KAELuyC,EAAevyC,KAAK81B,SAGxB,IAAK7zB,EAGA,CAAA,KAAIA,YAAiBpB,IAAWoB,YAAiBnB,IAIpD,KAAM,IAAIuF,WAAU,kDAHpBrG,MAAK81B,UAAY7zB,MAHjBjC,MAAK81B,UAAY,IAoBnB,IAXIyc,IAEF5xC,EAAK0H,QAAQrI,KAAK0vC,cAAe,SAAUpnC,EAAUgB,GACnDipC,EAAa7+B,IAAIpK,EAAOhB,KAI1B6M,EAAMo9B,EAAa18B,SACnB7V,KAAK6vC,UAAU16B,IAGbnV,KAAK81B,UAAW,CAElB,GAAIz1B,GAAKL,KAAKK,EACdM,GAAK0H,QAAQrI,KAAK0vC,cAAe,SAAUpnC,EAAUgB,GACnD6K,EAAG2hB,UAAUviB,GAAGjK,EAAOhB,EAAUjI,KAInC8U,EAAMnV,KAAK81B,UAAUjgB,SACrB7V,KAAK2vC,OAAOx6B,GAGZnV,KAAKywC,qBAQT3tC,EAAQqQ,UAAUq/B,SAAW,WAC3B,MAAOxyC,MAAK81B,WAOdhzB,EAAQqQ,UAAU6iB,UAAY,SAAS7B,GACrC,GACIhf,GADAhB,EAAKnU,IAgBT,IAZIA,KAAK+1B,aACPp1B,EAAK0H,QAAQrI,KAAK8vC,eAAgB,SAAUxnC,EAAUgB,GACpD6K,EAAG4hB,WAAWniB,YAAYtK,EAAOhB,KAInC6M,EAAMnV,KAAK+1B,WAAWlgB,SACtB7V,KAAK+1B,WAAa,KAClB/1B,KAAKiwC,gBAAgB96B,IAIlBgf,EAGA,CAAA,KAAIA,YAAkBtzB,IAAWszB,YAAkBrzB,IAItD,KAAM,IAAIuF,WAAU,kDAHpBrG,MAAK+1B,WAAa5B,MAHlBn0B,MAAK+1B,WAAa,IASpB,IAAI/1B,KAAK+1B,WAAY,CAEnB,GAAI11B,GAAKL,KAAKK,EACdM,GAAK0H,QAAQrI,KAAK8vC,eAAgB,SAAUxnC,EAAUgB,GACpD6K,EAAG4hB,WAAWxiB,GAAGjK,EAAOhB,EAAUjI,KAIpC8U,EAAMnV,KAAK+1B,WAAWlgB,SACtB7V,KAAK+vC,aAAa56B,GAIpBnV,KAAKywC,mBAGLzwC,KAAKyyC,SAELzyC,KAAK20B,KAAKE,QAAQjH,KAAK,UAAWxa,OAAO,KAO3CtQ,EAAQqQ,UAAUu/B,UAAY,WAC5B,MAAO1yC,MAAK+1B,YAOdjzB,EAAQqQ,UAAU66B,WAAa,SAAS3tC,GACtC,GAAIgP,GAAOrP,KAAK81B,UAAU5gB,IAAI7U,GAC1B22B,EAAUh3B,KAAK81B,UAAUhgB,YAEzBzG,IAEFrP,KAAKyO,QAAQ8gC,SAASlgC,EAAM,SAAUA,GAChCA,GAGF2nB,EAAQ3gB,OAAOhW,MAYvByC,EAAQqQ,UAAUw/B,SAAW,SAAU9b,GACrC,MAAOA,GAAS/vB,MAAQ9G,KAAKyO,QAAQ3H,OAAS+vB,EAAShnB,IAAM,QAAU,QAUzE/M,EAAQqQ,UAAUk/B,YAAc,SAAUxb,GACxC,GAAI/vB,GAAO9G,KAAK2yC,SAAS9b,EACzB,OAAY,cAAR/vB,GAA0CN,QAAlBqwB,EAAS5kB,MAC7Bs+B,EAGCvwC,KAAK+1B,WAAac,EAAS5kB,MAAQq+B,GAS9CxtC,EAAQqQ,UAAUy8B,UAAY,SAASz6B,GACrC,GAAIhB,GAAKnU,IAETmV,GAAI9M,QAAQ,SAAUhI,GACpB,GAAIw2B,GAAW1iB,EAAG2hB,UAAU5gB,IAAI7U,EAAI8T,EAAGs7B,aACnCpgC,EAAO8E,EAAGlS,MAAM5B,GAChByG,EAAOqN,EAAGw+B,SAAS9b,GAEnBvwB,EAAcxD,EAAQoU,MAAMpQ,EAchC,IAZIuI,IAEG/I,GAAiB+I,YAAgB/I,GAMpC6N,EAAGc,YAAY5F,EAAMwnB,IAJrB1iB,EAAGy+B,YAAYvjC,GACfA,EAAO,QAONA,EAAM,CAET,IAAI/I,EAKC,KAEG,IAAID,WAFK,iBAARS,EAEa,4HAIA,sBAAwBA,EAAO,IAVnDuI,GAAO,GAAI/I,GAAYuwB,EAAU1iB,EAAGimB,WAAYjmB,EAAG1F,SACnDY,EAAKhP,GAAKA,EACV8T,EAAGC,SAAS/E,MAalBrP,KAAKyyC,SACLzyC,KAAKowC,YAAa,EAClBpwC,KAAK20B,KAAKE,QAAQjH,KAAK,UAAWxa,OAAO,KAQ3CtQ,EAAQqQ,UAAUw8B,OAAS7sC,EAAQqQ,UAAUy8B,UAO7C9sC,EAAQqQ,UAAU08B,UAAY,SAAS16B,GACrC,GAAI6B,GAAQ,EACR7C,EAAKnU,IACTmV,GAAI9M,QAAQ,SAAUhI,GACpB,GAAIgP,GAAO8E,EAAGlS,MAAM5B,EAChBgP,KACF2H,IACA7C,EAAGy+B,YAAYvjC,MAIf2H,IAEFhX,KAAKyyC,SACLzyC,KAAKowC,YAAa,EAClBpwC,KAAK20B,KAAKE,QAAQjH,KAAK,UAAWxa,OAAO,MAQ7CtQ,EAAQqQ,UAAUs/B,OAAS,WAGzB9xC,EAAK0H,QAAQrI,KAAKm0B,OAAQ,SAAUliB,GAClCA,EAAMwD,WASV3S,EAAQqQ,UAAU68B,gBAAkB,SAAS76B,GAC3CnV,KAAK+vC,aAAa56B,IAQpBrS,EAAQqQ,UAAU48B,aAAe,SAAS56B,GACxC,GAAIhB,GAAKnU,IAETmV,GAAI9M,QAAQ,SAAUhI,GACpB,GAAIyrC,GAAY33B,EAAG4hB,WAAW7gB,IAAI7U,GAC9B4R,EAAQkC,EAAGggB,OAAO9zB,EAEtB,IAAK4R,EA6BHA,EAAM+F,QAAQ8zB,OA7BJ,CAEV,GAAIzrC,GAAMiwC,GAAajwC,GAAMkwC,EAC3B,KAAM,IAAI3sC,OAAM,qBAAuBvD,EAAK,qBAG9C,IAAIwyC,GAAetsC,OAAO8H,OAAO8F,EAAG1F,QACpC9N,GAAK2E,OAAOutC,GACVrgC,OAAQ,OAGVP,EAAQ,GAAIrP,GAAMvC,EAAIyrC,EAAW33B,GACjCA,EAAGggB,OAAO9zB,GAAM4R,CAGhB,KAAK,GAAIsD,KAAUpB,GAAGlS,MACpB,GAAIkS,EAAGlS,MAAM6D,eAAeyP,GAAS,CACnC,GAAIlG,GAAO8E,EAAGlS,MAAMsT,EAChBlG,GAAKqD,KAAKT,OAAS5R,GACrB4R,EAAMgB,IAAI5D,GAKhB4C,EAAMwD,QACNxD,EAAM01B,UAQV3nC,KAAK20B,KAAKE,QAAQjH,KAAK,UAAWxa,OAAO,KAQ3CtQ,EAAQqQ,UAAU88B,gBAAkB,SAAS96B,GAC3C,GAAIgf,GAASn0B,KAAKm0B,MAClBhf,GAAI9M,QAAQ,SAAUhI,GACpB,GAAI4R,GAAQkiB,EAAO9zB,EAEf4R,KACFA,EAAMy1B,aACCvT,GAAO9zB,MAIlBL,KAAKixC,YAELjxC,KAAK20B,KAAKE,QAAQjH,KAAK,UAAWxa,OAAO,KAQ3CtQ,EAAQqQ,UAAUo+B,aAAe,WAC/B,GAAIvxC,KAAK+1B,WAAY,CAEnB,GAAIma,GAAWlwC,KAAK+1B,WAAWlgB,QAC7BJ,MAAOzV,KAAKyO,QAAQugC,aAGlB/P,GAAWt+B,EAAKiG,WAAWspC,EAAUlwC,KAAKkwC,SAC9C,IAAIjR,EAAS,CAEX,GAAI9K,GAASn0B,KAAKm0B,MAClB+b,GAAS7nC,QAAQ,SAAUivB,GACzBnD,EAAOmD,GAASoQ,SAIlBwI,EAAS7nC,QAAQ,SAAUivB,GACzBnD,EAAOmD,GAASqQ,SAGlB3nC,KAAKkwC,SAAWA,EAGlB,MAAOjR,GAGP,OAAO,GASXn8B,EAAQqQ,UAAUiB,SAAW,SAAS/E,GACpCrP,KAAKiC,MAAMoN,EAAKhP,IAAMgP,CAGtB,IAAIioB,GAAUt3B,KAAKqyC,YAAYhjC,EAAKqD,MAChCT,EAAQjS,KAAKm0B,OAAOmD,EACpBrlB,IAAOA,EAAMgB,IAAI5D,IASvBvM,EAAQqQ,UAAU8B,YAAc,SAAS5F,EAAMwnB,GAC7C,GAAIic,GAAazjC,EAAKqD,KAAKT,KAM3B,IAHA5C,EAAK2I,QAAQ6e,GAGTic,GAAczjC,EAAKqD,KAAKT,MAAO,CACjC,GAAI8gC,GAAW/yC,KAAKm0B,OAAO2e,EACvBC,IAAUA,EAAS18B,OAAOhH,EAE9B,IAAIioB,GAAUt3B,KAAKqyC,YAAYhjC,EAAKqD,MAChCT,EAAQjS,KAAKm0B,OAAOmD,EACpBrlB,IAAOA,EAAMgB,IAAI5D,KAUzBvM,EAAQqQ,UAAUy/B,YAAc,SAASvjC,GAEvCA,EAAKq4B,aAGE1nC,MAAKiC,MAAMoN,EAAKhP,GAGvB,IAAI8H,GAAQnI,KAAKmwC,UAAUxpC,QAAQ0I,EAAKhP,GAC3B,KAAT8H,GAAanI,KAAKmwC,UAAU/nC,OAAOD,EAAO,GAG9CkH,EAAKy1B,QAAUz1B,EAAKy1B,OAAOzuB,OAAOhH,IASpCvM,EAAQqQ,UAAU6/B,qBAAuB,SAASxqC,GAGhD,IAAK,GAFD0lC,MAEK1oC,EAAI,EAAGA,EAAIgD,EAAM7C,OAAQH,IAC5BgD,EAAMhD,YAAclD,IACtB4rC,EAASlmC,KAAKQ,EAAMhD,GAGxB,OAAO0oC,IAaTprC,EAAQqQ,UAAUkrB,SAAW,SAAU/0B,GAErCtJ,KAAKqwC,YAAYhhC,KAAOvM,EAAQmwC,eAAe3pC,GAC/CtJ,KAAKqwC,YAAY6C,aAAe5pC,EAAMI,OAAOwpC,eAAgB,EAC7DlzC,KAAKqwC,YAAY8C,cAAgB7pC,EAAMI,OAAOypC,gBAAiB,EAC/DnzC,KAAKqwC,YAAY+C,UAAY,MAQ/BtwC,EAAQqQ,UAAU6qB,aAAe,SAAU10B,GACzC,GAAKtJ,KAAKyO,QAAQygC,SAASC,YAAenvC,KAAKyO,QAAQygC,SAAS1H,YAAhE,CAIA,GAEIxhC,GAFAqJ,EAAOrP,KAAKqwC,YAAYhhC,MAAQ,KAChC8E,EAAKnU,IAGT,IAAIqP,GAAQA,EAAKgkC,SAAU,CACzB,GAAIH,GAAelzC,KAAKqwC,YAAY6C,aAChCC,EAAgBnzC,KAAKqwC,YAAY8C,aAEjCD,IACFltC,GACEqJ,KAAM6jC,EACNI,SAAUhqC,EAAM4iB,OAAOna,GAGrBoC,EAAG1F,QAAQygC,SAASC,aACtBnpC,EAAM4J,MAAQP,EAAKqD,KAAK9C,MAAM5I,WAE5BmN,EAAG1F,QAAQygC,SAAS1H,aAClB,SAAWn4B,GAAKqD,OAAM1M,EAAMiM,MAAQ5C,EAAKqD,KAAKT,OAGpDjS,KAAKqwC,YAAY+C,WAAaptC,IAEvBmtC,GACPntC,GACEqJ,KAAM8jC,EACNG,SAAUhqC,EAAM4iB,OAAOna,GAGrBoC,EAAG1F,QAAQygC,SAASC,aACtBnpC,EAAM6J,IAAMR,EAAKqD,KAAK7C,IAAI7I,WAExBmN,EAAG1F,QAAQygC,SAAS1H,aAClB,SAAWn4B,GAAKqD,OAAM1M,EAAMiM,MAAQ5C,EAAKqD,KAAKT,OAGpDjS,KAAKqwC,YAAY+C,WAAaptC,IAG9BhG,KAAKqwC,YAAY+C,UAAYpzC,KAAK42B,eAAevpB,IAAI,SAAUhN,GAC7D,GAAIgP,GAAO8E,EAAGlS,MAAM5B,GAChB2F,GACFqJ,KAAMA,EACNikC,SAAUhqC,EAAM4iB,OAAOna,EAWzB,OARIoC,GAAG1F,QAAQygC,SAASC,aAClB,SAAW9/B,GAAKqD,OAAM1M,EAAM4J,MAAQP,EAAKqD,KAAK9C,MAAM5I,WACpD,OAASqI,GAAKqD,OAAQ1M,EAAM6J,IAAMR,EAAKqD,KAAK7C,IAAI7I,YAElDmN,EAAG1F,QAAQygC,SAAS1H,aAClB,SAAWn4B,GAAKqD,OAAM1M,EAAMiM,MAAQ5C,EAAKqD,KAAKT,OAG7CjM,IAIXsD,EAAMq8B,kBACNr8B,EAAMD,oBASVvG,EAAQqQ,UAAU8qB,QAAU,SAAU30B,GAGpC,GAFAA,EAAMD,iBAEFrJ,KAAKqwC,YAAY+C,UAAW,CAC9B,GAAIj/B,GAAKnU,KACLg1B,EAAOh1B,KAAK20B,KAAKh0B,KAAKq0B,MAAQ,KAC9BpL,EAAU5pB,KAAK20B,KAAK5E,IAAIrwB,KAAK2tC,WAAartC,KAAK20B,KAAKC,SAASntB,KAAK8K,KAGtEvS,MAAKqwC,YAAY+C,UAAU/qC,QAAQ,SAAUrC,GAC3C,GAAIutC,MACAzZ,EAAU3lB,EAAGwgB,KAAKh0B,KAAK00B,OAAO/rB,EAAM4iB,OAAOna,EAAI6X,GAC/C4pB,EAAUr/B,EAAGwgB,KAAKh0B,KAAK00B,OAAOrvB,EAAMstC,SAAW1pB,GAC/CD,EAASmQ,EAAU0Z,CAEvB,IAAI,SAAWxtC,GAAO,CACpB,GAAI4J,GAAQ,GAAItL,MAAK0B,EAAM4J,MAAQ+Z,EACnC4pB,GAAS3jC,MAAQolB,EAAOA,EAAKplB,GAASA,EAGxC,GAAI,OAAS5J,GAAO,CAClB,GAAI6J,GAAM,GAAIvL,MAAK0B,EAAM6J,IAAM8Z,EAC/B4pB,GAAS1jC,IAAMmlB,EAAOA,EAAKnlB,GAAOA,EAGpC,GAAI,SAAW7J,GAAO,CAEpB,GAAIiM,GAAQnP,EAAQ2wC,gBAAgBnqC,EACpCiqC,GAASthC,MAAQA,GAASA,EAAMqlB,QAIlC,GAAIT,GAAWl2B,EAAK2E,UAAWU,EAAMqJ,KAAKqD,KAAM6gC,EAChDp/B,GAAG1F,QAAQ+gC,SAAS3Y,EAAU,SAAUA,GAClCA,GACF1iB,EAAGu/B,iBAAiB1tC,EAAMqJ,KAAMwnB,OAKtC72B,KAAKowC,YAAa,EAClBpwC,KAAK20B,KAAKE,QAAQjH,KAAK,UAEvBtkB,EAAMq8B,oBAUV7iC,EAAQqQ,UAAUugC,iBAAmB,SAASrkC,EAAMrJ,GAE9C,SAAWA,KAAOqJ,EAAKqD,KAAK9C,MAAQ5J,EAAM4J,OAC1C,OAAS5J,KAASqJ,EAAKqD,KAAK7C,IAAQ7J,EAAM6J,KAC1C,SAAW7J,IAASqJ,EAAKqD,KAAKT,OAASjM,EAAMiM,OAC/CjS,KAAK2zC,aAAatkC,EAAMrJ,EAAMiM,QAUlCnP,EAAQqQ,UAAUwgC,aAAe,SAAStkC,EAAMioB,GAC9C,GAAIrlB,GAAQjS,KAAKm0B,OAAOmD,EACxB,IAAIrlB,GAASA,EAAMqlB,SAAWjoB,EAAKqD,KAAKT,MAAO,CAC7C,GAAI8gC,GAAW1jC,EAAKy1B,MACpBiO,GAAS18B,OAAOhH,GAChB0jC,EAASt9B,QACTxD,EAAMgB,IAAI5D,GACV4C,EAAMwD,QAENpG,EAAKqD,KAAKT,MAAQA,EAAMqlB,UAS5Bx0B,EAAQqQ,UAAU+qB,WAAa,SAAU50B,GACvC,GAAItJ,KAAKqwC,YAAY+C,UAAW,CAE9B,GAAIQ,MACAz/B,EAAKnU,KACLg3B,EAAUh3B,KAAK81B,UAAUhgB,aAEzBs9B,EAAYpzC,KAAKqwC,YAAY+C,SACjCpzC,MAAKqwC,YAAY+C,UAAY,KAC7BA,EAAU/qC,QAAQ,SAAUrC,GAC1B,GAAI3F,GAAK2F,EAAMqJ,KAAKhP,GAChBw2B,EAAW1iB,EAAG2hB,UAAU5gB,IAAI7U,EAAI8T,EAAGs7B,aAEnCxQ,GAAU,CACV,UAAWj5B,GAAMqJ,KAAKqD,OACxBusB,EAAWj5B,EAAM4J,OAAS5J,EAAMqJ,KAAKqD,KAAK9C,MAAM5I,UAChD6vB,EAASjnB,MAAQjP,EAAKkG,QAAQb,EAAMqJ,KAAKqD,KAAK9C,MACtConB,EAAQrkB,SAAS7L,MAAQkwB,EAAQrkB,SAAS7L,KAAK8I,OAAS,SAE9D,OAAS5J,GAAMqJ,KAAKqD,OACtBusB,EAAUA,GAAaj5B,EAAM6J,KAAO7J,EAAMqJ,KAAKqD,KAAK7C,IAAI7I,UACxD6vB,EAAShnB,IAAMlP,EAAKkG,QAAQb,EAAMqJ,KAAKqD,KAAK7C,IACpCmnB,EAAQrkB,SAAS7L,MAAQkwB,EAAQrkB,SAAS7L,KAAK+I,KAAO,SAE5D,SAAW7J,GAAMqJ,KAAKqD,OACxBusB,EAAUA,GAAaj5B,EAAMiM,OAASjM,EAAMqJ,KAAKqD,KAAKT,MACtD4kB,EAAS5kB,MAAQjM,EAAMqJ,KAAKqD,KAAKT,OAI/BgtB,GACF9qB,EAAG1F,QAAQ6gC,OAAOzY,EAAU,SAAUA,GAChCA,GAEFA,EAASG,EAAQnkB,UAAYxS,EAC7BuzC,EAAQ5rC,KAAK6uB,KAIb1iB,EAAGu/B,iBAAiB1tC,EAAMqJ,KAAMrJ,GAEhCmO,EAAGi8B,YAAa,EAChBj8B,EAAGwgB,KAAKE,QAAQjH,KAAK,eAOzBgmB,EAAQjuC,QACVqxB,EAAQniB,OAAO++B,GAGjBtqC,EAAMq8B,oBASV7iC,EAAQqQ,UAAUy9B,cAAgB,SAAUtnC,GAC1C,GAAKtJ,KAAKyO,QAAQwgC,WAAlB,CAEA,GAAI4E,GAAWvqC,EAAMwqC,UAAYxqC,EAAMwqC,SAASD,QAC5CE,EAAWzqC,EAAMwqC,UAAYxqC,EAAMwqC,SAASC,QAChD,IAAIF,GAAWE,EAEb,WADA/zC,MAAK6wC,mBAAmBvnC,EAI1B,IAAI0qC,GAAeh0C,KAAK42B,eAEpBvnB,EAAOvM,EAAQmwC,eAAe3pC,GAC9B6mC,EAAY9gC,GAAQA,EAAKhP,MAC7BL,MAAK02B,aAAayZ,EAElB,IAAI8D,GAAej0C,KAAK42B,gBAIpBqd,EAAatuC,OAAS,GAAKquC,EAAaruC,OAAS,IACnD3F,KAAK20B,KAAKE,QAAQjH,KAAK,UACrB3rB,MAAOgyC,MAUbnxC,EAAQqQ,UAAU29B,WAAa,SAAUxnC,GACvC,GAAKtJ,KAAKyO,QAAQwgC,YACbjvC,KAAKyO,QAAQygC,SAASj8B,IAA3B,CAEA,GAAIkB,GAAKnU,KACLg1B,EAAOh1B,KAAK20B,KAAKh0B,KAAKq0B,MAAQ,KAC9B3lB,EAAOvM,EAAQmwC,eAAe3pC,EAElC,IAAI+F,EAAM,CAIR,GAAIwnB,GAAW1iB,EAAG2hB,UAAU5gB,IAAI7F,EAAKhP,GACrCL,MAAKyO,QAAQ4gC,SAASxY,EAAU,SAAUA,GACpCA,GACF1iB,EAAG2hB,UAAUhgB,aAAajB,OAAOgiB,SAIlC,CAEH,GAAIqd,GAAOvzC,EAAK2G,gBAAgBtH,KAAK+vB,IAAIzQ,OACrCvN,EAAIzI,EAAM4iB,OAAOna,EAAImiC,EACrBtkC,EAAQ5P,KAAK20B,KAAKh0B,KAAK00B,OAAOtjB,GAC9BoiC,GACFvkC,MAAOolB,EAAOA,EAAKplB,GAASA,EAC5BggB,QAAS,WAIX,IAA0B,UAAtB5vB,KAAKyO,QAAQ3H,KAAkB,CACjC,GAAI+I,GAAM7P,KAAK20B,KAAKh0B,KAAK00B,OAAOtjB,EAAI/R,KAAKgG,MAAMuM,MAAQ,EACvD4hC,GAAQtkC,IAAMmlB,EAAOA,EAAKnlB,GAAOA,EAGnCskC,EAAQn0C,KAAK81B,UAAUjjB,UAAYlS,EAAKqE,YAExC,IAAIiN,GAAQnP,EAAQ2wC,gBAAgBnqC,EAChC2I,KACFkiC,EAAQliC,MAAQA,EAAMqlB,SAIxBt3B,KAAKyO,QAAQ2gC,MAAM+E,EAAS,SAAU9kC,GAChCA,GACF8E,EAAG2hB,UAAUhgB,aAAa7C,IAAI5D,QAYtCvM,EAAQqQ,UAAU09B,mBAAqB,SAAUvnC,GAC/C,GAAKtJ,KAAKyO,QAAQwgC,WAAlB,CAEA,GAAIkB,GACA9gC,EAAOvM,EAAQmwC,eAAe3pC,EAElC,IAAI+F,EAAM,CAER8gC,EAAYnwC,KAAK42B,cAEjB,IAAImd,GAAWzqC,EAAMwqC,UAAYxqC,EAAMwqC,SAASC,WAAY,CAC5D,IAAIA,EAAU,CAIZ5D,EAAUnoC,KAAKqH,EAAKhP,GACpB,IAAIo1B,GAAQ3yB,EAAQsxC,cAAcp0C,KAAK81B,UAAU5gB,IAAIi7B,EAAWnwC,KAAKyvC,aAGrEU,KACA,KAAK,GAAI9vC,KAAML,MAAKiC,MAClB,GAAIjC,KAAKiC,MAAM6D,eAAezF,GAAK,CACjC,GAAIg0C,GAAQr0C,KAAKiC,MAAM5B,GACnBuP,EAAQykC,EAAM3hC,KAAK9C,MACnBC,EAA0BrJ,SAAnB6tC,EAAM3hC,KAAK7C,IAAqBwkC,EAAM3hC,KAAK7C,IAAMD,CAExDA,IAAS6lB,EAAM3pB,KAAO+D,GAAO4lB,EAAM/oB,KACrCyjC,EAAUnoC,KAAKqsC,EAAMh0C,SAKxB,CAEH,GAAI8H,GAAQgoC,EAAUxpC,QAAQ0I,EAAKhP,GACtB,KAAT8H,EAEFgoC,EAAUnoC,KAAKqH,EAAKhP,IAIpB8vC,EAAU/nC,OAAOD,EAAO,GAI5BnI,KAAK02B,aAAayZ,GAElBnwC,KAAK20B,KAAKE,QAAQjH,KAAK,UACrB3rB,MAAOjC,KAAK42B,oBAWlB9zB,EAAQsxC,cAAgB,SAASte,GAC/B,GAAIppB,GAAM,KACNZ,EAAM,IAmBV,OAjBAgqB,GAAUztB,QAAQ,SAAUqK,IACf,MAAP5G,GAAe4G,EAAK9C,MAAQ9D,KAC9BA,EAAM4G,EAAK9C,OAGGpJ,QAAZkM,EAAK7C,KACI,MAAPnD,GAAegG,EAAK7C,IAAMnD,KAC5BA,EAAMgG,EAAK7C,MAIF,MAAPnD,GAAegG,EAAK9C,MAAQlD,KAC9BA,EAAMgG,EAAK9C,UAMf9D,IAAKA,EACLY,IAAKA,IAUT5J,EAAQmwC,eAAiB,SAAS3pC,GAEhC,IADA,GAAII,GAASJ,EAAMI,OACZA,GAAQ,CACb,GAAIA,EAAO5D,eAAe,iBACxB,MAAO4D,GAAO,gBAEhBA,GAASA,EAAOG,WAGlB,MAAO,OAST/G,EAAQ2wC,gBAAkB,SAASnqC,GAEjC,IADA,GAAII,GAASJ,EAAMI,OACZA,GAAQ,CACb,GAAIA,EAAO5D,eAAe,kBACxB,MAAO4D,GAAO,iBAEhBA,GAASA,EAAOG,WAGlB,MAAO,OAST/G,EAAQwxC,kBAAoB,SAAShrC,GAEnC,IADA,GAAII,GAASJ,EAAMI,OACZA,GAAQ,CACb,GAAIA,EAAO5D,eAAe,oBACxB,MAAO4D,GAAO,mBAEhBA,GAASA,EAAOG,WAGlB,MAAO,OAGThK,EAAOD,QAAUkD,GAKb,SAASjD,EAAQD,EAASM,GAS9B,QAAS6C,GAAO4xB,EAAMlmB,EAAS8lC,EAAM1O,GACnC7lC,KAAK20B,KAAOA,EACZ30B,KAAKq0B,gBACH3lB,SAAS,EACTs3B,OAAO,EACPwO,SAAU,GACVC,YAAa,EACbhtC,MACEihB,SAAS,EACT9E,SAAU,YAEZyD,OACEqB,SAAS,EACT9E,SAAU,aAGd5jB,KAAKu0C,KAAOA,EACZv0C,KAAKyO,QAAU9N,EAAK2E,UAAUtF,KAAKq0B,gBACnCr0B,KAAK6lC,iBAAmBA,EAExB7lC,KAAKinC,eACLjnC,KAAK+vB,OACL/vB,KAAKm0B,UACLn0B,KAAKmnC,eAAiB,EACtBnnC,KAAK00B,UAEL10B,KAAKkT,WAAWzE,GAjClB,GAAI9N,GAAOT,EAAoB,GAC3BU,EAAUV,EAAoB,GAC9BqC,EAAYrC,EAAoB,GAkCpC6C,GAAOoQ,UAAY,GAAI5Q,GAEvBQ,EAAOoQ,UAAUsD,MAAQ,WACvBzW,KAAKm0B,UACLn0B,KAAKmnC,eAAiB,GAGxBpkC,EAAOoQ,UAAUm0B,SAAW,SAAS7e,EAAO8e,GAErCvnC,KAAKm0B,OAAOruB,eAAe2iB,KAC9BzoB,KAAKm0B,OAAO1L,GAAS8e,GAEvBvnC,KAAKmnC,gBAAkB,GAGzBpkC,EAAOoQ,UAAUq0B,YAAc,SAAS/e,EAAO8e,GAC7CvnC,KAAKm0B,OAAO1L,GAAS8e,GAGvBxkC,EAAOoQ,UAAUs0B,YAAc,SAAShf,GAClCzoB,KAAKm0B,OAAOruB,eAAe2iB,WACtBzoB,MAAKm0B,OAAO1L,GACnBzoB,KAAKmnC,gBAAkB,IAI3BpkC,EAAOoQ,UAAUuhB,QAAU,WACzB10B,KAAK+vB,IAAIzQ,MAAQ/N,SAASM,cAAc,OACxC7R,KAAK+vB,IAAIzQ,MAAMzX,UAAY,SAC3B7H,KAAK+vB,IAAIzQ,MAAMrS,MAAM2W,SAAW,WAChC5jB,KAAK+vB,IAAIzQ,MAAMrS,MAAMtF,IAAM,OAC3B3H,KAAK+vB,IAAIzQ,MAAMrS,MAAM26B,QAAU,QAE/B5nC,KAAK+vB,IAAI2kB,SAAWnjC,SAASM,cAAc,OAC3C7R,KAAK+vB,IAAI2kB,SAAS7sC,UAAY,aAC9B7H,KAAK+vB,IAAI2kB,SAASznC,MAAM2W,SAAW,WACnC5jB,KAAK+vB,IAAI2kB,SAASznC,MAAMtF,IAAM,MAE9B3H,KAAK4lC,IAAMr0B,SAASC,gBAAgB,6BAA6B,OACjExR,KAAK4lC,IAAI34B,MAAM2W,SAAW,WAC1B5jB,KAAK4lC,IAAI34B,MAAMtF,IAAM,MACrB3H,KAAK4lC,IAAI34B,MAAMsF,MAAQvS,KAAKyO,QAAQ+lC,SAAW,EAAI,KACnDx0C,KAAK4lC,IAAI34B,MAAMuF,OAAS,OAExBxS,KAAK+vB,IAAIzQ,MAAM7N,YAAYzR,KAAK4lC,KAChC5lC,KAAK+vB,IAAIzQ,MAAM7N,YAAYzR,KAAK+vB,IAAI2kB,WAMtC3xC,EAAOoQ,UAAUu0B,KAAO,WAElB1nC,KAAK+vB,IAAIzQ,MAAMzV,YACjB7J,KAAK+vB,IAAIzQ,MAAMzV,WAAWsH,YAAYnR,KAAK+vB,IAAIzQ,QAQnDvc,EAAOoQ,UAAUw0B,KAAO,WAEjB3nC,KAAK+vB,IAAIzQ,MAAMzV,YAClB7J,KAAK20B,KAAK5E,IAAI7D,OAAOza,YAAYzR,KAAK+vB,IAAIzQ,QAI9Cvc,EAAOoQ,UAAUD,WAAa,SAASzE,GACrC,GAAIP,IAAU,UAAU,cAAc,QAAQ,OAAO,QACrDvN,GAAKwF,oBAAoB+H,EAAQlO,KAAKyO,QAASA,IAGjD1L,EAAOoQ,UAAUsO,OAAS,WACxB,GAAI0mB,GAAe,CACnB,KAAK,GAAI7Q,KAAWt3B,MAAKm0B,OACnBn0B,KAAKm0B,OAAOruB,eAAewxB,KACO,GAAhCt3B,KAAKm0B,OAAOmD,GAAS5O,SAAkEliB,SAA9CxG,KAAK6lC,iBAAiBrO,WAAWF,IAAuE,GAA7Ct3B,KAAK6lC,iBAAiBrO,WAAWF,IACvI6Q,IAKN,IAAuC,GAAnCnoC,KAAKyO,QAAQzO,KAAKu0C,MAAM7rB,SAA2C,GAAvB1oB,KAAKmnC,gBAA+C,GAAxBnnC,KAAKyO,QAAQC,SAAoC,GAAhBy5B,EAC3GnoC,KAAK0nC,WAEF,CAqBH,GApBA1nC,KAAK2nC,OACmC,YAApC3nC,KAAKyO,QAAQzO,KAAKu0C,MAAM3wB,UAA8D,eAApC5jB,KAAKyO,QAAQzO,KAAKu0C,MAAM3wB,UAC5E5jB,KAAK+vB,IAAIzQ,MAAMrS,MAAMxF,KAAO,MAC5BzH,KAAK+vB,IAAIzQ,MAAMrS,MAAMqb,UAAY,OACjCtoB,KAAK+vB,IAAI2kB,SAASznC,MAAMqb,UAAY,OACpCtoB,KAAK+vB,IAAI2kB,SAASznC,MAAMxF,KAAQzH,KAAKyO,QAAQ+lC,SAAW,GAAM,KAC9Dx0C,KAAK+vB,IAAI2kB,SAASznC,MAAMoa,MAAQ,GAChCrnB,KAAK4lC,IAAI34B,MAAMxF,KAAO,MACtBzH,KAAK4lC,IAAI34B,MAAMoa,MAAQ,KAGvBrnB,KAAK+vB,IAAIzQ,MAAMrS,MAAMoa,MAAQ,MAC7BrnB,KAAK+vB,IAAIzQ,MAAMrS,MAAMqb,UAAY,QACjCtoB,KAAK+vB,IAAI2kB,SAASznC,MAAMqb,UAAY,QACpCtoB,KAAK+vB,IAAI2kB,SAASznC,MAAMoa,MAASrnB,KAAKyO,QAAQ+lC,SAAW,GAAM,KAC/Dx0C,KAAK+vB,IAAI2kB,SAASznC,MAAMxF,KAAO,GAC/BzH,KAAK4lC,IAAI34B,MAAMoa,MAAQ,MACvBrnB,KAAK4lC,IAAI34B,MAAMxF,KAAO,IAGgB,YAApCzH,KAAKyO,QAAQzO,KAAKu0C,MAAM3wB,UAA8D,aAApC5jB,KAAKyO,QAAQzO,KAAKu0C,MAAM3wB,SAC5E5jB,KAAK+vB,IAAIzQ,MAAMrS,MAAMtF,IAAM,EAAIzD,OAAOlE,KAAK20B,KAAK5E,IAAI7D,OAAOjf,MAAMtF,IAAI6C,QAAQ,KAAK,KAAO,KACzFxK,KAAK+vB,IAAIzQ,MAAMrS,MAAMqW,OAAS,OAE3B,CACH,GAAIqxB,GAAmB30C,KAAK20B,KAAKC,SAAS1I,OAAO1Z,OAASxS,KAAK20B,KAAKC,SAASiD,gBAAgBrlB,MAC7FxS,MAAK+vB,IAAIzQ,MAAMrS,MAAMqW,OAAS,EAAIqxB,EAAmBzwC,OAAOlE,KAAK20B,KAAK5E,IAAI7D,OAAOjf,MAAMtF,IAAI6C,QAAQ,KAAK,KAAO,KAC/GxK,KAAK+vB,IAAIzQ,MAAMrS,MAAMtF,IAAM,GAGH,GAAtB3H,KAAKyO,QAAQu3B,OACfhmC,KAAK+vB,IAAIzQ,MAAMrS,MAAMsF,MAAQvS,KAAK+vB,IAAI2kB,SAAStkB,YAAc,GAAK,KAClEpwB,KAAK+vB,IAAI2kB,SAASznC,MAAMoa,MAAQ,GAChCrnB,KAAK+vB,IAAI2kB,SAASznC,MAAMxF,KAAO,GAC/BzH,KAAK4lC,IAAI34B,MAAMsF,MAAQ,QAGvBvS,KAAK+vB,IAAIzQ,MAAMrS,MAAMsF,MAAQvS,KAAKyO,QAAQ+lC,SAAW,GAAKx0C,KAAK+vB,IAAI2kB,SAAStkB,YAAc,GAAK,KAC/FpwB,KAAK40C,kBAGP,IAAIhlB,GAAU,EACd,KAAK,GAAI0H,KAAWt3B,MAAKm0B,OACnBn0B,KAAKm0B,OAAOruB,eAAewxB,KACO,GAAhCt3B,KAAKm0B,OAAOmD,GAAS5O,SAAkEliB,SAA9CxG,KAAK6lC,iBAAiBrO,WAAWF,IAAuE,GAA7Ct3B,KAAK6lC,iBAAiBrO,WAAWF,KACvI1H,GAAW5vB,KAAKm0B,OAAOmD,GAAS1H,QAAU,UAIhD5vB,MAAK+vB,IAAI2kB,SAASzwB,UAAY2L,EAC9B5vB,KAAK+vB,IAAI2kB,SAASznC,MAAMsjB,WAAe,IAAOvwB,KAAKyO,QAAQ+lC,SAAYx0C,KAAKyO,QAAQgmC,YAAe,OAIvG1xC,EAAOoQ,UAAUyhC,gBAAkB,WACjC,GAAI50C,KAAK+vB,IAAIzQ,MAAMzV,WAAY,CAC7BjJ,EAAQiQ,gBAAgB7Q,KAAKinC,YAC7B,IAAIjjB,GAAUza,OAAOsrC,iBAAiB70C,KAAK+vB,IAAIzQ,OAAOw1B,WAClD/M,EAAa7jC,OAAO8f,EAAQxZ,QAAQ,KAAK,KACzCuH,EAAIg2B,EACJ1B,EAAYrmC,KAAKyO,QAAQ+lC,SACzB1M,EAAa,IAAO9nC,KAAKyO,QAAQ+lC,SACjCxiC,EAAI+1B,EAAa,GAAMD,EAAa,CAExC9nC,MAAK4lC,IAAI34B,MAAMsF,MAAQ8zB,EAAY,EAAI0B,EAAa,IAEpD,KAAK,GAAIzQ,KAAWt3B,MAAKm0B,OACnBn0B,KAAKm0B,OAAOruB,eAAewxB,KACO,GAAhCt3B,KAAKm0B,OAAOmD,GAAS5O,SAAkEliB,SAA9CxG,KAAK6lC,iBAAiBrO,WAAWF,IAAuE,GAA7Ct3B,KAAK6lC,iBAAiBrO,WAAWF,KACvIt3B,KAAKm0B,OAAOmD,GAAS0Q,SAASj2B,EAAGC,EAAGhS,KAAKinC,YAAajnC,KAAK4lC,IAAKS,EAAWyB,GAC3E91B,GAAK81B,EAAa9nC,KAAKyO,QAAQgmC,aAKrC7zC,GAAQsQ,gBAAgBlR,KAAKinC,eAIjCpnC,EAAOD,QAAUmD,GAKb,SAASlD,EAAQD,EAASM,GAqB9B,QAAS8C,GAAU2xB,EAAMlmB,GACvBzO,KAAKK,GAAKM,EAAKqE,aACfhF,KAAK20B,KAAOA,EAEZ30B,KAAKq0B,gBACHuX,iBAAkB,OAClBmJ,aAAc,UACd7+B,MAAM,EACN8+B,UAAU,EACVC,YAAa,QACb1J,QACE78B,SAAS,EACT6lB,YAAa,UAEftnB,MAAO,OACPioC,UACE3iC,MAAO,GACP4iC,cAAe,UACfpG,MAAO,UAEThE,YACEr8B,SAAS,EACTs8B,gBAAiB,cACjBC,MAAO,IAET94B,YACEzD,SAAS,EACT2D,KAAM,EACNpF,MAAO,UAETmoC,UACEtP,iBAAiB,EACjBC,iBAAiB,EACjBC,OAAO,EACPzzB,MAAO,OACPmW,SAAS,EACT6S,YAAY,EACZD,aACE7zB,MAAOqE,IAAItF,OAAWkG,IAAIlG,QAC1B6gB,OAAQvb,IAAItF,OAAWkG,IAAIlG,UAkB/B6uC,QACE3mC,SAAS,EACTs3B,OAAO,EACPv+B,MACEihB,SAAS,EACT9E,SAAU,YAEZyD,OACEqB,SAAS,EACT9E,SAAU,cAGduQ,QACEqD,gBAKJx3B,KAAKyO,QAAU9N,EAAK2E,UAAWtF,KAAKq0B,gBACpCr0B,KAAK+vB,OACL/vB,KAAKgG,SACLhG,KAAK8D,OAAS,KACd9D,KAAKm0B,UACLn0B,KAAKs1C,oBAAqB,EAC1Bt1C,KAAKu1C,iBAAkB,EACvBv1C,KAAKw1C,yBAA0B,CAE/B,IAAIrhC,GAAKnU,IACTA,MAAK81B,UAAY,KACjB91B,KAAK+1B,WAAa,KAGlB/1B,KAAK0vC,eACHz8B,IAAO,SAAU3J,EAAOwK,GACtBK,EAAGw7B,OAAO77B,EAAO7R,QAEnB4S,OAAU,SAAUvL,EAAOwK,GACzBK,EAAGy7B,UAAU97B,EAAO7R,QAEtBoU,OAAU,SAAU/M,EAAOwK,GACzBK,EAAG07B,UAAU/7B,EAAO7R,SAKxBjC,KAAK8vC,gBACH78B,IAAO,SAAU3J,EAAOwK,GACtBK,EAAG47B,aAAaj8B,EAAO7R,QAEzB4S,OAAU,SAAUvL,EAAOwK,GACzBK,EAAG67B,gBAAgBl8B,EAAO7R,QAE5BoU,OAAU,SAAU/M,EAAOwK,GACzBK,EAAG87B,gBAAgBn8B,EAAO7R,SAI9BjC,KAAKiC,SACLjC,KAAKmwC,aACLnwC,KAAKy1C,UAAYz1C,KAAK20B,KAAKc,MAAM7lB,MACjC5P,KAAKqwC,eAELrwC,KAAKinC,eACLjnC,KAAKkT,WAAWzE,GAChBzO,KAAKwqC,0BAA4B,GACjCxqC,KAAK01C,QAAU,EACf11C,KAAK20B,KAAKE,QAAQthB,GAAG,eAAgB,WACnCY,EAAGshC,UAAYthC,EAAGwgB,KAAKc,MAAM7lB,MAC7BuE,EAAGyxB,IAAI34B,MAAMxF,KAAO9G,EAAKmJ,OAAOK,QAAQgK,EAAGnO,MAAMuM,OACjD4B,EAAGsN,OAAOlhB,KAAK4T,GAAG,KAIpBnU,KAAK00B,UACL10B,KAAKgsC,WAAapG,IAAK5lC,KAAK4lC,IAAKqB,YAAajnC,KAAKinC,YAAax4B,QAASzO,KAAKyO,QAAS0lB,OAAQn0B,KAAKm0B,QACpGn0B,KAAK20B,KAAKE,QAAQjH,KAAK,UAvJzB,GAAIjtB,GAAOT,EAAoB,GAC3BU,EAAUV,EAAoB,GAC9BW,EAAUX,EAAoB,GAC9BY,EAAWZ,EAAoB,GAC/BqC,EAAYrC,EAAoB,IAChCwC,EAAWxC,EAAoB,IAC/ByC,EAAazC,EAAoB,IACjC6C,EAAS7C,EAAoB,IAC7By1C,EAAoBz1C,EAAoB,IAExCowC,EAAY,eAiJhBttC,GAAUmQ,UAAY,GAAI5Q,GAK1BS,EAAUmQ,UAAUuhB,QAAU,WAC5B,GAAIpV,GAAQ/N,SAASM,cAAc,MACnCyN,GAAMzX,UAAY,YAClB7H,KAAK+vB,IAAIzQ,MAAQA,EAGjBtf,KAAK4lC,IAAMr0B,SAASC,gBAAgB,6BAA6B,OACjExR,KAAK4lC,IAAI34B,MAAM2W,SAAW,WAC1B5jB,KAAK4lC,IAAI34B,MAAMuF,QAAU,GAAKxS,KAAKyO,QAAQwmC,aAAazqC,QAAQ,KAAK,IAAM,KAC3ExK,KAAK4lC,IAAI34B,MAAM26B,QAAU,QACzBtoB,EAAM7N,YAAYzR,KAAK4lC,KAGvB5lC,KAAKyO,QAAQ2mC,SAAS7gB,YAAc,OACpCv0B,KAAK41C,UAAY,GAAIlzC,GAAS1C,KAAK20B,KAAM30B,KAAKyO,QAAQ2mC,SAAUp1C,KAAK4lC,IAAK5lC,KAAKyO,QAAQ0lB,QAEvFn0B,KAAKyO,QAAQ2mC,SAAS7gB,YAAc,QACpCv0B,KAAK61C,WAAa,GAAInzC,GAAS1C,KAAK20B,KAAM30B,KAAKyO,QAAQ2mC,SAAUp1C,KAAK4lC,IAAK5lC,KAAKyO,QAAQ0lB,cACjFn0B,MAAKyO,QAAQ2mC,SAAS7gB,YAG7Bv0B,KAAK81C,WAAa,GAAI/yC,GAAO/C,KAAK20B,KAAM30B,KAAKyO,QAAQ4mC,OAAQ,OAAQr1C,KAAKyO,QAAQ0lB,QAClFn0B,KAAK+1C,YAAc,GAAIhzC,GAAO/C,KAAK20B,KAAM30B,KAAKyO,QAAQ4mC,OAAQ,QAASr1C,KAAKyO,QAAQ0lB,QAEpFn0B,KAAK2nC,QAOP3kC,EAAUmQ,UAAUD,WAAa,SAASzE,GACxC,GAAIA,EAAS,CACX,GAAIP,IAAU,WAAW,eAAe,SAAS,cAAc,mBAAmB,QAAQ,WAAW,WAAW,OAAO,SAC3F1H,UAAxBiI,EAAQwmC,aAAgDzuC,SAAnBiI,EAAQ+D,QAAsEhM,SAA9CxG,KAAK20B,KAAKC,SAASiD,gBAAgBrlB,QAC1GxS,KAAKu1C,iBAAkB,EACvBv1C,KAAKw1C,yBAA0B,GAEsBhvC,SAA9CxG,KAAK20B,KAAKC,SAASiD,gBAAgBrlB,QAAgDhM,SAAxBiI,EAAQwmC,aACtErqC,UAAU6D,EAAQwmC,YAAc,IAAIzqC,QAAQ,KAAK,KAAOxK,KAAK20B,KAAKC,SAASiD,gBAAgBrlB,SAC7FxS,KAAKu1C,iBAAkB,GAG3B50C,EAAKwF,oBAAoB+H,EAAQlO,KAAKyO,QAASA,GAC/C9N,EAAK4N,aAAavO,KAAKyO,QAASA,EAAQ,cACxC9N,EAAK4N,aAAavO,KAAKyO,QAASA,EAAQ,cACxC9N,EAAK4N,aAAavO,KAAKyO,QAASA,EAAQ,UACxC9N,EAAK4N,aAAavO,KAAKyO,QAASA,EAAQ,UAEpCA,EAAQs8B,YACuB,gBAAtBt8B,GAAQs8B,YACbt8B,EAAQs8B,WAAWC,kBACqB,WAAtCv8B,EAAQs8B,WAAWC,gBACrBhrC,KAAKyO,QAAQs8B,WAAWE,MAAQ,EAEa,WAAtCx8B,EAAQs8B,WAAWC,gBAC1BhrC,KAAKyO,QAAQs8B,WAAWE,MAAQ,GAGhCjrC,KAAKyO,QAAQs8B,WAAWC,gBAAkB,cAC1ChrC,KAAKyO,QAAQs8B,WAAWE,MAAQ,KAMpCjrC,KAAK41C,WACkBpvC,SAArBiI,EAAQ2mC,WACVp1C,KAAK41C,UAAU1iC,WAAWlT,KAAKyO,QAAQ2mC,UACvCp1C,KAAK61C,WAAW3iC,WAAWlT,KAAKyO,QAAQ2mC,WAIxCp1C,KAAK81C,YACgBtvC,SAAnBiI,EAAQ4mC,SACVr1C,KAAK81C,WAAW5iC,WAAWlT,KAAKyO,QAAQ4mC,QACxCr1C,KAAK+1C,YAAY7iC,WAAWlT,KAAKyO,QAAQ4mC,SAIzCr1C,KAAKm0B,OAAOruB,eAAewqC,IAC7BtwC,KAAKm0B,OAAOmc,GAAWp9B,WAAWzE,GAKlCzO,KAAK+vB,IAAIzQ,OACXtf,KAAKyhB,QAAO,IAOhBze,EAAUmQ,UAAUu0B,KAAO,WAErB1nC,KAAK+vB,IAAIzQ,MAAMzV,YACjB7J,KAAK+vB,IAAIzQ,MAAMzV,WAAWsH,YAAYnR,KAAK+vB,IAAIzQ,QASnDtc,EAAUmQ,UAAUw0B,KAAO,WAEpB3nC,KAAK+vB,IAAIzQ,MAAMzV,YAClB7J,KAAK20B,KAAK5E,IAAI7D,OAAOza,YAAYzR,KAAK+vB,IAAIzQ,QAS9Ctc,EAAUmQ,UAAU8iB,SAAW,SAASh0B,GACtC,GACEkT,GADEhB,EAAKnU,KAEPuyC,EAAevyC,KAAK81B,SAGtB,IAAK7zB,EAGA,CAAA,KAAIA,YAAiBpB,IAAWoB,YAAiBnB,IAIpD,KAAM,IAAIuF,WAAU,kDAHpBrG,MAAK81B,UAAY7zB,MAHjBjC,MAAK81B,UAAY,IAoBnB,IAXIyc,IAEF5xC,EAAK0H,QAAQrI,KAAK0vC,cAAe,SAAUpnC,EAAUgB,GACnDipC,EAAa7+B,IAAIpK,EAAOhB,KAI1B6M,EAAMo9B,EAAa18B,SACnB7V,KAAK6vC,UAAU16B,IAGbnV,KAAK81B,UAAW,CAElB,GAAIz1B,GAAKL,KAAKK,EACdM,GAAK0H,QAAQrI,KAAK0vC,cAAe,SAAUpnC,EAAUgB,GACnD6K,EAAG2hB,UAAUviB,GAAGjK,EAAOhB,EAAUjI,KAInC8U,EAAMnV,KAAK81B,UAAUjgB,SACrB7V,KAAK2vC,OAAOx6B,GAEdnV,KAAKywC,mBAELzwC,KAAKyhB,QAAO,IAQdze,EAAUmQ,UAAU6iB,UAAY,SAAS7B,GACvC,GACIhf,GADAhB,EAAKnU,IAgBT,IAZIA,KAAK+1B,aACPp1B,EAAK0H,QAAQrI,KAAK8vC,eAAgB,SAAUxnC,EAAUgB,GACpD6K,EAAG4hB,WAAWniB,YAAYtK,EAAOhB,KAInC6M,EAAMnV,KAAK+1B,WAAWlgB,SACtB7V,KAAK+1B,WAAa,KAClB/1B,KAAKiwC,gBAAgB96B,IAIlBgf,EAGA,CAAA,KAAIA,YAAkBtzB,IAAWszB,YAAkBrzB,IAItD,KAAM,IAAIuF,WAAU,kDAHpBrG,MAAK+1B,WAAa5B,MAHlBn0B,MAAK+1B,WAAa,IASpB,IAAI/1B,KAAK+1B,WAAY,CAEnB,GAAI11B,GAAKL,KAAKK,EACdM,GAAK0H,QAAQrI,KAAK8vC,eAAgB,SAAUxnC,EAAUgB,GACpD6K,EAAG4hB,WAAWxiB,GAAGjK,EAAOhB,EAAUjI,KAIpC8U,EAAMnV,KAAK+1B,WAAWlgB,SACtB7V,KAAK+vC,aAAa56B,GAEpBnV,KAAK4vC,aASP5sC,EAAUmQ,UAAUy8B,UAAY,WAC9B5vC,KAAKywC,mBACLzwC,KAAKg2C,sBAELh2C,KAAKyhB,QAAO,IAEdze,EAAUmQ,UAAUw8B,OAAkB,SAAUx6B,GAAMnV,KAAK4vC,UAAUz6B,IACrEnS,EAAUmQ,UAAU08B,UAAkB,SAAU16B,GAAMnV,KAAK4vC,UAAUz6B,IACrEnS,EAAUmQ,UAAU68B,gBAAmB,SAAUE,GAC/C,IAAK,GAAI1qC,GAAI,EAAGA,EAAI0qC,EAASvqC,OAAQH,IAAK,CACxC,GAAIyM,GAAQjS,KAAK+1B,WAAW7gB,IAAIg7B,EAAS1qC,GACzCxF,MAAKi2C,aAAahkC,EAAOi+B,EAAS1qC,IAIpCxF,KAAKyhB,QAAO,IAEdze,EAAUmQ,UAAU48B,aAAe,SAAUG,GAAWlwC,KAAKgwC,gBAAgBE,IAQ7EltC,EAAUmQ,UAAU88B,gBAAkB,SAAUC,GAC9C,IAAK,GAAI1qC,GAAI,EAAGA,EAAI0qC,EAASvqC,OAAQH,IAC/BxF,KAAKm0B,OAAOruB,eAAeoqC,EAAS1qC,MACmB,SAArDxF,KAAKm0B,OAAO+b,EAAS1qC,IAAIiJ,QAAQm9B,kBACnC5rC,KAAK61C,WAAWpO,YAAYyI,EAAS1qC,IACrCxF,KAAK+1C,YAAYtO,YAAYyI,EAAS1qC,IACtCxF,KAAK+1C,YAAYt0B,WAGjBzhB,KAAK41C,UAAUnO,YAAYyI,EAAS1qC,IACpCxF,KAAK81C,WAAWrO,YAAYyI,EAAS1qC,IACrCxF,KAAK81C,WAAWr0B,gBAEXzhB,MAAKm0B,OAAO+b,EAAS1qC,IAGhCxF,MAAKywC,mBAELzwC,KAAKyhB,QAAO,IAWdze,EAAUmQ,UAAU8iC,aAAe,SAAUhkC,EAAOqlB,GAC7Ct3B,KAAKm0B,OAAOruB,eAAewxB,IAY9Bt3B,KAAKm0B,OAAOmD,GAASziB,OAAO5C,GACyB,SAAjDjS,KAAKm0B,OAAOmD,GAAS7oB,QAAQm9B,kBAC/B5rC,KAAK61C,WAAWrO,YAAYlQ,EAASt3B,KAAKm0B,OAAOmD,IACjDt3B,KAAK+1C,YAAYvO,YAAYlQ,EAASt3B,KAAKm0B,OAAOmD,MAGlDt3B,KAAK41C,UAAUpO,YAAYlQ,EAASt3B,KAAKm0B,OAAOmD,IAChDt3B,KAAK81C,WAAWtO,YAAYlQ,EAASt3B,KAAKm0B,OAAOmD,OAlBnDt3B,KAAKm0B,OAAOmD,GAAW,GAAI30B,GAAWsP,EAAOqlB,EAASt3B,KAAKyO,QAASzO,KAAKwqC,0BACpB,SAAjDxqC,KAAKm0B,OAAOmD,GAAS7oB,QAAQm9B,kBAC/B5rC,KAAK61C,WAAWvO,SAAShQ,EAASt3B,KAAKm0B,OAAOmD,IAC9Ct3B,KAAK+1C,YAAYzO,SAAShQ,EAASt3B,KAAKm0B,OAAOmD,MAG/Ct3B,KAAK41C,UAAUtO,SAAShQ,EAASt3B,KAAKm0B,OAAOmD,IAC7Ct3B,KAAK81C,WAAWxO,SAAShQ,EAASt3B,KAAKm0B,OAAOmD,MAclDt3B,KAAK81C,WAAWr0B,SAChBzhB,KAAK+1C,YAAYt0B,UASnBze,EAAUmQ,UAAU6iC,oBAAsB,WACxC,GAAsB,MAAlBh2C,KAAK81B,UAAmB,CAC1B,GACIwB,GADA4e,IAEJ,KAAK5e,IAAWt3B,MAAKm0B,OACfn0B,KAAKm0B,OAAOruB,eAAewxB,KAC7B4e,EAAc5e,MAGlB,KAAK,GAAI/hB,KAAUvV,MAAK81B,UAAUljB,MAChC,GAAI5S,KAAK81B,UAAUljB,MAAM9M,eAAeyP,GAAS,CAC/C,GAAIlG,GAAOrP,KAAK81B,UAAUljB,MAAM2C,EAChC,IAAkC/O,SAA9B0vC,EAAc7mC,EAAK4C,OACrB,KAAM,IAAIrO,OAAM,4IAElByL,GAAK0C,EAAIpR,EAAKkG,QAAQwI,EAAK0C,EAAE,QAC7BmkC,EAAc7mC,EAAK4C,OAAOjK,KAAKqH,GAGnC,IAAKioB,IAAWt3B,MAAKm0B,OACfn0B,KAAKm0B,OAAOruB,eAAewxB,IAC7Bt3B,KAAKm0B,OAAOmD,GAASrB,SAASigB,EAAc5e,MAYpDt0B,EAAUmQ,UAAUs9B,iBAAmB,WACrC,GAAIzwC,KAAK81B,WAA+B,MAAlB91B,KAAK81B,UAAmB,CAC5C,GAAIqgB,GAAmB,CACvB,KAAK,GAAI5gC,KAAUvV,MAAK81B,UAAUljB,MAChC,GAAI5S,KAAK81B,UAAUljB,MAAM9M,eAAeyP,GAAS,CAC/C,GAAIlG,GAAOrP,KAAK81B,UAAUljB,MAAM2C,EACpB/O,SAAR6I,IACEA,EAAKvJ,eAAe,SACHU,SAAf6I,EAAK4C,QACP5C,EAAK4C,MAAQq+B,GAIfjhC,EAAK4C,MAAQq+B,EAEf6F,EAAmB9mC,EAAK4C,OAASq+B,EAAY6F,EAAmB,EAAIA,GAK1E,GAAwB,GAApBA,QACKn2C,MAAKm0B,OAAOmc,GACnBtwC,KAAK81C,WAAWrO,YAAY6I,GAC5BtwC,KAAK+1C,YAAYtO,YAAY6I,GAC7BtwC,KAAK41C,UAAUnO,YAAY6I,GAC3BtwC,KAAK61C,WAAWpO,YAAY6I,OAEzB,CACH,GAAIr+B,IAAS5R,GAAIiwC,EAAW1gB,QAAS5vB,KAAKyO,QAAQsmC,aAClD/0C,MAAKi2C,aAAahkC,EAAOq+B,eAIpBtwC,MAAKm0B,OAAOmc,GACnBtwC,KAAK81C,WAAWrO,YAAY6I,GAC5BtwC,KAAK+1C,YAAYtO,YAAY6I,GAC7BtwC,KAAK41C,UAAUnO,YAAY6I,GAC3BtwC,KAAK61C,WAAWpO,YAAY6I,EAG9BtwC,MAAK81C,WAAWr0B,SAChBzhB,KAAK+1C,YAAYt0B,UAQnBze,EAAUmQ,UAAUsO,OAAS,SAAS20B,GACpC,GAAI3R,IAAU,CAGdzkC,MAAKgG,MAAMuM,MAAQvS,KAAK+vB,IAAIzQ,MAAM8Q,YAClCpwB,KAAKgG,MAAMwM,OAASxS,KAAK20B,KAAKC,SAASiD,gBAAgBrlB,OAGhChM,SAAnBxG,KAAK2xC,WAA2B3xC,KAAKgG,MAAMuM,QAC7C6jC,GAAmB,GAIrB3R,EAAUzkC,KAAKwkC,cAAgBC,CAG/B,IAAI+M,GAAkBxxC,KAAK20B,KAAKc,MAAM5lB,IAAM7P,KAAK20B,KAAKc,MAAM7lB,MACxD6hC,EAAUD,GAAmBxxC,KAAK0xC,mBA6BtC,IA5BA1xC,KAAK0xC,oBAAsBF,EAKZ,GAAX/M,IACFzkC,KAAK4lC,IAAI34B,MAAMsF,MAAQ5R,EAAKmJ,OAAOK,OAAO,EAAEnK,KAAKgG,MAAMuM,OACvDvS,KAAK4lC,IAAI34B,MAAMxF,KAAO9G,EAAKmJ,OAAOK,QAAQnK,KAAKgG,MAAMuM,QAGN,KAA1CvS,KAAKyO,QAAQ+D,OAAS,IAAI7L,QAAQ,MAA8C,GAAhC3G,KAAKw1C,2BACxDx1C,KAAKu1C,iBAAkB,IAKC,GAAxBv1C,KAAKu1C,iBACHv1C,KAAKyO,QAAQwmC,aAAej1C,KAAK20B,KAAKC,SAASiD,gBAAgBrlB,OAAS,OAC1ExS,KAAKyO,QAAQwmC,YAAcj1C,KAAK20B,KAAKC,SAASiD,gBAAgBrlB,OAAS,KACvExS,KAAK4lC,IAAI34B,MAAMuF,OAASxS,KAAK20B,KAAKC,SAASiD,gBAAgBrlB,OAAS,MAEtExS,KAAKu1C,iBAAkB,GAGvBv1C,KAAK4lC,IAAI34B,MAAMuF,QAAU,GAAKxS,KAAKyO,QAAQwmC,aAAazqC,QAAQ,KAAK,IAAM,KAI9D,GAAXi6B,GAA6B,GAAVgN,GAA6C,GAA3BzxC,KAAKs1C,oBAAkD,GAApBc,EAC1E3R,EAAUzkC,KAAKq2C,gBAAkB5R,MAIjC,IAAsB,GAAlBzkC,KAAKy1C,UAAgB,CACvB,GAAI9rB,GAAS3pB,KAAK20B,KAAKc,MAAM7lB,MAAQ5P,KAAKy1C,UACtChgB,EAAQz1B,KAAK20B,KAAKc,MAAM5lB,IAAM7P,KAAK20B,KAAKc,MAAM7lB,KAClD,IAAwB,GAApB5P,KAAKgG,MAAMuM,MAAY,CACzB,GAAI+jC,GAAmBt2C,KAAKgG,MAAMuM,MAAMkjB,EACpC7L,EAAUD,EAAS2sB,CACvBt2C,MAAK4lC,IAAI34B,MAAMxF,MAASzH,KAAKgG,MAAMuM,MAAQqX,EAAW,MAO5D,MAFA5pB,MAAK81C,WAAWr0B,SAChBzhB,KAAK+1C,YAAYt0B,SACVgjB,GAQTzhC,EAAUmQ,UAAUkjC,aAAe,WAGjC,GADAz1C,EAAQiQ,gBAAgB7Q,KAAKinC,aACL,GAApBjnC,KAAKgG,MAAMuM,OAAgC,MAAlBvS,KAAK81B,UAAmB,CACnD,GAAI7jB,GAAOzM,EACP+wC,KACAC,KACAC,KACAC,GAAe,EAGfxG,IACJ,KAAK,GAAI5Y,KAAWt3B,MAAKm0B,OACnBn0B,KAAKm0B,OAAOruB,eAAewxB,KAC7BrlB,EAAQjS,KAAKm0B,OAAOmD,GACC,GAAjBrlB,EAAMyW,SAAgEliB,SAA5CxG,KAAKyO,QAAQ0lB,OAAOqD,WAAWF,IAAqE,GAA3Ct3B,KAAKyO,QAAQ0lB,OAAOqD,WAAWF,IACpH4Y,EAASloC,KAAKsvB,GAIpB,IAAI4Y,EAASvqC,OAAS,EAAG,CAEvB,GAAIgxC,GAAU32C,KAAK20B,KAAKh0B,KAAK40B,cAAcv1B,KAAK20B,KAAKC,SAASl1B,KAAK6S,OAC/DqkC,EAAU52C,KAAK20B,KAAKh0B,KAAK40B,aAAa,EAAIv1B,KAAK20B,KAAKC,SAASl1B,KAAK6S,OAClEwjB,IAQJ,KANA/1B,KAAK62C,iBAAiB3G,EAAUna,EAAY4gB,EAASC,GAGrD52C,KAAK82C,eAAe5G,EAAUna,GAGzBvwB,EAAI,EAAGA,EAAI0qC,EAASvqC,OAAQH,IAC/B+wC,EAAsBrG,EAAS1qC,IAAMxF,KAAK+2C,qBAAqBhhB,EAAWma,EAAS1qC,IAIrFxF,MAAKg3C,YAAY9G,EAAUqG,EAAuBE,GAIlDC,EAAe12C,KAAKi3C,aAAa/G,EAAUuG,EAC3C,IAAIS,GAAa,CACjB,IAAoB,GAAhBR,GAAwB12C,KAAK01C,QAAUwB,EAKzC,MAJAt2C,GAAQsQ,gBAAgBlR,KAAKinC,aAC7BjnC,KAAKs1C,oBAAqB,EAC1Bt1C,KAAK01C,UACL11C,KAAK20B,KAAKE,QAAQjH,KAAK,WAChB,CAUP,KAPI5tB,KAAK01C,QAAUwB,GACjBve,QAAQhF,IAAI,6EAEd3zB,KAAK01C,QAAU,EACf11C,KAAKs1C,oBAAqB,EAGrB9vC,EAAI,EAAGA,EAAI0qC,EAASvqC,OAAQH,IAC/ByM,EAAQjS,KAAKm0B,OAAO+b,EAAS1qC,IAC7BgxC,EAAmBtG,EAAS1qC,IAAMxF,KAAKm3C,qBAAqBphB,EAAWma,EAAS1qC,IAAKyM,EAIvF,KAAKzM,EAAI,EAAGA,EAAI0qC,EAASvqC,OAAQH,IAC/ByM,EAAQjS,KAAKm0B,OAAO+b,EAAS1qC,IACF,OAAvByM,EAAMxD,QAAQxB,OAChBgF,EAAM85B,KAAKyK,EAAmBtG,EAAS1qC,IAAKyM,EAAOjS,KAAKgsC,UAG5D2J,GAAkB5J,KAAKmE,EAAUsG,EAAoBx2C,KAAKgsC,YAOhE,MADAprC,GAAQsQ,gBAAgBlR,KAAKinC,cACtB,GAiBTjkC,EAAUmQ,UAAU0jC,iBAAmB,SAAU3G,EAAUna,EAAY4gB,EAASC,GAC9E,GAAI3kC,GAAOzM,EAAGomB,EAAGvc,CACjB,IAAI6gC,EAASvqC,OAAS,EACpB,IAAKH,EAAI,EAAGA,EAAI0qC,EAASvqC,OAAQH,IAAK,CACpCyM,EAAQjS,KAAKm0B,OAAO+b,EAAS1qC,IAC7BuwB,EAAWma,EAAS1qC,MACpB,IAAI4xC,GAAgBrhB,EAAWma,EAAS1qC,GAExC;GAA0B,GAAtByM,EAAMxD,QAAQyH,KAAc,CAC9B,GAAImhC,GAAQnyC,KAAKwH,IAAI,EAAG/L,EAAK4O,kBAAkB0C,EAAM6jB,UAAW6gB,EAAS,IAAK,UAC9E,KAAK/qB,EAAIyrB,EAAOzrB,EAAI3Z,EAAM6jB,UAAUnwB,OAAQimB,IAE1C,GADAvc,EAAO4C,EAAM6jB,UAAUlK,GACVplB,SAAT6I,EAAoB,CACtB,GAAIA,EAAK0C,EAAI6kC,EAAS,CACpBQ,EAAcpvC,KAAKqH,EACnB,OAGA+nC,EAAcpvC,KAAKqH,QAMzB,KAAKuc,EAAI,EAAGA,EAAI3Z,EAAM6jB,UAAUnwB,OAAQimB,IACtCvc,EAAO4C,EAAM6jB,UAAUlK,GACVplB,SAAT6I,GACEA,EAAK0C,EAAI4kC,GAAWtnC,EAAK0C,EAAI6kC,GAC/BQ,EAAcpvC,KAAKqH,KAgBjCrM,EAAUmQ,UAAU2jC,eAAiB,SAAU5G,EAAUna,GACvD,GAAI9jB,EACJ,IAAIi+B,EAASvqC,OAAS,EACpB,IAAK,GAAIH,GAAI,EAAGA,EAAI0qC,EAASvqC,OAAQH,IAEnC,GADAyM,EAAQjS,KAAKm0B,OAAO+b,EAAS1qC,IACC,GAA1ByM,EAAMxD,QAAQumC,SAAkB,CAClC,GAAIoC,GAAgBrhB,EAAWma,EAAS1qC,GACxC,IAAI4xC,EAAczxC,OAAS,EAAG,CAC5B,GAAI2xC,GAAY,EACZC,EAAiBH,EAAczxC,OAI/B6xC,EAAYx3C,KAAK20B,KAAKh0B,KAAKw0B,eAAeiiB,EAAcA,EAAczxC,OAAS,GAAGoM,GAAK/R,KAAK20B,KAAKh0B,KAAKw0B,eAAeiiB,EAAc,GAAGrlC,GACtI0lC,EAAiBF,EAAiBC,CACtCF,GAAYpyC,KAAK4G,IAAI5G,KAAKwyC,KAAK,GAAMH,GAAiBryC,KAAKwH,IAAI,EAAGxH,KAAKwoB,MAAM+pB,IAG7E,KAAK,GADDE,MACK/rB,EAAI,EAAO2rB,EAAJ3rB,EAAoBA,GAAK0rB,EACvCK,EAAY3vC,KAAKovC,EAAcxrB,GAGjCmK,GAAWma,EAAS1qC,IAAMmyC,KAgBpC30C,EAAUmQ,UAAU6jC,YAAc,SAAU9G,EAAUna,EAAY0gB,GAChE,GAAI3K,GAAW75B,EAAOzM,EAGlBiJ,EAFAmpC,KACAC,IAEJ,IAAI3H,EAASvqC,OAAS,EAAG,CACvB,IAAKH,EAAI,EAAGA,EAAI0qC,EAASvqC,OAAQH,IAC/BsmC,EAAY/V,EAAWma,EAAS1qC,IAChCiJ,EAAUzO,KAAKm0B,OAAO+b,EAAS1qC,IAAIiJ,QAC/Bq9B,EAAUnmC,OAAS,IACrBsM,EAAQjS,KAAKm0B,OAAO+b,EAAS1qC,IAES,SAAlCiJ,EAAQymC,SAASC,eAA6C,OAAjB1mC,EAAQxB,MACvB,QAA5BwB,EAAQm9B,iBAA6BgM,EAAuBA,EAAoB5jC,OAAO/B,EAAM45B,UAAUC,IAClE+L,EAAuBA,EAAqB7jC,OAAO/B,EAAM45B,UAAUC,IAG5G2K,EAAYvG,EAAS1qC,IAAMyM,EAAM45B,UAAUC,EAAUoE,EAAS1qC,IAMpEmwC,GAAkBmC,oBAAoBF,EAAsBnB,EAAavG,EAAU,iBAAmB,QACtGyF,EAAkBmC,oBAAoBD,EAAsBpB,EAAavG,EAAU,kBAAmB,WAW1GltC,EAAUmQ,UAAU8jC,aAAe,SAAU/G,EAAUuG,GACrD,GAGoEsB,GAAQC,EAHxEvT,GAAU,EACVwT,GAAgB,EAChBC,GAAiB,EACjBC,EAAU,IAAKC,EAAW,IAAKC,EAAU,KAAMC,EAAW,IAE9D,IAAIpI,EAASvqC,OAAS,EAAG,CAEvB,IAAK,GAAIH,GAAI,EAAGA,EAAI0qC,EAASvqC,OAAQH,IAAK,CACxC,GAAIyM,GAAQjS,KAAKm0B,OAAO+b,EAAS1qC,GAC7ByM,IAA2C,SAAlCA,EAAMxD,QAAQm9B,kBACzBqM,GAAgB,EAChBE,EAAU,EACVE,EAAU,GAEHpmC,GAASA,EAAMxD,QAAQm9B,mBAC9BsM,GAAiB,EACjBE,EAAW,EACXE,EAAW,GAKf,IAAK,GAAI9yC,GAAI,EAAGA,EAAI0qC,EAASvqC,OAAQH,IAC/BixC,EAAY3wC,eAAeoqC,EAAS1qC,KAClCixC,EAAYvG,EAAS1qC,IAAI+yC,UAAW,IACtCR,EAAStB,EAAYvG,EAAS1qC,IAAIsG,IAClCksC,EAASvB,EAAYvG,EAAS1qC,IAAIkH,IAEe,SAA7C+pC,EAAYvG,EAAS1qC,IAAIomC,kBAC3BqM,GAAgB,EAChBE,EAAUA,EAAUJ,EAASA,EAASI,EACtCE,EAAoBL,EAAVK,EAAmBL,EAASK,IAGtCH,GAAiB,EACjBE,EAAWA,EAAWL,EAASA,EAASK,EACxCE,EAAsBN,EAAXM,EAAoBN,EAASM,GAM3B,IAAjBL,GACFj4C,KAAK41C,UAAUriB,SAAS4kB,EAASE,GAEb,GAAlBH,GACFl4C,KAAK61C,WAAWtiB,SAAS6kB,EAAUE,GAoCvC,MAjCA7T,GAAUzkC,KAAKw4C,qBAAqBP,EAAgBj4C,KAAK41C,YAAenR,EACxEA,EAAUzkC,KAAKw4C,qBAAqBN,EAAgBl4C,KAAK61C,aAAepR,EAElD,GAAlByT,GAA2C,GAAjBD,GAC5Bj4C,KAAK41C,UAAU6C,WAAY,EAC3Bz4C,KAAK61C,WAAW4C,WAAY,IAG5Bz4C,KAAK41C,UAAU6C,WAAY,EAC3Bz4C,KAAK61C,WAAW4C,WAAY,GAE9Bz4C,KAAK61C,WAAW7O,QAAUiR,EACI,GAA1Bj4C,KAAK61C,WAAW7O,QACWhnC,KAAK41C,UAAU7O,WAAtB,GAAlBmR,EAAqDl4C,KAAK61C,WAAWtjC,MAChB,EAEzDkyB,EAAUzkC,KAAK41C,UAAUn0B,UAAYgjB,EACrCzkC,KAAK61C,WAAWhP,iBAAmB7mC,KAAK41C,UAAUhP,WAClD5mC,KAAK61C,WAAW/O,aAAe9mC,KAAK41C,UAAU9O,aAC9CrC,EAAUzkC,KAAK61C,WAAWp0B,UAAYgjB,GAGtCA,EAAUzkC,KAAK61C,WAAWp0B,UAAYgjB,EAIE,IAAtCyL,EAASvpC,QAAQ,mBACnBupC,EAAS9nC,OAAO8nC,EAASvpC,QAAQ,kBAAkB,GAEV,IAAvCupC,EAASvpC,QAAQ,oBACnBupC,EAAS9nC,OAAO8nC,EAASvpC,QAAQ,mBAAmB,GAG/C89B,GAYTzhC,EAAUmQ,UAAUqlC,qBAAuB,SAAUE,EAAU5X,GAC7D,GAAI7B,IAAU,CAad,OAZgB,IAAZyZ,EACE5X,EAAK/Q,IAAIzQ,MAAMzV,YAA6B,GAAfi3B,EAAK5H,SACpC4H,EAAK4G,OACLzI,GAAU,GAIP6B,EAAK/Q,IAAIzQ,MAAMzV,YAA6B,GAAfi3B,EAAK5H,SACrC4H,EAAK6G,OACL1I,GAAU,GAGPA,GAaTj8B,EAAUmQ,UAAU4jC,qBAAuB,SAAU4B,GAKnD,IAAK,GAHDC,GAAQC,EADRC,KAEA7jB,EAAWj1B,KAAK20B,KAAKh0B,KAAKs0B,SAErBzvB,EAAI,EAAGA,EAAImzC,EAAWhzC,OAAQH,IACrCozC,EAAS3jB,EAAS0jB,EAAWnzC,GAAGuM,GAAK/R,KAAKgG,MAAMuM,MAChDsmC,EAASF,EAAWnzC,GAAGwM,EACvB8mC,EAAc9wC,MAAM+J,EAAG6mC,EAAQ5mC,EAAG6mC,GAGpC,OAAOC,IAcT91C,EAAUmQ,UAAUgkC,qBAAuB,SAAUwB,EAAY1mC,GAC/D,GACI2mC,GAAQC,EADRC,KAEA7jB,EAAWj1B,KAAK20B,KAAKh0B,KAAKs0B,SAC1B6L,EAAO9gC,KAAK41C,UACZmD,EAAY70C,OAAOlE,KAAK4lC,IAAI34B,MAAMuF,OAAOhI,QAAQ,KAAK,IACpB,UAAlCyH,EAAMxD,QAAQm9B,mBAChB9K,EAAO9gC,KAAK61C,WAGd,KAAK,GAAIrwC,GAAI,EAAGA,EAAImzC,EAAWhzC,OAAQH,IACrCozC,EAAS3jB,EAAS0jB,EAAWnzC,GAAGuM,GAAK/R,KAAKgG,MAAMuM,MAChDsmC,EAAS3zC,KAAKwoB,MAAMoT,EAAK2I,aAAakP,EAAWnzC,GAAGwM,IACpD8mC,EAAc9wC,MAAM+J,EAAG6mC,EAAQ5mC,EAAG6mC,GAKpC,OAFA5mC,GAAM64B,gBAAgB5lC,KAAK4G,IAAIitC,EAAWjY,EAAK2I,aAAa,KAErDqP,GAITj5C,EAAOD,QAAUoD,GAKb,SAASnD,EAAQD,EAASM,GAgB9B,QAAS+C,GAAU0xB,EAAMlmB,GACvBzO,KAAK+vB,KACH0c,WAAY,KACZjG,SACAwS,cACAC,cACAjoC,WACEw1B,SACAwS,cACAC,gBAGJj5C,KAAKgG,OACHyvB,OACE7lB,MAAO,EACPC,IAAK,EACLurB,YAAa,GAEf8d,QAAS,GAGXl5C,KAAKq0B,gBACHE,YAAa,SAEbuR,iBAAiB,EACjBC,iBAAiB,EACjBzE,OAAQ,MAEVthC,KAAKyO,QAAU9N,EAAK2E,UAAWtF,KAAKq0B,gBAEpCr0B,KAAK20B,KAAOA,EAGZ30B,KAAK00B,UAEL10B,KAAKkT,WAAWzE,GAjDlB,GAAI9N,GAAOT,EAAoB,GAC3BqC,EAAYrC,EAAoB,IAChC6B,EAAW7B,EAAoB,IAC/ByB,EAAWzB,EAAoB,IAC/B2D,EAAS3D,EAAoB,GAgDjC+C,GAASkQ,UAAY,GAAI5Q,GAUzBU,EAASkQ,UAAUD,WAAa,SAASzE,GACnCA,IAEF9N,EAAKoF,iBACH,cACA,kBACA,kBACA,cACA,UACC/F,KAAKyO,QAASA,GAIb,UAAYA,KACe,kBAAlB5K,GAAOwgC,OAEhBxgC,EAAOwgC,OAAO51B,EAAQ41B,QAGtBxgC,EAAOygC,KAAK71B,EAAQ41B,WAS5BphC,EAASkQ,UAAUuhB,QAAU,WAC3B10B,KAAK+vB,IAAI0c,WAAal7B,SAASM,cAAc,OAC7C7R,KAAK+vB,IAAI5jB,WAAaoF,SAASM,cAAc,OAE7C7R,KAAK+vB,IAAI0c,WAAW5kC,UAAY,sBAChC7H,KAAK+vB,IAAI5jB,WAAWtE,UAAY,uBAMlC5E,EAASkQ,UAAUG,QAAU,WAEvBtT,KAAK+vB,IAAI0c,WAAW5iC,YACtB7J,KAAK+vB,IAAI0c,WAAW5iC,WAAWsH,YAAYnR,KAAK+vB,IAAI0c,YAElDzsC,KAAK+vB,IAAI5jB,WAAWtC,YACtB7J,KAAK+vB,IAAI5jB,WAAWtC,WAAWsH,YAAYnR,KAAK+vB,IAAI5jB,YAGtDnM,KAAK20B,KAAO,MAOd1xB,EAASkQ,UAAUsO,OAAS,WAC1B,GAAIhT,GAAUzO,KAAKyO,QACfzI,EAAQhG,KAAKgG,MACbymC,EAAazsC,KAAK+vB,IAAI0c,WACtBtgC,EAAanM,KAAK+vB,IAAI5jB,WAGtB24B,EAAiC,OAAvBr2B,EAAQ8lB,YAAwBv0B,KAAK20B,KAAK5E,IAAIpoB,IAAM3H,KAAK20B,KAAK5E,IAAIzM,OAC5E61B,EAAiB1M,EAAW5iC,aAAei7B,CAG/C9kC,MAAKooC,oBAGL,IACItC,IADc9lC,KAAKyO,QAAQ8lB,YACTv0B,KAAKyO,QAAQq3B,iBAC/BC,EAAkB/lC,KAAKyO,QAAQs3B,eAGnC//B,GAAMqiC,iBAAmBvC,EAAkB9/B,EAAMsiC,gBAAkB,EACnEtiC,EAAMuiC,iBAAmBxC,EAAkB//B,EAAMwiC,gBAAkB,EACnExiC,EAAMwM,OAASxM,EAAMqiC,iBAAmBriC,EAAMuiC,iBAC9CviC,EAAMuM,MAAQk6B,EAAWrc,YAEzBpqB,EAAM0iC,gBAAkB1oC,KAAK20B,KAAKC,SAASl1B,KAAK8S,OAASxM,EAAMuiC,kBACnC,OAAvB95B,EAAQ8lB,YAAuBv0B,KAAK20B,KAAKC,SAAStR,OAAO9Q,OAASxS,KAAK20B,KAAKC,SAASjtB,IAAI6K,QAC9FxM,EAAMyiC,eAAiB,EACvBziC,EAAM4iC,gBAAkB5iC,EAAM0iC,gBAAkB1iC,EAAMuiC,iBACtDviC,EAAM2iC,eAAiB,CAGvB,IAAIyQ,GAAwB3M,EAAW4M,YACnCC,EAAwBntC,EAAWktC,WAsBvC,OArBA5M,GAAW5iC,YAAc4iC,EAAW5iC,WAAWsH,YAAYs7B,GAC3DtgC,EAAWtC,YAAcsC,EAAWtC,WAAWsH,YAAYhF,GAE3DsgC,EAAWx/B,MAAMuF,OAASxS,KAAKgG,MAAMwM,OAAS,KAE9CxS,KAAKu5C,iBAGDH,EACFtU,EAAOlzB,aAAa66B,EAAY2M,GAGhCtU,EAAOrzB,YAAYg7B,GAEjB6M,EACFt5C,KAAK20B,KAAK5E,IAAIgV,mBAAmBnzB,aAAazF,EAAYmtC,GAG1Dt5C,KAAK20B,KAAK5E,IAAIgV,mBAAmBtzB,YAAYtF,GAGxCnM,KAAKwkC,cAAgB2U,GAO9Bl2C,EAASkQ,UAAUomC,eAAiB,WAClC,GAAIhlB,GAAcv0B,KAAKyO,QAAQ8lB,YAG3B3kB,EAAQjP,EAAKkG,QAAQ7G,KAAK20B,KAAKc,MAAM7lB,MAAO,UAC5CC,EAAMlP,EAAKkG,QAAQ7G,KAAK20B,KAAKc,MAAM5lB,IAAK,UACxC2pC,EAAgBx5C,KAAK20B,KAAKh0B,KAAK00B,OAA2C,GAAnCr1B,KAAKgG,MAAM+jC,gBAAkB,KAAS/iC,UAC7Eo0B,EAAcoe,EAAgB73C,EAASk5B,wBAAwB76B,KAAK20B,KAAKI,YAAa/0B,KAAK20B,KAAKc,MAAO+jB,EAC3Gpe,IAAep7B,KAAK20B,KAAKh0B,KAAK00B,OAAO,GAAGruB,SAExC,IAAImhB,GAAO,GAAIpmB,GAAS,GAAIuC,MAAKsL,GAAQ,GAAItL,MAAKuL,GAAMurB,EAAap7B,KAAK20B,KAAKI,YAC3E/0B,MAAKyO,QAAQ6yB,QACfnZ,EAAK4Z,UAAU/hC,KAAKyO,QAAQ6yB,QAE9BthC,KAAKmoB,KAAOA,CAKZ,IAAI4H,GAAM/vB,KAAK+vB,GACfA,GAAI/e,UAAUw1B,MAAQzW,EAAIyW,MAC1BzW,EAAI/e,UAAUgoC,WAAajpB,EAAIipB,WAC/BjpB,EAAI/e,UAAUioC,WAAalpB,EAAIkpB,WAC/BlpB,EAAIyW,SACJzW,EAAIipB,cACJjpB,EAAIkpB,aAEJ,IAAIQ,GAEAvc,EAGAwc,EAGA7xC,EAPAkK,EAAI,EAEJ4nC,EAAQ,EACRpnC,EAAQ,EAERqnC,EAAmBpzC,OACnBkG,EAAM,CAIV,KADAyb,EAAK8Z,QACE9Z,EAAK0U,WAAmB,IAANnwB,GACvBA,IAEA+sC,EAAMtxB,EAAKC,aACX8U,EAAU/U,EAAK+U,UACfr1B,EAAYsgB,EAAK2b,eAEjB6V,EAAQ5nC,EACRA,EAAI/R,KAAK20B,KAAKh0B,KAAKs0B,SAASwkB,GAC5BlnC,EAAQR,EAAI4nC,EACRD,IACFA,EAASzsC,MAAMsF,MAAQA,EAAQ,MAG7BvS,KAAKyO,QAAQq3B,iBACf9lC,KAAK65C,kBAAkB9nC,EAAGoW,EAAKyb,gBAAiBrP,EAAa1sB,GAG3Dq1B,GAAWl9B,KAAKyO,QAAQs3B,iBACtBh0B,EAAI,IACkBvL,QAApBozC,IACFA,EAAmB7nC,GAErB/R,KAAK85C,kBAAkB/nC,EAAGoW,EAAK0b,gBAAiBtP,EAAa1sB,IAE/D6xC,EAAW15C,KAAK+5C,kBAAkBhoC,EAAGwiB,EAAa1sB,IAGlD6xC,EAAW15C,KAAKg6C,kBAAkBjoC,EAAGwiB,EAAa1sB,GAGpDsgB,EAAKE,MAIP,IAAIroB,KAAKyO,QAAQs3B,gBAAiB,CAChC,GAAIkU,GAAWj6C,KAAK20B,KAAKh0B,KAAK00B,OAAO,GACjC6kB,EAAW/xB,EAAK0b,cAAcoW,GAC9BE,EAAYD,EAASv0C,QAAU3F,KAAKgG,MAAM8jC,gBAAkB,IAAM,IAE9CtjC,QAApBozC,GAA6CA,EAAZO,IACnCn6C,KAAK85C,kBAAkB,EAAGI,EAAU3lB,EAAa1sB,GAKrDlH,EAAK0H,QAAQrI,KAAK+vB,IAAI/e,UAAW,SAAUopC,GACzC,KAAOA,EAAIz0C,QAAQ,CACjB,GAAI4B,GAAO6yC,EAAIC,KACX9yC,IAAQA,EAAKsC,YACftC,EAAKsC,WAAWsH,YAAY5J,OAcpCtE,EAASkQ,UAAU0mC,kBAAoB,SAAU9nC,EAAGwX,EAAMgL,EAAa1sB,GAErE,GAAI4gB,GAAQzoB,KAAK+vB,IAAI/e,UAAUioC,WAAW3nC,OAE1C,KAAKmX,EAAO,CAEV,GAAImH,GAAUre,SAAS04B,eAAe,GACtCxhB,GAAQlX,SAASM,cAAc,OAC/B4W,EAAMhX,YAAYme,GAClB5vB,KAAK+vB,IAAI0c,WAAWh7B,YAAYgX,GAElCzoB,KAAK+vB,IAAIkpB,WAAWjxC,KAAKygB,GAEzBA,EAAM6xB,WAAW,GAAGC,UAAYhxB,EAEhCd,EAAMxb,MAAMtF,IAAsB,OAAf4sB,EAAyBv0B,KAAKgG,MAAMuiC,iBAAmB,KAAQ,IAClF9f,EAAMxb,MAAMxF,KAAOsK,EAAI,KACvB0W,EAAM5gB,UAAY,cAAgBA,GAYpC5E,EAASkQ,UAAU2mC,kBAAoB,SAAU/nC,EAAGwX,EAAMgL,EAAa1sB,GAErE,GAAI4gB,GAAQzoB,KAAK+vB,IAAI/e,UAAUgoC,WAAW1nC,OAE1C,KAAKmX,EAAO,CAEV,GAAImH,GAAUre,SAAS04B,eAAe1gB,EACtCd,GAAQlX,SAASM,cAAc,OAC/B4W,EAAMhX,YAAYme,GAClB5vB,KAAK+vB,IAAI0c,WAAWh7B,YAAYgX,GAElCzoB,KAAK+vB,IAAIipB,WAAWhxC,KAAKygB,GAEzBA,EAAM6xB,WAAW,GAAGC,UAAYhxB,EAChCd,EAAM5gB,UAAY,cAAgBA,EAGlC4gB,EAAMxb,MAAMtF,IAAsB,OAAf4sB,EAAwB,IAAOv0B,KAAKgG,MAAMqiC,iBAAoB,KACjF5f,EAAMxb,MAAMxF,KAAOsK,EAAI,MAWzB9O,EAASkQ,UAAU6mC,kBAAoB,SAAUjoC,EAAGwiB,EAAa1sB,GAE/D,GAAIgoB,GAAO7vB,KAAK+vB,IAAI/e,UAAUw1B,MAAMl1B,OAC/Bue,KAEHA,EAAOte,SAASM,cAAc,OAC9B7R,KAAK+vB,IAAI5jB,WAAWsF,YAAYoe,IAElC7vB,KAAK+vB,IAAIyW,MAAMx+B,KAAK6nB,EAEpB,IAAI7pB,GAAQhG,KAAKgG,KAYjB,OAVE6pB,GAAK5iB,MAAMtF,IADM,OAAf4sB,EACevuB,EAAMuiC,iBAAmB,KAGzBvoC,KAAK20B,KAAKC,SAASjtB,IAAI6K,OAAS,KAEnDqd,EAAK5iB,MAAMuF,OAASxM,EAAM0iC,gBAAkB,KAC5C7Y,EAAK5iB,MAAMxF,KAAQsK,EAAI/L,EAAMyiC,eAAiB,EAAK,KAEnD5Y,EAAKhoB,UAAY,uBAAyBA,EAEnCgoB,GAWT5sB,EAASkQ,UAAU4mC,kBAAoB,SAAUhoC,EAAGwiB,EAAa1sB,GAE/D,GAAIgoB,GAAO7vB,KAAK+vB,IAAI/e,UAAUw1B,MAAMl1B,OAC/Bue,KAEHA,EAAOte,SAASM,cAAc,OAC9B7R,KAAK+vB,IAAI5jB,WAAWsF,YAAYoe,IAElC7vB,KAAK+vB,IAAIyW,MAAMx+B,KAAK6nB,EAEpB,IAAI7pB,GAAQhG,KAAKgG,KAYjB,OAVE6pB,GAAK5iB,MAAMtF,IADM,OAAf4sB,EACe,IAGAv0B,KAAK20B,KAAKC,SAASjtB,IAAI6K,OAAS,KAEnDqd,EAAK5iB,MAAMxF,KAAQsK,EAAI/L,EAAM2iC,eAAiB,EAAK,KACnD9Y,EAAK5iB,MAAMuF,OAASxM,EAAM4iC,gBAAkB,KAE5C/Y,EAAKhoB,UAAY,uBAAyBA,EAEnCgoB,GAQT5sB,EAASkQ,UAAUi1B,mBAAqB,WAKjCpoC,KAAK+vB,IAAIma,mBACZlqC,KAAK+vB,IAAIma,iBAAmB34B,SAASM,cAAc,OACnD7R,KAAK+vB,IAAIma,iBAAiBriC,UAAY,qBACtC7H,KAAK+vB,IAAIma,iBAAiBj9B,MAAM2W,SAAW,WAE3C5jB,KAAK+vB,IAAIma,iBAAiBz4B,YAAYF,SAAS04B,eAAe,MAC9DjqC,KAAK+vB,IAAI0c,WAAWh7B,YAAYzR,KAAK+vB,IAAIma,mBAE3ClqC,KAAKgG,MAAMsiC,gBAAkBtoC,KAAK+vB,IAAIma,iBAAiBrlB,aACvD7kB,KAAKgG,MAAM+jC,eAAiB/pC,KAAK+vB,IAAIma,iBAAiB1qB,YAGjDxf,KAAK+vB,IAAIqa,mBACZpqC,KAAK+vB,IAAIqa,iBAAmB74B,SAASM,cAAc,OACnD7R,KAAK+vB,IAAIqa,iBAAiBviC,UAAY,qBACtC7H,KAAK+vB,IAAIqa,iBAAiBn9B,MAAM2W,SAAW,WAE3C5jB,KAAK+vB,IAAIqa,iBAAiB34B,YAAYF,SAAS04B,eAAe,MAC9DjqC,KAAK+vB,IAAI0c,WAAWh7B,YAAYzR,KAAK+vB,IAAIqa,mBAE3CpqC,KAAKgG,MAAMwiC,gBAAkBxoC,KAAK+vB,IAAIqa,iBAAiBvlB,aACvD7kB,KAAKgG,MAAM8jC,eAAiB9pC,KAAK+vB,IAAIqa,iBAAiB5qB,aASxDvc,EAASkQ,UAAU6hB,KAAO,SAASyD,GACjC,MAAOz4B,MAAKmoB,KAAK6M,KAAKyD,IAGxB54B,EAAOD,QAAUqD,GAKb,SAASpD,EAAQD,EAASM,GAc9B,QAASgC,GAAMwQ,EAAM0nB,EAAY3rB,GAC/BzO,KAAKK,GAAK,KACVL,KAAK8kC,OAAS,KACd9kC,KAAK0S,KAAOA,EACZ1S,KAAK+vB,IAAM,KACX/vB,KAAKo6B,WAAaA,MAClBp6B,KAAKyO,QAAUA,MAEfzO,KAAKqzC,UAAW,EAChBrzC,KAAKktC,WAAY,EACjBltC,KAAKitC,OAAQ,EAEbjtC,KAAK2H,IAAM,KACX3H,KAAKyH,KAAO,KACZzH,KAAKuS,MAAQ,KACbvS,KAAKwS,OAAS,KA3BhB,GAAIzO,GAAS7D,EAAoB,IAC7BS,EAAOT,EAAoB,EA6B/BgC,GAAKiR,UAAUrR,OAAQ,EAKvBI,EAAKiR,UAAUg+B,OAAS,WACtBnxC,KAAKqzC,UAAW,EAChBrzC,KAAKitC,OAAQ,EACTjtC,KAAKktC,WAAWltC,KAAKyhB,UAM3Bvf,EAAKiR,UAAU+9B,SAAW,WACxBlxC,KAAKqzC,UAAW,EAChBrzC,KAAKitC,OAAQ,EACTjtC,KAAKktC,WAAWltC,KAAKyhB,UAQ3Bvf,EAAKiR,UAAU6E,QAAU,SAAStF,GAChC1S,KAAK0S,KAAOA,EACZ1S,KAAKitC,OAAQ,EACTjtC,KAAKktC,WAAWltC,KAAKyhB,UAO3Bvf,EAAKiR,UAAUu6B,UAAY,SAAS5I,GAC9B9kC,KAAKktC,WACPltC,KAAK0nC,OACL1nC,KAAK8kC,OAASA,EACV9kC,KAAK8kC,QACP9kC,KAAK2nC,QAIP3nC,KAAK8kC,OAASA,GASlB5iC,EAAKiR,UAAU27B,UAAY,WAEzB,OAAO,GAOT5sC,EAAKiR,UAAUw0B,KAAO,WACpB,OAAO,GAOTzlC,EAAKiR,UAAUu0B,KAAO,WACpB,OAAO,GAMTxlC,EAAKiR,UAAUsO,OAAS,aAOxBvf,EAAKiR,UAAUw7B,YAAc,aAO7BzsC,EAAKiR,UAAUo6B,YAAc,aAS7BrrC,EAAKiR,UAAUqnC,qBAAuB,SAAUC,GAC9C,GAAIz6C,KAAKqzC,UAAYrzC,KAAKyO,QAAQygC,SAAS74B,SAAWrW,KAAK+vB,IAAI2qB,aAAc,CAE3E,GAAIvmC,GAAKnU,KAEL06C,EAAenpC,SAASM,cAAc,MAC1C6oC,GAAa7yC,UAAY,SACzB6yC,EAAa1V,MAAQ,mBAGrB,GAAIjhC,GAAO22C,GAAcnnC,GAAG,MAAO,SAAUjK,GAC3C6K,EAAG2wB,OAAOiJ,kBAAkB55B,GAC5B7K,EAAMq8B,kBACNr8B,EAAMD,mBAGRoxC,EAAOhpC,YAAYipC,GACnB16C,KAAK+vB,IAAI2qB,aAAeA,OAEhB16C,KAAKqzC,UAAYrzC,KAAK+vB,IAAI2qB,eAE9B16C,KAAK+vB,IAAI2qB,aAAa7wC,YACxB7J,KAAK+vB,IAAI2qB,aAAa7wC,WAAWsH,YAAYnR,KAAK+vB,IAAI2qB,cAExD16C,KAAK+vB,IAAI2qB,aAAe,OAS5Bx4C,EAAKiR,UAAUwnC,gBAAkB,SAAU/xC,GACzC,GAAIgnB,EACJ,IAAI5vB,KAAKyO,QAAQmsC,SAAU,CACzB,GAAI/jB,GAAW72B,KAAK8kC,OAAOjP,QAAQC,UAAU5gB,IAAIlV,KAAKK,GACtDuvB,GAAU5vB,KAAKyO,QAAQmsC,SAAS/jB,OAGhCjH,GAAU5vB,KAAK0S,KAAKkd,OAGtB,IAAGA,IAAY5vB,KAAK4vB,QAAS,CAE3B,GAAIA,YAAmB+c,SACrB/jC,EAAQqb,UAAY,GACpBrb,EAAQ6I,YAAYme,OAEjB,IAAeppB,QAAXopB,EACPhnB,EAAQqb,UAAY2L,MAGpB,IAAwB,cAAlB5vB,KAAK0S,KAAK5L,MAA8CN,SAAtBxG,KAAK0S,KAAKkd,QAChD,KAAM,IAAIhsB,OAAM,sCAAwC5D,KAAKK,GAIjEL,MAAK4vB,QAAUA,IASnB1tB,EAAKiR,UAAU0nC,aAAe,SAAUjyC,GACf,MAAnB5I,KAAK0S,KAAKsyB,MACZp8B,EAAQo8B,MAAQhlC,KAAK0S,KAAKsyB,OAAS,GAGnCp8B,EAAQkyC,gBAAgB,UAS3B54C,EAAKiR,UAAU4nC,sBAAwB,SAASnyC,GAC/C,GAAI5I,KAAKyO,QAAQusC,gBAAkBh7C,KAAKyO,QAAQusC,eAAer1C,OAAS,EAAG,CACzE,GAAIs1C,KAEJ,IAAIh1C,MAAMC,QAAQlG,KAAKyO,QAAQusC,gBAC7BC,EAAaj7C,KAAKyO,QAAQusC,mBAEvB,CAAA,GAAmC,OAA/Bh7C,KAAKyO,QAAQusC,eAIpB,MAHAC,GAAa10C,OAAO6G,KAAKpN,KAAK0S,MAMhC,IAAK,GAAIlN,GAAI,EAAGA,EAAIy1C,EAAWt1C,OAAQH,IAAK,CAC1C,GAAIyQ,GAAOglC,EAAWz1C,GAClB6B,EAAQrH,KAAK0S,KAAKuD,EAET,OAAT5O,EACFuB,EAAQsyC,aAAa,QAAUjlC,EAAM5O,GAGrCuB,EAAQkyC,gBAAgB,QAAU7kC,MAW1C/T,EAAKiR,UAAUgoC,aAAe,SAASvyC,GAEjC5I,KAAKiN,QACPtM,EAAK8M,cAAc7E,EAAS5I,KAAKiN,OACjCjN,KAAKiN,MAAQ,MAIXjN,KAAK0S,KAAKzF,QACZtM,EAAK2M,WAAW1E,EAAS5I,KAAK0S,KAAKzF,OACnCjN,KAAKiN,MAAQjN,KAAK0S,KAAKzF,QAI3BpN,EAAOD,QAAUsC,GAKb,SAASrC,EAAQD,EAASM,GAkB9B,QAASiC,GAAgBuQ,EAAM0nB,EAAY3rB,GASzC,GARAzO,KAAKgG,OACH4pB,SACErd,MAAO,IAGXvS,KAAK6jB,UAAW,EAGZnR,EAAM,CACR,GAAkBlM,QAAdkM,EAAK9C,MACP,KAAM,IAAIhM,OAAM,oCAAsC8O,EAAKrS,GAE7D,IAAgBmG,QAAZkM,EAAK7C,IACP,KAAM,IAAIjM,OAAM,kCAAoC8O,EAAKrS,IAI7D6B,EAAK3B,KAAKP,KAAM0S,EAAM0nB,EAAY3rB,GAElCzO,KAAKo7C,cAAe,EApCtB,GACIl5C,IADShC,EAAoB,IACtBA,EAAoB,KAC3B2C,EAAkB3C,EAAoB,IACtCoC,EAAYpC,EAAoB,GAoCpCiC,GAAegR,UAAY,GAAIjR,GAAM,KAAM,KAAM,MAEjDC,EAAegR,UAAUkoC,cAAgB,kBACzCl5C,EAAegR,UAAUrR,OAAQ,EAOjCK,EAAegR,UAAU27B,UAAY,SAASrZ,GAE5C,MAAQz1B,MAAK0S,KAAK9C,MAAQ6lB,EAAM5lB,KAAS7P,KAAK0S,KAAK7C,IAAM4lB,EAAM7lB,OAMjEzN,EAAegR,UAAUsO,OAAS,WAChC,GAAIsO,GAAM/vB,KAAK+vB,GAuBf,IAtBKA,IAEH/vB,KAAK+vB,OACLA,EAAM/vB,KAAK+vB,IAGXA,EAAIygB,IAAMj/B,SAASM,cAAc,OAIjCke,EAAIH,QAAUre,SAASM,cAAc,OACrCke,EAAIH,QAAQ/nB,UAAY,UACxBkoB,EAAIygB,IAAI/+B,YAAYse,EAAIH,SAMxB5vB,KAAKitC,OAAQ,IAIVjtC,KAAK8kC,OACR,KAAM,IAAIlhC,OAAM,yCAElB,KAAKmsB,EAAIygB,IAAI3mC,WAAY,CACvB,GAAIsC,GAAanM,KAAK8kC,OAAO/U,IAAI5jB,UACjC,KAAKA,EACH,KAAM,IAAIvI,OAAM,iEAElBuI,GAAWsF,YAAYse,EAAIygB,KAQ7B,GANAxwC,KAAKktC,WAAY,EAMbltC,KAAKitC,MAAO,CACdjtC,KAAK26C,gBAAgB36C,KAAK+vB,IAAIH,SAC9B5vB,KAAK66C,aAAa76C,KAAK+vB,IAAIH,SAC3B5vB,KAAK+6C,sBAAsB/6C,KAAK+vB,IAAIH,SACpC5vB,KAAKm7C,aAAan7C,KAAK+vB,IAAIygB,IAG3B,IAAI3oC,IAAa7H,KAAK0S,KAAK7K,UAAa,IAAM7H,KAAK0S,KAAK7K,UAAa,KAChE7H,KAAKqzC,SAAW,YAAc,GACnCtjB,GAAIygB,IAAI3oC,UAAY7H,KAAKq7C,cAAgBxzC,EAGzC7H,KAAK6jB,SAA6D,WAAlDta,OAAOsrC,iBAAiB9kB,EAAIH,SAAS/L,SAGrD7jB,KAAKgG,MAAM4pB,QAAQrd,MAAQvS,KAAK+vB,IAAIH,QAAQQ,YAC5CpwB,KAAKwS,OAAS,EAEdxS,KAAKitC,OAAQ,IAQjB9qC,EAAegR,UAAUw0B,KAAOrlC,EAAU6Q,UAAUw0B,KAMpDxlC,EAAegR,UAAUu0B,KAAOplC,EAAU6Q,UAAUu0B,KAMpDvlC,EAAegR,UAAUw7B,YAAcrsC,EAAU6Q,UAAUw7B,YAM3DxsC,EAAegR,UAAUo6B,YAAc,SAAS7zB,GAC9C,GAAI4hC,GAAqC,QAA7Bt7C,KAAKyO,QAAQ8lB,WACzBv0B,MAAK+vB,IAAIH,QAAQ3iB,MAAMtF,IAAM2zC,EAAQ,GAAK,IAC1Ct7C,KAAK+vB,IAAIH,QAAQ3iB,MAAMqW,OAASg4B,EAAQ,IAAM,EAC9C,IAAI9oC,EAGJ,IAA2BhM,SAAvBxG,KAAK0S,KAAK2uB,SAAwB,CACpC,GAAIka,GAAev7C,KAAK0S,KAAK2uB,SACzBF,EAAYnhC,KAAK8kC,OAAO3D,UACxB8K,EAAgB9K,EAAUoa,GAAcpzC,KAE5C,IAAa,GAATmzC,EAAe,CAEjB9oC,EAASxS,KAAK8kC,OAAO3D,UAAUoa,GAAc/oC,OAASkH,EAAOrK,KAAKoW,SAClEjT,GAA2B,GAAjBy5B,EAAqBvyB,EAAOonB,KAAO,GAAIpnB,EAAOrK,KAAKoW,SAAW,CACxE,IAAI2b,GAASphC,KAAK8kC,OAAOn9B,GACzB,KAAK,GAAI05B,KAAYF,GACfA,EAAUr7B,eAAeu7B,IACQ,GAA/BF,EAAUE,GAAU3Y,SAAmByY,EAAUE,GAAUl5B,MAAQ8jC,IACrE7K,GAAUD,EAAUE,GAAU7uB,OAASkH,EAAOrK,KAAKoW,SAMzD2b,IAA2B,GAAjB6K,EAAqBvyB,EAAOonB,KAAO,GAAMpnB,EAAOrK,KAAKoW,SAAW,EAC1EzlB,KAAK+vB,IAAIygB,IAAIvjC,MAAMtF,IAAMy5B,EAAS,KAClCphC,KAAK+vB,IAAIygB,IAAIvjC,MAAMqW,OAAS,OAGzB,CACH,GAAI8d,GAASphC,KAAK8kC,OAAOn9B,GACzB,KAAK,GAAI05B,KAAYF,GACfA,EAAUr7B,eAAeu7B,IACQ,GAA/BF,EAAUE,GAAU3Y,SAAmByY,EAAUE,GAAUl5B,MAAQ8jC,IACrE7K,GAAUD,EAAUE,GAAU7uB,OAASkH,EAAOrK,KAAKoW,SAIzDjT,GAASxS,KAAK8kC,OAAO3D,UAAUoa,GAAc/oC,OAASkH,EAAOrK,KAAKoW,SAClEzlB,KAAK+vB,IAAIygB,IAAIvjC,MAAMtF,IAAMy5B,EAAS,KAClCphC,KAAK+vB,IAAIygB,IAAIvjC,MAAMqW,OAAS,QAM1BtjB,MAAK8kC,iBAAkBjiC,IAEzB2P,EAAStN,KAAKwH,IAAI1M,KAAK8kC,OAAOtyB,OAC1BxS,KAAK8kC,OAAOjP,QAAQlB,KAAKC,SAAS1I,OAAO1Z,OACzCxS,KAAK8kC,OAAOjP,QAAQlB,KAAKC,SAASiD,gBAAgBrlB,QACtDxS,KAAK+vB,IAAIygB,IAAIvjC,MAAMtF,IAAM2zC,EAAQ,IAAM,GACvCt7C,KAAK+vB,IAAIygB,IAAIvjC,MAAMqW,OAASg4B,EAAQ,GAAK,MAGzC9oC,EAASxS,KAAK8kC,OAAOtyB,OAErBxS,KAAK+vB,IAAIygB,IAAIvjC,MAAMtF,IAAM3H,KAAK8kC,OAAOn9B,IAAM,KAC3C3H,KAAK+vB,IAAIygB,IAAIvjC,MAAMqW,OAAS,GAGhCtjB,MAAK+vB,IAAIygB,IAAIvjC,MAAMuF,OAASA,EAAS,MAGvC3S,EAAOD,QAAUuC,GAKb,SAAStC,EAAQD,EAASM,GAe9B,QAASkC,GAASsQ,EAAM0nB,EAAY3rB,GAalC,GAZAzO,KAAKgG,OACH8pB,KACEvd,MAAO,EACPC,OAAQ,GAEVqd,MACEtd,MAAO,EACPC,OAAQ,IAKRE,GACgBlM,QAAdkM,EAAK9C,MACP,KAAM,IAAIhM,OAAM,oCAAsC8O,EAI1DxQ,GAAK3B,KAAKP,KAAM0S,EAAM0nB,EAAY3rB,GAhCpC,CAAA,GAAIvM,GAAOhC,EAAoB,GACpBA,GAAoB,GAkC/BkC,EAAQ+Q,UAAY,GAAIjR,GAAM,KAAM,KAAM,MAO1CE,EAAQ+Q,UAAU27B,UAAY,SAASrZ,GAGrC,GAAIjD,IAAYiD,EAAM5lB,IAAM4lB,EAAM7lB,OAAS,CAC3C,OAAQ5P,MAAK0S,KAAK9C,MAAQ6lB,EAAM7lB,MAAQ4iB,GAAcxyB,KAAK0S,KAAK9C,MAAQ6lB,EAAM5lB,IAAM2iB,GAMtFpwB,EAAQ+Q,UAAUsO,OAAS,WACzB,GAAIsO,GAAM/vB,KAAK+vB,GA6Bf,IA5BKA,IAEH/vB,KAAK+vB,OACLA,EAAM/vB,KAAK+vB,IAGXA,EAAIygB,IAAMj/B,SAASM,cAAc,OAGjCke,EAAIH,QAAUre,SAASM,cAAc,OACrCke,EAAIH,QAAQ/nB,UAAY,UACxBkoB,EAAIygB,IAAI/+B,YAAYse,EAAIH,SAGxBG,EAAIF,KAAOte,SAASM,cAAc,OAClCke,EAAIF,KAAKhoB,UAAY,OAGrBkoB,EAAID,IAAMve,SAASM,cAAc,OACjCke,EAAID,IAAIjoB,UAAY,MAGpBkoB,EAAIygB,IAAI,iBAAmBxwC,KAE3BA,KAAKitC,OAAQ,IAIVjtC,KAAK8kC,OACR,KAAM,IAAIlhC,OAAM,yCAElB,KAAKmsB,EAAIygB,IAAI3mC,WAAY,CACvB,GAAI4iC,GAAazsC,KAAK8kC,OAAO/U,IAAI0c,UACjC,KAAKA,EAAY,KAAM,IAAI7oC,OAAM,iEACjC6oC,GAAWh7B,YAAYse,EAAIygB,KAE7B,IAAKzgB,EAAIF,KAAKhmB,WAAY,CACxB,GAAIsC,GAAanM,KAAK8kC,OAAO/U,IAAI5jB,UACjC,KAAKA,EAAY,KAAM,IAAIvI,OAAM,iEACjCuI,GAAWsF,YAAYse,EAAIF,MAE7B,IAAKE,EAAID,IAAIjmB,WAAY,CACvB,GAAIi3B,GAAO9gC,KAAK8kC,OAAO/U,IAAI+Q,IAC3B,KAAK30B,EAAY,KAAM,IAAIvI,OAAM,2DACjCk9B,GAAKrvB,YAAYse,EAAID,KAQvB,GANA9vB,KAAKktC,WAAY,EAMbltC,KAAKitC,MAAO,CACdjtC,KAAK26C,gBAAgB36C,KAAK+vB,IAAIH,SAC9B5vB,KAAK66C,aAAa76C,KAAK+vB,IAAIygB,KAC3BxwC,KAAK+6C,sBAAsB/6C,KAAK+vB,IAAIygB,KACpCxwC,KAAKm7C,aAAan7C,KAAK+vB,IAAIygB,IAG3B,IAAI3oC,IAAa7H,KAAK0S,KAAK7K,UAAW,IAAM7H,KAAK0S,KAAK7K,UAAY,KAC7D7H,KAAKqzC,SAAW,YAAc,GACnCtjB,GAAIygB,IAAI3oC,UAAY,WAAaA,EACjCkoB,EAAIF,KAAKhoB,UAAY,YAAcA,EACnCkoB,EAAID,IAAIjoB,UAAa,WAAaA,EAGlC7H,KAAKgG,MAAM8pB,IAAItd,OAASud,EAAID,IAAIQ,aAChCtwB,KAAKgG,MAAM8pB,IAAIvd,MAAQwd,EAAID,IAAIM,YAC/BpwB,KAAKgG,MAAM6pB,KAAKtd,MAAQwd,EAAIF,KAAKO,YACjCpwB,KAAKuS,MAAQwd,EAAIygB,IAAIpgB,YACrBpwB,KAAKwS,OAASud,EAAIygB,IAAIlgB,aAEtBtwB,KAAKitC,OAAQ,EAGfjtC,KAAKw6C,qBAAqBzqB,EAAIygB,MAOhCpuC,EAAQ+Q,UAAUw0B,KAAO,WAClB3nC,KAAKktC,WACRltC,KAAKyhB,UAOTrf,EAAQ+Q,UAAUu0B,KAAO,WACvB,GAAI1nC,KAAKktC,UAAW,CAClB,GAAInd,GAAM/vB,KAAK+vB,GAEXA,GAAIygB,IAAI3mC,YAAckmB,EAAIygB,IAAI3mC,WAAWsH,YAAY4e,EAAIygB,KACzDzgB,EAAIF,KAAKhmB,YAAakmB,EAAIF,KAAKhmB,WAAWsH,YAAY4e,EAAIF,MAC1DE,EAAID,IAAIjmB,YAAckmB,EAAID,IAAIjmB,WAAWsH,YAAY4e,EAAID,KAE7D9vB,KAAK2H,IAAM,KACX3H,KAAKyH,KAAO,KAEZzH,KAAKktC,WAAY,IAQrB9qC,EAAQ+Q,UAAUw7B,YAAc,WAC9B,GAAI/+B,GAAQ5P,KAAKo6B,WAAWnF,SAASj1B,KAAK0S,KAAK9C,OAC3Cm/B,EAAQ/uC,KAAKyO,QAAQsgC,MAErByB,EAAMxwC,KAAK+vB,IAAIygB,IACf3gB,EAAO7vB,KAAK+vB,IAAIF,KAChBC,EAAM9vB,KAAK+vB,IAAID,GAIjB9vB,MAAKyH,KADM,SAATsnC,EACUn/B,EAAQ5P,KAAKuS,MAET,QAATw8B,EACKn/B,EAIAA,EAAQ5P,KAAKuS,MAAQ,EAInCi+B,EAAIvjC,MAAMxF,KAAOzH,KAAKyH,KAAO,KAG7BooB,EAAK5iB,MAAMxF,KAAQmI,EAAQ5P,KAAKgG,MAAM6pB,KAAKtd,MAAQ,EAAK,KAGxDud,EAAI7iB,MAAMxF,KAAQmI,EAAQ5P,KAAKgG,MAAM8pB,IAAIvd,MAAQ,EAAK,MAOxDnQ,EAAQ+Q,UAAUo6B,YAAc,WAC9B,GAAIhZ,GAAcv0B,KAAKyO,QAAQ8lB,YAC3Bic,EAAMxwC,KAAK+vB,IAAIygB,IACf3gB,EAAO7vB,KAAK+vB,IAAIF,KAChBC,EAAM9vB,KAAK+vB,IAAID,GAEnB,IAAmB,OAAfyE,EACFic,EAAIvjC,MAAMtF,KAAW3H,KAAK2H,KAAO,GAAK,KAEtCkoB,EAAK5iB,MAAMtF,IAAS,IACpBkoB,EAAK5iB,MAAMuF,OAAUxS,KAAK8kC,OAAOn9B,IAAM3H,KAAK2H,IAAM,EAAK,KACvDkoB,EAAK5iB,MAAMqW,OAAS,OAEjB,CACH,GAAIk4B,GAAgBx7C,KAAK8kC,OAAOjP,QAAQ7vB,MAAMwM,OAC1C+d,EAAairB,EAAgBx7C,KAAK8kC,OAAOn9B,IAAM3H,KAAK8kC,OAAOtyB,OAASxS,KAAK2H,GAE7E6oC,GAAIvjC,MAAMtF,KAAW3H,KAAK8kC,OAAOtyB,OAASxS,KAAK2H,IAAM3H,KAAKwS,QAAU,GAAK,KACzEqd,EAAK5iB,MAAMtF,IAAU6zC,EAAgBjrB,EAAc,KACnDV,EAAK5iB,MAAMqW,OAAS,IAGtBwM,EAAI7iB,MAAMtF,KAAQ3H,KAAKgG,MAAM8pB,IAAItd,OAAS,EAAK,MAGjD3S,EAAOD,QAAUwC,GAKb,SAASvC,EAAQD,EAASM,GAc9B,QAASmC,GAAWqQ,EAAM0nB,EAAY3rB,GAcpC,GAbAzO,KAAKgG,OACH8pB,KACEnoB,IAAK,EACL4K,MAAO,EACPC,OAAQ,GAEVod,SACEpd,OAAQ,EACRipC,WAAY,IAKZ/oC,GACgBlM,QAAdkM,EAAK9C,MACP,KAAM,IAAIhM,OAAM,oCAAsC8O,EAI1DxQ,GAAK3B,KAAKP,KAAM0S,EAAM0nB,EAAY3rB,GAhCpC,GAAIvM,GAAOhC,EAAoB,GAmC/BmC,GAAU8Q,UAAY,GAAIjR,GAAM,KAAM,KAAM,MAO5CG,EAAU8Q,UAAU27B,UAAY,SAASrZ,GAGvC,GAAIjD,IAAYiD,EAAM5lB,IAAM4lB,EAAM7lB,OAAS,CAC3C,OAAQ5P,MAAK0S,KAAK9C,MAAQ6lB,EAAM7lB,MAAQ4iB,GAAcxyB,KAAK0S,KAAK9C,MAAQ6lB,EAAM5lB,IAAM2iB,GAMtFnwB,EAAU8Q,UAAUsO,OAAS,WAC3B,GAAIsO,GAAM/vB,KAAK+vB,GA0Bf,IAzBKA,IAEH/vB,KAAK+vB,OACLA,EAAM/vB,KAAK+vB,IAGXA,EAAI7d,MAAQX,SAASM,cAAc,OAInCke,EAAIH,QAAUre,SAASM,cAAc,OACrCke,EAAIH,QAAQ/nB,UAAY,UACxBkoB,EAAI7d,MAAMT,YAAYse,EAAIH,SAG1BG,EAAID,IAAMve,SAASM,cAAc,OACjCke,EAAI7d,MAAMT,YAAYse,EAAID,KAG1BC,EAAI7d,MAAM,iBAAmBlS,KAE7BA,KAAKitC,OAAQ,IAIVjtC,KAAK8kC,OACR,KAAM,IAAIlhC,OAAM,yCAElB,KAAKmsB,EAAI7d,MAAMrI,WAAY,CACzB,GAAI4iC,GAAazsC,KAAK8kC,OAAO/U,IAAI0c,UACjC,KAAKA,EACH,KAAM,IAAI7oC,OAAM,iEAElB6oC,GAAWh7B,YAAYse,EAAI7d,OAQ7B,GANAlS,KAAKktC,WAAY,EAMbltC,KAAKitC,MAAO,CACdjtC,KAAK26C,gBAAgB36C,KAAK+vB,IAAIH,SAC9B5vB,KAAK66C,aAAa76C,KAAK+vB,IAAI7d,OAC3BlS,KAAK+6C,sBAAsB/6C,KAAK+vB,IAAI7d,OACpClS,KAAKm7C,aAAan7C,KAAK+vB,IAAI7d,MAG3B,IAAIrK,IAAa7H,KAAK0S,KAAK7K,UAAW,IAAM7H,KAAK0S,KAAK7K,UAAY,KAC7D7H,KAAKqzC,SAAW,YAAc,GACnCtjB,GAAI7d,MAAMrK,UAAa,aAAeA,EACtCkoB,EAAID,IAAIjoB,UAAa,WAAaA,EAGlC7H,KAAKuS,MAAQwd,EAAI7d,MAAMke,YACvBpwB,KAAKwS,OAASud,EAAI7d,MAAMoe,aACxBtwB,KAAKgG,MAAM8pB,IAAIvd,MAAQwd,EAAID,IAAIM,YAC/BpwB,KAAKgG,MAAM8pB,IAAItd,OAASud,EAAID,IAAIQ,aAChCtwB,KAAKgG,MAAM4pB,QAAQpd,OAASud,EAAIH,QAAQU,aAGxCP,EAAIH,QAAQ3iB,MAAMwuC,WAAa,EAAIz7C,KAAKgG,MAAM8pB,IAAIvd,MAAQ,KAG1Dwd,EAAID,IAAI7iB,MAAMtF,KAAQ3H,KAAKwS,OAASxS,KAAKgG,MAAM8pB,IAAItd,QAAU,EAAK,KAClEud,EAAID,IAAI7iB,MAAMxF,KAAQzH,KAAKgG,MAAM8pB,IAAIvd,MAAQ,EAAK,KAElDvS,KAAKitC,OAAQ,EAGfjtC,KAAKw6C,qBAAqBzqB,EAAI7d,QAOhC7P,EAAU8Q,UAAUw0B,KAAO,WACpB3nC,KAAKktC,WACRltC,KAAKyhB,UAOTpf,EAAU8Q,UAAUu0B,KAAO,WACrB1nC,KAAKktC,YACHltC,KAAK+vB,IAAI7d,MAAMrI,YACjB7J,KAAK+vB,IAAI7d,MAAMrI,WAAWsH,YAAYnR,KAAK+vB,IAAI7d,OAGjDlS,KAAK2H,IAAM,KACX3H,KAAKyH,KAAO,KAEZzH,KAAKktC,WAAY,IAQrB7qC,EAAU8Q,UAAUw7B,YAAc,WAChC,GAAI/+B,GAAQ5P,KAAKo6B,WAAWnF,SAASj1B,KAAK0S,KAAK9C,MAE/C5P,MAAKyH,KAAOmI,EAAQ5P,KAAKgG,MAAM8pB,IAAIvd,MAGnCvS,KAAK+vB,IAAI7d,MAAMjF,MAAMxF,KAAOzH,KAAKyH,KAAO,MAO1CpF,EAAU8Q,UAAUo6B,YAAc,WAChC,GAAIhZ,GAAcv0B,KAAKyO,QAAQ8lB,YAC3BriB,EAAQlS,KAAK+vB,IAAI7d,KAGnBA,GAAMjF,MAAMtF,IADK,OAAf4sB,EACgBv0B,KAAK2H,IAAM,KAGV3H,KAAK8kC,OAAOtyB,OAASxS,KAAK2H,IAAM3H,KAAKwS,OAAU,MAItE3S,EAAOD,QAAUyC,GAKb,SAASxC,EAAQD,EAASM,GAe9B,QAASoC,GAAWoQ,EAAM0nB,EAAY3rB,GASpC,GARAzO,KAAKgG,OACH4pB,SACErd,MAAO,IAGXvS,KAAK6jB,UAAW,EAGZnR,EAAM,CACR,GAAkBlM,QAAdkM,EAAK9C,MACP,KAAM,IAAIhM,OAAM,oCAAsC8O,EAAKrS,GAE7D,IAAgBmG,QAAZkM,EAAK7C,IACP,KAAM,IAAIjM,OAAM,kCAAoC8O,EAAKrS,IAI7D6B,EAAK3B,KAAKP,KAAM0S,EAAM0nB,EAAY3rB,GA/BpC,GACIvM,IADShC,EAAoB,IACtBA,EAAoB,IAiC/BoC,GAAU6Q,UAAY,GAAIjR,GAAM,KAAM,KAAM,MAE5CI,EAAU6Q,UAAUkoC,cAAgB,aAOpC/4C,EAAU6Q,UAAU27B,UAAY,SAASrZ,GAEvC,MAAQz1B,MAAK0S,KAAK9C,MAAQ6lB,EAAM5lB,KAAS7P,KAAK0S,KAAK7C,IAAM4lB,EAAM7lB,OAMjEtN,EAAU6Q,UAAUsO,OAAS,WAC3B,GAAIsO,GAAM/vB,KAAK+vB,GAsBf,IArBKA,IAEH/vB,KAAK+vB,OACLA,EAAM/vB,KAAK+vB,IAGXA,EAAIygB,IAAMj/B,SAASM,cAAc,OAIjCke,EAAIH,QAAUre,SAASM,cAAc,OACrCke,EAAIH,QAAQ/nB,UAAY,UACxBkoB,EAAIygB,IAAI/+B,YAAYse,EAAIH,SAGxBG,EAAIygB,IAAI,iBAAmBxwC,KAE3BA,KAAKitC,OAAQ,IAIVjtC,KAAK8kC,OACR,KAAM,IAAIlhC,OAAM,yCAElB,KAAKmsB,EAAIygB,IAAI3mC,WAAY,CACvB,GAAI4iC,GAAazsC,KAAK8kC,OAAO/U,IAAI0c,UACjC,KAAKA,EACH,KAAM,IAAI7oC,OAAM,iEAElB6oC,GAAWh7B,YAAYse,EAAIygB,KAQ7B,GANAxwC,KAAKktC,WAAY,EAMbltC,KAAKitC,MAAO,CACdjtC,KAAK26C,gBAAgB36C,KAAK+vB,IAAIH,SAC9B5vB,KAAK66C,aAAa76C,KAAK+vB,IAAIygB,KAC3BxwC,KAAK+6C,sBAAsB/6C,KAAK+vB,IAAIygB,KACpCxwC,KAAKm7C,aAAan7C,KAAK+vB,IAAIygB,IAG3B,IAAI3oC,IAAa7H,KAAK0S,KAAK7K,UAAa,IAAM7H,KAAK0S,KAAK7K,UAAa,KAChE7H,KAAKqzC,SAAW,YAAc,GACnCtjB,GAAIygB,IAAI3oC,UAAY7H,KAAKq7C,cAAgBxzC,EAGzC7H,KAAK6jB,SAA6D,WAAlDta,OAAOsrC,iBAAiB9kB,EAAIH,SAAS/L,SAKrD7jB,KAAK+vB,IAAIH,QAAQ3iB,MAAMyuC,SAAW,OAClC17C,KAAKgG,MAAM4pB,QAAQrd,MAAQvS,KAAK+vB,IAAIH,QAAQQ,YAC5CpwB,KAAKwS,OAASxS,KAAK+vB,IAAIygB,IAAIlgB,aAC3BtwB,KAAK+vB,IAAIH,QAAQ3iB,MAAMyuC,SAAW,GAElC17C,KAAKitC,OAAQ,EAGfjtC,KAAKw6C,qBAAqBzqB,EAAIygB,KAC9BxwC,KAAK27C,mBACL37C,KAAK47C,qBAOPt5C,EAAU6Q,UAAUw0B,KAAO,WACpB3nC,KAAKktC,WACRltC,KAAKyhB,UAQTnf,EAAU6Q,UAAUu0B,KAAO,WACzB,GAAI1nC,KAAKktC,UAAW,CAClB,GAAIsD,GAAMxwC,KAAK+vB,IAAIygB,GAEfA,GAAI3mC,YACN2mC,EAAI3mC,WAAWsH,YAAYq/B,GAG7BxwC,KAAK2H,IAAM,KACX3H,KAAKyH,KAAO,KAEZzH,KAAKktC,WAAY,IAQrB5qC,EAAU6Q,UAAUw7B,YAAc,WAChC,GAGIkN,GACA1rB,EAJA2rB,EAAc97C,KAAK8kC,OAAOvyB,MAC1B3C,EAAQ5P,KAAKo6B,WAAWnF,SAASj1B,KAAK0S,KAAK9C,OAC3CC,EAAM7P,KAAKo6B,WAAWnF,SAASj1B,KAAK0S,KAAK7C,MAKhCisC,EAATlsC,IACFA,GAASksC,GAEPjsC,EAAM,EAAIisC,IACZjsC,EAAM,EAAIisC,EAEZ,IAAIC,GAAW72C,KAAKwH,IAAImD,EAAMD,EAAO,EAoBrC,QAlBI5P,KAAK6jB,UACP7jB,KAAKyH,KAAOmI,EACZ5P,KAAKuS,MAAQwpC,EAAW/7C,KAAKgG,MAAM4pB,QAAQrd,MAC3C4d,EAAenwB,KAAKgG,MAAM4pB,QAAQrd,QAOlCvS,KAAKyH,KAAOmI,EACZ5P,KAAKuS,MAAQwpC,EACb5rB,EAAejrB,KAAK4G,IAAI+D,EAAMD,EAAQ,EAAI5P,KAAKyO,QAAQuV,QAAShkB,KAAKgG,MAAM4pB,QAAQrd,QAGrFvS,KAAK+vB,IAAIygB,IAAIvjC,MAAMxF,KAAOzH,KAAKyH,KAAO,KACtCzH,KAAK+vB,IAAIygB,IAAIvjC,MAAMsF,MAAQwpC,EAAW,KAE9B/7C,KAAKyO,QAAQsgC,OACnB,IAAK,OACH/uC,KAAK+vB,IAAIH,QAAQ3iB,MAAMxF,KAAO,GAC9B,MAEF,KAAK,QACHzH,KAAK+vB,IAAIH,QAAQ3iB,MAAMxF,KAAOvC,KAAKwH,IAAKqvC,EAAW5rB,EAAe,EAAInwB,KAAKyO,QAAQuV,QAAU,GAAK,IAClG,MAEF,KAAK,SACHhkB,KAAK+vB,IAAIH,QAAQ3iB,MAAMxF,KAAOvC,KAAKwH,KAAKqvC,EAAW5rB,EAAe,EAAInwB,KAAKyO,QAAQuV,SAAW,EAAG,GAAK,IACtG,MAEF,SAIM63B,EAFA77C,KAAK6jB,SACHhU,EAAM,EACM3K,KAAKwH,KAAKkD,EAAO,IAGhBugB,EAIL,EAARvgB,EACY1K,KAAK4G,KAAK8D,EACnBC,EAAMD,EAAQugB,EAAe,EAAInwB,KAAKyO,QAAQuV,SAIrC,EAGlBhkB,KAAK+vB,IAAIH,QAAQ3iB,MAAMxF,KAAOo0C,EAAc,OAQlDv5C,EAAU6Q,UAAUo6B,YAAc,WAChC,GAAIhZ,GAAcv0B,KAAKyO,QAAQ8lB,YAC3Bic,EAAMxwC,KAAK+vB,IAAIygB,GAGjBA,GAAIvjC,MAAMtF,IADO,OAAf4sB,EACcv0B,KAAK2H,IAAM,KAGV3H,KAAK8kC,OAAOtyB,OAASxS,KAAK2H,IAAM3H,KAAKwS,OAAU,MAQpElQ,EAAU6Q,UAAUwoC,iBAAmB,WACrC,GAAI37C,KAAKqzC,UAAYrzC,KAAKyO,QAAQygC,SAASC,aAAenvC,KAAK+vB,IAAIisB,SAAU,CAE3E,GAAIA,GAAWzqC,SAASM,cAAc,MACtCmqC,GAASn0C,UAAY,YACrBm0C,EAAS9I,aAAelzC,KASxBA,KAAK+vB,IAAIygB,IAAI/+B,YAAYuqC,GACzBh8C,KAAK+vB,IAAIisB,SAAWA,OAEZh8C,KAAKqzC,UAAYrzC,KAAK+vB,IAAIisB,WAE9Bh8C,KAAK+vB,IAAIisB,SAASnyC,YACpB7J,KAAK+vB,IAAIisB,SAASnyC,WAAWsH,YAAYnR,KAAK+vB,IAAIisB,UAEpDh8C,KAAK+vB,IAAIisB,SAAW,OAQxB15C,EAAU6Q,UAAUyoC,kBAAoB,WACtC,GAAI57C,KAAKqzC,UAAYrzC,KAAKyO,QAAQygC,SAASC,aAAenvC,KAAK+vB,IAAIksB,UAAW,CAE5E,GAAIA,GAAY1qC,SAASM,cAAc,MACvCoqC,GAAUp0C,UAAY,aACtBo0C,EAAU9I,cAAgBnzC,KAS1BA,KAAK+vB,IAAIygB,IAAI/+B,YAAYwqC,GACzBj8C,KAAK+vB,IAAIksB,UAAYA,OAEbj8C,KAAKqzC,UAAYrzC,KAAK+vB,IAAIksB,YAE9Bj8C,KAAK+vB,IAAIksB,UAAUpyC,YACrB7J,KAAK+vB,IAAIksB,UAAUpyC,WAAWsH,YAAYnR,KAAK+vB,IAAIksB,WAErDj8C,KAAK+vB,IAAIksB,UAAY,OAIzBp8C,EAAOD,QAAU0C,GAKb,SAASzC,EAAQD,EAASM,GAkC9B,QAASgD,GAASqW,EAAW7G,EAAMjE,GACjC,KAAMzO,eAAgBkD,IACpB,KAAM,IAAIsW,aAAY,mDAGxBxZ,MAAKk8C,0BACLl8C,KAAKm8C,0BAGLn8C,KAAKyZ,iBAAmBF,EAGxBvZ,KAAKo8C,kBAAoB,GACzBp8C,KAAKq8C,eAAiB,IAAOr8C,KAAKo8C,kBAClCp8C,KAAKs8C,WAAa,EAClBt8C,KAAKu8C,YAAc,EACnBv8C,KAAKw8C,gBAAiB,EACtBx8C,KAAKy8C,wBAA0B,GAE/Bz8C,KAAK08C,cAAe,EAEpB18C,KAAK28C,kBAAoB1pC,IAAI,KAAK2pC,KAAK,KAAKC,SAAS,KAAKC,QAAQ,KAAKC,IAAI,MAG3E/8C,KAAKq0B,gBACH2oB,OACEC,KAAM,EACNC,UAAW,GACXC,UAAW,GACX1xB,OAAQ,GACR2xB,MAAO,UACPC,MAAO72C,OACP0gB,SAAU,GACVC,SAAU,GACVm2B,UAAW,QACXC,SAAU,GACVC,SAAU,UACVC,SAAUj3C,OACVk3C,gBAAiB,EACjBC,gBAAiB,QACjBC,MAAO,GACPzyC,OACIiB,OAAQ,UACRD,WAAY,UACdE,WACED,OAAQ,UACRD,WAAY,WAEdG,OACEF,OAAQ,UACRD,WAAY,YAGhB8F,MAAOzL,OACPwZ,YAAa,EACb69B,oBAAqBr3C,QAEvBs3C,OACE52B,SAAU,EACVC,SAAU,GACV5U,MAAO,EACPwrC,yBAA0B,EAC1BC,WAAY,IACZ/wC,MAAO,OACP9B,OACEA,MAAM,UACNkB,UAAU,UACVC,MAAO,WAETgxC,UAAW,UACXC,SAAU,GACVC,SAAU,QACVC,SAAU,QACVC,gBAAiB,EACjBC,gBAAiB,QACjBM,eAAe,aACfC,iBAAkB,EAClBC,MACEx4C,OAAQ,GACRy4C,IAAK,EACLC,UAAW73C,QAEb83C,aAAc,QAEhBC,kBAAiB,EACjBC,SACEC,WACE/vC,SAAS,EACTgwC,cAAe,EACfC,sBAAuB,KACvBC,eAAgB,GAChBC,aAAc,GACdC,eAAgB,IAChBC,QAAS,KAEXC,WACEJ,eAAgB,EAChBC,aAAc,IACdC,eAAgB,IAChBG,aAAc,IACdF,QAAS,KAEXG,uBACExwC,SAAS,EACTkwC,eAAgB,EAChBC,aAAc,IACdC,eAAgB,IAChBG,aAAc,IACdF,QAAS,KAEXA,QAAS,KACTH,eAAgB,KAChBC,aAAc,KACdC,eAAgB,MAElBK,YACEzwC,SAAS,EACT0wC,gBAAiB,IACjBC,iBAAiB,IACjBC,cAAc,IACdC,eAAgB,GAChBC,qBAAsB,GACtBC,gBAAiB,IACjBC,oBAAqB,GACrBC,mBAAoB,EACpBC,YAAa,IACbC,mBAAoB,GACpBC,sBAAuB,GACvBC,WAAY,GACZC,aAAcztC,MAAQ,EACRC,OAAQ,EACRiZ,OAAQ,GACtBw0B,sBAAuB,IACvBC,kBAAmB,GACnBC,uBAAwB,GAE1BC,YACE1xC,SAAS,GAEX2xC,UACE3xC,SAAS,EACT4xC,OAAQvuC,EAAG,GAAIC,EAAG,GAAIkuB,KAAM,MAE9BqgB,kBACE7xC,SAAS,EACT8xC,kBAAkB,GAEpBC,oBACE/xC,SAAQ,EACRgyC,gBAAiB,IACjBC,YAAa,IACbzlB,UAAW,KACX0lB,OAAQ,WAEVC,wBAAwB,EACxBC,cACEpyC,SAAS,EACTqyC,SAAS,EACTj6C,KAAM,aACNk6C,UAAW,IAEbC,YAAc,GACdC,YAAc,GACdC,WAAW,EACXC,wBAAyB,IACzBC,uBAAuB,EACvBhd,OAAQ,KACRQ,QAASA,EACTze,SACE5N,MAAO,IACP8kC,UAAW,QACXC,SAAU,GACVC,SAAU,UACVryC,OACEiB,OAAQ,OACRD,WAAY,YAGhBm1C,aAAa,EACbC,WAAW,EACX5jB,UAAU,EACVrxB,OAAO,EACPk1C,iBAAiB,EACjBC,iBAAiB,EACjBlvC,MAAQ,OACRC,OAAS,OACTy8B,YAAY,GAEdjvC,KAAK0hD,UAAY/gD,EAAK2E,UAAWtF,KAAKq0B,gBACtCr0B,KAAK2hD,WAAa,EAGlB3hD,KAAK4hD,UAAY5E,SAASc,UAC1B99C,KAAK6hD,oBAAqB,EAC1B7hD,KAAK8hD,mBAAqBC,YAAaC,SAGvChiD,KAAKiiD,eAAiB,EAAEjiD,KAAKo8C,kBAC7Bp8C,KAAKkiD,wBAA0B,iBAC/BliD,KAAKmiD,WAAa,EAClBniD,KAAKoiD,YAAc,EACnBpiD,KAAKqiD,YAAc,EACnBriD,KAAKsiD,kBAAoB,EACzBtiD,KAAKuiD,kBAAoB,EACzBviD,KAAKwiD,eAAiB,KACtBxiD,KAAKyiD,mBAAqB,KAC1BziD,KAAK0iD,UAAY,CAGjB,IAAIv/C,GAAUnD,IACdA,MAAKm0B,OAAS,GAAI9wB,GAClBrD,KAAK2iD,OAAS,GAAIr/C,GAClBtD,KAAK2iD,OAAOC,kBAAkB,WAC5Bz/C,EAAQ0/C,YAIV7iD,KAAK8iD,WAAa,EAClB9iD,KAAK+iD,WAAa,EAClB/iD,KAAKgjD,cAAgB,EAIrBhjD,KAAKijD,qBAELjjD,KAAK00B,UAEL10B,KAAKkjD,oBAELljD,KAAKmjD,qBAELnjD,KAAKojD,uBAELpjD,KAAKqjD,uBAILrjD,KAAKsjD,gBAAgBtjD,KAAKsf,MAAME,YAAc,EAAGxf,KAAKsf,MAAMuF,aAAe,GAC3E7kB,KAAKgd,UAAU,GACfhd,KAAKkT,WAAWzE,GAGhBzO,KAAKujD,kBAAmB,EACxBvjD,KAAKwjD,mBACLxjD,KAAKyjD,sBAAuB,EAC5BzjD,KAAK0jD,YAAa,EAClB1jD,KAAKohD,wBAA0B,KAC/BphD,KAAK2jD,eAAgB,EAGrB3jD,KAAK4jD,oBACL5jD,KAAK6jD,0BACL7jD,KAAK8jD,eACL9jD,KAAKg9C,SACLh9C,KAAK89C,SAGL99C,KAAK+jD,eAAqBhyC,EAAK,EAAEC,EAAK,GACtChS,KAAKgkD,mBAAqBjyC,EAAK,EAAEC,EAAK,GACtChS,KAAKikD,iBAAmBlyC,EAAK,EAAEC,EAAK,GACpChS,KAAKkkD,cACLlkD,KAAKid,MAAQ,EACbjd,KAAKmkD,cAAgBnkD,KAAKid,MAG1Bjd,KAAKokD,UAAY,KACjBpkD,KAAKqkD,UAAY,KAGjBrkD,KAAKskD,gBACHrxC,IAAO,SAAU3J,EAAOwK,GACtB3Q,EAAQohD,UAAUzwC,EAAO7R,OACzBkB,EAAQyM,SAEViF,OAAU,SAAUvL,EAAOwK,GACzB3Q,EAAQqhD,aAAa1wC,EAAO7R,MAAO6R,EAAOpB,MAC1CvP,EAAQyM,SAEVyG,OAAU,SAAU/M,EAAOwK,GACzB3Q,EAAQshD,aAAa3wC,EAAO7R,OAC5BkB,EAAQyM,UAGZ5P,KAAK0kD,gBACHzxC,IAAO,SAAU3J,EAAOwK,GACtB3Q,EAAQwhD,UAAU7wC,EAAO7R,OACzBkB,EAAQyM,SAEViF,OAAU,SAAUvL,EAAOwK,GACzB3Q,EAAQyhD,aAAa9wC,EAAO7R,OAC5BkB,EAAQyM,SAEVyG,OAAU,SAAU/M,EAAOwK,GACzB3Q,EAAQ0hD,aAAa/wC,EAAO7R,OAC5BkB,EAAQyM,UAKZ5P,KAAK8kD,QAAS,EACd9kD,KAAK+kD,MAAQv+C,OAGbxG,KAAKgY,QAAQtF,EAAK1S,KAAK0hD,UAAUvC,WAAWzwC,SAAW1O,KAAK0hD,UAAUjB,mBAAmB/xC,SAGzF1O,KAAK08C,cAAe,EAC6B,GAA7C18C,KAAK0hD,UAAUjB,mBAAmB/xC,QACpC1O,KAAKglD,2BAI2B,GAA5BhlD,KAAK0hD,UAAUP,WACjBnhD,KAAKilD,WAAWz+C,QAAW,EAAKxG,KAAK0hD,UAAUvC,WAAWzwC,SAK1D1O,KAAK0hD,UAAUvC,WAAWzwC,SAC5B1O,KAAKklD,sBA/VT,GAAInoC,GAAU7c,EAAoB,IAC9B6D,EAAS7D,EAAoB,IAC7BilD,EAAWjlD,EAAoB,IAC/BS,EAAOT,EAAoB,GAC3BklD,EAAallD,EAAoB,IACjCW,EAAUX,EAAoB,GAC9BY,EAAWZ,EAAoB,GAC/BuD,EAAYvD,EAAoB,IAChCwD,EAAcxD,EAAoB,IAClCmD,EAASnD,EAAoB,IAC7BoD,EAASpD,EAAoB,IAC7BqD,EAAOrD,EAAoB,IAC3BkD,EAAOlD,EAAoB,IAC3BsD,EAAQtD,EAAoB,IAC5BmlD,EAAcnlD,EAAoB,IAClColD,EAAYplD,EAAoB,IAChC2kC,EAAU3kC,EAAoB,GAGlCA,GAAoB,IAiVpB6c,EAAQ7Z,EAAQiQ,WAOhBjQ,EAAQiQ,UAAU+oC,wBAA0B,WAC1C,GAAIqJ,GAAcv8C,UAAUC,UAAUs7B,aACtCvkC,MAAKwlD,iBAAkB,EACgB,IAAnCD,EAAY5+C,QAAQ,YACtB3G,KAAKwlD,iBAAkB,EAEiB,IAAjCD,EAAY5+C,QAAQ,WACvB4+C,EAAY5+C,QAAQ,WAAa,KACnC3G,KAAKwlD,iBAAkB,IAa7BtiD,EAAQiQ,UAAUsyC,eAAiB,WAIjC,IAAK,GAHDC,GAAUn0C,SAASo0C,qBAAsB,UAGpCngD,EAAI,EAAGA,EAAIkgD,EAAQ//C,OAAQH,IAAK,CACvC,GAAIogD,GAAMF,EAAQlgD,GAAGogD,IACjBrhD,EAAQqhD,GAAO,qBAAqBnhD,KAAKmhD,EAC7C,IAAIrhD,EAEF,MAAOqhD,GAAI1gB,UAAU,EAAG0gB,EAAIjgD,OAASpB,EAAM,GAAGoB,QAIlD,MAAO,OAQTzC,EAAQiQ,UAAU0yC,UAAY,WAC5B,GAAsDC,GAAlDC,EAAO,IAAKC,EAAO,KAAMC,EAAO,IAAKC,EAAO,IAChD,KAAK,GAAIC,KAAUnmD,MAAKg9C,MAClBh9C,KAAKg9C,MAAMl3C,eAAeqgD,KAC5BL,EAAO9lD,KAAKg9C,MAAMmJ,GACdF,EAAQH,EAAKM,YAAgB,OAAIH,EAAOH,EAAKM,YAAY3+C,MACzDy+C,EAAQJ,EAAKM,YAAiB,QAAIF,EAAOJ,EAAKM,YAAY/+B,OAC1D0+B,EAAQD,EAAKM,YAAkB,SAAIL,EAAOD,EAAKM,YAAY9iC,QAC3D0iC,EAAQF,EAAKM,YAAe,MAAIJ,EAAOF,EAAKM,YAAYz+C,KAMhE,OAHY,MAARs+C,GAAuB,MAARC,GAAwB,KAARH,GAAuB,MAARC,IAChDD,EAAO,EAAGC,EAAO,EAAGC,EAAO,EAAGC,EAAO,IAE/BD,KAAMA,EAAMC,KAAMA,EAAMH,KAAMA,EAAMC,KAAMA,IASpD9iD,EAAQiQ,UAAUkzC,YAAc,SAAS5wB,GACvC,OAAQ1jB,EAAI,IAAO0jB,EAAMywB,KAAOzwB,EAAMwwB,MAC9Bj0C,EAAI,IAAOyjB,EAAMuwB,KAAOvwB,EAAMswB,QAUxC7iD,EAAQiQ,UAAU8xC,WAAa,SAASqB,EAAkBC,EAAaC,GACrExmD,KAAK6iD,SAAQ,GAEOr8C,SAAhB+/C,IACFA,GAAc,GAEK//C,SAAjBggD,IACFA,GAAe,GAEQhgD,SAArB8/C,IACFA,GAAmB,EAGrB,IACIG,GADAhxB,EAAQz1B,KAAK6lD,WAGjB,IAAmB,GAAfU,EAAqB,CACvB,GAAIG,GAAgB1mD,KAAK8jD,YAAYn+C,MAIjC8gD,GAH+B,GAA/BzmD,KAAK0hD,UAAUZ,aACwB,GAArC9gD,KAAK0hD,UAAUvC,WAAWzwC,SAC5Bg4C,GAAiB1mD,KAAK0hD,UAAUvC,WAAWC,gBAC/B,UAAYsH,EAAgB,WAAa,SAGzC,QAAUA,EAAgB,QAAU,SAIT,GAArC1mD,KAAK0hD,UAAUvC,WAAWzwC,SAC1Bg4C,GAAiB1mD,KAAK0hD,UAAUvC,WAAWC,gBACjC,YAAcsH,EAAgB,YAAc,cAG5C,YAAcA,EAAgB,aAAe,SAK7D,IAAIC,GAASzhD,KAAK4G,IAAI9L,KAAKsf,MAAMC,OAAOC,YAAc,IAAKxf,KAAKsf,MAAMC,OAAOsF,aAAe,IAC5F4hC,IAAaE,MAEV,CACH,GAAInP,GAAgD,IAApCtyC,KAAK2lB,IAAI4K,EAAMywB,KAAOzwB,EAAMwwB,MACxCW,EAAgD,IAApC1hD,KAAK2lB,IAAI4K,EAAMuwB,KAAOvwB,EAAMswB,MAExCc,EAAa7mD,KAAKsf,MAAMC,OAAOC,YAAeg4B,EAC9CsP,EAAa9mD,KAAKsf,MAAMC,OAAOsF,aAAe+hC,CAElDH,GAA2BK,GAAdD,EAA4BA,EAAaC,EAGpDL,EAAY,IACdA,EAAY,EAId,IAAIv6B,GAASlsB,KAAKqmD,YAAY5wB,EAC9B,IAAoB,GAAhB+wB,EAAuB,CACzB,GAAI/3C,IAAWmV,SAAUsI,EAAQjP,MAAOwpC,EAAWM,UAAWT,EAC9DtmD,MAAK6nB,OAAOpZ,GACZzO,KAAK8kD,QAAS,EACd9kD,KAAK4P,YAGLsc,GAAOna,GAAK00C,EACZv6B,EAAOla,GAAKy0C,EACZv6B,EAAOna,GAAK,GAAM/R,KAAKsf,MAAMC,OAAOC,YACpC0M,EAAOla,GAAK,GAAMhS,KAAKsf,MAAMC,OAAOsF,aACpC7kB,KAAKgd,UAAUypC,GACfzmD,KAAKsjD,iBAAiBp3B,EAAOna,GAAGma,EAAOla,IAS3C9O,EAAQiQ,UAAU6zC,qBAAuB,WACvChnD,KAAKinD,qBACL,KAAK,GAAIC,KAAOlnD,MAAKg9C,MACfh9C,KAAKg9C,MAAMl3C,eAAeohD,IAC5BlnD,KAAK8jD,YAAY97C,KAAKk/C,IAiB5BhkD,EAAQiQ,UAAU6E,QAAU,SAAStF,EAAM8zC,GAOzC,GANqBhgD,SAAjBggD,IACFA,GAAe,GAGjBxmD,KAAK08C,cAAe,EAEhBhqC,GAAQA,EAAKod,MAAQpd,EAAKsqC,OAAStqC,EAAKorC,OAC1C,KAAM,IAAItkC,aAAY,iGAYxB,IAP+C,GAA3CxZ,KAAK0hD,UAAUnB,iBAAiB7xC,SAClC1O,KAAKmnD,wBAIPnnD,KAAKkT,WAAWR,GAAQA,EAAKjE,SAEzBiE,GAAQA,EAAKod,KAEf,GAAGpd,GAAQA,EAAKod,IAAK,CACnB,GAAIs3B,GAAU3jD,EAAU4jD,WAAW30C,EAAKod,IAExC,YADA9vB,MAAKgY,QAAQovC,QAIZ,IAAI10C,GAAQA,EAAK40C,OAEpB,GAAG50C,GAAQA,EAAK40C,MAAO,CACrB,GAAIC,GAAY7jD,EAAY8jD,WAAW90C,EAAK40C,MAE5C,YADAtnD,MAAKgY,QAAQuvC,QAKfvnD,MAAKynD,UAAU/0C,GAAQA,EAAKsqC,OAC5Bh9C,KAAK0nD,UAAUh1C,GAAQA,EAAKorC,MAE9B99C,MAAK2nD,mBACe,GAAhBnB,IAC+C,GAA7CxmD,KAAK0hD,UAAUjB,mBAAmB/xC,SACpC1O,KAAK4nD,eACL5nD,KAAKglD,4BAIDhlD,KAAK0hD,UAAUP,WACjBnhD,KAAK6nD,aAGT7nD,KAAK4P,SAEP5P,KAAK08C,cAAe,GAOtBx5C,EAAQiQ,UAAUD,WAAa,SAAUzE,GACvC,GAAIA,EAAS,CACX,GAAI5I,GACAqI,GAAU,QAAQ,QAAQ,eAAe,qBAAqB,aAAa,aAC7E,WAAW,mBAAmB,QAAQ,SAAS,aAAa,YAAY,WAAW,aAOrF,IAJAvN,EAAK+F,uBAAuBwH,EAAOlO,KAAK0hD,UAAWjzC,GACnD9N,EAAK+F,wBAAwB,SAAS1G,KAAK0hD,UAAU1E,MAAOvuC,EAAQuuC,OACpEr8C,EAAK+F,wBAAwB,QAAQ,UAAU1G,KAAK0hD,UAAU5D,MAAOrvC,EAAQqvC,OAEzErvC,EAAQ+vC,UACV79C,EAAK4N,aAAavO,KAAK0hD,UAAUlD,QAAS/vC,EAAQ+vC,QAAQ,aAC1D79C,EAAK4N,aAAavO,KAAK0hD,UAAUlD,QAAS/vC,EAAQ+vC,QAAQ,aAEtD/vC,EAAQ+vC,QAAQU,uBAAuB,CACzCl/C,KAAK0hD,UAAUjB,mBAAmB/xC,SAAU,EAC5C1O,KAAK0hD,UAAUlD,QAAQU,sBAAsBxwC,SAAU,EACvD1O,KAAK0hD,UAAUlD,QAAQC,UAAU/vC,SAAU,CAC3C,KAAK7I,IAAQ4I,GAAQ+vC,QAAQU,sBACvBzwC,EAAQ+vC,QAAQU,sBAAsBp5C,eAAeD,KACvD7F,KAAK0hD,UAAUlD,QAAQU,sBAAsBr5C,GAAQ4I,EAAQ+vC,QAAQU,sBAAsBr5C;CAkDnG,GA5CI4I,EAAQ2gC,QAAQpvC,KAAK28C,iBAAiB1pC,IAAMxE,EAAQ2gC,OACpD3gC,EAAQq5C,SAAS9nD,KAAK28C,iBAAiBC,KAAOnuC,EAAQq5C,QACtDr5C,EAAQs5C,aAAa/nD,KAAK28C,iBAAiBE,SAAWpuC,EAAQs5C,YAC9Dt5C,EAAQu5C,YAAYhoD,KAAK28C,iBAAiBG,QAAUruC,EAAQu5C,WAC5Dv5C,EAAQw5C,WAAWjoD,KAAK28C,iBAAiBI,IAAMtuC,EAAQw5C,UAE3DtnD,EAAK4N,aAAavO,KAAK0hD,UAAWjzC,EAAQ,gBAC1C9N,EAAK4N,aAAavO,KAAK0hD,UAAWjzC,EAAQ,sBAC1C9N,EAAK4N,aAAavO,KAAK0hD,UAAWjzC,EAAQ,cAC1C9N,EAAK4N,aAAavO,KAAK0hD,UAAWjzC,EAAQ,cAC1C9N,EAAK4N,aAAavO,KAAK0hD,UAAWjzC,EAAQ,YAC1C9N,EAAK4N,aAAavO,KAAK0hD,UAAWjzC,EAAQ,oBAGtCA,EAAQ8xC,mBACVvgD,KAAKkoD,SAAWloD,KAAK0hD,UAAUnB,iBAAiBC,kBAK9C/xC,EAAQqvC,QACkBt3C,SAAxBiI,EAAQqvC,MAAM3yC,QACZxK,EAAKwD,SAASsK,EAAQqvC,MAAM3yC,QAC9BnL,KAAK0hD,UAAU5D,MAAM3yC,SACrBnL,KAAK0hD,UAAU5D,MAAM3yC,MAAMA,MAAQsD,EAAQqvC,MAAM3yC,MACjDnL,KAAK0hD,UAAU5D,MAAM3yC,MAAMkB,UAAYoC,EAAQqvC,MAAM3yC,MACrDnL,KAAK0hD,UAAU5D,MAAM3yC,MAAMmB,MAAQmC,EAAQqvC,MAAM3yC,QAGf3E,SAA9BiI,EAAQqvC,MAAM3yC,MAAMA,QAA0BnL,KAAK0hD,UAAU5D,MAAM3yC,MAAMA,MAAQsD,EAAQqvC,MAAM3yC,MAAMA,OACnE3E,SAAlCiI,EAAQqvC,MAAM3yC,MAAMkB,YAA0BrM,KAAK0hD,UAAU5D,MAAM3yC,MAAMkB,UAAYoC,EAAQqvC,MAAM3yC,MAAMkB,WAC3E7F,SAA9BiI,EAAQqvC,MAAM3yC,MAAMmB,QAA0BtM,KAAK0hD,UAAU5D,MAAM3yC,MAAMmB,MAAQmC,EAAQqvC,MAAM3yC,MAAMmB,QAE3GtM,KAAK0hD,UAAU5D,MAAMQ,cAAe,GAGjC7vC,EAAQqvC,MAAMR,WACW92C,SAAxBiI,EAAQqvC,MAAM3yC,QACZxK,EAAKwD,SAASsK,EAAQqvC,MAAM3yC,OAAmBnL,KAAK0hD,UAAU5D,MAAMR,UAAY7uC,EAAQqvC,MAAM3yC,MAC3D3E,SAA9BiI,EAAQqvC,MAAM3yC,MAAMA,QAAsBnL,KAAK0hD,UAAU5D,MAAMR,UAAY7uC,EAAQqvC,MAAM3yC,MAAMA,SAK1GsD,EAAQuuC,OACNvuC,EAAQuuC,MAAM7xC,MAAO,CACvB,GAAIg9C,GAAcxnD,EAAKuK,WAAWuD,EAAQuuC,MAAM7xC,MAChDnL,MAAK0hD,UAAU1E,MAAM7xC,MAAMgB,WAAag8C,EAAYh8C,WACpDnM,KAAK0hD,UAAU1E,MAAM7xC,MAAMiB,OAAS+7C,EAAY/7C,OAChDpM,KAAK0hD,UAAU1E,MAAM7xC,MAAMkB,UAAUF,WAAag8C,EAAY97C,UAAUF,WACxEnM,KAAK0hD,UAAU1E,MAAM7xC,MAAMkB,UAAUD,OAAS+7C,EAAY97C,UAAUD,OACpEpM,KAAK0hD,UAAU1E,MAAM7xC,MAAMmB,MAAMH,WAAag8C,EAAY77C,MAAMH,WAChEnM,KAAK0hD,UAAU1E,MAAM7xC,MAAMmB,MAAMF,OAAS+7C,EAAY77C,MAAMF,OAGhE,GAAIqC,EAAQ0lB,OACV,IAAK,GAAIi0B,KAAa35C,GAAQ0lB,OAC5B,GAAI1lB,EAAQ0lB,OAAOruB,eAAesiD,GAAY,CAC5C,GAAIn2C,GAAQxD,EAAQ0lB,OAAOi0B,EAC3BpoD,MAAKm0B,OAAOlhB,IAAIm1C,EAAWn2C,GAKjC,GAAIxD,EAAQ2X,QAAS,CACnB,IAAKvgB,IAAQ4I,GAAQ2X,QACf3X,EAAQ2X,QAAQtgB,eAAeD,KACjC7F,KAAK0hD,UAAUt7B,QAAQvgB,GAAQ4I,EAAQ2X,QAAQvgB,GAG/C4I,GAAQ2X,QAAQjb,QAClBnL,KAAK0hD,UAAUt7B,QAAQjb,MAAQxK,EAAKuK,WAAWuD,EAAQ2X,QAAQjb,QAmBnE,GAfI,cAAgBsD,KACdA,EAAQ45C,WACLroD,KAAKsoD,YACRtoD,KAAKsoD,UAAY,GAAIhD,GAAUtlD,KAAKsf,OACpCtf,KAAKsoD,UAAU/0C,GAAG,SAAUvT,KAAKuoD,gBAAgBzzB,KAAK90B,QAIpDA,KAAKsoD,YACPtoD,KAAKsoD,UAAUh1C,gBACRtT,MAAKsoD,YAKd75C,EAAQg4B,OACV,KAAM,IAAI7iC,OAAM,6EAMlB5D,MAAKijD,qBAELjjD,KAAKwoD,0BAELxoD,KAAKyoD,0BAELzoD,KAAK0oD,yBAIL1oD,KAAKuoD,kBAELvoD,KAAK2kB,QAAQ3kB,KAAK0hD,UAAUnvC,MAAOvS,KAAK0hD,UAAUlvC,QAClDxS,KAAK8kD,QAAS,EACd9kD,KAAK4P,UAaT1M,EAAQiQ,UAAUuhB,QAAU,WAE1B,KAAO10B,KAAKyZ,iBAAiBiK,iBAC3B1jB,KAAKyZ,iBAAiBtI,YAAYnR,KAAKyZ,iBAAiBkK,WAe1D,IAZA3jB,KAAKsf,MAAQ/N,SAASM,cAAc,OACpC7R,KAAKsf,MAAMzX,UAAY,oBACvB7H,KAAKsf,MAAMrS,MAAM2W,SAAW,WAC5B5jB,KAAKsf,MAAMrS,MAAM4W,SAAW,SAK5B7jB,KAAKsf,MAAMC,OAAShO,SAASM,cAAc,UAC3C7R,KAAKsf,MAAMC,OAAOtS,MAAM2W,SAAW,WACnC5jB,KAAKsf,MAAM7N,YAAYzR,KAAKsf,MAAMC,QAE7Bvf,KAAKsf,MAAMC,OAAOyH,WAQlB,CACH,GAAID,GAAM/mB,KAAKsf,MAAMC,OAAOyH,WAAW,KACvChnB,MAAK2hD,YAAcp4C,OAAOo/C,kBAAoB,IAAM5hC,EAAI6hC,8BAC9C7hC,EAAI8hC,2BACJ9hC,EAAI+hC,0BACJ/hC,EAAIgiC,yBACJhiC,EAAIiiC,wBAA0B,GAExChpD,KAAKsf,MAAMC,OAAOyH,WAAW,MAAMiiC,aAAajpD,KAAK2hD,WAAY,EAAG,EAAG3hD,KAAK2hD,WAAY,EAAG,OAhB1D,CACjC,GAAI79B,GAAWvS,SAASM,cAAe,MACvCiS,GAAS7W,MAAM9B,MAAQ,MACvB2Y,EAAS7W,MAAM8W,WAAc,OAC7BD,EAAS7W,MAAM+W,QAAW,OAC1BF,EAASG,UAAa,mDACtBjkB,KAAKsf,MAAMC,OAAO9N,YAAYqS,GAgBhC,GAAI3P,GAAKnU,IACTA,MAAKwlC,QACLxlC,KAAKkpD,SACLlpD,KAAK8D,OAAS,GAAIC,GAAO/D,KAAKsf,MAAMC,QACpCvf,KAAK8D,OAAOoR,IAAI,SAASi0C,KAAK/lB,QAAQ,IAEtCpjC,KAAK8D,OAAOyP,GAAG,MAAaY,EAAGi1C,OAAOt0B,KAAK3gB,IAC3CnU,KAAK8D,OAAOyP,GAAG,YAAaY,EAAGk1C,aAAav0B,KAAK3gB,IACjDnU,KAAK8D,OAAOyP,GAAG,QAAaY,EAAGgqB,QAAQrJ,KAAK3gB,IAC5CnU,KAAK8D,OAAOyP,GAAG,QAAaY,EAAGmqB,SAASxJ,KAAK3gB,IAC7CixC,EAAWkE,QAAQtpD,KAAK8D,OAAQqQ,EAAGkqB,SAASvJ,KAAK3gB,IACjDnU,KAAK8D,OAAOyP,GAAG,WAAaY,EAAG6pB,aAAalJ,KAAK3gB,IACjDnU,KAAK8D,OAAOyP,GAAG,UAAaY,EAAG8pB,QAAQnJ,KAAK3gB,IAC5CnU,KAAK8D,OAAOyP,GAAG,SAAaY,EAAG+pB,WAAWpJ,KAAK3gB,IAE/CnU,KAAKsf,MAAMC,OAAO5W,iBAAiB,YAAawL,EAAGo1C,kBAAkBz0B,KAAK3gB,IAC1EnU,KAAKsf,MAAMC,OAAO5W,iBAAiB,aAAcwL,EAAGiqB,cAActJ,KAAK3gB,IACvEnU,KAAKsf,MAAMC,OAAO5W,iBAAiB,iBAAkBwL,EAAGiqB,cAActJ,KAAK3gB,IAG3EnU,KAAKyZ,iBAAiBhI,YAAYzR,KAAKsf,QAQzCpc,EAAQiQ,UAAUo1C,gBAAkB,WAClC,GAAIp0C,GAAKnU,IACawG,UAAlBxG,KAAKmlD,UACPnlD,KAAKmlD,SAAS7xC,UAEhBtT,KAAKmlD,SAAWA,IAEhBnlD,KAAKmlD,SAASqE,QAEVxpD,KAAK0hD,UAAUrB,SAAS3xC,SAAW1O,KAAKypD,aAC1CzpD,KAAKmlD,SAASrwB,KAAK,KAAQ90B,KAAK0pD,QAAQ50B,KAAK3gB,GAAQ,WACrDnU,KAAKmlD,SAASrwB,KAAK,KAAQ90B,KAAK2pD,aAAa70B,KAAK3gB,GAAK,SACvDnU,KAAKmlD,SAASrwB,KAAK,OAAQ90B,KAAK4pD,UAAU90B,KAAK3gB,GAAM,WACrDnU,KAAKmlD,SAASrwB,KAAK,OAAQ90B,KAAK2pD,aAAa70B,KAAK3gB,GAAK,SACvDnU,KAAKmlD,SAASrwB,KAAK,OAAQ90B,KAAK6pD,UAAU/0B,KAAK3gB,GAAM,WACrDnU,KAAKmlD,SAASrwB,KAAK,OAAQ90B,KAAK8pD,aAAah1B,KAAK3gB,GAAK,SACvDnU,KAAKmlD,SAASrwB,KAAK,QAAQ90B,KAAK+pD,WAAWj1B,KAAK3gB,GAAK,WACrDnU,KAAKmlD,SAASrwB,KAAK,QAAQ90B,KAAK8pD,aAAah1B,KAAK3gB,GAAK,SACvDnU,KAAKmlD,SAASrwB,KAAK,IAAQ90B,KAAKgqD,QAAQl1B,KAAK3gB,GAAQ,WACrDnU,KAAKmlD,SAASrwB,KAAK,IAAQ90B,KAAKiqD,UAAUn1B,KAAK3gB,GAAQ,SACvDnU,KAAKmlD,SAASrwB,KAAK,OAAQ90B,KAAKgqD,QAAQl1B,KAAK3gB,GAAQ,WACrDnU,KAAKmlD,SAASrwB,KAAK,OAAQ90B,KAAKiqD,UAAUn1B,KAAK3gB,GAAQ,SACvDnU,KAAKmlD,SAASrwB,KAAK,OAAQ90B,KAAKkqD,SAASp1B,KAAK3gB,GAAO,WACrDnU,KAAKmlD,SAASrwB,KAAK,OAAQ90B,KAAKiqD,UAAUn1B,KAAK3gB,GAAQ,SACvDnU,KAAKmlD,SAASrwB,KAAK,IAAQ90B,KAAKkqD,SAASp1B,KAAK3gB,GAAO,WACrDnU,KAAKmlD,SAASrwB,KAAK,IAAQ90B,KAAKiqD,UAAUn1B,KAAK3gB,GAAQ,SACvDnU,KAAKmlD,SAASrwB,KAAK,IAAQ90B,KAAKgqD,QAAQl1B,KAAK3gB,GAAQ,WACrDnU,KAAKmlD,SAASrwB,KAAK,IAAQ90B,KAAKiqD,UAAUn1B,KAAK3gB,GAAQ,SACvDnU,KAAKmlD,SAASrwB,KAAK,IAAQ90B,KAAKkqD,SAASp1B,KAAK3gB,GAAO,WACrDnU,KAAKmlD,SAASrwB,KAAK,IAAQ90B,KAAKiqD,UAAUn1B,KAAK3gB,GAAQ,SACvDnU,KAAKmlD,SAASrwB,KAAK,SAAS90B,KAAKgqD,QAAQl1B,KAAK3gB,GAAO,WACrDnU,KAAKmlD,SAASrwB,KAAK,SAAS90B,KAAKiqD,UAAUn1B,KAAK3gB,GAAO,SACvDnU,KAAKmlD,SAASrwB,KAAK,WAAW90B,KAAKkqD,SAASp1B,KAAK3gB,GAAI,WACrDnU,KAAKmlD,SAASrwB,KAAK,WAAW90B,KAAKiqD,UAAUn1B,KAAK3gB,GAAK,UAGV,GAA3CnU,KAAK0hD,UAAUnB,iBAAiB7xC,UAClC1O,KAAKmlD,SAASrwB,KAAK,MAAM90B,KAAKmnD,sBAAsBryB,KAAK3gB,IACzDnU,KAAKmlD,SAASrwB,KAAK,SAAS90B,KAAKmqD,gBAAgBr1B,KAAK3gB,MAU1DjR,EAAQiQ,UAAUG,QAAU,WAC1BtT,KAAK4P,MAAQ,aACb5P,KAAKyhB,OAAS,aACdzhB,KAAK+kD,OAAQ,EAGb/kD,KAAKoqD,+BAGLpqD,KAAKmlD,SAASqE,QAGdxpD,KAAK8D,OAAOwP,UAGZtT,KAAK0T,MAEL1T,KAAKqqD,oBAAoBrqD,KAAKyZ,mBAGhCvW,EAAQiQ,UAAUk3C,oBAAsB,SAASC,GAC/C,KAAoC,GAA7BA,EAAU5mC,iBACf1jB,KAAKqqD,oBAAoBC,EAAU3mC,YACnC2mC,EAAUn5C,YAAYm5C,EAAU3mC,aAUpCzgB,EAAQiQ,UAAUo3C,YAAc,SAAUr+B,GACxC,OACEna,EAAGma,EAAOna,EAAIpR,EAAK2G,gBAAgBtH,KAAKsf,MAAMC,QAC9CvN,EAAGka,EAAOla,EAAIrR,EAAK+G,eAAe1H,KAAKsf,MAAMC,UASjDrc,EAAQiQ,UAAUkrB,SAAW,SAAU/0B,IACjC,GAAIhF,OAAO0C,UAAYhH,KAAK0iD,UAAY,MAC1C1iD,KAAKwlC,KAAK3F,QAAU7/B,KAAKuqD,YAAYjhD,EAAM4iB,QAC3ClsB,KAAKwlC,KAAKglB,SAAU,EACpBxqD,KAAKkpD,MAAMjsC,MAAQjd,KAAKyqD,YAGxBzqD,KAAK0iD,WAAY,GAAIp+C,OAAO0C,UAE5BhH,KAAK0qD,aAAa1qD,KAAKwlC,KAAK3F,WAQhC38B,EAAQiQ,UAAU6qB,aAAe,SAAU10B,GACzCtJ,KAAK2qD,iBAAiBrhD,IAUxBpG,EAAQiQ,UAAUw3C,iBAAmB,SAASrhD,GAElB9C,SAAtBxG,KAAKwlC,KAAK3F,SACZ7/B,KAAKq+B,SAAS/0B,EAGhB,IAAIw8C,GAAO9lD,KAAK4qD,WAAW5qD,KAAKwlC,KAAK3F,QASrC,IANA7/B,KAAKwlC,KAAKzG,UAAW,EACrB/+B,KAAKwlC,KAAK2K,aACVnwC,KAAKwlC,KAAK/nB,YAAczd,KAAK6qD,kBAC7B7qD,KAAKwlC,KAAK2gB,OAAS,KACnBnmD,KAAK2jD,eAAgB,EAET,MAARmC,GAA4C,GAA5B9lD,KAAK0hD,UAAUH,UAAmB,CACpDvhD,KAAK2jD,eAAgB,EACrB3jD,KAAKwlC,KAAK2gB,OAASL,EAAKzlD,GAEnBylD,EAAKgF,cACR9qD,KAAK+qD,cAAcjF,GAAK,GAG1B9lD,KAAK4tB,KAAK,aAAao9B,QAAQhrD,KAAK42B,eAAeomB,OAGnD,KAAK,GAAIiO,KAAYjrD,MAAKkrD,aAAalO,MACrC,GAAIh9C,KAAKkrD,aAAalO,MAAMl3C,eAAemlD,GAAW,CACpD,GAAIhnD,GAASjE,KAAKkrD,aAAalO,MAAMiO,GACjCr/C,GACFvL,GAAI4D,EAAO5D,GACXylD,KAAM7hD,EAGN8N,EAAG9N,EAAO8N,EACVC,EAAG/N,EAAO+N,EACVm5C,OAAQlnD,EAAOknD,OACfC,OAAQnnD,EAAOmnD,OAGjBnnD,GAAOknD,QAAS,EAChBlnD,EAAOmnD,QAAS,EAEhBprD,KAAKwlC,KAAK2K,UAAUnoC,KAAK4D,IAK/BtC,EAAMD,kBAQRnG,EAAQiQ,UAAU8qB,QAAU,SAAU30B,GACpCtJ,KAAKqrD,cAAc/hD,IAUrBpG,EAAQiQ,UAAUk4C,cAAgB,SAAS/hD,GACzC,IAAItJ,KAAKwlC,KAAKglB,QAAd,CAKAxqD,KAAKsrD,aAEL,IAAIzrB,GAAU7/B,KAAKuqD,YAAYjhD,EAAM4iB,QACjC/X,EAAKnU,KACLwlC,EAAOxlC,KAAKwlC,KACZ2K,EAAY3K,EAAK2K,SACrB,IAAIA,GAAaA,EAAUxqC,QAAsC,GAA5B3F,KAAK0hD,UAAUH,UAAmB,CAErE,GAAI/hB,GAASK,EAAQ9tB,EAAIyzB,EAAK3F,QAAQ9tB,EAClC0tB,EAASI,EAAQ7tB,EAAIwzB,EAAK3F,QAAQ7tB,CAGtCm+B,GAAU9nC,QAAQ,SAAUuD,GAC1B,GAAIk6C,GAAOl6C,EAAEk6C,IAERl6C,GAAEu/C,SACLrF,EAAK/zC,EAAIoC,EAAGo3C,qBAAqBp3C,EAAGq3C,qBAAqB5/C,EAAEmG,GAAKytB,IAG7D5zB,EAAEw/C,SACLtF,EAAK9zC,EAAImC,EAAGs3C,qBAAqBt3C,EAAGu3C,qBAAqB9/C,EAAEoG,GAAKytB,MAM/Dz/B,KAAK8kD,SACR9kD,KAAK8kD,QAAS,EACd9kD,KAAK4P,aAKP,IAAkC,GAA9B5P,KAAK0hD,UAAUJ,YAAqB,CAEtC,GAA0B96C,SAAtBxG,KAAKwlC,KAAK3F,QAEZ,WADA7/B,MAAK2qD,iBAAiBrhD,EAGxB,IAAI8jB,GAAQyS,EAAQ9tB,EAAI/R,KAAKwlC,KAAK3F,QAAQ9tB,EACtCsb,EAAQwS,EAAQ7tB,EAAIhS,KAAKwlC,KAAK3F,QAAQ7tB,CAE1ChS,MAAKsjD,gBACHtjD,KAAKwlC,KAAK/nB,YAAY1L,EAAIqb,EAC1BptB,KAAKwlC,KAAK/nB,YAAYzL,EAAIqb,GAE5BrtB,KAAK6iD,UAITv5C,EAAMD,mBAORnG,EAAQiQ,UAAU+qB,WAAa,SAAU50B,GACvCtJ,KAAK2rD,eAAeriD,IAItBpG,EAAQiQ,UAAUw4C,eAAiB,SAASriD,GAC1CtJ,KAAKwlC,KAAKzG,UAAW,CACrB,IAAIoR,GAAYnwC,KAAKwlC,KAAK2K,SACtBA,IAAaA,EAAUxqC,QACzBwqC,EAAU9nC,QAAQ,SAAUuD,GAE1BA,EAAEk6C,KAAKqF,OAASv/C,EAAEu/C,OAClBv/C,EAAEk6C,KAAKsF,OAASx/C,EAAEw/C,SAEpBprD,KAAK8kD,QAAS,EACd9kD,KAAK4P,SAGL5P,KAAK6iD,UAEmB,GAAtB7iD,KAAK2jD,cACP3jD,KAAK4tB,KAAK,WAAWo9B,aAGrBhrD,KAAK4tB,KAAK,WAAWo9B,QAAQhrD,KAAK42B,eAAeomB,QAGnD1zC,EAAMD,kBAORnG,EAAQiQ,UAAUi2C,OAAS,SAAU9/C,GACnC,GAAIu2B,GAAU7/B,KAAKuqD,YAAYjhD,EAAM4iB,OACrClsB,MAAKikD,gBAAkBpkB,EACvB7/B,KAAK4rD,WAAW/rB,IASlB38B,EAAQiQ,UAAUk2C,aAAe,SAAU//C,GACzC,GAAIu2B,GAAU7/B,KAAKuqD,YAAYjhD,EAAM4iB,OACrClsB,MAAK6rD,iBAAiBhsB,IAQxB38B,EAAQiQ,UAAUgrB,QAAU,SAAU70B,GACpC,GAAIu2B,GAAU7/B,KAAKuqD,YAAYjhD,EAAM4iB,OACrClsB,MAAKikD,gBAAkBpkB,EACvB7/B,KAAK8rD,cAAcjsB,IAQrB38B,EAAQiQ,UAAU44C,WAAa,SAAUziD,GACvC,GAAIu2B,GAAU7/B,KAAKuqD,YAAYjhD,EAAM4iB,OACrClsB,MAAKgsD,iBAAiBnsB,IAQxB38B,EAAQiQ,UAAUmrB,SAAW,SAAUh1B,GACrC,GAAIu2B,GAAU7/B,KAAKuqD,YAAYjhD,EAAM4iB,OAErClsB,MAAKwlC,KAAKglB,SAAU,EACd,SAAWxqD,MAAKkpD,QACpBlpD,KAAKkpD,MAAMjsC,MAAQ,EAIrB,IAAIA,GAAQjd,KAAKkpD,MAAMjsC,MAAQ3T,EAAM2T,KACrCjd,MAAKisD,MAAMhvC,EAAO4iB,IAUpB38B,EAAQiQ,UAAU84C,MAAQ,SAAShvC,EAAO4iB,GACxC,GAA+B,GAA3B7/B,KAAK0hD,UAAU/jB,SAAkB,CACnC,GAAIuuB,GAAWlsD,KAAKyqD,WACR,MAARxtC,IACFA,EAAQ,MAENA,EAAQ,KACVA,EAAQ,GAGV,IAAIkvC,GAAsB,IACR3lD,UAAdxG,KAAKwlC,MACmB,GAAtBxlC,KAAKwlC,KAAKzG,WACZotB,EAAsBnsD,KAAKosD,YAAYpsD,KAAKwlC,KAAK3F,SAIrD,IAAIpiB,GAAczd,KAAK6qD,kBAEnBwB,EAAYpvC,EAAQivC,EACpBI,GAAM,EAAID,GAAaxsB,EAAQ9tB,EAAI0L,EAAY1L,EAAIs6C,EACnDE,GAAM,EAAIF,GAAaxsB,EAAQ7tB,EAAIyL,EAAYzL,EAAIq6C,CASvD,IAPArsD,KAAKkkD,YAAcnyC,EAAM/R,KAAKurD,qBAAqB1rB,EAAQ9tB,GACxCC,EAAMhS,KAAKyrD,qBAAqB5rB,EAAQ7tB,IAE3DhS,KAAKgd,UAAUC,GACfjd,KAAKsjD,gBAAgBgJ,EAAIC,GACzBvsD,KAAKwsD,wBAEsB,MAAvBL,EAA6B,CAC/B,GAAIM,GAAuBzsD,KAAK0sD,YAAYP,EAC5CnsD,MAAKwlC,KAAK3F,QAAQ9tB,EAAI06C,EAAqB16C,EAC3C/R,KAAKwlC,KAAK3F,QAAQ7tB,EAAIy6C,EAAqBz6C,EAY7C,MATAhS,MAAK6iD,UAEU5lC,EAAXivC,EACFlsD,KAAK4tB,KAAK,QAASsN,UAAU,MAG7Bl7B,KAAK4tB,KAAK,QAASsN,UAAU,MAGxBje,IAYX/Z,EAAQiQ,UAAUirB,cAAgB,SAAS90B,GAEzC,GAAImlB,GAAQ,CAYZ,IAXInlB,EAAMolB,WACRD,EAAQnlB,EAAMolB,WAAW,IAChBplB,EAAMqlB,SAGfF,GAASnlB,EAAMqlB,OAAO,GAMpBF,EAAO,CAET,GAAIxR,GAAQjd,KAAKyqD,YACbvqB,EAAOzR,EAAQ,EACP,GAARA,IACFyR,GAAe,EAAIA,GAErBjjB,GAAU,EAAIijB,CAGd,IAAIL,GAAU7/B,KAAKuqD,aAAax4C,EAAGzI,EAAMw2B,MAAO9tB,EAAG1I,EAAMy2B,OAGzD//B,MAAKisD,MAAMhvC,EAAO4iB,GAIpBv2B,EAAMD,kBASRnG,EAAQiQ,UAAUo2C,kBAAoB,SAAUjgD,GAC9C,GAAIu2B,GAAU7/B,KAAKuqD,aAAax4C,EAAGzI,EAAMw2B,MAAO9tB,EAAG1I,EAAMy2B,OAGrD//B,MAAK2sD,UACP3sD,KAAK4sD,gBAAgB/sB,EAKvB,IAAI1rB,GAAKnU,KACL6sD,EAAY,WACd14C,EAAG24C,gBAAgBjtB,GAarB,IAXI7/B,KAAK+sD,YACPt6B,cAAczyB,KAAK+sD,YAEhB/sD,KAAKwlC,KAAKzG,WACb/+B,KAAK+sD,WAAazzC,WAAWuzC,EAAW7sD,KAAK0hD,UAAUt7B,QAAQ5N,QAOrC,GAAxBxY,KAAK0hD,UAAUp1C,MAAe,CAEhC,IAAK,GAAI0gD,KAAUhtD,MAAK4hD,SAAS9D,MAC3B99C,KAAK4hD,SAAS9D,MAAMh4C,eAAeknD,KACrChtD,KAAK4hD,SAAS9D,MAAMkP,GAAQ1gD,OAAQ,QAC7BtM,MAAK4hD,SAAS9D,MAAMkP,GAK/B,IAAIjqC,GAAM/iB,KAAK4qD,WAAW/qB,EACf,OAAP9c,IACFA,EAAM/iB,KAAKitD,WAAWptB,IAEb,MAAP9c,GACF/iB,KAAKktD,aAAanqC,EAIpB,KAAK,GAAIojC,KAAUnmD,MAAK4hD,SAAS5E,MAC3Bh9C,KAAK4hD,SAAS5E,MAAMl3C,eAAeqgD,KACjCpjC,YAAexf,IAAQwf,EAAI1iB,IAAM8lD,GAAUpjC,YAAe3f,IAAe,MAAP2f,KACpE/iB,KAAKmtD,YAAYntD,KAAK4hD,SAAS5E,MAAMmJ,UAC9BnmD,MAAK4hD,SAAS5E,MAAMmJ,GAIjCnmD,MAAKyhB,WAYTve,EAAQiQ,UAAU25C,gBAAkB,SAAUjtB,GAC5C,GAOIx/B,GAPA0iB,GACFtb,KAAQzH,KAAKurD,qBAAqB1rB,EAAQ9tB,GAC1CpK,IAAQ3H,KAAKyrD,qBAAqB5rB,EAAQ7tB,GAC1CqV,MAAQrnB,KAAKurD,qBAAqB1rB,EAAQ9tB,GAC1CuR,OAAQtjB,KAAKyrD,qBAAqB5rB,EAAQ7tB,IAIxCo7C,EAAgBptD,KAAK2sD,SACrBU,GAAkB,CAEtB,IAAqB7mD,QAAjBxG,KAAK2sD,SAAuB,CAE9B,GAAI3P,GAAQh9C,KAAKg9C,MACbsQ,IACJ,KAAKjtD,IAAM28C,GACT,GAAIA,EAAMl3C,eAAezF,GAAK,CAC5B,GAAIylD,GAAO9I,EAAM38C,EACbylD,GAAKyH,kBAAkBxqC,IACDvc,SAApBs/C,EAAK0H,YACPF,EAAiBtlD,KAAK3H,GAM1BitD,EAAiB3nD,OAAS,IAG5B3F,KAAK2sD,SAAW3sD,KAAKg9C,MAAMsQ,EAAiBA,EAAiB3nD,OAAS,IAEtE0nD,GAAkB,GAItB,GAAsB7mD,SAAlBxG,KAAK2sD,UAA6C,GAAnBU,EAA0B,CAE3D,GAAIvP,GAAQ99C,KAAK89C,MACb2P,IACJ,KAAKptD,IAAMy9C,GACT,GAAIA,EAAMh4C,eAAezF,GAAK,CAC5B,GAAIqtD,GAAO5P,EAAMz9C,EACbqtD,GAAKC,WAAkCnnD,SAApBknD,EAAKF,YACxBE,EAAKH,kBAAkBxqC,IACzB0qC,EAAiBzlD,KAAK3H,GAKxBotD,EAAiB9nD,OAAS,IAC5B3F,KAAK2sD,SAAW3sD,KAAK89C,MAAM2P,EAAiBA,EAAiB9nD,OAAS,KAI1E,GAAI3F,KAAK2sD,UAEP,GAAI3sD,KAAK2sD,UAAYS,EAAe,CAClC,GAAIj5C,GAAKnU,IACJmU,GAAGy5C,QACNz5C,EAAGy5C,MAAQ,GAAIpqD,GAAM2Q,EAAGmL,MAAOnL,EAAGutC,UAAUt7B,UAM9CjS,EAAGy5C,MAAMC,YAAYhuB,EAAQ9tB,EAAI,EAAG8tB,EAAQ7tB,EAAI,GAChDmC,EAAGy5C,MAAME,QAAQ35C,EAAGw4C,SAASa,YAC7Br5C,EAAGy5C,MAAMjmB,YAIP3nC,MAAK4tD,OACP5tD,KAAK4tD,MAAMlmB,QAYjBxkC,EAAQiQ,UAAUy5C,gBAAkB,SAAU/sB,GACvC7/B,KAAK2sD,UAAa3sD,KAAK4qD,WAAW/qB,KACrC7/B,KAAK2sD,SAAWnmD,OACZxG,KAAK4tD,OACP5tD,KAAK4tD,MAAMlmB,SAajBxkC,EAAQiQ,UAAUwR,QAAU,SAASpS,EAAOC,GAC1C,GAAIu7C,IAAY,EACZC,EAAWhuD,KAAKsf,MAAMC,OAAOhN,MAC7B07C,EAAYjuD,KAAKsf,MAAMC,OAAO/M,MAC9BD,IAASvS,KAAK0hD,UAAUnvC,OAASC,GAAUxS,KAAK0hD,UAAUlvC,QAAUxS,KAAKsf,MAAMrS,MAAMsF,OAASA,GAASvS,KAAKsf,MAAMrS,MAAMuF,QAAUA,GACpIxS,KAAKsf,MAAMrS,MAAMsF,MAAQA,EACzBvS,KAAKsf,MAAMrS,MAAMuF,OAASA,EAE1BxS,KAAKsf,MAAMC,OAAOtS,MAAMsF,MAAQ,OAChCvS,KAAKsf,MAAMC,OAAOtS,MAAMuF,OAAS,OAEjCxS,KAAKsf,MAAMC,OAAOhN,MAAQvS,KAAKsf,MAAMC,OAAOC,YAAcxf,KAAK2hD,WAC/D3hD,KAAKsf,MAAMC,OAAO/M,OAASxS,KAAKsf,MAAMC,OAAOsF,aAAe7kB,KAAK2hD,WAEjE3hD,KAAK0hD,UAAUnvC,MAAQA,EACvBvS,KAAK0hD,UAAUlvC,OAASA,EAExBu7C,GAAY,IAMR/tD,KAAKsf,MAAMC,OAAOhN,OAASvS,KAAKsf,MAAMC,OAAOC,YAAcxf,KAAK2hD,aAClE3hD,KAAKsf,MAAMC,OAAOhN,MAAQvS,KAAKsf,MAAMC,OAAOC,YAAcxf,KAAK2hD,WAC/DoM,GAAY,GAEV/tD,KAAKsf,MAAMC,OAAO/M,QAAUxS,KAAKsf,MAAMC,OAAOsF,aAAe7kB,KAAK2hD,aACpE3hD,KAAKsf,MAAMC,OAAO/M,OAASxS,KAAKsf,MAAMC,OAAOsF,aAAe7kB,KAAK2hD,WACjEoM,GAAY,IAIC,GAAbA,GACF/tD,KAAK4tB,KAAK,UAAWrb,MAAMvS,KAAKsf,MAAMC,OAAOhN,MAAQvS,KAAK2hD,WAAWnvC,OAAOxS,KAAKsf,MAAMC,OAAO/M,OAASxS,KAAK2hD,WAAYqM,SAAUA,EAAWhuD,KAAK2hD,WAAYsM,UAAWA,EAAYjuD,KAAK2hD,cAS9Lz+C,EAAQiQ,UAAUs0C,UAAY,SAASzK,GACrC,GAAIkR,GAAeluD,KAAKokD,SAExB,IAAIpH,YAAiBn8C,IAAWm8C,YAAiBl8C,GAC/Cd,KAAKokD,UAAYpH,MAEd,IAAI/2C,MAAMC,QAAQ82C,GACrBh9C,KAAKokD,UAAY,GAAIvjD,GACrBb,KAAKokD,UAAUnxC,IAAI+pC,OAEhB,CAAA,GAAKA,EAIR,KAAM,IAAI32C,WAAU,4BAHpBrG,MAAKokD,UAAY,GAAIvjD,GAgBvB,GAVIqtD,GAEFvtD,EAAK0H,QAAQrI,KAAKskD,eAAgB,SAAUh8C,EAAUgB,GACpD4kD,EAAax6C,IAAIpK,EAAOhB,KAK5BtI,KAAKg9C,SAEDh9C,KAAKokD,UAAW,CAElB,GAAIjwC,GAAKnU,IACTW,GAAK0H,QAAQrI,KAAKskD,eAAgB,SAAUh8C,EAAUgB,GACpD6K,EAAGiwC,UAAU7wC,GAAGjK,EAAOhB,IAIzB,IAAI6M,GAAMnV,KAAKokD,UAAUvuC,QACzB7V,MAAKukD,UAAUpvC,GAEjBnV,KAAKmuD,oBAQPjrD,EAAQiQ,UAAUoxC,UAAY,SAASpvC,GAErC,IAAK,GADD9U,GACKmF,EAAI,EAAGC,EAAM0P,EAAIxP,OAAYF,EAAJD,EAASA,IAAK,CAC9CnF,EAAK8U,EAAI3P,EACT,IAAIkN,GAAO1S,KAAKokD,UAAUlvC,IAAI7U,GAC1BylD,EAAO,GAAIviD,GAAKmP,EAAM1S,KAAK2iD,OAAQ3iD,KAAKm0B,OAAQn0B,KAAK0hD,UAEzD,IADA1hD,KAAKg9C,MAAM38C,GAAMylD,IACG,GAAfA,EAAKqF,QAAkC,GAAfrF,EAAKsF,QAAgC,OAAXtF,EAAK/zC,GAAyB,OAAX+zC,EAAK9zC,GAAa,CAC1F,GAAIyZ,GAAS,EAAStW,EAAIxP,OAAS,GAC/ByoD,EAAQ,EAAIlpD,KAAKymB,GAAKzmB,KAAKE,QACZ,IAAf0gD,EAAKqF,SAAkBrF,EAAK/zC,EAAI0Z,EAASvmB,KAAKqZ,IAAI6vC,IACnC,GAAftI,EAAKsF,SAAkBtF,EAAK9zC,EAAIyZ,EAASvmB,KAAKkZ,IAAIgwC,IAExDpuD,KAAK8kD,QAAS,EAGhB9kD,KAAKgnD,uBAC4C,GAA7ChnD,KAAK0hD,UAAUjB,mBAAmB/xC,SAAwC,GAArB1O,KAAK08C,eAC5D18C,KAAK4nD,eACL5nD,KAAKglD,4BAEPhlD,KAAKquD,0BACLruD,KAAKsuD,kBACLtuD,KAAKuuD,kBAAkBvuD,KAAKg9C,OAC5Bh9C,KAAKwuD,gBAQPtrD,EAAQiQ,UAAUqxC,aAAe,SAASrvC,EAAIs5C,GAE5C,IAAK,GADDzR,GAAQh9C,KAAKg9C,MACRx3C,EAAI,EAAGC,EAAM0P,EAAIxP,OAAYF,EAAJD,EAASA,IAAK,CAC9C,GAAInF,GAAK8U,EAAI3P,GACTsgD,EAAO9I,EAAM38C,GACbqS,EAAO+7C,EAAYjpD,EACnBsgD,GAEFA,EAAK4I,cAAch8C,EAAM1S,KAAK0hD,YAI9BoE,EAAO,GAAIviD,GAAKorD,WAAY3uD,KAAK2iD,OAAQ3iD,KAAKm0B,OAAQn0B,KAAK0hD,WAC3D1E,EAAM38C,GAAMylD,GAGhB9lD,KAAK8kD,QAAS,EACmC,GAA7C9kD,KAAK0hD,UAAUjB,mBAAmB/xC,SAAwC,GAArB1O,KAAK08C,eAC5D18C,KAAK4nD,eACL5nD,KAAKglD,4BAEPhlD,KAAKgnD,uBACLhnD,KAAKuuD,kBAAkBvR,IAQzB95C,EAAQiQ,UAAUsxC,aAAe,SAAStvC,GAExC,IAAK,GADD6nC,GAAQh9C,KAAKg9C,MACRx3C,EAAI,EAAGC,EAAM0P,EAAIxP,OAAYF,EAAJD,EAASA,IAAK,CAC9C,GAAInF,GAAK8U,EAAI3P,SACNw3C,GAAM38C,GAEfL,KAAKgnD,uBAC4C,GAA7ChnD,KAAK0hD,UAAUjB,mBAAmB/xC,SAAwC,GAArB1O,KAAK08C,eAC5D18C,KAAK4nD,eACL5nD,KAAKglD,4BAEPhlD,KAAKquD,0BACLruD,KAAKsuD,kBACLtuD,KAAKmuD,mBACLnuD,KAAKuuD,kBAAkBvR,IASzB95C,EAAQiQ,UAAUu0C,UAAY,SAAS5J,GACrC,GAAI8Q,GAAe5uD,KAAKqkD,SAExB,IAAIvG,YAAiBj9C,IAAWi9C,YAAiBh9C,GAC/Cd,KAAKqkD,UAAYvG,MAEd,IAAI73C,MAAMC,QAAQ43C,GACrB99C,KAAKqkD,UAAY,GAAIxjD,GACrBb,KAAKqkD,UAAUpxC,IAAI6qC,OAEhB,CAAA,GAAKA,EAIR,KAAM,IAAIz3C,WAAU,4BAHpBrG,MAAKqkD,UAAY,GAAIxjD,GAgBvB,GAVI+tD,GAEFjuD,EAAK0H,QAAQrI,KAAK0kD,eAAgB,SAAUp8C,EAAUgB,GACpDslD,EAAal7C,IAAIpK,EAAOhB,KAK5BtI,KAAK89C,SAED99C,KAAKqkD,UAAW,CAElB,GAAIlwC,GAAKnU,IACTW,GAAK0H,QAAQrI,KAAK0kD,eAAgB,SAAUp8C,EAAUgB,GACpD6K,EAAGkwC,UAAU9wC,GAAGjK,EAAOhB,IAIzB,IAAI6M,GAAMnV,KAAKqkD,UAAUxuC,QACzB7V,MAAK2kD,UAAUxvC,GAGjBnV,KAAKsuD,mBAQPprD,EAAQiQ,UAAUwxC,UAAY,SAAUxvC,GAItC,IAAK,GAHD2oC,GAAQ99C,KAAK89C,MACbuG,EAAYrkD,KAAKqkD,UAEZ7+C,EAAI,EAAGC,EAAM0P,EAAIxP,OAAYF,EAAJD,EAASA,IAAK,CAC9C,GAAInF,GAAK8U,EAAI3P,GAETqpD,EAAU/Q,EAAMz9C,EAChBwuD,IACFA,EAAQC,YAGV,IAAIp8C,GAAO2xC,EAAUnvC,IAAI7U,GAAK0uD,iBAAoB,GAClDjR,GAAMz9C,GAAM,GAAI+C,GAAKsP,EAAM1S,KAAMA,KAAK0hD,WAExC1hD,KAAK8kD,QAAS,EACd9kD,KAAKuuD,kBAAkBzQ,GACvB99C,KAAKgvD,qBACLhvD,KAAKquD,0BAC4C,GAA7CruD,KAAK0hD,UAAUjB,mBAAmB/xC,SAAwC,GAArB1O,KAAK08C,eAC5D18C,KAAK4nD,eACL5nD,KAAKglD,6BAST9hD,EAAQiQ,UAAUyxC,aAAe,SAAUzvC,GAGzC,IAAK,GAFD2oC,GAAQ99C,KAAK89C,MACbuG,EAAYrkD,KAAKqkD,UACZ7+C,EAAI,EAAGC,EAAM0P,EAAIxP,OAAYF,EAAJD,EAASA,IAAK,CAC9C,GAAInF,GAAK8U,EAAI3P,GAETkN,EAAO2xC,EAAUnvC,IAAI7U,GACrBqtD,EAAO5P,EAAMz9C,EACbqtD,IAEFA,EAAKoB,aACLpB,EAAKgB,cAAch8C,EAAM1S,KAAK0hD,WAC9BgM,EAAK5Q,YAIL4Q,EAAO,GAAItqD,GAAKsP,EAAM1S,KAAMA,KAAK0hD,WACjC1hD,KAAK89C,MAAMz9C,GAAMqtD,GAIrB1tD,KAAKgvD,qBAC4C,GAA7ChvD,KAAK0hD,UAAUjB,mBAAmB/xC,SAAwC,GAArB1O,KAAK08C,eAC5D18C,KAAK4nD,eACL5nD,KAAKglD,4BAEPhlD,KAAK8kD,QAAS,EACd9kD,KAAKuuD,kBAAkBzQ,IAQzB56C,EAAQiQ,UAAU0xC,aAAe,SAAU1vC,GAEzC,IAAK,GADD2oC,GAAQ99C,KAAK89C,MACRt4C,EAAI,EAAGC,EAAM0P,EAAIxP,OAAYF,EAAJD,EAASA,IAAK,CAC9C,GAAInF,GAAK8U,EAAI3P,GACTkoD,EAAO5P,EAAMz9C,EACbqtD,KACc,MAAZA,EAAKuB,WACAjvD,MAAKkvD,QAAiB,QAAS,MAAExB,EAAKuB,IAAI5uD,IAEnDqtD,EAAKoB,mBACEhR,GAAMz9C,IAIjBL,KAAK8kD,QAAS,EACd9kD,KAAKuuD,kBAAkBzQ,GAC0B,GAA7C99C,KAAK0hD,UAAUjB,mBAAmB/xC,SAAwC,GAArB1O,KAAK08C,eAC5D18C,KAAK4nD,eACL5nD,KAAKglD,4BAEPhlD,KAAKquD,2BAOPnrD,EAAQiQ,UAAUm7C,gBAAkB,WAClC,GAAIjuD,GACA28C,EAAQh9C,KAAKg9C,MACbc,EAAQ99C,KAAK89C,KACjB,KAAKz9C,IAAM28C,GACLA,EAAMl3C,eAAezF,KACvB28C,EAAM38C,GAAIy9C,SACVd,EAAM38C,GAAI8uD,gBAId,KAAK9uD,IAAMy9C,GACT,GAAIA,EAAMh4C,eAAezF,GAAK,CAC5B,GAAIqtD,GAAO5P,EAAMz9C,EACjBqtD,GAAKtkC,KAAO,KACZskC,EAAKrkC,GAAK,KACVqkC,EAAK5Q,YAaX55C,EAAQiQ,UAAUo7C,kBAAoB,SAASxrC,GAC7C,GAAI1iB,GAGA6b,EAAW1V,OACX2V,EAAW3V,MACf,KAAKnG,IAAM0iB,GACT,GAAIA,EAAIjd,eAAezF,GAAK,CAC1B,GAAIgH,GAAQ0b,EAAI1iB,GAAIuU,UACNpO,UAAVa,IACF6U,EAAyB1V,SAAb0V,EAA0B7U,EAAQnC,KAAK4G,IAAIzE,EAAO6U,GAC9DC,EAAyB3V,SAAb2V,EAA0B9U,EAAQnC,KAAKwH,IAAIrF,EAAO8U,IAMpE,GAAiB3V,SAAb0V,GAAuC1V,SAAb2V,EAC5B,IAAK9b,IAAM0iB,GACLA,EAAIjd,eAAezF,IACrB0iB,EAAI1iB,GAAI+uD,cAAclzC,EAAUC,IAUxCjZ,EAAQiQ,UAAUsO,OAAS,WACzBzhB,KAAK2kB,QAAQ3kB,KAAK0hD,UAAUnvC,MAAOvS,KAAK0hD,UAAUlvC,QAClDxS,KAAK6iD,WAQP3/C,EAAQiQ,UAAU0vC,QAAU,SAAS3pB,GACnC,GAAInS,GAAM/mB,KAAKsf,MAAMC,OAAOyH,WAAW,KAEvCD,GAAIkiC,aAAajpD,KAAK2hD,WAAY,EAAG,EAAG3hD,KAAK2hD,WAAY,EAAG,EAG5D,IAAI0N,GAAIrvD,KAAKsf,MAAMC,OAAOhN,MAASvS,KAAK2hD,WACpCh2C,EAAI3L,KAAKsf,MAAMC,OAAO/M,OAAUxS,KAAK2hD,UACzC56B,GAAIE,UAAU,EAAG,EAAGooC,EAAG1jD,GAGvBob,EAAIuoC,OACJvoC,EAAIwoC,UAAUvvD,KAAKyd,YAAY1L,EAAG/R,KAAKyd,YAAYzL,GACnD+U,EAAI9J,MAAMjd,KAAKid,MAAOjd,KAAKid,OAE3Bjd,KAAK+jD,eACHhyC,EAAK/R,KAAKurD,qBAAqB,GAC/Bv5C,EAAKhS,KAAKyrD,qBAAqB,IAEjCzrD,KAAKgkD,mBACHjyC,EAAK/R,KAAKurD,qBAAqBvrD,KAAKsf,MAAMC,OAAOC,YAAcxf,KAAK2hD,YACpE3vC,EAAKhS,KAAKyrD,qBAAqBzrD,KAAKsf,MAAMC,OAAOsF,aAAe7kB,KAAK2hD,aAGvD,GAAVzoB,IACJl5B,KAAKwvD,gBAAgB,sBAAuBzoC,IAClB,GAAtB/mB,KAAKwlC,KAAKzG,UAA4Cv4B,SAAvBxG,KAAKwlC,KAAKzG,UAA4D,GAAlC/+B,KAAK0hD,UAAUF,kBACpFxhD,KAAKwvD,gBAAgB,aAAczoC,KAIb,GAAtB/mB,KAAKwlC,KAAKzG,UAA4Cv4B,SAAvBxG,KAAKwlC,KAAKzG,UAA4D,GAAlC/+B,KAAK0hD,UAAUD,kBACpFzhD,KAAKwvD,gBAAgB,aAAazoC,GAAI,GAGxB,GAAVmS,GAC2B,GAA3Bl5B,KAAK6hD,oBACP7hD,KAAKwvD,gBAAgB,oBAAqBzoC,GAQ9CA,EAAI0oC,UAEU,GAAVv2B,GACFnS,EAAIE,UAAU,EAAG,EAAGooC,EAAG1jD,IAU3BzI,EAAQiQ,UAAUmwC,gBAAkB,SAASoM,EAASC,GAC3BnpD,SAArBxG,KAAKyd,cACPzd,KAAKyd,aACH1L,EAAG,EACHC,EAAG,IAISxL,SAAZkpD,IACF1vD,KAAKyd,YAAY1L,EAAI29C,GAEPlpD,SAAZmpD,IACF3vD,KAAKyd,YAAYzL,EAAI29C,GAGvB3vD,KAAK4tB,KAAK,gBAQZ1qB,EAAQiQ,UAAU03C,gBAAkB,WAClC,OACE94C,EAAG/R,KAAKyd,YAAY1L,EACpBC,EAAGhS,KAAKyd,YAAYzL,IASxB9O,EAAQiQ,UAAU6J,UAAY,SAASC,GACrCjd,KAAKid,MAAQA,GAQf/Z,EAAQiQ,UAAUs3C,UAAY,WAC5B,MAAOzqD,MAAKid,OAUd/Z,EAAQiQ,UAAUo4C,qBAAuB,SAASx5C,GAChD,OAAQA,EAAI/R,KAAKyd,YAAY1L,GAAK/R,KAAKid,OAUzC/Z,EAAQiQ,UAAUq4C,qBAAuB,SAASz5C,GAChD,MAAOA,GAAI/R,KAAKid,MAAQjd,KAAKyd,YAAY1L,GAU3C7O,EAAQiQ,UAAUs4C,qBAAuB,SAASz5C,GAChD,OAAQA,EAAIhS,KAAKyd,YAAYzL,GAAKhS,KAAKid,OAUzC/Z,EAAQiQ,UAAUu4C,qBAAuB,SAAS15C,GAChD,MAAOA,GAAIhS,KAAKid,MAAQjd,KAAKyd,YAAYzL,GAU3C9O,EAAQiQ,UAAUu5C,YAAc,SAAUnnC,GACxC,OAAQxT,EAAG/R,KAAKwrD,qBAAqBjmC,EAAIxT,GAAIC,EAAGhS,KAAK0rD,qBAAqBnmC,EAAIvT,KAShF9O,EAAQiQ,UAAUi5C,YAAc,SAAU7mC,GACxC,OAAQxT,EAAG/R,KAAKurD,qBAAqBhmC,EAAIxT,GAAIC,EAAGhS,KAAKyrD,qBAAqBlmC,EAAIvT,KAUhF9O,EAAQiQ,UAAUy8C,WAAa,SAAS7oC,EAAI8oC,GACvBrpD,SAAfqpD,IACFA,GAAa,EAIf,IAAI7S,GAAQh9C,KAAKg9C,MACb3J,IAEJ,KAAK,GAAIhzC,KAAM28C,GACTA,EAAMl3C,eAAezF,KACvB28C,EAAM38C,GAAIyvD,eAAe9vD,KAAKid,MAAMjd,KAAK+jD,cAAc/jD,KAAKgkD,mBACxDhH,EAAM38C,GAAIyqD,aACZzX,EAASrrC,KAAK3H,IAGV28C,EAAM38C,GAAI0vD,UAAYF,IACxB7S,EAAM38C,GAAI0rC,KAAKhlB,GAOvB,KAAK,GAAInb,GAAI,EAAGokD,EAAO3c,EAAS1tC,OAAYqqD,EAAJpkD,EAAUA,KAC5CoxC,EAAM3J,EAASznC,IAAImkD,UAAYF,IACjC7S,EAAM3J,EAASznC,IAAImgC,KAAKhlB,IAW9B7jB,EAAQiQ,UAAU88C,WAAa,SAASlpC,GACtC,GAAI+2B,GAAQ99C,KAAK89C,KACjB,KAAK,GAAIz9C,KAAMy9C,GACb,GAAIA,EAAMh4C,eAAezF,GAAK,CAC5B,GAAIqtD,GAAO5P,EAAMz9C,EACjBqtD,GAAK1qB,SAAShjC,KAAKid,OACfywC,EAAKC,WACP7P,EAAMz9C,GAAI0rC,KAAKhlB,KAYvB7jB,EAAQiQ,UAAU+8C,kBAAoB,SAASnpC,GAC7C,GAAI+2B,GAAQ99C,KAAK89C,KACjB,KAAK,GAAIz9C,KAAMy9C,GACTA,EAAMh4C,eAAezF,IACvBy9C,EAAMz9C,GAAI6vD,kBAAkBnpC,IASlC7jB,EAAQiQ,UAAU00C,WAAa,WACgB,GAAzC7nD,KAAK0hD,UAAUb,wBACjB7gD,KAAKmwD,qBAKP,KADA,GAAIn5C,GAAQ,EACLhX,KAAK8kD,QAAU9tC,EAAQhX,KAAK0hD,UAAUN,yBAC3CphD,KAAKowD,eACLp5C,GAG0C,IAAxChX,KAAK0hD,UAAUL,uBACjBrhD,KAAKilD,WAAWz+C,QAAW,GAAO,GAGS,GAAzCxG,KAAK0hD,UAAUb,wBACjB7gD,KAAKqwD,uBAUTntD,EAAQiQ,UAAUg9C,oBAAsB,WACtC,GAAInT,GAAQh9C,KAAKg9C,KACjB,KAAK,GAAI38C,KAAM28C,GACTA,EAAMl3C,eAAezF,IACJ,MAAf28C,EAAM38C,GAAI0R,GAA4B,MAAfirC,EAAM38C,GAAI2R,IACnCgrC,EAAM38C,GAAIiwD,UAAUv+C,EAAIirC,EAAM38C,GAAI8qD,OAClCnO,EAAM38C,GAAIiwD,UAAUt+C,EAAIgrC,EAAM38C,GAAI+qD,OAClCpO,EAAM38C,GAAI8qD,QAAS,EACnBnO,EAAM38C,GAAI+qD,QAAS,IAW3BloD,EAAQiQ,UAAUk9C,oBAAsB,WACtC,GAAIrT,GAAQh9C,KAAKg9C,KACjB,KAAK,GAAI38C,KAAM28C,GACTA,EAAMl3C,eAAezF,IACM,MAAzB28C,EAAM38C,GAAIiwD,UAAUv+C,IACtBirC,EAAM38C,GAAI8qD,OAASnO,EAAM38C,GAAIiwD,UAAUv+C,EACvCirC,EAAM38C,GAAI+qD,OAASpO,EAAM38C,GAAIiwD,UAAUt+C,IAa/C9O,EAAQiQ,UAAUo9C,UAAY,SAASC,GACrC,GAAIxT,GAAQh9C,KAAKg9C,KACjB,KAAK,GAAI38C,KAAM28C,GACb,GAAIA,EAAMl3C,eAAezF,IAAO28C,EAAM38C,GAAIowD,SAASD,GACjD,OAAO,CAGX,QAAO,GAUTttD,EAAQiQ,UAAUu9C,mBAAqB,WACrC,GAEIvK,GAFA3zB,EAAWxyB,KAAKy8C,wBAChBO,EAAQh9C,KAAKg9C,MAEb2T,GAAe,CAEnB,IAAI3wD,KAAK0hD,UAAUT,YAAc,EAC/B,IAAKkF,IAAUnJ,GACTA,EAAMl3C,eAAeqgD,KACvBnJ,EAAMmJ,GAAQyK,oBAAoBp+B,EAAUxyB,KAAK0hD,UAAUT,aAC3D0P,GAAe,OAKnB,KAAKxK,IAAUnJ,GACTA,EAAMl3C,eAAeqgD,KACvBnJ,EAAMmJ,GAAQ0K,aAAar+B,GAC3Bm+B,GAAe,EAKrB,IAAoB,GAAhBA,EAAsB,CACxB,GAAIG,GAAgB9wD,KAAK0hD,UAAUR,YAAch8C,KAAKwH,IAAI1M,KAAKid,MAAM,IACrE,OAAI6zC,GAAgB,GAAI9wD,KAAK0hD,UAAUT,aAC9B,EAGAjhD,KAAKuwD,UAAUO,GAG1B,OAAO,GAIT5tD,EAAQiQ,UAAU49C,oBAAsB,WACtC,GAAI/T,GAAQh9C,KAAKg9C,KACjB,KAAK,GAAImJ,KAAUnJ,GACbA,EAAMl3C,eAAeqgD,IACvBnJ,EAAMmJ,GAAQ6K,kBAKpB9tD,EAAQiQ,UAAU89C,mBAAqB,WACrCjxD,KAAKkxD,sBAAsB,uBACgB,GAAvClxD,KAAK0hD,UAAUZ,aAAapyC,SAA0D,GAAvC1O,KAAK0hD,UAAUZ,aAAaC,SAC7E/gD,KAAKmxD,mBAAmB,wBAS5BjuD,EAAQiQ,UAAUi9C,aAAe,WAC/B,IAAKpwD,KAAKujD,kBACW,GAAfvjD,KAAK8kD,OAAgB,CACvB,GAAIsM,IAAmB,EACnBC,GAAsB,CAE1BrxD,MAAKkxD,sBAAsB,8BAC3B,IAAII,GAAatxD,KAAKkxD,sBAAsB,qBACD,IAAvClxD,KAAK0hD,UAAUZ,aAAapyC,SAA0D,GAAvC1O,KAAK0hD,UAAUZ,aAAaC,UAC7EsQ,EAAsBrxD,KAAKmxD,mBAAmB,sBAIhD,KAAK,GAAI3rD,GAAI,EAAGA,EAAI8rD,EAAW3rD,OAAQH,IAAM4rD,EAAmBE,EAAW,IAAMF,CAGjFpxD,MAAK8kD,OAASsM,GAAoBC,EAEf,GAAfrxD,KAAK8kD,OACP9kD,KAAKixD,qBAI4B,GAA7BjxD,KAAKyjD,uBACPzjD,KAAK4tB,KAAK,sBACV5tB,KAAKyjD,sBAAuB,GAIhCzjD,KAAKohD,4BAYXl+C,EAAQiQ,UAAUo+C,eAAiB,WAEjCvxD,KAAK+kD,MAAQv+C,OAGbxG,KAAKwxD,mBAEL,IAAIC,GAAYntD,KAAK64B,KACrBn9B,MAAKowD,cACL,IAAI7T,GAAcj4C,KAAK64B,MAAQs0B,GAG1BzxD,KAAKq8C,eAAiBr8C,KAAKs8C,WAAa,EAAIC,GAAsC,GAAvBv8C,KAAKw8C,iBAA2C,GAAfx8C,KAAK8kD,SACpG9kD,KAAKowD,eAGkB,GAAnBpwD,KAAKs8C,aACPt8C,KAAKw8C,gBAAiB,GAI1B,IAAIkV,GAAkBptD,KAAK64B,KAC3Bn9B,MAAK6iD,UACL7iD,KAAKs8C,WAAah4C,KAAK64B,MAAQu0B,EAG/B1xD,KAAK4P,SAGe,mBAAXrG,UACTA,OAAOooD,sBAAwBpoD,OAAOooD,uBAAyBpoD,OAAOqoD,0BACvCroD,OAAOsoD,6BAA+BtoD,OAAOuoD,yBAM9E5uD,EAAQiQ,UAAUvD,MAAQ,WACxB,GAAmB,GAAf5P,KAAK8kD,QAAqC,GAAnB9kD,KAAK8iD,YAAsC,GAAnB9iD,KAAK+iD,YAAyC,GAAtB/iD,KAAKgjD,cACzEhjD,KAAK+kD,QAEN/kD,KAAK+kD,MADqB,GAAxB/kD,KAAKwlD,gBACMj8C,OAAO+P,WAAWtZ,KAAKuxD,eAAez8B,KAAK90B,MAAOA,KAAKq8C,gBAGvD9yC,OAAOooD,sBAAsB3xD,KAAKuxD,eAAez8B,KAAK90B,YAOvE,IAFAA,KAAK6iD,UAED7iD,KAAKohD,wBAA0B,EAAG,CAKpC,GAAIjtC,GAAKnU,KACL8T,GACFi+C,WAAY59C,EAAGitC,wBAEjBphD,MAAKohD,wBAA0B,EAC/BphD,KAAKyjD,sBAAuB,EAC5BnqC,WAAW,WACTnF,EAAGyZ,KAAK,aAAc9Z,IACrB,OAGH9T,MAAKohD,wBAA0B,GAWrCl+C,EAAQiQ,UAAUq+C,kBAAoB,WACpC,GAAuB,GAAnBxxD,KAAK8iD,YAAsC,GAAnB9iD,KAAK+iD,WAAiB,CAChD,GAAItlC,GAAczd,KAAK6qD,iBACvB7qD,MAAKsjD,gBAAgB7lC,EAAY1L,EAAE/R,KAAK8iD,WAAYrlC,EAAYzL,EAAEhS,KAAK+iD,YAEzE,GAA0B,GAAtB/iD,KAAKgjD,cAAoB,CAC3B,GAAI92B,IACFna,EAAG/R,KAAKsf,MAAMC,OAAOC,YAAc,EACnCxN,EAAGhS,KAAKsf,MAAMC,OAAOsF,aAAe,EAEtC7kB,MAAKisD,MAAMjsD,KAAKid,OAAO,EAAIjd,KAAKgjD,eAAgB92B,KAQpDhpB,EAAQiQ,UAAU6+C,aAAe,WACF,GAAzBhyD,KAAKujD,iBACPvjD,KAAKujD,kBAAmB,GAGxBvjD,KAAKujD,kBAAmB,EACxBvjD,KAAK4P,UAWT1M,EAAQiQ,UAAUu1C,uBAAyB,SAASlC,GAIlD,GAHqBhgD,SAAjBggD,IACFA,GAAe,GAE0B,GAAvCxmD,KAAK0hD,UAAUZ,aAAapyC,SAA0D,GAAvC1O,KAAK0hD,UAAUZ,aAAaC,QAAiB,CAC9F/gD,KAAKgvD,oBAEL,KAAK,GAAI7I,KAAUnmD,MAAKkvD,QAAiB,QAAS,MAC5ClvD,KAAKkvD,QAAiB,QAAS,MAAEppD,eAAeqgD,IACwB3/C,SAAtExG,KAAK89C,MAAM99C,KAAKkvD,QAAiB,QAAS,MAAE/I,GAAQ8L,qBAC/CjyD,MAAKkvD,QAAiB,QAAS,MAAE/I,OAK3C,CAEHnmD,KAAKkvD,QAAiB,QAAS,QAC/B,KAAK,GAAIlC,KAAUhtD,MAAK89C,MAClB99C,KAAK89C,MAAMh4C,eAAeknD,KAC5BhtD,KAAK89C,MAAMkP,GAAQiC,IAAM,MAM/BjvD,KAAKquD,0BACA7H,IACHxmD,KAAK8kD,QAAS,EACd9kD,KAAK4P,UAWT1M,EAAQiQ,UAAU67C,mBAAqB,WACrC,GAA2C,GAAvChvD,KAAK0hD,UAAUZ,aAAapyC,SAA0D,GAAvC1O,KAAK0hD,UAAUZ,aAAaC,QAC7E,IAAK,GAAIiM,KAAUhtD,MAAK89C,MACtB,GAAI99C,KAAK89C,MAAMh4C,eAAeknD,GAAS,CACrC,GAAIU,GAAO1tD,KAAK89C,MAAMkP,EACtB,IAAgB,MAAZU,EAAKuB,IAAa,CACpB,GAAI9I,GAAS,UAAUnyC,OAAO05C,EAAKrtD,GACnCL,MAAKkvD,QAAiB,QAAS,MAAE/I,GAAU,GAAI5iD,IACtClD,GAAG8lD,EACFlJ,KAAK,EACLG,MAAM,SACNC,MAAM,GACN6U,mBAAmB,SACblyD,KAAK0hD,WACrBgM,EAAKuB,IAAMjvD,KAAKkvD,QAAiB,QAAS,MAAE/I,GAC5CuH,EAAKuB,IAAIgD,aAAevE,EAAKrtD,GAC7BqtD,EAAKyE,wBAYfjvD,EAAQiQ,UAAUgpC,wBAA0B,WAC1C,IAAK,GAAIiW,KAAS/M,GACZA,EAAYv/C,eAAessD,KAC7BlvD,EAAQiQ,UAAUi/C,GAAS/M,EAAY+M,KAQ7ClvD,EAAQiQ,UAAUk/C,cAAgB,WAChC15B,QAAQhF,IAAI,mEACZ3zB,KAAKsyD,kBAMPpvD,EAAQiQ,UAAUm/C,eAAiB,WACjC,GAAIC,KACJ,KAAK,GAAIpM,KAAUnmD,MAAKg9C,MACtB,GAAIh9C,KAAKg9C,MAAMl3C,eAAeqgD,GAAS,CACrC,GAAIL,GAAO9lD,KAAKg9C,MAAMmJ,GAClBqM,GAAkBxyD,KAAKg9C,MAAMmO,OAC7BsH,GAAkBzyD,KAAKg9C,MAAMoO,QAC7BprD,KAAKokD,UAAUxxC,MAAMuzC,GAAQp0C,GAAK7M,KAAKwoB,MAAMo4B,EAAK/zC,IAAM/R,KAAKokD,UAAUxxC,MAAMuzC,GAAQn0C,GAAK9M,KAAKwoB,MAAMo4B,EAAK9zC,KAC5GugD,EAAUvqD,MAAM3H,GAAG8lD,EAAOp0C,EAAE7M,KAAKwoB,MAAMo4B,EAAK/zC,GAAGC,EAAE9M,KAAKwoB,MAAMo4B,EAAK9zC,GAAGwgD,eAAeA,EAAeC,eAAeA,IAIvHzyD,KAAKokD,UAAUvvC,OAAO09C,IAMxBrvD,EAAQiQ,UAAUu/C,aAAe,SAASv9C,GACxC,GAAIo9C,KACJ,IAAY/rD,SAAR2O,GACF,GAA0B,GAAtBlP,MAAMC,QAAQiP,IAChB,IAAK,GAAI3P,GAAI,EAAGA,EAAI2P,EAAIxP,OAAQH,IAC9B,GAA2BgB,SAAvBxG,KAAKg9C,MAAM7nC,EAAI3P,IAAmB,CACpC,GAAIsgD,GAAO9lD,KAAKg9C,MAAM7nC,EAAI3P,GAC1B+sD,GAAUp9C,EAAI3P,KAAOuM,EAAG7M,KAAKwoB,MAAMo4B,EAAK/zC,GAAIC,EAAG9M,KAAKwoB,MAAMo4B,EAAK9zC,SAKnE,IAAwBxL,SAApBxG,KAAKg9C,MAAM7nC,GAAoB,CACjC,GAAI2wC,GAAO9lD,KAAKg9C,MAAM7nC,EACtBo9C,GAAUp9C,IAAQpD,EAAG7M,KAAKwoB,MAAMo4B,EAAK/zC,GAAIC,EAAG9M,KAAKwoB,MAAMo4B,EAAK9zC,SAKhE,KAAK,GAAIm0C,KAAUnmD,MAAKg9C,MACtB,GAAIh9C,KAAKg9C,MAAMl3C,eAAeqgD,GAAS,CACrC,GAAIL,GAAO9lD,KAAKg9C,MAAMmJ,EACtBoM,GAAUpM,IAAWp0C,EAAG7M,KAAKwoB,MAAMo4B,EAAK/zC,GAAIC,EAAG9M,KAAKwoB,MAAMo4B,EAAK9zC,IAIrE,MAAOugD,IAWTrvD,EAAQiQ,UAAUw/C,YAAc,SAAUxM,EAAQ13C,GAChD,GAAIzO,KAAKg9C,MAAMl3C,eAAeqgD,GAAS,CACrB3/C,SAAZiI,IACFA,KAEF,IAAImkD,IAAgB7gD,EAAG/R,KAAKg9C,MAAMmJ,GAAQp0C,EAAGC,EAAGhS,KAAKg9C,MAAMmJ,GAAQn0C,EACnEvD,GAAQmV,SAAWgvC,EACnBnkD,EAAQokD,aAAe1M,EAEvBnmD,KAAK6nB,OAAOpZ,OAGZkqB,SAAQhF,IAAI,iCAWhBzwB,EAAQiQ,UAAU0U,OAAS,SAAUpZ,GACnC,MAAgBjI,UAAZiI,OACFA,OAGwBjI,SAAtBiI,EAAQkb,SAAoClb,EAAQkb,QAAa5X,EAAG,EAAGC,EAAG,IACpDxL,SAAtBiI,EAAQkb,OAAO5X,IAA6BtD,EAAQkb,OAAO5X,EAAK,GAC1CvL,SAAtBiI,EAAQkb,OAAO3X,IAA6BvD,EAAQkb,OAAO3X,EAAK,GAC1CxL,SAAtBiI,EAAQwO,QAAoCxO,EAAQwO,MAAYjd,KAAKyqD,aAC/CjkD,SAAtBiI,EAAQmV,WAAoCnV,EAAQmV,SAAY5jB,KAAK6qD,mBAC/CrkD,SAAtBiI,EAAQs4C,YAAoCt4C,EAAQs4C,WAAaj3C,SAAS,IAC1ErB,EAAQs4C,aAAc,IAAsBt4C,EAAQs4C,WAAaj3C,SAAS,IAC1ErB,EAAQs4C,aAAc,IAAsBt4C,EAAQs4C,cACrBvgD,SAA/BiI,EAAQs4C,UAAUj3C,WAA0BrB,EAAQs4C,UAAUj3C,SAAW,KACpCtJ,SAArCiI,EAAQs4C,UAAU+L,iBAAgCrkD,EAAQs4C,UAAU+L,eAAiB,qBAEzF9yD,MAAK+yD,YAAYtkD,KAcnBvL,EAAQiQ,UAAU4/C,YAAc,SAAUtkD,GACxC,GAAgBjI,SAAZiI,EAEF,YADAA,KAKFzO,MAAKsrD,cACiB,GAAlB78C,EAAQukD,SACVhzD,KAAKwiD,eAAiB/zC,EAAQokD,aAC9B7yD,KAAKyiD,mBAAqBh0C,EAAQkb,QAIb,GAAnB3pB,KAAKmiD,YACPniD,KAAKizD,kBAAkB,GAGzBjzD,KAAKoiD,YAAcpiD,KAAKyqD,YACxBzqD,KAAKsiD,kBAAoBtiD,KAAK6qD,kBAC9B7qD,KAAKqiD,YAAc5zC,EAAQwO,MAI3Bjd,KAAKgd,UAAUhd,KAAKqiD,YACpB,IAAI6Q,GAAalzD,KAAKosD,aAAar6C,EAAG,GAAM/R,KAAKsf,MAAMC,OAAOC,YAAaxN,EAAG,GAAMhS,KAAKsf,MAAMC,OAAOsF,eAClGsuC,GACFphD,EAAGmhD,EAAWnhD,EAAItD,EAAQmV,SAAS7R,EACnCC,EAAGkhD,EAAWlhD,EAAIvD,EAAQmV,SAAS5R,EAErChS,MAAKuiD,mBACHxwC,EAAG/R,KAAKsiD,kBAAkBvwC,EAAIohD,EAAmBphD,EAAI/R,KAAKqiD,YAAc5zC,EAAQkb,OAAO5X,EACvFC,EAAGhS,KAAKsiD,kBAAkBtwC,EAAImhD,EAAmBnhD,EAAIhS,KAAKqiD,YAAc5zC,EAAQkb,OAAO3X,GAIvD,GAA9BvD,EAAQs4C,UAAUj3C,SACO,MAAvB9P,KAAKwiD,gBACPxiD,KAAKozD,eAAiBpzD,KAAK6iD,QAC3B7iD,KAAK6iD,QAAU7iD,KAAKqzD,gBAGpBrzD,KAAKgd,UAAUhd,KAAKqiD,aACpBriD,KAAKsjD,gBAAgBtjD,KAAKuiD,kBAAkBxwC,EAAG/R,KAAKuiD,kBAAkBvwC,GACtEhS,KAAK6iD,YAIP7iD,KAAKiiD,eAAiB,GAAKjiD,KAAKo8C,kBAAoB3tC,EAAQs4C,UAAUj3C,SAAW,OAAU,EAAI9P,KAAKo8C,kBACpGp8C,KAAKkiD,wBAA0BzzC,EAAQs4C,UAAU+L,eACjD9yD,KAAKozD,eAAiBpzD,KAAK6iD,QAC3B7iD,KAAK6iD,QAAU7iD,KAAKizD,kBACpBjzD,KAAK6iD,UACL7iD,KAAK8kD,QAAS,EACd9kD,KAAK4P,UAQT1M,EAAQiQ,UAAUkgD,cAAgB,WAChC,GAAIT,IAAgB7gD,EAAG/R,KAAKg9C,MAAMh9C,KAAKwiD,gBAAgBzwC,EAAGC,EAAGhS,KAAKg9C,MAAMh9C,KAAKwiD,gBAAgBxwC,GACzFkhD,EAAalzD,KAAKosD,aAAar6C,EAAG,GAAM/R,KAAKsf,MAAMC,OAAOC,YAAaxN,EAAG,GAAMhS,KAAKsf,MAAMC,OAAOsF,eAClGsuC,GACFphD,EAAGmhD,EAAWnhD,EAAI6gD,EAAa7gD,EAC/BC,EAAGkhD,EAAWlhD,EAAI4gD,EAAa5gD,GAE7BswC,EAAoBtiD,KAAK6qD,kBACzBtI,GACFxwC,EAAGuwC,EAAkBvwC,EAAIohD,EAAmBphD,EAAI/R,KAAKid,MAAQjd,KAAKyiD,mBAAmB1wC,EACrFC,EAAGswC,EAAkBtwC,EAAImhD,EAAmBnhD,EAAIhS,KAAKid,MAAQjd,KAAKyiD,mBAAmBzwC,EAGvFhS,MAAKsjD,gBAAgBf,EAAkBxwC,EAAEwwC,EAAkBvwC,GAC3DhS,KAAKozD,kBAGPlwD,EAAQiQ,UAAUm4C,YAAc,WACH,MAAvBtrD,KAAKwiD,iBACPxiD,KAAK6iD,QAAU7iD,KAAKozD,eACpBpzD,KAAKwiD,eAAiB,KACtBxiD,KAAKyiD,mBAAqB,OAS9Bv/C,EAAQiQ,UAAU8/C,kBAAoB,SAAU9Q,GAC9CniD,KAAKmiD,WAAaA,GAAcniD,KAAKmiD,WAAaniD,KAAKiiD,eACvDjiD,KAAKmiD,YAAcniD,KAAKiiD,cAExB,IAAIxwB,GAAW9wB,EAAKqP,gBAAgBhQ,KAAKkiD,yBAAyBliD,KAAKmiD,WAEvEniD,MAAKgd,UAAUhd,KAAKoiD,aAAepiD,KAAKqiD,YAAcriD,KAAKoiD,aAAe3wB,GAC1EzxB,KAAKsjD,gBACHtjD,KAAKsiD,kBAAkBvwC,GAAK/R,KAAKuiD,kBAAkBxwC,EAAI/R,KAAKsiD,kBAAkBvwC,GAAK0f,EACnFzxB,KAAKsiD,kBAAkBtwC,GAAKhS,KAAKuiD,kBAAkBvwC,EAAIhS,KAAKsiD,kBAAkBtwC,GAAKyf,GAGrFzxB,KAAKozD,iBACLpzD,KAAK8kD,QAAS,EAGV9kD,KAAKmiD,YAAc,IACrBniD,KAAKmiD,WAAa,EAEhBniD,KAAK6iD,QADoB,MAAvB7iD,KAAKwiD,eACQxiD,KAAKqzD,cAGLrzD,KAAKozD,eAEtBpzD,KAAK4tB,KAAK,uBAId1qB,EAAQiQ,UAAUigD,eAAiB,aAQnClwD,EAAQiQ,UAAUs2C,SAAW,WAC3B,OAAQzpD,KAAKsoD,WAAatoD,KAAKsoD,UAAUgL,QAQ3CpwD,EAAQiQ,UAAU6vB,SAAW,WAC3B,MAAOhjC,MAAKgd,aAQd9Z,EAAQiQ,UAAUogD,SAAW,WAC3B,MAAOvzD,MAAKyqD,aAQdvnD,EAAQiQ,UAAUqgD,qBAAuB,WACvC,MAAOxzD,MAAKosD,aAAar6C,EAAG,GAAM/R,KAAKsf,MAAMC,OAAOC,YAAaxN,EAAG,GAAMhS,KAAKsf,MAAMC,OAAOsF,gBAI9F3hB,EAAQiQ,UAAUsgD,eAAiB,SAAStN,GAC1C,MAA2B3/C,UAAvBxG,KAAKg9C,MAAMmJ,GACNnmD,KAAKg9C,MAAMmJ,GAAQC,YAD5B,QAKFvmD,EAAOD,QAAUsD,GAKb,SAASrD,EAAQD,EAASM,GAoB9B,QAASkD,GAAMurD,EAAYxrD,EAASuwD,GAClC,IAAKvwD,EACH,KAAM,qBAER,IAAI+K,IAAU,QAAQ,WAClBwzC,EAAY/gD,EAAKsN,sBAAsBC,EAAOwlD,EAClD1zD,MAAKyO,QAAUizC,EAAU5D,MACzB99C,KAAKw+C,QAAUkD,EAAUlD,QACzBx+C,KAAKyO,QAAsB,aAAIilD,EAA+B,aAG9D1zD,KAAKmD,QAAUA,EAGfnD,KAAKK,GAASmG,OACdxG,KAAK2zD,OAASntD,OACdxG,KAAK4zD,KAASptD,OACdxG,KAAKglC,MAASx+B,OACdxG,KAAK6zD,cAAgB7zD,KAAKyO,QAAQ8D,MAAQvS,KAAKyO,QAAQsvC,yBACvD/9C,KAAKqH,MAASb,OACdxG,KAAKqzC,UAAW,EAChBrzC,KAAKsM,OAAQ,EACbtM,KAAK8zD,iBAAmBnsD,IAAI,EAAEF,KAAK,EAAE8K,MAAM,EAAEC,OAAO,EAAEuhD,MAAM,GAC5D/zD,KAAKg0D,YAAa,EAElBh0D,KAAKopB,KAAO,KACZppB,KAAKqpB,GAAK,KACVrpB,KAAKivD,IAAM,KAEXjvD,KAAKi0D,WAAa,KAClBj0D,KAAKk0D,SAAW,KAIhBl0D,KAAKm0D,kBACLn0D,KAAKo0D,gBAELp0D,KAAK2tD,WAAY,EAEjB3tD,KAAKq0D,YAAc,EACnBr0D,KAAKs0D,aAAc,EAEnBt0D,KAAK0uD,cAAcC,GAEnB3uD,KAAKu0D,qBAAsB,EAC3Bv0D,KAAKw0D,cAAgBprC,KAAK,KAAMC,GAAG,KAAMorC,cACzCz0D,KAAK00D,cAAgB,KAhEvB,GAAI/zD,GAAOT,EAAoB,GAC3BqD,EAAOrD,EAAoB,GAuE/BkD,GAAK+P,UAAUu7C,cAAgB,SAASC,GACtC,GAAKA,EAAL,CAIA,GAAIzgD,IAAU,QAAQ,WAAW,WAAW,YAAY,WAAW,kBAAkB,kBAAkB,QACrG,2BAA2B,aAAa,mBAAmB,OAAO,eAAe,iBAoCnF,QAlCAvN,EAAKwF,oBAAoB+H,EAAQlO,KAAKyO,QAASkgD,GAEvBnoD,SAApBmoD,EAAWvlC,OAA+BppB,KAAK2zD,OAAShF,EAAWvlC,MACjD5iB,SAAlBmoD,EAAWtlC,KAA+BrpB,KAAK4zD,KAAOjF,EAAWtlC,IAE/C7iB,SAAlBmoD,EAAWtuD,KAA+BL,KAAKK,GAAKsuD,EAAWtuD,IAC1CmG,SAArBmoD,EAAWlmC,QAA+BzoB,KAAKyoB,MAAQkmC,EAAWlmC,MAAOzoB,KAAKg0D,YAAa,GAEtExtD,SAArBmoD,EAAW3pB,QAA6BhlC,KAAKglC,MAAQ2pB,EAAW3pB,OAC3Cx+B,SAArBmoD,EAAWtnD,QAA6BrH,KAAKqH,MAAQsnD,EAAWtnD,OAC1Cb,SAAtBmoD,EAAWhpD,SAA6B3F,KAAKw+C,QAAQK,aAAe8P,EAAWhpD,QAE1Da,SAArBmoD,EAAWxjD,QACbnL,KAAKyO,QAAQ6vC,cAAe,EACxB39C,EAAKwD,SAASwqD,EAAWxjD,QAC3BnL,KAAKyO,QAAQtD,MAAMA,MAAQwjD,EAAWxjD,MACtCnL,KAAKyO,QAAQtD,MAAMkB,UAAYsiD,EAAWxjD,QAGX3E,SAA3BmoD,EAAWxjD,MAAMA,QAA0BnL,KAAKyO,QAAQtD,MAAMA,MAAQwjD,EAAWxjD,MAAMA,OACxD3E,SAA/BmoD,EAAWxjD,MAAMkB,YAA0BrM,KAAKyO,QAAQtD,MAAMkB,UAAYsiD,EAAWxjD,MAAMkB,WAChE7F,SAA3BmoD,EAAWxjD,MAAMmB,QAA0BtM,KAAKyO,QAAQtD,MAAMmB,MAAQqiD,EAAWxjD,MAAMmB,SAK/FtM,KAAK88C,UAEL98C,KAAKq0D,WAAar0D,KAAKq0D,YAAoC7tD,SAArBmoD,EAAWp8C,MACjDvS,KAAKs0D,YAAct0D,KAAKs0D,aAAsC9tD,SAAtBmoD,EAAWhpD,OAEnD3F,KAAK6zD,cAAgB7zD,KAAKyO,QAAQ8D,MAAOvS,KAAKyO,QAAQsvC,yBAG9C/9C,KAAKyO,QAAQxB,OACnB,IAAK,OAAiBjN,KAAK+rC,KAAO/rC,KAAK20D,SAAW,MAClD,KAAK,QAAiB30D,KAAK+rC,KAAO/rC,KAAK40D,UAAY,MACnD,KAAK,eAAiB50D,KAAK+rC,KAAO/rC,KAAK60D,gBAAkB,MACzD,KAAK,YAAiB70D,KAAK+rC,KAAO/rC,KAAK80D,aAAe,MACtD,SAAsB90D,KAAK+rC,KAAO/rC,KAAK20D,aAQ3CvxD,EAAK+P,UAAU2pC,QAAU,WACvB98C,KAAK8uD,aAEL9uD,KAAKopB,KAAOppB,KAAKmD,QAAQ65C,MAAMh9C,KAAK2zD,SAAW,KAC/C3zD,KAAKqpB,GAAKrpB,KAAKmD,QAAQ65C,MAAMh9C,KAAK4zD,OAAS,KAC3C5zD,KAAK2tD,UAAa3tD,KAAKopB,MAAQppB,KAAKqpB,GAEhCrpB,KAAK2tD,WACP3tD,KAAKopB,KAAK2rC,WAAW/0D,MACrBA,KAAKqpB,GAAG0rC,WAAW/0D,QAGfA,KAAKopB,MACPppB,KAAKopB,KAAK4rC,WAAWh1D,MAEnBA,KAAKqpB,IACPrpB,KAAKqpB,GAAG2rC,WAAWh1D,QAQzBoD,EAAK+P,UAAU27C,WAAa,WACtB9uD,KAAKopB,OACPppB,KAAKopB,KAAK4rC,WAAWh1D,MACrBA,KAAKopB,KAAO,MAEVppB,KAAKqpB,KACPrpB,KAAKqpB,GAAG2rC,WAAWh1D,MACnBA,KAAKqpB,GAAK,MAGZrpB,KAAK2tD,WAAY,GAQnBvqD,EAAK+P,UAAUq6C,SAAW,WACxB,MAA6B,kBAAfxtD,MAAKglC,MAAuBhlC,KAAKglC,QAAUhlC,KAAKglC;EAQhE5hC,EAAK+P,UAAUyB,SAAW,WACxB,MAAO5U,MAAKqH,OASdjE,EAAK+P,UAAUi8C,cAAgB,SAAStjD,EAAKY,GAC3C,IAAK1M,KAAKq0D,YAA6B7tD,SAAfxG,KAAKqH,MAAqB,CAChD,GAAI4V,IAASjd,KAAKyO,QAAQ0Y,SAAWnnB,KAAKyO,QAAQyY,WAAaxa,EAAMZ,EACrE9L,MAAKyO,QAAQ8D,OAAQvS,KAAKqH,MAAQyE,GAAOmR,EAAQjd,KAAKyO,QAAQyY,SAC9DlnB,KAAK6zD,cAAgB7zD,KAAKyO,QAAQ8D,MAAOvS,KAAKyO,QAAQsvC,2BAU1D36C,EAAK+P,UAAU44B,KAAO,WACpB,KAAM,uCAQR3oC,EAAK+P,UAAUo6C,kBAAoB,SAASxqC,GAC1C,GAAI/iB,KAAK2tD,UAAW,CAClB,GAAIv+B,GAAU,GACV6lC,EAAQj1D,KAAKopB,KAAKrX,EAClBmjD,EAAQl1D,KAAKopB,KAAKpX,EAClBmjD,EAAMn1D,KAAKqpB,GAAGtX,EACdqjD,EAAMp1D,KAAKqpB,GAAGrX,EACdqjD,EAAOtyC,EAAItb,KACX6tD,EAAOvyC,EAAIpb,IAEXujB,EAAOlrB,KAAKu1D,mBAAmBN,EAAOC,EAAOC,EAAKC,EAAKC,EAAMC,EAEjE,OAAelmC,GAAPlE,EAGR,OAAO,GAIX9nB,EAAK+P,UAAUqiD,UAAY,WACzB,GAAIC,GAAWz1D,KAAKyO,QAAQtD,KAgB5B,OAfiC,MAA7BnL,KAAKyO,QAAQ6vC,aACfmX,GACEppD,UAAWrM,KAAKqpB,GAAG5a,QAAQtD,MAAMkB,UAAUD,OAC3CE,MAAOtM,KAAKqpB,GAAG5a,QAAQtD,MAAMmB,MAAMF,OACnCjB,MAAOnL,KAAKqpB,GAAG5a,QAAQtD,MAAMiB,SAGK,QAA7BpM,KAAKyO,QAAQ6vC,cAAuD,GAA7Bt+C,KAAKyO,QAAQ6vC,gBAC3DmX,GACEppD,UAAWrM,KAAKopB,KAAK3a,QAAQtD,MAAMkB,UAAUD,OAC7CE,MAAOtM,KAAKopB,KAAK3a,QAAQtD,MAAMmB,MAAMF,OACrCjB,MAAOnL,KAAKopB,KAAK3a,QAAQtD,MAAMiB,SAId,GAAjBpM,KAAKqzC,SAA4BoiB,EAASppD,UACvB,GAAdrM,KAAKsM,MAAuBmpD,EAASnpD,MACTmpD,EAAStqD,OAWhD/H,EAAK+P,UAAUwhD,UAAY,SAAS5tC,GAKlC,GAHAA,EAAIY,YAAc3nB,KAAKw1D,YACvBzuC,EAAIO,UAActnB,KAAK01D,gBAEnB11D,KAAKopB,MAAQppB,KAAKqpB,GAAI,CAExB,GAGInX,GAHA+8C,EAAMjvD,KAAK21D,MAAM5uC,EAIrB,IAAI/mB,KAAKyoB,MAAO,CACd,GAAyC,GAArCzoB,KAAKyO,QAAQqyC,aAAapyC,SAA0B,MAAPugD,EAAa,CAC5D,GAAI2G,GAAY,IAAK,IAAK51D,KAAKopB,KAAKrX,EAAIk9C,EAAIl9C,GAAK,IAAK/R,KAAKqpB,GAAGtX,EAAIk9C,EAAIl9C,IAClE8jD,EAAY,IAAK,IAAK71D,KAAKopB,KAAKpX,EAAIi9C,EAAIj9C,GAAK,IAAKhS,KAAKqpB,GAAGrX,EAAIi9C,EAAIj9C,GACtEE,IAASH,EAAE6jD,EAAW5jD,EAAE6jD,OAGxB3jD,GAAQlS,KAAK81D,aAAa,GAE5B91D,MAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOvW,EAAMH,EAAGG,EAAMF,QAG3C,CACH,GAAID,GAAGC,EACHyZ,EAASzrB,KAAKw+C,QAAQK,aAAe,EACrCiH,EAAO9lD,KAAKopB,IACX08B,GAAKvzC,OACRuzC,EAAKkQ,OAAOjvC,GAEV++B,EAAKvzC,MAAQuzC,EAAKtzC,QACpBT,EAAI+zC,EAAK/zC,EAAI+zC,EAAKvzC,MAAQ,EAC1BP,EAAI8zC,EAAK9zC,EAAIyZ,IAGb1Z,EAAI+zC,EAAK/zC,EAAI0Z,EACbzZ,EAAI8zC,EAAK9zC,EAAI8zC,EAAKtzC,OAAS,GAE7BxS,KAAKi2D,QAAQlvC,EAAKhV,EAAGC,EAAGyZ,GACxBvZ,EAAQlS,KAAKk2D,eAAenkD,EAAGC,EAAGyZ,EAAQ,IAC1CzrB,KAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOvW,EAAMH,EAAGG,EAAMF,KAUhD5O,EAAK+P,UAAUuiD,cAAgB,WAC7B,MAAqB,IAAjB11D,KAAKqzC,SACCnuC,KAAKwH,IAAIxH,KAAK4G,IAAI9L,KAAK6zD,cAAe7zD,KAAKyO,QAAQ0Y,UAAW,GAAInnB,KAAKm2D,iBAG7D,GAAdn2D,KAAKsM,MACApH,KAAKwH,IAAIxH,KAAK4G,IAAI9L,KAAKyO,QAAQuvC,WAAYh+C,KAAKyO,QAAQ0Y,UAAW,GAAInnB,KAAKm2D,iBAG5EjxD,KAAKwH,IAAI1M,KAAKyO,QAAQ8D,MAAO,GAAIvS,KAAKm2D,kBAKnD/yD,EAAK+P,UAAUijD,mBAAqB,WAClC,GAAyC,GAArCp2D,KAAKyO,QAAQqyC,aAAaC,SAAwD,GAArC/gD,KAAKyO,QAAQqyC,aAAapyC,QACzE,MAAO1O,MAAKivD,GAET,IAAyC,GAArCjvD,KAAKyO,QAAQqyC,aAAapyC,QACjC,OAAQqD,EAAE,EAAEC,EAAE,EAGd,IAAIqkD,GAAO,KACPC,EAAO,KACP3P,EAAS3mD,KAAKyO,QAAQqyC,aAAaE,UACnCl6C,EAAO9G,KAAKyO,QAAQqyC,aAAah6C,KAEjC8X,EAAK1Z,KAAK2lB,IAAI7qB,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GACpC8M,EAAK3Z,KAAK2lB,IAAI7qB,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,EA2JxC,OA1JY,YAARlL,GAA8B,iBAARA,EACpB5B,KAAK2lB,IAAI7qB,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GAAK7M,KAAK2lB,IAAI7qB,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,IACjEhS,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,EACpBhS,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GACxBskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS9nC,EAC9By3C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS9nC,GAEvB7e,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,IAC7BskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS9nC,EAC9By3C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS9nC,GAGzB7e,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,IACzBhS,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GACxBskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS9nC,EAC9By3C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS9nC,GAEvB7e,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,IAC7BskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS9nC,EAC9By3C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS9nC,IAGtB,YAAR/X,IACFuvD,EAAY1P,EAAS9nC,EAAdD,EAAmB5e,KAAKopB,KAAKrX,EAAIskD,IAGnCnxD,KAAK2lB,IAAI7qB,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GAAK7M,KAAK2lB,IAAI7qB,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,KACtEhS,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,EACpBhS,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GACxBskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS/nC,EAC9B03C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS/nC,GAEvB5e,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,IAC7BskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS/nC,EAC9B03C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS/nC,GAGzB5e,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,IACzBhS,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GACxBskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS/nC,EAC9B03C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS/nC,GAEvB5e,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,IAC7BskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS/nC,EAC9B03C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS/nC,IAGtB,YAAR9X,IACFwvD,EAAY3P,EAAS/nC,EAAdC,EAAmB7e,KAAKopB,KAAKpX,EAAIskD,IAI7B,iBAARxvD,EACH5B,KAAK2lB,IAAI7qB,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GAAK7M,KAAK2lB,IAAI7qB,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,IACrEqkD,EAAOr2D,KAAKopB,KAAKrX,EAEfukD,EADEt2D,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,EACjBhS,KAAKqpB,GAAGrX,GAAK,EAAI20C,GAAU9nC,EAG3B7e,KAAKqpB,GAAGrX,GAAK,EAAI20C,GAAU9nC,GAG7B3Z,KAAK2lB,IAAI7qB,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GAAK7M,KAAK2lB,IAAI7qB,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,KAExEqkD,EADEr2D,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,EACjB/R,KAAKqpB,GAAGtX,GAAK,EAAI40C,GAAU/nC,EAG3B5e,KAAKqpB,GAAGtX,GAAK,EAAI40C,GAAU/nC,EAEpC03C,EAAOt2D,KAAKopB,KAAKpX,GAGJ,cAARlL,GAELuvD,EADEr2D,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,EACjB/R,KAAKqpB,GAAGtX,GAAK,EAAI40C,GAAU/nC,EAG3B5e,KAAKqpB,GAAGtX,GAAK,EAAI40C,GAAU/nC,EAEpC03C,EAAOt2D,KAAKopB,KAAKpX,GAEF,YAARlL,GACPuvD,EAAOr2D,KAAKopB,KAAKrX,EAEfukD,EADEt2D,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,EACjBhS,KAAKqpB,GAAGrX,GAAK,EAAI20C,GAAU9nC,EAG3B7e,KAAKqpB,GAAGrX,GAAK,EAAI20C,GAAU9nC,GAIhC3Z,KAAK2lB,IAAI7qB,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GAAK7M,KAAK2lB,IAAI7qB,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,GACjEhS,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,EACpBhS,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GAExBskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS9nC,EAC9By3C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS9nC,EAC9Bw3C,EAAOr2D,KAAKqpB,GAAGtX,EAAIskD,EAAOr2D,KAAKqpB,GAAGtX,EAAIskD,GAE/Br2D,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,IAE7BskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS9nC,EAC9By3C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS9nC,EAC9Bw3C,EAAOr2D,KAAKqpB,GAAGtX,EAAIskD,EAAOr2D,KAAKqpB,GAAGtX,EAAIskD,GAGjCr2D,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,IACzBhS,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GAExBskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS9nC,EAC9By3C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS9nC,EAC9Bw3C,EAAOr2D,KAAKqpB,GAAGtX,EAAIskD,EAAOr2D,KAAKqpB,GAAGtX,EAAIskD,GAE/Br2D,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,IAE7BskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS9nC,EAC9By3C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS9nC,EAC9Bw3C,EAAOr2D,KAAKqpB,GAAGtX,EAAIskD,EAAOr2D,KAAKqpB,GAAGtX,EAAIskD,IAInCnxD,KAAK2lB,IAAI7qB,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GAAK7M,KAAK2lB,IAAI7qB,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,KACtEhS,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,EACpBhS,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GAExBskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS/nC,EAC9B03C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS/nC,EAC9B03C,EAAOt2D,KAAKqpB,GAAGrX,EAAIskD,EAAOt2D,KAAKqpB,GAAGrX,EAAIskD,GAE/Bt2D,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,IAE7BskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS/nC,EAC9B03C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS/nC,EAC9B03C,EAAOt2D,KAAKqpB,GAAGrX,EAAIskD,EAAOt2D,KAAKqpB,GAAGrX,EAAIskD,GAGjCt2D,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,IACzBhS,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GAExBskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS/nC,EAC9B03C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS/nC,EAC9B03C,EAAOt2D,KAAKqpB,GAAGrX,EAAIskD,EAAOt2D,KAAKqpB,GAAGrX,EAAIskD,GAE/Bt2D,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,IAE7BskD,EAAOr2D,KAAKopB,KAAKrX,EAAI40C,EAAS/nC,EAC9B03C,EAAOt2D,KAAKopB,KAAKpX,EAAI20C,EAAS/nC,EAC9B03C,EAAOt2D,KAAKqpB,GAAGrX,EAAIskD,EAAOt2D,KAAKqpB,GAAGrX,EAAIskD,MAOtCvkD,EAAGskD,EAAMrkD,EAAGskD,IASxBlzD,EAAK+P,UAAUwiD,MAAQ,SAAU5uC,GAI/B,GAFAA,EAAIa,YACJb,EAAIc,OAAO7nB,KAAKopB,KAAKrX,EAAG/R,KAAKopB,KAAKpX,GACO,GAArChS,KAAKyO,QAAQqyC,aAAapyC,QAAiB,CAC7C,GAAyC,GAArC1O,KAAKyO,QAAQqyC,aAAaC,QAAkB,CAC9C,GAAIkO,GAAMjvD,KAAKo2D,oBACf,OAAa,OAATnH,EAAIl9C,GACNgV,EAAIe,OAAO9nB,KAAKqpB,GAAGtX,EAAG/R,KAAKqpB,GAAGrX,GAC9B+U,EAAIlH,SACG,OAKPkH,EAAIwvC,iBAAiBtH,EAAIl9C,EAAEk9C,EAAIj9C,EAAEhS,KAAKqpB,GAAGtX,EAAG/R,KAAKqpB,GAAGrX,GACpD+U,EAAIlH,SACGovC,GAMT,MAFAloC,GAAIwvC,iBAAiBv2D,KAAKivD,IAAIl9C,EAAE/R,KAAKivD,IAAIj9C,EAAEhS,KAAKqpB,GAAGtX,EAAG/R,KAAKqpB,GAAGrX,GAC9D+U,EAAIlH,SACG7f,KAAKivD,IAMd,MAFAloC,GAAIe,OAAO9nB,KAAKqpB,GAAGtX,EAAG/R,KAAKqpB,GAAGrX,GAC9B+U,EAAIlH,SACG,MAYXzc,EAAK+P,UAAU8iD,QAAU,SAAUlvC,EAAKhV,EAAGC,EAAGyZ,GAE5C1E,EAAIa,YACJb,EAAI2E,IAAI3Z,EAAGC,EAAGyZ,EAAQ,EAAG,EAAIvmB,KAAKymB,IAAI,GACtC5E,EAAIlH,UAWNzc,EAAK+P,UAAU4iD,OAAS,SAAUhvC,EAAKwC,EAAMxX,EAAGC,GAC9C,GAAIuX,EAAM,CACRxC,EAAIQ,MAASvnB,KAAKopB,KAAKiqB,UAAYrzC,KAAKqpB,GAAGgqB,SAAY,QAAU,IACjErzC,KAAKyO,QAAQ8uC,SAAW,MAAQv9C,KAAKyO,QAAQ+uC,QAC7C,IAAIuW,EAEJ,IAAuB,GAAnB/zD,KAAKg0D,WAAoB,CAC3B,GAAIxtB,GAAQpiC,OAAOmlB,GAAMxhB,MAAM,MAC3ByuD,EAAYhwB,EAAM7gC,OAClB43C,EAAWr5C,OAAOlE,KAAKyO,QAAQ8uC,SACnCwW,GAAQ/hD,GAAK,EAAIwkD,GAAa,EAAIjZ,CAGlC,KAAK,GADDhrC,GAAQwU,EAAI0vC,YAAYjwB,EAAM,IAAIj0B,MAC7B/M,EAAI,EAAOgxD,EAAJhxD,EAAeA,IAAK,CAClC,GAAI8hB,GAAYP,EAAI0vC,YAAYjwB,EAAMhhC,IAAI+M,KAC1CA,GAAQ+U,EAAY/U,EAAQ+U,EAAY/U,EAE1C,GAAIC,GAASxS,KAAKyO,QAAQ8uC,SAAWiZ,EACjC/uD,EAAOsK,EAAIQ,EAAQ,EACnB5K,EAAMqK,EAAIQ,EAAS,CAGvBxS,MAAK8zD,iBAAmBnsD,IAAIA,EAAIF,KAAKA,EAAK8K,MAAMA,EAAMC,OAAOA,EAAOuhD,MAAMA,GAG/E,GAAIA,GAAQ/zD,KAAK8zD,gBAAgBC,KAEjChtC,GAAIuoC,OAE+B,cAA/BtvD,KAAKyO,QAAQwvC,iBAChBl3B,EAAIwoC,UAAUx9C,EAAGgiD,GACjB/zD,KAAK02D,yBAAyB3vC,GAC9BhV,EAAI,EACJgiD,EAAQ,GAIT/zD,KAAK22D,eAAe5vC,GACpB/mB,KAAK42D,eAAe7vC,EAAIhV,EAAEgiD,EAAOvtB,EAAOgwB,EAAWjZ,GAEnDx2B,EAAI0oC,YASLrsD,EAAK+P,UAAUujD,yBAA2B,SAAS3vC,GAClD,GAAIlI,GAAK7e,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,EAC3B4M,EAAK5e,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,EAC3B8kD,EAAiB3xD,KAAK4xD,MAAMj4C,EAAID,IAGf,GAAjBi4C,GAA4B,EAALj4C,GAAYi4C,EAAiB,GAAU,EAALj4C,KAC5Di4C,GAAkC3xD,KAAKymB,IAGxC5E,EAAIgwC,OAAOF,IASZzzD,EAAK+P,UAAUwjD,eAAiB,SAAS5vC,GACxC,GAA8BvgB,SAA1BxG,KAAKyO,QAAQgvC,UAAoD,OAA1Bz9C,KAAKyO,QAAQgvC,UAA+C,SAA1Bz9C,KAAKyO,QAAQgvC,SAAqB,CAC9G12B,EAAIiB,UAAYhoB,KAAKyO,QAAQgvC,QAE7B,IAAIuZ,GAAa,CAEoB,gBAA/Bh3D,KAAKyO,QAAQwvC,eACfl3B,EAAIkwC,SAAuC,IAA7Bj3D,KAAK8zD,gBAAgBvhD,MAA4C,IAA9BvS,KAAK8zD,gBAAgBthD,OAAcxS,KAAK8zD,gBAAgBvhD,MAAOvS,KAAK8zD,gBAAgBthD,QAE/F,cAA/BxS,KAAKyO,QAAQwvC,eACpBl3B,EAAIkwC,SAAuC,IAA7Bj3D,KAAK8zD,gBAAgBvhD,QAAevS,KAAK8zD,gBAAgBthD,OAASwkD,GAAah3D,KAAK8zD,gBAAgBvhD,MAAOvS,KAAK8zD,gBAAgBthD,QAExG,cAA/BxS,KAAKyO,QAAQwvC,eACpBl3B,EAAIkwC,SAAuC,IAA7Bj3D,KAAK8zD,gBAAgBvhD,MAAaykD,EAAYh3D,KAAK8zD,gBAAgBvhD,MAAOvS,KAAK8zD,gBAAgBthD,QAG7GuU,EAAIkwC,SAASj3D,KAAK8zD,gBAAgBrsD,KAAMzH,KAAK8zD,gBAAgBnsD,IAAK3H,KAAK8zD,gBAAgBvhD,MAAOvS,KAAK8zD,gBAAgBthD,UAezHpP,EAAK+P,UAAUyjD,eAAiB,SAAS7vC,EAAKhV,EAAGgiD,EAAOvtB,EAAOgwB,EAAWjZ,GAMxE,GAJDx2B,EAAIiB,UAAYhoB,KAAKyO,QAAQ6uC,WAAa,QAC1Cv2B,EAAIuB,UAAY,SAGoB,cAA/BtoB,KAAKyO,QAAQwvC,eAAgC,CAC/C,GAAI+Y,GAAa,CACkB,eAA/Bh3D,KAAKyO,QAAQwvC,gBACfl3B,EAAIwB,aAAe,aACnBwrC,GAAS,EAAIiD,GAEyB,cAA/Bh3D,KAAKyO,QAAQwvC,gBACpBl3B,EAAIwB,aAAe,UACnBwrC,GAAS,EAAIiD,GAGbjwC,EAAIwB,aAAe,aAIrBxB,GAAIwB,aAAe,QAIjBvoB,MAAKyO,QAAQivC,gBAAkB,IACjC32B,EAAIO,UAActnB,KAAKyO,QAAQivC,gBAC/B32B,EAAIY,YAAc3nB,KAAKyO,QAAQkvC,gBAC/B52B,EAAImwC,SAAc,QAErB,KAAK,GAAI1xD,GAAI,EAAOgxD,EAAJhxD,EAAeA,IACzBxF,KAAKyO,QAAQivC,gBAAkB,GAChC32B,EAAIowC,WAAW3wB,EAAMhhC,GAAIuM,EAAGgiD,GAEhChtC,EAAIyB,SAASge,EAAMhhC,GAAIuM,EAAGgiD,GAC1BA,GAASxW,GAaXn6C,EAAK+P,UAAU2hD,cAAgB,SAAS/tC,GAEtCA,EAAIY,YAAc3nB,KAAKw1D,YACvBzuC,EAAIO,UAAYtnB,KAAK01D,eAErB,IAAIzG,GAAM,IAEV,IAAwBzoD,SAApBugB,EAAIqwC,YAA2B,CACjCrwC,EAAIuoC,MAEJ,IAAI+H,IAAW,EAEbA,GAD+B7wD,SAA7BxG,KAAKyO,QAAQ0vC,KAAKx4C,QAAkDa,SAA1BxG,KAAKyO,QAAQ0vC,KAAKC,KACnDp+C,KAAKyO,QAAQ0vC,KAAKx4C,OAAO3F,KAAKyO,QAAQ0vC,KAAKC,MAG3C,EAAE,GAIfr3B,EAAIqwC,YAAYC,GAChBtwC,EAAIuwC,eAAiB,EAGrBrI,EAAMjvD,KAAK21D,MAAM5uC,GAGjBA,EAAIqwC,aAAa,IACjBrwC,EAAIuwC,eAAiB,EACrBvwC,EAAI0oC,cAIJ1oC,GAAIa,YACJb,EAAIwwC,QAAU,QACsB/wD,SAAhCxG,KAAKyO,QAAQ0vC,KAAKE,UAEpBt3B,EAAIywC,WAAWx3D,KAAKopB,KAAKrX,EAAE/R,KAAKopB,KAAKpX,EAAEhS,KAAKqpB,GAAGtX,EAAE/R,KAAKqpB,GAAGrX,GACpDhS,KAAKyO,QAAQ0vC,KAAKx4C,OAAO3F,KAAKyO,QAAQ0vC,KAAKC,IAAIp+C,KAAKyO,QAAQ0vC,KAAKE,UAAUr+C,KAAKyO,QAAQ0vC,KAAKC,MAE9D53C,SAA7BxG,KAAKyO,QAAQ0vC,KAAKx4C,QAAkDa,SAA1BxG,KAAKyO,QAAQ0vC,KAAKC,IAEnEr3B,EAAIywC,WAAWx3D,KAAKopB,KAAKrX,EAAE/R,KAAKopB,KAAKpX,EAAEhS,KAAKqpB,GAAGtX,EAAE/R,KAAKqpB,GAAGrX,GACpDhS,KAAKyO,QAAQ0vC,KAAKx4C,OAAO3F,KAAKyO,QAAQ0vC,KAAKC,OAIhDr3B,EAAIc,OAAO7nB,KAAKopB,KAAKrX,EAAG/R,KAAKopB,KAAKpX,GAClC+U,EAAIe,OAAO9nB,KAAKqpB,GAAGtX,EAAG/R,KAAKqpB,GAAGrX,IAEhC+U,EAAIlH,QAIN,IAAI7f,KAAKyoB,MAAO,CACd,GAAIvW,EACJ,IAAyC,GAArClS,KAAKyO,QAAQqyC,aAAapyC,SAA0B,MAAPugD,EAAa,CAC5D,GAAI2G,GAAY,IAAK,IAAK51D,KAAKopB,KAAKrX,EAAIk9C,EAAIl9C,GAAK,IAAK/R,KAAKqpB,GAAGtX,EAAIk9C,EAAIl9C,IAClE8jD,EAAY,IAAK,IAAK71D,KAAKopB,KAAKpX,EAAIi9C,EAAIj9C,GAAK,IAAKhS,KAAKqpB,GAAGrX,EAAIi9C,EAAIj9C,GACtEE,IAASH,EAAE6jD,EAAW5jD,EAAE6jD,OAGxB3jD,GAAQlS,KAAK81D,aAAa,GAE5B91D,MAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOvW,EAAMH,EAAGG,EAAMF,KAUhD5O,EAAK+P,UAAU2iD,aAAe,SAAU2B,GACtC,OACE1lD,GAAI,EAAI0lD,GAAcz3D,KAAKopB,KAAKrX,EAAI0lD,EAAaz3D,KAAKqpB,GAAGtX,EACzDC,GAAI,EAAIylD,GAAcz3D,KAAKopB,KAAKpX,EAAIylD,EAAaz3D,KAAKqpB,GAAGrX,IAa7D5O,EAAK+P,UAAU+iD,eAAiB,SAAUnkD,EAAGC,EAAGyZ,EAAQgsC,GACtD,GAAIrJ,GAA6B,GAApBqJ,EAAa,EAAE,GAASvyD,KAAKymB,EAC1C,QACE5Z,EAAGA,EAAI0Z,EAASvmB,KAAKqZ,IAAI6vC,GACzBp8C,EAAGA,EAAIyZ,EAASvmB,KAAKkZ,IAAIgwC,KAW7BhrD,EAAK+P,UAAU0hD,iBAAmB,SAAS9tC,GACzC,GAAI7U,EAMJ,IAJA6U,EAAIY,YAAc3nB,KAAKw1D,YACvBzuC,EAAIiB,UAAYjB,EAAIY,YACpBZ,EAAIO,UAAYtnB,KAAK01D,gBAEjB11D,KAAKopB,MAAQppB,KAAKqpB,GAAI,CAExB,GAAI4lC,GAAMjvD,KAAK21D,MAAM5uC,GAEjBqnC,EAAQlpD,KAAK4xD,MAAO92D,KAAKqpB,GAAGrX,EAAIhS,KAAKopB,KAAKpX,EAAKhS,KAAKqpB,GAAGtX,EAAI/R,KAAKopB,KAAKrX,GACrEpM,GAAU,GAAK,EAAI3F,KAAKyO,QAAQ8D,OAASvS,KAAKyO,QAAQyvC,gBAE1D,IAAyC,GAArCl+C,KAAKyO,QAAQqyC,aAAapyC,SAA0B,MAAPugD,EAAa,CAC5D,GAAI2G,GAAY,IAAK,IAAK51D,KAAKopB,KAAKrX,EAAIk9C,EAAIl9C,GAAK,IAAK/R,KAAKqpB,GAAGtX,EAAIk9C,EAAIl9C,IAClE8jD,EAAY,IAAK,IAAK71D,KAAKopB,KAAKpX,EAAIi9C,EAAIj9C,GAAK,IAAKhS,KAAKqpB,GAAGrX,EAAIi9C,EAAIj9C,GACtEE,IAASH,EAAE6jD,EAAW5jD,EAAE6jD,OAGxB3jD,GAAQlS,KAAK81D,aAAa,GAG5B/uC,GAAI2wC,MAAMxlD,EAAMH,EAAGG,EAAMF,EAAGo8C,EAAOzoD,GACnCohB,EAAInH,OACJmH,EAAIlH,SAGA7f,KAAKyoB,OACPzoB,KAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOvW,EAAMH,EAAGG,EAAMF,OAG3C,CAEH,GAAID,GAAGC,EACHyZ,EAAS,IAAOvmB,KAAKwH,IAAI,IAAI1M,KAAKw+C,QAAQK,cAC1CiH,EAAO9lD,KAAKopB,IACX08B,GAAKvzC,OACRuzC,EAAKkQ,OAAOjvC,GAEV++B,EAAKvzC,MAAQuzC,EAAKtzC,QACpBT,EAAI+zC,EAAK/zC,EAAiB,GAAb+zC,EAAKvzC,MAClBP,EAAI8zC,EAAK9zC,EAAIyZ,IAGb1Z,EAAI+zC,EAAK/zC,EAAI0Z,EACbzZ,EAAI8zC,EAAK9zC,EAAkB,GAAd8zC,EAAKtzC,QAEpBxS,KAAKi2D,QAAQlvC,EAAKhV,EAAGC,EAAGyZ,EAGxB,IAAI2iC,GAAQ,GAAMlpD,KAAKymB,GACnBhmB,GAAU,GAAK,EAAI3F,KAAKyO,QAAQ8D,OAASvS,KAAKyO,QAAQyvC,gBAC1DhsC,GAAQlS,KAAKk2D,eAAenkD,EAAGC,EAAGyZ,EAAQ,IAC1C1E,EAAI2wC,MAAMxlD,EAAMH,EAAGG,EAAMF,EAAGo8C,EAAOzoD,GACnCohB,EAAInH,OACJmH,EAAIlH,SAGA7f,KAAKyoB,QACPvW,EAAQlS,KAAKk2D,eAAenkD,EAAGC,EAAGyZ,EAAQ,IAC1CzrB,KAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOvW,EAAMH,EAAGG,EAAMF,MAKlD5O,EAAK+P,UAAUwkD,eAAiB,SAAS7pD,GACvC,GAAImhD,GAAMjvD,KAAKo2D,qBAEXrkD,EAAI7M,KAAK4uB,IAAI,EAAEhmB,EAAE,GAAG9N,KAAKopB,KAAKrX,EAAK,EAAEjE,GAAG,EAAIA,GAAImhD,EAAIl9C,EAAI7M,KAAK4uB,IAAIhmB,EAAE,GAAG9N,KAAKqpB,GAAGtX,EAC9EC,EAAI9M,KAAK4uB,IAAI,EAAEhmB,EAAE,GAAG9N,KAAKopB,KAAKpX,EAAK,EAAElE,GAAG,EAAIA,GAAImhD,EAAIj9C,EAAI9M,KAAK4uB,IAAIhmB,EAAE,GAAG9N,KAAKqpB,GAAGrX,CAElF,QAAQD,EAAEA,EAAEC,EAAEA,IAWhB5O,EAAK+P,UAAUykD,oBAAsB,SAASxuC,EAAKrC,GACjD,GAIIxB,GAAI6oC,EAAMyJ,EAAkBC,EAAiBC,EAJ7C/oD,EAAgB,GAChBC,EAAY,EACZC,EAAM,EACNC,EAAO,EAEP6oD,EAAY,GACZlS,EAAO9lD,KAAKqpB,EAKhB,KAJY,GAARD,IACF08B,EAAO9lD,KAAKopB,MAGAja,GAAPD,GAA2BF,EAAZC,GAA2B,CAC/C,GAAIG,GAAwB,IAAdF,EAAMC,EAOpB,IALAoW,EAAMvlB,KAAK23D,eAAevoD,GAC1Bg/C,EAAQlpD,KAAK4xD,MAAOhR,EAAK9zC,EAAIuT,EAAIvT,EAAK8zC,EAAK/zC,EAAIwT,EAAIxT,GACnD8lD,EAAmB/R,EAAK+R,iBAAiB9wC,EAAIqnC,GAC7C0J,EAAkB5yD,KAAKyqB,KAAKzqB,KAAK4uB,IAAIvO,EAAIxT,EAAE+zC,EAAK/zC,EAAE,GAAK7M,KAAK4uB,IAAIvO,EAAIvT,EAAE8zC,EAAK9zC,EAAE,IAC7E+lD,EAAaF,EAAmBC,EAC5B5yD,KAAK2lB,IAAIktC,GAAcC,EACzB,KAEoB,GAAbD,EACK,GAAR3uC,EACFla,EAAME,EAGND,EAAOC,EAIG,GAARga,EACFja,EAAOC,EAGPF,EAAME,EAIVH,IAIF,MAFAsW,GAAIzX,EAAIsB,EAEDmW,GAUTniB,EAAK+P,UAAUyhD,WAAa,SAAS7tC,GAEnCA,EAAIY,YAAc3nB,KAAKw1D,YACvBzuC,EAAIiB,UAAYjB,EAAIY,YACpBZ,EAAIO,UAAYtnB,KAAK01D,eAGrB,IAAItH,GAAOzoD,EAAQsyD,CAGnB,IAAIj4D,KAAKopB,MAAQppB,KAAKqpB,GAAI,CAKxB,GAHArpB,KAAK21D,MAAM5uC,GAG8B,GAArC/mB,KAAKyO,QAAQqyC,aAAapyC,QAAiB,CAC7C,GAAIugD,GAAMjvD,KAAKo2D,oBACf6B,GAAWj4D,KAAK43D,qBAAoB,EAAO7wC,EAC3C,IAAImxC,GAAWl4D,KAAK23D,eAAezyD,KAAKwH,IAAI,EAAKurD,EAASnqD,EAAI,IAC9DsgD,GAAQlpD,KAAK4xD,MAAOmB,EAASjmD,EAAIkmD,EAASlmD,EAAKimD,EAASlmD,EAAImmD,EAASnmD,OAElE,CACHq8C,EAAQlpD,KAAK4xD,MAAO92D,KAAKqpB,GAAGrX,EAAIhS,KAAKopB,KAAKpX,EAAKhS,KAAKqpB,GAAGtX,EAAI/R,KAAKopB,KAAKrX,EACrE,IAAI6M,GAAM5e,KAAKqpB,GAAGtX,EAAI/R,KAAKopB,KAAKrX,EAC5B8M,EAAM7e,KAAKqpB,GAAGrX,EAAIhS,KAAKopB,KAAKpX,EAC5BmmD,EAAoBjzD,KAAKyqB,KAAK/Q,EAAKA,EAAKC,EAAKA,GAC7Cu5C,EAAep4D,KAAKqpB,GAAGwuC,iBAAiB9wC,EAAKqnC,GAC7CiK,GAAiBF,EAAoBC,GAAgBD,CAEzDF,MACAA,EAASlmD,GAAK,EAAIsmD,GAAiBr4D,KAAKopB,KAAKrX,EAAIsmD,EAAgBr4D,KAAKqpB,GAAGtX,EACzEkmD,EAASjmD,GAAK,EAAIqmD,GAAiBr4D,KAAKopB,KAAKpX,EAAIqmD,EAAgBr4D,KAAKqpB,GAAGrX,EAU3E,GANArM,GAAU,GAAK,EAAI3F,KAAKyO,QAAQ8D,OAASvS,KAAKyO,QAAQyvC,iBACtDn3B,EAAI2wC,MAAMO,EAASlmD,EAAEkmD,EAASjmD,EAAGo8C,EAAOzoD,GACxCohB,EAAInH,OACJmH,EAAIlH,SAGA7f,KAAKyoB,MAAO,CACd,GAAIvW,EAEFA,GADuC,GAArClS,KAAKyO,QAAQqyC,aAAapyC,SAA0B,MAAPugD,EACvCjvD,KAAK23D,eAAe,IAGpB33D,KAAK81D,aAAa,IAE5B91D,KAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOvW,EAAMH,EAAGG,EAAMF,QAG3C,CAEH,GACID,GAAGC,EAAG0lD,EADN5R,EAAO9lD,KAAKopB,KAEZqC,EAAS,IAAOvmB,KAAKwH,IAAI,IAAI1M,KAAKw+C,QAAQK,aACzCiH,GAAKvzC,OACRuzC,EAAKkQ,OAAOjvC,GAEV++B,EAAKvzC,MAAQuzC,EAAKtzC,QACpBT,EAAI+zC,EAAK/zC,EAAiB,GAAb+zC,EAAKvzC,MAClBP,EAAI8zC,EAAK9zC,EAAIyZ,EACbisC,GACE3lD,EAAGA,EACHC,EAAG8zC,EAAK9zC,EACRo8C,MAAO,GAAMlpD,KAAKymB,MAIpB5Z,EAAI+zC,EAAK/zC,EAAI0Z,EACbzZ,EAAI8zC,EAAK9zC,EAAkB,GAAd8zC,EAAKtzC,OAClBklD,GACE3lD,EAAG+zC,EAAK/zC,EACRC,EAAGA,EACHo8C,MAAO,GAAMlpD,KAAKymB,KAGtB5E,EAAIa,YAEJb,EAAI2E,IAAI3Z,EAAGC,EAAGyZ,EAAQ,EAAG,EAAIvmB,KAAKymB,IAAI,GACtC5E,EAAIlH,QAGJ,IAAIla,IAAU,GAAK,EAAI3F,KAAKyO,QAAQ8D,OAASvS,KAAKyO,QAAQyvC,gBAC1Dn3B,GAAI2wC,MAAMA,EAAM3lD,EAAG2lD,EAAM1lD,EAAG0lD,EAAMtJ,MAAOzoD,GACzCohB,EAAInH,OACJmH,EAAIlH,SAGA7f,KAAKyoB,QACPvW,EAAQlS,KAAKk2D,eAAenkD,EAAGC,EAAGyZ,EAAQ,IAC1CzrB,KAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOvW,EAAMH,EAAGG,EAAMF,MAiBlD5O,EAAK+P,UAAUoiD,mBAAqB,SAAU+C,EAAGC,EAAIC,EAAGC,EAAIC,EAAGC,GAC7D,GAAInvD,GAAc,CAClB,IAAIxJ,KAAKopB,MAAQppB,KAAKqpB,GACpB,GAAyC,GAArCrpB,KAAKyO,QAAQqyC,aAAapyC,QAAiB,CAC7C,GAAI2nD,GAAMC,CACV,IAAyC,GAArCt2D,KAAKyO,QAAQqyC,aAAapyC,SAAwD,GAArC1O,KAAKyO,QAAQqyC,aAAaC,QACzEsV,EAAOr2D,KAAKivD,IAAIl9C,EAChBukD,EAAOt2D,KAAKivD,IAAIj9C,MAEb,CACH,GAAIi9C,GAAMjvD,KAAKo2D,oBACfC,GAAOpH,EAAIl9C,EACXukD,EAAOrH,EAAIj9C,EAEb,GACI2T,GACAngB,EAAEsI,EAAEiE,EAAEC,EAAG4mD,EAAOC,EAFhBC,EAAc,GAGlB,KAAKtzD,EAAI,EAAO,GAAJA,EAAQA,IAClBsI,EAAI,GAAItI,EACRuM,EAAI7M,KAAK4uB,IAAI,EAAEhmB,EAAE,GAAGwqD,EAAM,EAAExqD,GAAG,EAAIA,GAAIuoD,EAAOnxD,KAAK4uB,IAAIhmB,EAAE,GAAG0qD,EAC5DxmD,EAAI9M,KAAK4uB,IAAI,EAAEhmB,EAAE,GAAGyqD,EAAM,EAAEzqD,GAAG,EAAIA,GAAIwoD,EAAOpxD,KAAK4uB,IAAIhmB,EAAE,GAAG2qD,EACxDjzD,EAAI,IACNmgB,EAAW3lB,KAAK+4D,mBAAmBH,EAAMC,EAAM9mD,EAAEC,EAAG0mD,EAAGC,GACvDG,EAAyBA,EAAXnzC,EAAyBA,EAAWmzC,GAEpDF,EAAQ7mD,EAAG8mD,EAAQ7mD,CAErBxI,GAAcsvD,MAGdtvD,GAAcxJ,KAAK+4D,mBAAmBT,EAAGC,EAAGC,EAAGC,EAAGC,EAAGC,OAGpD,CACH,GAAI5mD,GAAGC,EAAG4M,EAAIC,EACV4M,EAAS,IAAOzrB,KAAKw+C,QAAQK,aAC7BiH,EAAO9lD,KAAKopB,IACZ08B,GAAKvzC,MAAQuzC,EAAKtzC,QACpBT,EAAI+zC,EAAK/zC,EAAI,GAAM+zC,EAAKvzC,MACxBP,EAAI8zC,EAAK9zC,EAAIyZ,IAGb1Z,EAAI+zC,EAAK/zC,EAAI0Z,EACbzZ,EAAI8zC,EAAK9zC,EAAI,GAAM8zC,EAAKtzC,QAE1BoM,EAAK7M,EAAI2mD,EACT75C,EAAK7M,EAAI2mD,EACTnvD,EAActE,KAAK2lB,IAAI3lB,KAAKyqB,KAAK/Q,EAAGA,EAAKC,EAAGA,GAAM4M,GAGpD,MAAIzrB,MAAK8zD,gBAAgBrsD,KAAOixD,GAC9B14D,KAAK8zD,gBAAgBrsD,KAAOzH,KAAK8zD,gBAAgBvhD,MAAQmmD,GACzD14D,KAAK8zD,gBAAgBnsD,IAAMgxD,GAC3B34D,KAAK8zD,gBAAgBnsD,IAAM3H,KAAK8zD,gBAAgBthD,OAASmmD,EAClD,EAGAnvD,GAIXpG,EAAK+P,UAAU4lD,mBAAqB,SAAST,EAAGC,EAAGC,EAAGC,EAAGC,EAAGC,GAC1D,GAAIK,GAAKR,EAAGF,EACVW,EAAKR,EAAGF,EACRW,EAAYF,EAAGA,EAAKC,EAAGA,EACvBE,IAAOT,EAAKJ,GAAMU,GAAML,EAAKJ,GAAMU,GAAMC,CAEvCC,GAAI,EACNA,EAAI,EAEO,EAAJA,IACPA,EAAI,EAGN,IAAIpnD,GAAIumD,EAAKa,EAAIH,EACfhnD,EAAIumD,EAAKY,EAAIF,EACbr6C,EAAK7M,EAAI2mD,EACT75C,EAAK7M,EAAI2mD,CAQX,OAAOzzD,MAAKyqB,KAAK/Q,EAAGA,EAAKC,EAAGA,IAQ9Bzb,EAAK+P,UAAU6vB,SAAW,SAAS/lB,GACjCjd,KAAKm2D,gBAAkB,EAAIl5C,GAI7B7Z,EAAK+P,UAAUg+B,OAAS,WACtBnxC,KAAKqzC,UAAW,GAGlBjwC,EAAK+P,UAAU+9B,SAAW,WACxBlxC,KAAKqzC,UAAW,GAGlBjwC,EAAK+P,UAAUg/C,mBAAqB,WACjB,OAAbnyD,KAAKivD,KAA8B,OAAdjvD,KAAKopB,MAA6B,OAAZppB,KAAKqpB,IAClDrpB,KAAKivD,IAAIl9C,EAAI,IAAO/R,KAAKopB,KAAKrX,EAAI/R,KAAKqpB,GAAGtX,GAC1C/R,KAAKivD,IAAIj9C,EAAI,IAAOhS,KAAKopB,KAAKpX,EAAIhS,KAAKqpB,GAAGrX,KAG1ChS,KAAKivD,IAAIl9C,EAAI,EACb/R,KAAKivD,IAAIj9C,EAAI,IASjB5O,EAAK+P,UAAU+8C,kBAAoB,SAASnpC,GAC1C,GAAgC,GAA5B/mB,KAAKu0D,oBAA6B,CACpC,GAA+B,OAA3Bv0D,KAAKw0D,aAAaprC,MAA0C,OAAzBppB,KAAKw0D,aAAanrC,GAAa,CACpE,GAAI+vC,GAAa,cAAcplD,OAAOhU,KAAKK,IACvCg5D,EAAW,YAAYrlD,OAAOhU,KAAKK,IACnCqhD,GACY1E,OAAO/qC,MAAM,GAAIwZ,OAAO,EAAGzL,YAAY,EAAG69B,oBAAqB,GAC/DW,SAASO,QAAQ,GACjBI,YAAac,sBAAuB,EAAGD,aAAcztC,MAAM,EAAGC,OAAQ,EAAGiZ,OAAO,IAEhGzrB,MAAKw0D,aAAaprC,KAAO,GAAI7lB,IAC1BlD,GAAG+4D,EACFhc,MAAM,MACJjyC,OAAOgB,WAAW,UAAWC,OAAO,UAAWC,WAAYF,WAAW,mBAClEu1C,GACV1hD,KAAKw0D,aAAanrC,GAAK,GAAI9lB,IACxBlD,GAAGg5D,EACFjc,MAAM,MACNjyC,OAAOgB,WAAW,UAAWC,OAAO,UAAWC,WAAYF,WAAW,mBAChEu1C,GAGZ1hD,KAAKw0D,aAAaC,aACqB,GAAnCz0D,KAAKw0D,aAAaprC,KAAKiqB,WACzBrzC,KAAKw0D,aAAaC,UAAUrrC,KAAOppB,KAAKs5D,2BAA2BvyC,GACnE/mB,KAAKw0D,aAAaprC,KAAKrX,EAAI/R,KAAKw0D,aAAaC,UAAUrrC,KAAKrX,EAC5D/R,KAAKw0D,aAAaprC,KAAKpX,EAAIhS,KAAKw0D,aAAaC,UAAUrrC,KAAKpX,GAEzB,GAAjChS,KAAKw0D,aAAanrC,GAAGgqB,WACvBrzC,KAAKw0D,aAAaC,UAAUprC,GAAKrpB,KAAKu5D,yBAAyBxyC,GAC/D/mB,KAAKw0D,aAAanrC,GAAGtX,EAAI/R,KAAKw0D,aAAaC,UAAUprC,GAAGtX,EACxD/R,KAAKw0D,aAAanrC,GAAGrX,EAAIhS,KAAKw0D,aAAaC,UAAUprC,GAAGrX,GAG1DhS,KAAKw0D,aAAaprC,KAAK2iB,KAAKhlB,GAC5B/mB,KAAKw0D,aAAanrC,GAAG0iB,KAAKhlB,OAG1B/mB,MAAKw0D,cAAgBprC,KAAK,KAAMC,GAAG,KAAMorC,eAQ7CrxD,EAAK+P,UAAUqmD,oBAAsB,WACnCx5D,KAAKi0D,WAAaj0D,KAAKopB,KACvBppB,KAAKk0D,SAAWl0D,KAAKqpB,GACrBrpB,KAAKu0D,qBAAsB,GAO7BnxD,EAAK+P,UAAUsmD,qBAAuB,WACpCz5D,KAAK2zD,OAAS3zD,KAAKopB,KAAK/oB,GACxBL,KAAK4zD,KAAO5zD,KAAKqpB,GAAGhpB,GAChBL,KAAK2zD,QAAU3zD,KAAKi0D,WAAW5zD,GACjCL,KAAKi0D,WAAWe,WAAWh1D,MAEpBA,KAAK4zD,MAAQ5zD,KAAKk0D,SAAS7zD,IAClCL,KAAKk0D,SAASc,WAAWh1D,MAG3BA,KAAKi0D,WAAa,KAClBj0D,KAAKk0D,SAAW,KAChBl0D,KAAKu0D,qBAAsB,GAW7BnxD,EAAK+P,UAAUumD,wBAA0B,SAAS3nD,EAAEC,GAClD,GAAIyiD,GAAYz0D,KAAKw0D,aAAaC,UAC9BkF,EAAez0D,KAAKyqB,KAAKzqB,KAAK4uB,IAAI/hB,EAAI0iD,EAAUrrC,KAAKrX,EAAE,GAAK7M,KAAK4uB,IAAI9hB,EAAIyiD,EAAUrrC,KAAKpX,EAAE,IAC1F4nD,EAAe10D,KAAKyqB,KAAKzqB,KAAK4uB,IAAI/hB,EAAI0iD,EAAUprC,GAAGtX,EAAI,GAAK7M,KAAK4uB,IAAI9hB,EAAIyiD,EAAUprC,GAAGrX,EAAI,GAE9F,OAAmB,IAAf2nD,GACF35D,KAAK00D,cAAgB10D,KAAKopB,KAC1BppB,KAAKopB,KAAOppB,KAAKw0D,aAAaprC,KACvBppB,KAAKw0D,aAAaprC,MAEL,GAAbwwC,GACP55D,KAAK00D,cAAgB10D,KAAKqpB,GAC1BrpB,KAAKqpB,GAAKrpB,KAAKw0D,aAAanrC,GACrBrpB,KAAKw0D,aAAanrC,IAGlB,MASXjmB,EAAK+P,UAAU0mD,qBAAuB,WACG,GAAnC75D,KAAKw0D,aAAaprC,KAAKiqB,UACzBrzC,KAAKopB,KAAOppB,KAAK00D,cACjB10D,KAAK00D,cAAgB,KACrB10D,KAAKw0D,aAAaprC,KAAK8nB,YAEiB,GAAjClxC,KAAKw0D,aAAanrC,GAAGgqB,WAC5BrzC,KAAKqpB,GAAKrpB,KAAK00D,cACf10D,KAAK00D,cAAgB,KACrB10D,KAAKw0D,aAAanrC,GAAG6nB,aAUzB9tC,EAAK+P,UAAUmmD,2BAA6B,SAASvyC,GAEnD,GAAI+yC,EACJ,IAAyC,GAArC95D,KAAKyO,QAAQqyC,aAAapyC,QAC5BorD,EAAqB95D,KAAK43D,qBAAoB,EAAM7wC,OAEjD,CACH,GAAIqnC,GAAQlpD,KAAK4xD,MAAO92D,KAAKqpB,GAAGrX,EAAIhS,KAAKopB,KAAKpX,EAAKhS,KAAKqpB,GAAGtX,EAAI/R,KAAKopB,KAAKrX,GACrE6M,EAAM5e,KAAKqpB,GAAGtX,EAAI/R,KAAKopB,KAAKrX,EAC5B8M,EAAM7e,KAAKqpB,GAAGrX,EAAIhS,KAAKopB,KAAKpX,EAC5BmmD,EAAoBjzD,KAAKyqB,KAAK/Q,EAAKA,EAAKC,EAAKA,GAE7Ck7C,EAAiB/5D,KAAKopB,KAAKyuC,iBAAiB9wC,EAAKqnC,EAAQlpD,KAAKymB,IAC9DquC,GAAmB7B,EAAoB4B,GAAkB5B,CAC7D2B,MACAA,EAAmB/nD,EAAI,EAAoB/R,KAAKopB,KAAKrX,GAAK,EAAIioD,GAAmBh6D,KAAKqpB,GAAGtX,EACzF+nD,EAAmB9nD,EAAI,EAAoBhS,KAAKopB,KAAKpX,GAAK,EAAIgoD,GAAmBh6D,KAAKqpB,GAAGrX,EAG3F,MAAO8nD,IAST12D,EAAK+P,UAAUomD,yBAA2B,SAASxyC,GAEjD,GAAuBkzC,EACvB,IAAyC,GAArCj6D,KAAKyO,QAAQqyC,aAAapyC,QAC5BurD,EAAmBj6D,KAAK43D,qBAAoB,EAAO7wC,OAEhD,CACH,GAAIqnC,GAAQlpD,KAAK4xD,MAAO92D,KAAKqpB,GAAGrX,EAAIhS,KAAKopB,KAAKpX,EAAKhS,KAAKqpB,GAAGtX,EAAI/R,KAAKopB,KAAKrX,GACrE6M,EAAM5e,KAAKqpB,GAAGtX,EAAI/R,KAAKopB,KAAKrX,EAC5B8M,EAAM7e,KAAKqpB,GAAGrX,EAAIhS,KAAKopB,KAAKpX,EAC5BmmD,EAAoBjzD,KAAKyqB,KAAK/Q,EAAKA,EAAKC,EAAKA,GAC7Cu5C,EAAep4D,KAAKqpB,GAAGwuC,iBAAiB9wC,EAAKqnC,GAC7CiK,GAAiBF,EAAoBC,GAAgBD,CAEzD8B,MACAA,EAAiBloD,GAAK,EAAIsmD,GAAiBr4D,KAAKopB,KAAKrX,EAAIsmD,EAAgBr4D,KAAKqpB,GAAGtX,EACjFkoD,EAAiBjoD,GAAK,EAAIqmD,GAAiBr4D,KAAKopB,KAAKpX,EAAIqmD,EAAgBr4D,KAAKqpB,GAAGrX,EAGnF,MAAOioD,IAGTp6D,EAAOD,QAAUwD,GAIb,SAASvD,EAAQD,EAASM,GAQ9B,QAASmD,KACPrD,KAAKyW,QACLzW,KAAKk6D,aAAe,EARXh6D,EAAoB,EAe/BmD,GAAO82D,UACJ/tD,OAAQ,UAAWD,WAAY,UAAWE,WAAYD,OAAQ,UAAWD,WAAY,WAAYG,OAAQF,OAAQ,UAAWD,WAAY,aACxIC,OAAQ,UAAWD,WAAY,UAAWE,WAAYD,OAAQ,UAAWD,WAAY,WAAYG,OAAQF,OAAQ,UAAWD,WAAY,aACxIC,OAAQ,UAAWD,WAAY,UAAWE,WAAYD,OAAQ,UAAWD,WAAY,WAAYG,OAAQF,OAAQ,UAAWD,WAAY,aACxIC,OAAQ,UAAWD,WAAY,UAAWE,WAAYD,OAAQ,UAAWD,WAAY,WAAYG,OAAQF,OAAQ,UAAWD,WAAY,aACxIC,OAAQ,UAAWD,WAAY,UAAWE,WAAYD,OAAQ,UAAWD,WAAY,WAAYG,OAAQF,OAAQ,UAAWD,WAAY,aACxIC,OAAQ,UAAWD,WAAY,UAAWE,WAAYD,OAAQ,UAAWD,WAAY,WAAYG,OAAQF,OAAQ,UAAWD,WAAY,aACxIC,OAAQ,UAAWD,WAAY,UAAWE,WAAYD,OAAQ,UAAWD,WAAY,WAAYG,OAAQF,OAAQ,UAAWD,WAAY,aACxIC,OAAQ,UAAWD,WAAY,UAAWE,WAAYD,OAAQ,UAAWD,WAAY,WAAYG,OAAQF,OAAQ,UAAWD,WAAY,aACxIC,OAAQ,UAAWD,WAAY,UAAWE,WAAYD,OAAQ,UAAWD,WAAY,WAAYG,OAAQF,OAAQ,UAAWD,WAAY,aACxIC,OAAQ,UAAWD,WAAY,UAAWE,WAAYD,OAAQ,UAAWD,WAAY,WAAYG,OAAQF,OAAQ,UAAWD,WAAY,aAO3I9I,EAAO8P,UAAUsD,MAAQ,WACvBzW,KAAKm0B,UACLn0B,KAAKm0B,OAAOxuB,OAAS,WAEnB,GAAIH,GAAI,CACR,KAAM,GAAI9E,KAAKV,MACTA,KAAK8F,eAAepF,IACtB8E,GAGJ,OAAOA,KAWXnC,EAAO8P,UAAU+B,IAAM,SAAUkzC,GAC/B,GAAIn2C,GAAQjS,KAAKm0B,OAAOi0B,EACxB,IAAa5hD,QAATyL,EAAoB,CAEtB,GAAI9J,GAAQnI,KAAKk6D,aAAe72D,EAAO82D,QAAQx0D,MAC/C3F,MAAKk6D,eACLjoD,KACAA,EAAM9G,MAAQ9H,EAAO82D,QAAQhyD,GAC7BnI,KAAKm0B,OAAOi0B,GAAan2C,EAG3B,MAAOA,IAUT5O,EAAO8P,UAAUF,IAAM,SAAUm1C,EAAWn7C,GAE1C,MADAjN,MAAKm0B,OAAOi0B,GAAan7C,EAClBA,GAGTpN,EAAOD,QAAUyD,GAKb,SAASxD,GAMb,QAASyD,KACPtD,KAAK2iD,UACL3iD,KAAKo6D,eACLp6D,KAAKsI,SAAW9B,OAQlBlD,EAAO6P,UAAUyvC,kBAAoB,SAASt6C,GAC5CtI,KAAKsI,SAAWA,GASlBhF,EAAO6P,UAAUknD,KAAO,SAASC,EAAKC,GACpC,GAAIC,GAAMx6D,KAAK2iD,OAAO2X,EACtB,IAAY9zD,SAARg0D,EAAmB,CAErB,GAAIrmD,GAAKnU,IACTw6D,GAAM,GAAIC,OACVD,EAAIE,OAAS,WAEO,GAAd16D,KAAKuS,QACPhB,SAASojB,KAAKljB,YAAYzR,MAC1BA,KAAKuS,MAAQvS,KAAKowB,YAClBpwB,KAAKwS,OAASxS,KAAKswB,aACnB/e,SAASojB,KAAKxjB,YAAYnR,OAGxBmU,EAAG7L,WACL6L,EAAGwuC,OAAO2X,GAAOE,EACjBrmD,EAAG7L,SAAStI,QAIhBw6D,EAAIG,QAAU,WACMn0D,SAAd+zD,GACF5hC,QAAQiiC,MAAM,wBAAyBN,SAChCt6D,MAAK4lD,IACRzxC,EAAG7L,UACL6L,EAAG7L,SAAStI,OAGPmU,EAAGimD,YAAYE,MAAS,GAC/B3hC,QAAQiiC,MAAM,8BAA+BL,SACtCv6D,MAAK4lD,IACRzxC,EAAG7L,UACL6L,EAAG7L,SAAStI,QAIdA,KAAK4lD,IAAM2U,EACXpmD,EAAGimD,YAAYE,IAAO,IAI1BE,EAAI5U,IAAM0U,EAGZ,MAAOE,IAGT36D,EAAOD,QAAU0D,GAKb,SAASzD,EAAQD,EAASM,GA6B9B,QAASqD,GAAKorD,EAAYkM,EAAWC,EAAWpH,GAC9C,GAAIhS,GAAY/gD,EAAKsN,uBAAuB,SAASylD,EACrD1zD,MAAKyO,QAAUizC,EAAU1E,MAEzBh9C,KAAKqzC,UAAW,EAChBrzC,KAAKsM,OAAQ,EAEbtM,KAAK89C,SACL99C,KAAKmvD,gBACLnvD,KAAK+6D,iBAEL/6D,KAAKg7D,kBAAoB,EAGzBh7D,KAAKK,GAAKmG,OACVxG,KAAKwyD,gBAAiB,EACtBxyD,KAAKyyD,gBAAiB,EACtBzyD,KAAKmrD,QAAS,EACdnrD,KAAKorD,QAAS,EACdprD,KAAKi7D,qBAAsB,EAC3Bj7D,KAAKk7D,kBAAsB,EAC3Bl7D,KAAKm7D,gBAAkBzH,EAAiB1W,MAAMvxB,OAC9CzrB,KAAKo7D,aAAc,EACnBp7D,KAAK49C,MAAQ,GACb59C,KAAKq7D,kBAAmB,EACxBr7D,KAAKs7D,qBAAsB,EAC3Bt7D,KAAK8zD,iBAAmBnsD,IAAI,EAAGF,KAAK,EAAG8K,MAAM,EAAGC,OAAO,EAAGuhD,MAAM,GAChE/zD,KAAKomD,aAAez+C,IAAI,EAAGF,KAAK,EAAG4f,MAAM,EAAG/D,OAAO,GAEnDtjB,KAAK66D,UAAYA,EACjB76D,KAAK86D,UAAYA,EAGjB96D,KAAKu7D,GAAK,EACVv7D,KAAKw7D,GAAK,EACVx7D,KAAKy7D,GAAK,EACVz7D,KAAK07D,GAAK,EACV17D,KAAK+R,EAAI,KACT/R,KAAKgS,EAAI,KAGThS,KAAK27D,eAAiBF,GAAG,EAAEC,GAAG,EAAE3pD,EAAE,EAAEC,EAAE,GAEtChS,KAAK++C,QAAU2U,EAAiBlV,QAAQO,QACxC/+C,KAAKswD,WAAav+C,EAAE,KAAKC,EAAE,MAE3BhS,KAAK0uD,cAAcC,EAAYjN,GAG/B1hD,KAAK47D,eACL57D,KAAK67D,mBAAqB,EAC1B77D,KAAK87D,eAAiB,EACtB97D,KAAK+7D,uBAA0BrI,EAAiBvU,WAAWa,YAAYztC,MACvEvS,KAAKg8D,wBAA0BtI,EAAiBvU,WAAWa,YAAYxtC,OACvExS,KAAKi8D,wBAA0BvI,EAAiBvU,WAAWa,YAAYv0B,OACvEzrB,KAAKigD,sBAAwByT,EAAiBvU,WAAWc,sBACzDjgD,KAAKk8D,gBAAkB,EAGvBl8D,KAAKm2D,gBAAkB,EACvBn2D,KAAKm8D,aAAe,EACpBn8D,KAAK+jD,eAAiBhyC,EAAK,KAAMC,EAAK,MACtChS,KAAKgkD,mBAAqBjyC,EAAM,IAAKC,EAAM,KAC3ChS,KAAKiyD,aAAe,KA1FtB,GAAItxD,GAAOT,EAAoB,EAiG/BqD,GAAK4P,UAAU69C,eAAiB,WAC9BhxD,KAAK+R,EAAI/R,KAAK27D,cAAc5pD,EAC5B/R,KAAKgS,EAAIhS,KAAK27D,cAAc3pD,EAC5BhS,KAAKy7D,GAAKz7D,KAAK27D,cAAcF,GAC7Bz7D,KAAK07D,GAAK17D,KAAK27D,cAAcD,IAO/Bn4D,EAAK4P,UAAUyoD,aAAe,WAE5B57D,KAAKo8D,eAAiB51D,OACtBxG,KAAKq8D,YAAc,EACnBr8D,KAAKs8D,kBACLt8D,KAAKu8D,kBACLv8D,KAAKw8D,oBAOPj5D,EAAK4P,UAAU4hD,WAAa,SAASrH,GACH,IAA5B1tD,KAAK89C,MAAMn3C,QAAQ+mD,IACrB1tD,KAAK89C,MAAM91C,KAAK0lD,GAEqB,IAAnC1tD,KAAKmvD,aAAaxoD,QAAQ+mD,IAC5B1tD,KAAKmvD,aAAannD,KAAK0lD,GAEzB1tD,KAAK67D,mBAAqB77D,KAAKmvD,aAAaxpD,QAO9CpC,EAAK4P,UAAU6hD,WAAa,SAAStH,GACnC,GAAIvlD,GAAQnI,KAAK89C,MAAMn3C,QAAQ+mD,EAClB,KAATvlD,GACFnI,KAAK89C,MAAM11C,OAAOD,EAAO,GAE3BA,EAAQnI,KAAKmvD,aAAaxoD,QAAQ+mD,GACrB,IAATvlD,GACFnI,KAAKmvD,aAAa/mD,OAAOD,EAAO,GAElCnI,KAAK67D,mBAAqB77D,KAAKmvD,aAAaxpD,QAS9CpC,EAAK4P,UAAUu7C,cAAgB,SAASC,EAAYjN,GAClD,GAAKiN,EAAL,CAIA,GAAIzgD,IAAU,cAAc,sBAAsB,QAAQ,QAAQ,cAAc,SAAS,YACvF,WAAW,WAAW,WAAW,kBAAkB,kBAAkB,QAAQ,OAkB/E,IAhBAvN,EAAKwF,oBAAoB+H,EAAQlO,KAAKyO,QAASkgD,GAGzBnoD,SAAlBmoD,EAAWtuD,KAA0BL,KAAKK,GAAKsuD,EAAWtuD,IACrCmG,SAArBmoD,EAAWlmC,QAA0BzoB,KAAKyoB,MAAQkmC,EAAWlmC,MAAOzoB,KAAKy8D,cAAgB9N,EAAWlmC,OAC/EjiB,SAArBmoD,EAAW3pB,QAA0BhlC,KAAKglC,MAAQ2pB,EAAW3pB,OAC5Cx+B,SAAjBmoD,EAAW58C,IAA0B/R,KAAK+R,EAAI48C,EAAW58C,GACxCvL,SAAjBmoD,EAAW38C,IAA0BhS,KAAKgS,EAAI28C,EAAW38C,GACpCxL,SAArBmoD,EAAWtnD,QAA0BrH,KAAKqH,MAAQsnD,EAAWtnD,OACxCb,SAArBmoD,EAAW/Q,QAA0B59C,KAAK49C,MAAQ+Q,EAAW/Q,MAAO59C,KAAKq7D,kBAAmB,GAGzD70D,SAAnCmoD,EAAWsM,sBAAoCj7D,KAAKi7D,oBAAsBtM,EAAWsM,qBAClDz0D,SAAnCmoD,EAAWuM,mBAAoCl7D,KAAKk7D,iBAAsBvM,EAAWuM,kBAClD10D,SAAnCmoD,EAAW+N,kBAAoC18D,KAAK08D,gBAAsB/N,EAAW+N,iBAEzEl2D,SAAZxG,KAAKK,GACP,KAAM,sBAIR,IAAkC,gBAAvBL,MAAKyO,QAAQwD,OAAqD,gBAAvBjS,MAAKyO,QAAQwD,OAA4C,IAAtBjS,KAAKyO,QAAQwD,MAAc,CAClH,GAAI0qD,GAAW38D,KAAK86D,UAAU5lD,IAAIlV,KAAKyO,QAAQwD,MAC/CtR,GAAK8F,WAAWzG,KAAKyO,QAASkuD,GAE9B38D,KAAKyO,QAAQtD,MAAQxK,EAAKuK,WAAWlL,KAAKyO,QAAQtD,OAMpD,GAF0B3E,SAAtBmoD,EAAWljC,SAA+BzrB,KAAKm7D,gBAAkBn7D,KAAKyO,QAAQgd,QACzDjlB,SAArBmoD,EAAWxjD,QAA+BnL,KAAKyO,QAAQtD,MAAQxK,EAAKuK,WAAWyjD,EAAWxjD,QACnE3E,SAAvBxG,KAAKyO,QAAQ4uC,OAA4C,IAArBr9C,KAAKyO,QAAQ4uC,MAAY,CAC/D,IAAIr9C,KAAK66D,UAIP,KAAM,uBAHN76D,MAAK48D,SAAW58D,KAAK66D,UAAUR,KAAKr6D,KAAKyO,QAAQ4uC,MAAOr9C,KAAKyO,QAAQouD,aAgCzE,OAzBkCr2D,SAA9BmoD,EAAW6D,gBACbxyD,KAAKmrD,QAAUwD,EAAW6D,eAC1BxyD,KAAKwyD,eAAiB7D,EAAW6D,gBAEThsD,SAAjBmoD,EAAW58C,GAA0C,GAAvB/R,KAAKwyD,iBAC1CxyD,KAAKmrD,QAAS,GAIkB3kD,SAA9BmoD,EAAW8D,gBACbzyD,KAAKorD,QAAUuD,EAAW8D,eAC1BzyD,KAAKyyD,eAAiB9D,EAAW8D,gBAETjsD,SAAjBmoD,EAAW38C,GAA0C,GAAvBhS,KAAKyyD,iBAC1CzyD,KAAKorD,QAAS,GAGhBprD,KAAKo7D,YAAcp7D,KAAKo7D,aAAsC50D,SAAtBmoD,EAAWljC,QAExB,UAAvBzrB,KAAKyO,QAAQ2uC,OAA4C,kBAAvBp9C,KAAKyO,QAAQ2uC,SACjDp9C,KAAKyO,QAAQyuC,UAAYwE,EAAU1E,MAAM91B,SACzClnB,KAAKyO,QAAQ0uC,UAAYuE,EAAU1E,MAAM71B,UAInCnnB,KAAKyO,QAAQ2uC,OACnB,IAAK,WAAiBp9C,KAAK+rC,KAAO/rC,KAAK88D,cAAe98D,KAAKg2D,OAASh2D,KAAK+8D,eAAiB,MAC1F,KAAK,MAAiB/8D,KAAK+rC,KAAO/rC,KAAKg9D,SAAUh9D,KAAKg2D,OAASh2D,KAAKi9D,UAAY,MAChF,KAAK,SAAiBj9D,KAAK+rC,KAAO/rC,KAAKk9D,YAAal9D,KAAKg2D,OAASh2D,KAAKm9D,aAAe,MACtF,KAAK,UAAiBn9D,KAAK+rC,KAAO/rC,KAAKo9D,aAAcp9D,KAAKg2D,OAASh2D,KAAKq9D,cAAgB,MAExF,KAAK,QAAiBr9D,KAAK+rC,KAAO/rC,KAAKs9D,WAAYt9D,KAAKg2D,OAASh2D,KAAKu9D,YAAc,MACpF,KAAK,gBAAiBv9D,KAAK+rC,KAAO/rC,KAAKw9D,mBAAoBx9D,KAAKg2D,OAASh2D,KAAKy9D,oBAAsB,MACpG,KAAK,OAAiBz9D,KAAK+rC,KAAO/rC,KAAK09D,UAAW19D,KAAKg2D,OAASh2D,KAAK29D,WAAa,MAClF,KAAK,MAAiB39D,KAAK+rC,KAAO/rC,KAAK49D,SAAU59D,KAAKg2D,OAASh2D,KAAK69D,YAAc,MAClF,KAAK,SAAiB79D,KAAK+rC,KAAO/rC,KAAK89D,YAAa99D,KAAKg2D,OAASh2D,KAAK69D,YAAc,MACrF,KAAK,WAAiB79D,KAAK+rC,KAAO/rC,KAAK+9D,cAAe/9D,KAAKg2D,OAASh2D,KAAK69D,YAAc,MACvF,KAAK,eAAiB79D,KAAK+rC,KAAO/rC,KAAKg+D,kBAAmBh+D,KAAKg2D,OAASh2D,KAAK69D,YAAc,MAC3F,KAAK,OAAiB79D,KAAK+rC,KAAO/rC,KAAKi+D,UAAWj+D,KAAKg2D,OAASh2D,KAAK69D,YAAc,MACnF,SAAsB79D,KAAK+rC,KAAO/rC,KAAKo9D,aAAcp9D,KAAKg2D,OAASh2D,KAAKq9D,eAG1Er9D,KAAKk+D,WAOP36D,EAAK4P,UAAUg+B,OAAS,WACtBnxC,KAAKqzC,UAAW,EAChBrzC,KAAKk+D,UAMP36D,EAAK4P,UAAU+9B,SAAW,WACxBlxC,KAAKqzC,UAAW,EAChBrzC,KAAKk+D,UAOP36D,EAAK4P,UAAUgrD,eAAiB,WAC9Bn+D,KAAKk+D,UAOP36D,EAAK4P,UAAU+qD,OAAS,WACtBl+D,KAAKuS,MAAQ/L,OACbxG,KAAKwS,OAAShM,QAQhBjD,EAAK4P,UAAUq6C,SAAW,WACxB,MAA6B,kBAAfxtD,MAAKglC,MAAuBhlC,KAAKglC,QAAUhlC,KAAKglC,OAShEzhC,EAAK4P,UAAU0kD,iBAAmB,SAAU9wC,EAAKqnC,GAC/C,GAAIpuC,GAAc,CAMlB,QAJKhgB,KAAKuS,OACRvS,KAAKg2D,OAAOjvC,GAGN/mB,KAAKyO,QAAQ2uC,OACnB,IAAK,SACL,IAAK,MACH,MAAOp9C,MAAKyO,QAAQgd,OAAQzL,CAE9B,KAAK,UACH,GAAIza,GAAIvF,KAAKuS,MAAQ,EACjBnM,EAAIpG,KAAKwS,OAAS,EAClB68C,EAAKnqD,KAAKkZ,IAAIgwC,GAAS7oD,EACvBoG,EAAKzG,KAAKqZ,IAAI6vC,GAAShoD,CAC3B,OAAOb,GAAIa,EAAIlB,KAAKyqB,KAAK0/B,EAAIA,EAAI1jD,EAAIA,EAMvC,KAAK,MACL,IAAK,QACL,IAAK,OACL,QACE,MAAI3L,MAAKuS,MACArN,KAAK4G,IACR5G,KAAK2lB,IAAI7qB,KAAKuS,MAAQ,EAAIrN,KAAKqZ,IAAI6vC,IACnClpD,KAAK2lB,IAAI7qB,KAAKwS,OAAS,EAAItN,KAAKkZ,IAAIgwC,KAAWpuC,EAI5C,IAYfzc,EAAK4P,UAAUirD,UAAY,SAAS7C,EAAIC,GACtCx7D,KAAKu7D,GAAKA,EACVv7D,KAAKw7D,GAAKA,GASZj4D,EAAK4P,UAAUkrD,UAAY,SAAS9C,EAAIC,GACtCx7D,KAAKu7D,IAAMA,EACXv7D,KAAKw7D,IAAMA,GAMbj4D,EAAK4P,UAAUmrD,WAAa,WAC1Bt+D,KAAK27D,cAAc5pD,EAAI/R,KAAK+R,EAC5B/R,KAAK27D,cAAc3pD,EAAIhS,KAAKgS,EAC5BhS,KAAK27D,cAAcF,GAAKz7D,KAAKy7D,GAC7Bz7D,KAAK27D,cAAcD,GAAK17D,KAAK07D,IAO/Bn4D,EAAK4P,UAAU09C,aAAe,SAASr+B,GAErC,GADAxyB,KAAKs+D,aACAt+D,KAAKmrD,OAORnrD,KAAKu7D,GAAK,EACVv7D,KAAKy7D,GAAK,MARM,CAChB,GAAI78C,GAAO5e,KAAK++C,QAAU/+C,KAAKy7D,GAC3B79C,GAAQ5d,KAAKu7D,GAAK38C,GAAM5e,KAAKyO,QAAQwuC,IACzCj9C,MAAKy7D,IAAM79C,EAAK4U,EAChBxyB,KAAK+R,GAAM/R,KAAKy7D,GAAKjpC,EAOvB,GAAKxyB,KAAKorD,OAORprD,KAAKw7D,GAAK,EACVx7D,KAAK07D,GAAK,MARM,CAChB,GAAI78C,GAAO7e,KAAK++C,QAAU/+C,KAAK07D,GAC3B79C,GAAQ7d,KAAKw7D,GAAK38C,GAAM7e,KAAKyO,QAAQwuC,IACzCj9C,MAAK07D,IAAM79C,EAAK2U,EAChBxyB,KAAKgS,GAAMhS,KAAK07D,GAAKlpC,IAezBjvB,EAAK4P,UAAUy9C,oBAAsB,SAASp+B,EAAUyuB,GAEtD,GADAjhD,KAAKs+D,aACAt+D,KAAKmrD,OAQRnrD,KAAKu7D,GAAK,EACVv7D,KAAKy7D,GAAK,MATM,CAChB,GAAI78C,GAAO5e,KAAK++C,QAAU/+C,KAAKy7D,GAC3B79C,GAAQ5d,KAAKu7D,GAAK38C,GAAM5e,KAAKyO,QAAQwuC,IACzCj9C,MAAKy7D,IAAM79C,EAAK4U,EAChBxyB,KAAKy7D,GAAMv2D,KAAK2lB,IAAI7qB,KAAKy7D,IAAMxa,EAAiBjhD,KAAKy7D,GAAK,EAAKxa,GAAeA,EAAejhD,KAAKy7D,GAClGz7D,KAAK+R,GAAM/R,KAAKy7D,GAAKjpC,EAOvB,GAAKxyB,KAAKorD,OAQRprD,KAAKw7D,GAAK,EACVx7D,KAAK07D,GAAK,MATM,CAChB,GAAI78C,GAAO7e,KAAK++C,QAAU/+C,KAAK07D,GAC3B79C,GAAQ7d,KAAKw7D,GAAK38C,GAAM7e,KAAKyO,QAAQwuC,IACzCj9C,MAAK07D,IAAM79C,EAAK2U,EAChBxyB,KAAK07D,GAAMx2D,KAAK2lB,IAAI7qB,KAAK07D,IAAMza,EAAiBjhD,KAAK07D,GAAK,EAAKza,GAAeA,EAAejhD,KAAK07D,GAClG17D,KAAKgS,GAAMhS,KAAK07D,GAAKlpC,IAYzBjvB,EAAK4P,UAAUorD,QAAU,WACvB,MAAQv+D,MAAKmrD,QAAUnrD,KAAKorD,QAQ9B7nD,EAAK4P,UAAUs9C,SAAW,SAASD,GACjC,GAAIgO,GAAWt5D,KAAKyqB,KAAKzqB,KAAK4uB,IAAI9zB,KAAKy7D,GAAG,GAAKv2D,KAAK4uB,IAAI9zB,KAAK07D,GAAG,GAEhE,OAAQ8C,GAAWhO,GAOrBjtD,EAAK4P,UAAU23C,WAAa,WAC1B,MAAO9qD,MAAKqzC,UAOd9vC,EAAK4P,UAAUyB,SAAW,WACxB,MAAO5U,MAAKqH,OASd9D,EAAK4P,UAAUsrD,YAAc,SAAS1sD,EAAGC,GACvC,GAAI4M,GAAK5e,KAAK+R,EAAIA,EACd8M,EAAK7e,KAAKgS,EAAIA,CAClB,OAAO9M,MAAKyqB,KAAK/Q,EAAKA,EAAKC,EAAKA,IAUlCtb,EAAK4P,UAAUi8C,cAAgB,SAAStjD,EAAKY,GAC3C,IAAK1M,KAAKo7D,aAA8B50D,SAAfxG,KAAKqH,MAC5B,GAAIqF,GAAOZ,EACT9L,KAAKyO,QAAQgd,QAASzrB,KAAKyO,QAAQyuC,UAAYl9C,KAAKyO,QAAQ0uC,WAAa,MAEtE,CACH,GAAIlgC,IAASjd,KAAKyO,QAAQ0uC,UAAYn9C,KAAKyO,QAAQyuC,YAAcxwC,EAAMZ,EACvE9L,MAAKyO,QAAQgd,QAASzrB,KAAKqH,MAAQyE,GAAOmR,EAAQjd,KAAKyO,QAAQyuC,UAGnEl9C,KAAKm7D,gBAAkBn7D,KAAKyO,QAAQgd,QAQtCloB,EAAK4P,UAAU44B,KAAO,WACpB,KAAM,wCAQRxoC,EAAK4P,UAAU6iD,OAAS,WACtB,KAAM,0CAQRzyD,EAAK4P,UAAUo6C,kBAAoB,SAASxqC,GAC1C,MAAQ/iB,MAAKyH,KAAoBsb,EAAIsE,OAC7BrnB,KAAKyH,KAAOzH,KAAKuS,MAAQwQ,EAAItb,MAC7BzH,KAAK2H,IAAoBob,EAAIO,QAC7BtjB,KAAK2H,IAAM3H,KAAKwS,OAASuQ,EAAIpb,KAGvCpE,EAAK4P,UAAUoqD,aAAe,WAG5B,IAAKv9D,KAAKuS,QAAUvS,KAAKwS,OAAQ,CAC/B,GAAID,GAAOC,CACX,IAAIxS,KAAKqH,MAAO,CACdrH,KAAKyO,QAAQgd,OAAQzrB,KAAKm7D,eAC1B,IAAIl+C,GAAQjd,KAAK48D,SAASpqD,OAASxS,KAAK48D,SAASrqD,KACnC/L,UAAVyW,GACF1K,EAAQvS,KAAKyO,QAAQgd,QAASzrB,KAAK48D,SAASrqD,MAC5CC,EAASxS,KAAKyO,QAAQgd,OAAQxO,GAASjd,KAAK48D,SAASpqD,SAGrDD,EAAQ,EACRC,EAAS,OAIXD,GAAQvS,KAAK48D,SAASrqD,MACtBC,EAASxS,KAAK48D,SAASpqD,MAEzBxS,MAAKuS,MAASA,EACdvS,KAAKwS,OAASA,EAEdxS,KAAKk8D,gBAAkB,EACnBl8D,KAAKuS,MAAQ,GAAKvS,KAAKwS,OAAS,IAClCxS,KAAKuS,OAAUrN,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAA0BjgD,KAAK+7D,uBAClF/7D,KAAKwS,QAAUtN,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAAyBjgD,KAAKg8D,wBACjFh8D,KAAKyO,QAAQgd,QAASvmB,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAAyBjgD,KAAKi8D,wBACxFj8D,KAAKk8D,gBAAkBl8D,KAAKuS,MAAQA,KAK1ChP,EAAK4P,UAAUurD,qBAAuB,SAAU33C,GAC9C,GAA2B,GAAvB/mB,KAAK48D,SAASrqD,MAAa,CAE7B,GAAIvS,KAAKq8D,YAAc,EAAG,CACxB,GAAI/0C,GAActnB,KAAKq8D,YAAc,EAAK,GAAK,CAC/C/0C,IAAatnB,KAAKm2D,gBAClB7uC,EAAYpiB,KAAK4G,IAAI,GAAM9L,KAAKuS,MAAM+U,GAEtCP,EAAI43C,YAAc,GAClB53C,EAAI63C,UAAU5+D,KAAK48D,SAAU58D,KAAKyH,KAAO6f,EAAWtnB,KAAK2H,IAAM2f,EAAWtnB,KAAKuS,MAAQ,EAAE+U,EAAWtnB,KAAKwS,OAAS,EAAE8U,GAItHP,EAAI43C,YAAc,EAClB53C,EAAI63C,UAAU5+D,KAAK48D,SAAU58D,KAAKyH,KAAMzH,KAAK2H,IAAK3H,KAAKuS,MAAOvS,KAAKwS,UAIvEjP,EAAK4P,UAAU0rD,gBAAkB,SAAU93C,GACzC,GAAIjN,GACA6P,EAAS,CAEb,IAAI3pB,KAAKwS,OAAO,CACdmX,EAAS3pB,KAAKwS,OAAS,CACvB,IAAIshD,GAAkB9zD,KAAK8+D,YAAY/3C,EAEnC+sC,GAAgB0C,WAAa,IAC/B7sC,GAAUmqC,EAAgBthD,OAAS,EACnCmX,GAAU,GAId7P,EAAS9Z,KAAKgS,EAAI2X,EAElB3pB,KAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOzoB,KAAK+R,EAAG+H,EAAQtT,SAG/CjD,EAAK4P,UAAUmqD,WAAa,SAAUv2C,GACpC/mB,KAAKu9D,aAAax2C,GAClB/mB,KAAKyH,KAASzH,KAAK+R,EAAI/R,KAAKuS,MAAQ,EACpCvS,KAAK2H,IAAS3H,KAAKgS,EAAIhS,KAAKwS,OAAS,EAErCxS,KAAK0+D,qBAAqB33C,GAE1B/mB,KAAKomD,YAAYz+C,IAAM3H,KAAK2H,IAC5B3H,KAAKomD,YAAY3+C,KAAOzH,KAAKyH,KAC7BzH,KAAKomD,YAAY/+B,MAAQrnB,KAAKyH,KAAOzH,KAAKuS,MAC1CvS,KAAKomD,YAAY9iC,OAAStjB,KAAK2H,IAAM3H,KAAKwS,OAE1CxS,KAAK6+D,gBAAgB93C,GACrB/mB,KAAKomD,YAAY3+C,KAAOvC,KAAK4G,IAAI9L,KAAKomD,YAAY3+C,KAAMzH,KAAK8zD,gBAAgBrsD,MAC7EzH,KAAKomD,YAAY/+B,MAAQniB,KAAKwH,IAAI1M,KAAKomD,YAAY/+B,MAAOrnB,KAAK8zD,gBAAgBrsD,KAAOzH,KAAK8zD,gBAAgBvhD,OAC3GvS,KAAKomD,YAAY9iC,OAASpe,KAAKwH,IAAI1M,KAAKomD,YAAY9iC,OAAQtjB,KAAKomD,YAAY9iC,OAAStjB,KAAK8zD,gBAAgBthD,SAG7GjP,EAAK4P,UAAUsqD,qBAAuB,SAAU12C,GAC9C,GAAI/mB,KAAK48D,SAAShX,KAAQ5lD,KAAK48D,SAASrqD,OAAUvS,KAAK48D,SAASpqD,OAe1DxS,KAAK++D,oCACP/+D,KAAKuS,MAAQ,EACbvS,KAAKwS,OAAS,QACPxS,MAAK++D,mCAEd/+D,KAAKu9D,aAAax2C,OAnBlB,KAAK/mB,KAAKuS,MAAO,CACf,GAAIysD,GAAiC,EAAtBh/D,KAAKyO,QAAQgd,MAC5BzrB,MAAKuS,MAAQysD,EACbh/D,KAAKwS,OAASwsD,EAKdh/D,KAAKyO,QAAQgd,QAAuE,GAA7DvmB,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAA+BjgD,KAAKi8D,wBAC/Fj8D,KAAKk8D,gBAAkBl8D,KAAKyO,QAAQgd,OAAQ,GAAIuzC,EAChDh/D,KAAK++D,mCAAoC,IAc/Cx7D,EAAK4P,UAAUqqD,mBAAqB,SAAUz2C,GAC5C/mB,KAAKy9D,qBAAqB12C,GAE1B/mB,KAAKyH,KAASzH,KAAK+R,EAAI/R,KAAKuS,MAAQ,EACpCvS,KAAK2H,IAAS3H,KAAKgS,EAAIhS,KAAKwS,OAAS,CAErC,IAAIysD,GAAUj/D,KAAKyH,KAAQzH,KAAKuS,MAAQ,EACpC2sD,EAAUl/D,KAAK2H,IAAO3H,KAAKwS,OAAS,EACpCiZ,EAASvmB,KAAK2lB,IAAI7qB,KAAKwS,OAAS,EAEpCxS,MAAKm/D,eAAep4C,EAAKk4C,EAASC,EAASzzC,GAE3C1E,EAAIuoC,OACJvoC,EAAIq4C,OAAOp/D,KAAK+R,EAAG/R,KAAKgS,EAAGyZ,GAC3B1E,EAAIlH,SACJkH,EAAIs4C,OAEJr/D,KAAK0+D,qBAAqB33C,GAE1BA,EAAI0oC,UAEJzvD,KAAKomD,YAAYz+C,IAAM3H,KAAKgS,EAAIhS,KAAKyO,QAAQgd,OAC7CzrB,KAAKomD,YAAY3+C,KAAOzH,KAAK+R,EAAI/R,KAAKyO,QAAQgd,OAC9CzrB,KAAKomD,YAAY/+B,MAAQrnB,KAAK+R,EAAI/R,KAAKyO,QAAQgd,OAC/CzrB,KAAKomD,YAAY9iC,OAAStjB,KAAKgS,EAAIhS,KAAKyO,QAAQgd,OAEhDzrB,KAAK6+D,gBAAgB93C,GAErB/mB,KAAKomD,YAAY3+C,KAAOvC,KAAK4G,IAAI9L,KAAKomD,YAAY3+C,KAAMzH,KAAK8zD,gBAAgBrsD,MAC7EzH,KAAKomD,YAAY/+B,MAAQniB,KAAKwH,IAAI1M,KAAKomD,YAAY/+B,MAAOrnB,KAAK8zD,gBAAgBrsD,KAAOzH,KAAK8zD,gBAAgBvhD,OAC3GvS,KAAKomD,YAAY9iC,OAASpe,KAAKwH,IAAI1M,KAAKomD,YAAY9iC,OAAQtjB,KAAKomD,YAAY9iC,OAAStjB,KAAK8zD,gBAAgBthD,SAG7GjP,EAAK4P,UAAU8pD,WAAa,SAAUl2C,GACpC,IAAK/mB,KAAKuS,MAAO,CACf,GAAImH,GAAS,EACT4lD,EAAWt/D,KAAK8+D,YAAY/3C,EAChC/mB,MAAKuS,MAAQ+sD,EAAS/sD,MAAQ,EAAImH,EAClC1Z,KAAKwS,OAAS8sD,EAAS9sD,OAAS,EAAIkH,EAEpC1Z,KAAKuS,OAAuE,GAA7DrN,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAA+BjgD,KAAK+7D,uBACvF/7D,KAAKwS,QAAuE,GAA7DtN,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAA+BjgD,KAAKg8D,wBACvFh8D,KAAKk8D,gBAAkBl8D,KAAKuS,OAAS+sD,EAAS/sD,MAAQ,EAAImH,KAM9DnW,EAAK4P,UAAU6pD,SAAW,SAAUj2C,GAClC/mB,KAAKi9D,WAAWl2C,GAEhB/mB,KAAKyH,KAAOzH,KAAK+R,EAAI/R,KAAKuS,MAAQ,EAClCvS,KAAK2H,IAAM3H,KAAKgS,EAAIhS,KAAKwS,OAAS,CAElC,IAAI+sD,GAAmB,IACnBv/C,EAAchgB,KAAKyO,QAAQuR,YAC3Bw/C,EAAqBx/D,KAAKyO,QAAQovC,qBAAuB,EAAI79C,KAAKyO,QAAQuR,WAE9E+G,GAAIY,YAAc3nB,KAAKqzC,SAAWrzC,KAAKyO,QAAQtD,MAAMkB,UAAUD,OAASpM,KAAKsM,MAAQtM,KAAKyO,QAAQtD,MAAMmB,MAAMF,OAASpM,KAAKyO,QAAQtD,MAAMiB,OAGtIpM,KAAKq8D,YAAc,IACrBt1C,EAAIO,WAAatnB,KAAKqzC,SAAWmsB,EAAqBx/C,IAAiBhgB,KAAKq8D,YAAc,EAAKkD,EAAmB,GAClHx4C,EAAIO,WAAatnB,KAAKm2D,gBACtBpvC,EAAIO,UAAYpiB,KAAK4G,IAAI9L,KAAKuS,MAAMwU,EAAIO,WAExCP,EAAI04C,UAAUz/D,KAAKyH,KAAK,EAAEsf,EAAIO,UAAWtnB,KAAK2H,IAAI,EAAEof,EAAIO,UAAWtnB,KAAKuS,MAAM,EAAEwU,EAAIO,UAAWtnB,KAAKwS,OAAO,EAAEuU,EAAIO,UAAWtnB,KAAKyO,QAAQgd,QACzI1E,EAAIlH,UAENkH,EAAIO,WAAatnB,KAAKqzC,SAAWmsB,EAAqBx/C,IAAiBhgB,KAAKq8D,YAAc,EAAKkD,EAAmB,GAClHx4C,EAAIO,WAAatnB,KAAKm2D,gBACtBpvC,EAAIO,UAAYpiB,KAAK4G,IAAI9L,KAAKuS,MAAMwU,EAAIO,WAExCP,EAAIiB,UAAYhoB,KAAKqzC,SAAWrzC,KAAKyO,QAAQtD,MAAMkB,UAAUF,WAAanM,KAAKsM,MAAQtM,KAAKyO,QAAQtD,MAAMmB,MAAMH,WAAanM,KAAKyO,QAAQtD,MAAMgB,WAEhJ4a,EAAI04C,UAAUz/D,KAAKyH,KAAMzH,KAAK2H,IAAK3H,KAAKuS,MAAOvS,KAAKwS,OAAQxS,KAAKyO,QAAQgd,QACzE1E,EAAInH,OACJmH,EAAIlH,SAEJ7f,KAAKomD,YAAYz+C,IAAM3H,KAAK2H,IAC5B3H,KAAKomD,YAAY3+C,KAAOzH,KAAKyH,KAC7BzH,KAAKomD,YAAY/+B,MAAQrnB,KAAKyH,KAAOzH,KAAKuS,MAC1CvS,KAAKomD,YAAY9iC,OAAStjB,KAAK2H,IAAM3H,KAAKwS,OAE1CxS,KAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOzoB,KAAK+R,EAAG/R,KAAKgS,IAI5CzO,EAAK4P,UAAU4pD,gBAAkB,SAAUh2C,GACzC,IAAK/mB,KAAKuS,MAAO,CACf,GAAImH,GAAS,EACT4lD,EAAWt/D,KAAK8+D,YAAY/3C,GAC5B1U,EAAOitD,EAAS/sD,MAAQ,EAAImH,CAChC1Z,MAAKuS,MAAQF,EACbrS,KAAKwS,OAASH,EAGdrS,KAAKuS,OAAUrN,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAAyBjgD,KAAK+7D,uBACjF/7D,KAAKwS,QAAUtN,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAAyBjgD,KAAKg8D,wBACjFh8D,KAAKyO,QAAQgd,QAASvmB,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAAyBjgD,KAAKi8D,wBACxFj8D,KAAKk8D,gBAAkBl8D,KAAKuS,MAAQF,IAIxC9O,EAAK4P,UAAU2pD,cAAgB,SAAU/1C,GACvC/mB,KAAK+8D,gBAAgBh2C,GACrB/mB,KAAKyH,KAAOzH,KAAK+R,EAAI/R,KAAKuS,MAAQ,EAClCvS,KAAK2H,IAAM3H,KAAKgS,EAAIhS,KAAKwS,OAAS,CAElC,IAAI+sD,GAAmB,IACnBv/C,EAAchgB,KAAKyO,QAAQuR,YAC3Bw/C,EAAqBx/D,KAAKyO,QAAQovC,qBAAuB,EAAI79C,KAAKyO,QAAQuR,WAE9E+G,GAAIY,YAAc3nB,KAAKqzC,SAAWrzC,KAAKyO,QAAQtD,MAAMkB,UAAUD,OAASpM,KAAKsM,MAAQtM,KAAKyO,QAAQtD,MAAMmB,MAAMF,OAASpM,KAAKyO,QAAQtD,MAAMiB,OAGtIpM,KAAKq8D,YAAc,IACrBt1C,EAAIO,WAAatnB,KAAKqzC,SAAWmsB,EAAqBx/C,IAAiBhgB,KAAKq8D,YAAc,EAAKkD,EAAmB,GAClHx4C,EAAIO,WAAatnB,KAAKm2D,gBACtBpvC,EAAIO,UAAYpiB,KAAK4G,IAAI9L,KAAKuS,MAAMwU,EAAIO,WAExCP,EAAI24C,SAAS1/D,KAAK+R,EAAI/R,KAAKuS,MAAM,EAAI,EAAEwU,EAAIO,UAAWtnB,KAAKgS,EAAgB,GAAZhS,KAAKwS,OAAa,EAAEuU,EAAIO,UAAWtnB,KAAKuS,MAAQ,EAAEwU,EAAIO,UAAWtnB,KAAKwS,OAAS,EAAEuU,EAAIO,WACpJP,EAAIlH,UAENkH,EAAIO,WAAatnB,KAAKqzC,SAAWmsB,EAAqBx/C,IAAiBhgB,KAAKq8D,YAAc,EAAKkD,EAAmB,GAClHx4C,EAAIO,WAAatnB,KAAKm2D,gBACtBpvC,EAAIO,UAAYpiB,KAAK4G,IAAI9L,KAAKuS,MAAMwU,EAAIO,WAExCP,EAAIiB,UAAYhoB,KAAKqzC,SAAWrzC,KAAKyO,QAAQtD,MAAMkB,UAAUF,WAAanM,KAAKsM,MAAQtM,KAAKyO,QAAQtD,MAAMmB,MAAMH,WAAanM,KAAKyO,QAAQtD,MAAMgB,WAChJ4a,EAAI24C,SAAS1/D,KAAK+R,EAAI/R,KAAKuS,MAAM,EAAGvS,KAAKgS,EAAgB,GAAZhS,KAAKwS,OAAYxS,KAAKuS,MAAOvS,KAAKwS,QAC/EuU,EAAInH,OACJmH,EAAIlH,SAEJ7f,KAAKomD,YAAYz+C,IAAM3H,KAAK2H,IAC5B3H,KAAKomD,YAAY3+C,KAAOzH,KAAKyH,KAC7BzH,KAAKomD,YAAY/+B,MAAQrnB,KAAKyH,KAAOzH,KAAKuS,MAC1CvS,KAAKomD,YAAY9iC,OAAStjB,KAAK2H,IAAM3H,KAAKwS,OAE1CxS,KAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOzoB,KAAK+R,EAAG/R,KAAKgS;EAI5CzO,EAAK4P,UAAUgqD,cAAgB,SAAUp2C,GACvC,IAAK/mB,KAAKuS,MAAO,CACf,GAAImH,GAAS,EACT4lD,EAAWt/D,KAAK8+D,YAAY/3C,GAC5Bi4C,EAAW95D,KAAKwH,IAAI4yD,EAAS/sD,MAAO+sD,EAAS9sD,QAAU,EAAIkH,CAC/D1Z,MAAKyO,QAAQgd,OAASuzC,EAAW,EAEjCh/D,KAAKuS,MAAQysD,EACbh/D,KAAKwS,OAASwsD,EAKdh/D,KAAKyO,QAAQgd,QAAuE,GAA7DvmB,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAA+BjgD,KAAKi8D,wBAC/Fj8D,KAAKk8D,gBAAkBl8D,KAAKyO,QAAQgd,OAAQ,GAAIuzC,IAIpDz7D,EAAK4P,UAAUgsD,eAAiB,SAAUp4C,EAAKhV,EAAGC,EAAGyZ,GACnD,GAAI8zC,GAAmB,IACnBv/C,EAAchgB,KAAKyO,QAAQuR,YAC3Bw/C,EAAqBx/D,KAAKyO,QAAQovC,qBAAuB,EAAI79C,KAAKyO,QAAQuR,WAE9E+G,GAAIY,YAAc3nB,KAAKqzC,SAAWrzC,KAAKyO,QAAQtD,MAAMkB,UAAUD,OAASpM,KAAKsM,MAAQtM,KAAKyO,QAAQtD,MAAMmB,MAAMF,OAASpM,KAAKyO,QAAQtD,MAAMiB,OAGtIpM,KAAKq8D,YAAc,IACrBt1C,EAAIO,WAAatnB,KAAKqzC,SAAWmsB,EAAqBx/C,IAAiBhgB,KAAKq8D,YAAc,EAAKkD,EAAmB,GAClHx4C,EAAIO,WAAatnB,KAAKm2D,gBACtBpvC,EAAIO,UAAYpiB,KAAK4G,IAAI9L,KAAKuS,MAAMwU,EAAIO,WAExCP,EAAIq4C,OAAOrtD,EAAGC,EAAGyZ,EAAO,EAAE1E,EAAIO,WAC9BP,EAAIlH,UAENkH,EAAIO,WAAatnB,KAAKqzC,SAAWmsB,EAAqBx/C,IAAiBhgB,KAAKq8D,YAAc,EAAKkD,EAAmB,GAClHx4C,EAAIO,WAAatnB,KAAKm2D,gBACtBpvC,EAAIO,UAAYpiB,KAAK4G,IAAI9L,KAAKuS,MAAMwU,EAAIO,WAExCP,EAAIiB,UAAYhoB,KAAKqzC,SAAWrzC,KAAKyO,QAAQtD,MAAMkB,UAAUF,WAAanM,KAAKsM,MAAQtM,KAAKyO,QAAQtD,MAAMmB,MAAMH,WAAanM,KAAKyO,QAAQtD,MAAMgB,WAChJ4a,EAAIq4C,OAAOp/D,KAAK+R,EAAG/R,KAAKgS,EAAGyZ,GAC3B1E,EAAInH,OACJmH,EAAIlH,UAGNtc,EAAK4P,UAAU+pD,YAAc,SAAUn2C,GACrC/mB,KAAKm9D,cAAcp2C,GACnB/mB,KAAKyH,KAAOzH,KAAK+R,EAAI/R,KAAKuS,MAAQ,EAClCvS,KAAK2H,IAAM3H,KAAKgS,EAAIhS,KAAKwS,OAAS,EAElCxS,KAAKm/D,eAAep4C,EAAK/mB,KAAK+R,EAAG/R,KAAKgS,EAAGhS,KAAKyO,QAAQgd,QAEtDzrB,KAAKomD,YAAYz+C,IAAM3H,KAAKgS,EAAIhS,KAAKyO,QAAQgd,OAC7CzrB,KAAKomD,YAAY3+C,KAAOzH,KAAK+R,EAAI/R,KAAKyO,QAAQgd,OAC9CzrB,KAAKomD,YAAY/+B,MAAQrnB,KAAK+R,EAAI/R,KAAKyO,QAAQgd,OAC/CzrB,KAAKomD,YAAY9iC,OAAStjB,KAAKgS,EAAIhS,KAAKyO,QAAQgd,OAEhDzrB,KAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOzoB,KAAK+R,EAAG/R,KAAKgS,IAG5CzO,EAAK4P,UAAUkqD,eAAiB,SAAUt2C,GACxC,IAAK/mB,KAAKuS,MAAO,CACf,GAAI+sD,GAAWt/D,KAAK8+D,YAAY/3C,EAEhC/mB,MAAKuS,MAAyB,IAAjB+sD,EAAS/sD,MACtBvS,KAAKwS,OAA2B,EAAlB8sD,EAAS9sD,OACnBxS,KAAKuS,MAAQvS,KAAKwS,SACpBxS,KAAKuS,MAAQvS,KAAKwS,OAEpB,IAAImtD,GAAc3/D,KAAKuS,KAGvBvS,MAAKuS,OAAUrN,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAAyBjgD,KAAK+7D,uBACjF/7D,KAAKwS,QAAUtN,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAAyBjgD,KAAKg8D,wBACjFh8D,KAAKyO,QAAQgd,QAAUvmB,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAAyBjgD,KAAKi8D,wBACzFj8D,KAAKk8D,gBAAkBl8D,KAAKuS,MAAQotD,IAIxCp8D,EAAK4P,UAAUiqD,aAAe,SAAUr2C,GACtC/mB,KAAKq9D,eAAet2C,GACpB/mB,KAAKyH,KAAOzH,KAAK+R,EAAI/R,KAAKuS,MAAQ,EAClCvS,KAAK2H,IAAM3H,KAAKgS,EAAIhS,KAAKwS,OAAS,CAElC,IAAI+sD,GAAmB,IACnBv/C,EAAchgB,KAAKyO,QAAQuR,YAC3Bw/C,EAAqBx/D,KAAKyO,QAAQovC,qBAAuB,EAAI79C,KAAKyO,QAAQuR,WAE9E+G,GAAIY,YAAc3nB,KAAKqzC,SAAWrzC,KAAKyO,QAAQtD,MAAMkB,UAAUD,OAASpM,KAAKsM,MAAQtM,KAAKyO,QAAQtD,MAAMmB,MAAMF,OAASpM,KAAKyO,QAAQtD,MAAMiB,OAGtIpM,KAAKq8D,YAAc,IACrBt1C,EAAIO,WAAatnB,KAAKqzC,SAAWmsB,EAAqBx/C,IAAiBhgB,KAAKq8D,YAAc,EAAKkD,EAAmB,GAClHx4C,EAAIO,WAAatnB,KAAKm2D,gBACtBpvC,EAAIO,UAAYpiB,KAAK4G,IAAI9L,KAAKuS,MAAMwU,EAAIO,WAExCP,EAAI64C,QAAQ5/D,KAAKyH,KAAK,EAAEsf,EAAIO,UAAWtnB,KAAK2H,IAAI,EAAEof,EAAIO,UAAWtnB,KAAKuS,MAAM,EAAEwU,EAAIO,UAAWtnB,KAAKwS,OAAO,EAAEuU,EAAIO,WAC/GP,EAAIlH,UAENkH,EAAIO,WAAatnB,KAAKqzC,SAAWmsB,EAAqBx/C,IAAiBhgB,KAAKq8D,YAAc,EAAKkD,EAAmB,GAClHx4C,EAAIO,WAAatnB,KAAKm2D,gBACtBpvC,EAAIO,UAAYpiB,KAAK4G,IAAI9L,KAAKuS,MAAMwU,EAAIO,WAExCP,EAAIiB,UAAYhoB,KAAKqzC,SAAWrzC,KAAKyO,QAAQtD,MAAMkB,UAAUF,WAAanM,KAAKsM,MAAQtM,KAAKyO,QAAQtD,MAAMmB,MAAMH,WAAanM,KAAKyO,QAAQtD,MAAMgB,WAEhJ4a,EAAI64C,QAAQ5/D,KAAKyH,KAAMzH,KAAK2H,IAAK3H,KAAKuS,MAAOvS,KAAKwS,QAClDuU,EAAInH,OACJmH,EAAIlH,SAEJ7f,KAAKomD,YAAYz+C,IAAM3H,KAAK2H,IAC5B3H,KAAKomD,YAAY3+C,KAAOzH,KAAKyH,KAC7BzH,KAAKomD,YAAY/+B,MAAQrnB,KAAKyH,KAAOzH,KAAKuS,MAC1CvS,KAAKomD,YAAY9iC,OAAStjB,KAAK2H,IAAM3H,KAAKwS,OAE1CxS,KAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOzoB,KAAK+R,EAAG/R,KAAKgS,IAG5CzO,EAAK4P,UAAUyqD,SAAW,SAAU72C,GAClC/mB,KAAK6/D,WAAW94C,EAAK,WAGvBxjB,EAAK4P,UAAU4qD,cAAgB,SAAUh3C,GACvC/mB,KAAK6/D,WAAW94C,EAAK,aAGvBxjB,EAAK4P,UAAU6qD,kBAAoB,SAAUj3C,GAC3C/mB,KAAK6/D,WAAW94C,EAAK,iBAGvBxjB,EAAK4P,UAAU2qD,YAAc,SAAU/2C,GACrC/mB,KAAK6/D,WAAW94C,EAAK,WAGvBxjB,EAAK4P,UAAU8qD,UAAY,SAAUl3C,GACnC/mB,KAAK6/D,WAAW94C,EAAK,SAGvBxjB,EAAK4P,UAAU0qD,aAAe,WAC5B,IAAK79D,KAAKuS,MAAO,CACfvS,KAAKyO,QAAQgd,OAAQzrB,KAAKm7D,eAC1B,IAAI9oD,GAAO,EAAIrS,KAAKyO,QAAQgd,MAC5BzrB,MAAKuS,MAAQF,EACbrS,KAAKwS,OAASH,EAGdrS,KAAKuS,OAAUrN,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAAyBjgD,KAAK+7D,uBACjF/7D,KAAKwS,QAAUtN,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAAyBjgD,KAAKg8D,wBACjFh8D,KAAKyO,QAAQgd,QAAsE,GAA7DvmB,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAA+BjgD,KAAKi8D,wBAC9Fj8D,KAAKk8D,gBAAkBl8D,KAAKuS,MAAQF,IAIxC9O,EAAK4P,UAAU0sD,WAAa,SAAU94C,EAAKq2B,GACzCp9C,KAAK69D,aAAa92C,GAElB/mB,KAAKyH,KAAOzH,KAAK+R,EAAI/R,KAAKuS,MAAQ,EAClCvS,KAAK2H,IAAM3H,KAAKgS,EAAIhS,KAAKwS,OAAS,CAElC,IAAI+sD,GAAmB,IACnBv/C,EAAchgB,KAAKyO,QAAQuR,YAC3Bw/C,EAAqBx/D,KAAKyO,QAAQovC,qBAAuB,EAAI79C,KAAKyO,QAAQuR,YAC1E8/C,EAAmB,CAGvB,QAAQ1iB,GACN,IAAK,MAAiB0iB,EAAmB,CAAG,MAC5C,KAAK,SAAiBA,EAAmB,CAAG,MAC5C,KAAK,WAAiBA,EAAmB,CAAG,MAC5C,KAAK,eAAiBA,EAAmB,CAAG,MAC5C,KAAK,OAAiBA,EAAmB,EAG3C/4C,EAAIY,YAAc3nB,KAAKqzC,SAAWrzC,KAAKyO,QAAQtD,MAAMkB,UAAUD,OAASpM,KAAKsM,MAAQtM,KAAKyO,QAAQtD,MAAMmB,MAAMF,OAASpM,KAAKyO,QAAQtD,MAAMiB,OAEtIpM,KAAKq8D,YAAc,IACrBt1C,EAAIO,WAAatnB,KAAKqzC,SAAWmsB,EAAqBx/C,IAAiBhgB,KAAKq8D,YAAc,EAAKkD,EAAmB,GAClHx4C,EAAIO,WAAatnB,KAAKm2D,gBACtBpvC,EAAIO,UAAYpiB,KAAK4G,IAAI9L,KAAKuS,MAAMwU,EAAIO,WAExCP,EAAIq2B,GAAOp9C,KAAK+R,EAAG/R,KAAKgS,EAAGhS,KAAKyO,QAAQgd,OAAQq0C,EAAmB/4C,EAAIO,WACvEP,EAAIlH,UAENkH,EAAIO,WAAatnB,KAAKqzC,SAAWmsB,EAAqBx/C,IAAiBhgB,KAAKq8D,YAAc,EAAKkD,EAAmB,GAClHx4C,EAAIO,WAAatnB,KAAKm2D,gBACtBpvC,EAAIO,UAAYpiB,KAAK4G,IAAI9L,KAAKuS,MAAMwU,EAAIO,WAExCP,EAAIiB,UAAYhoB,KAAKqzC,SAAWrzC,KAAKyO,QAAQtD,MAAMkB,UAAUF,WAAanM,KAAKsM,MAAQtM,KAAKyO,QAAQtD,MAAMmB,MAAMH,WAAanM,KAAKyO,QAAQtD,MAAMgB,WAChJ4a,EAAIq2B,GAAOp9C,KAAK+R,EAAG/R,KAAKgS,EAAGhS,KAAKyO,QAAQgd,QACxC1E,EAAInH,OACJmH,EAAIlH,SAEJ7f,KAAKomD,YAAYz+C,IAAM3H,KAAKgS,EAAIhS,KAAKyO,QAAQgd,OAC7CzrB,KAAKomD,YAAY3+C,KAAOzH,KAAK+R,EAAI/R,KAAKyO,QAAQgd,OAC9CzrB,KAAKomD,YAAY/+B,MAAQrnB,KAAK+R,EAAI/R,KAAKyO,QAAQgd,OAC/CzrB,KAAKomD,YAAY9iC,OAAStjB,KAAKgS,EAAIhS,KAAKyO,QAAQgd,OAE5CzrB,KAAKyoB,QACPzoB,KAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOzoB,KAAK+R,EAAG/R,KAAKgS,EAAIhS,KAAKwS,OAAS,EAAGhM,OAAW,WAAU,GACpFxG,KAAKomD,YAAY3+C,KAAOvC,KAAK4G,IAAI9L,KAAKomD,YAAY3+C,KAAMzH,KAAK8zD,gBAAgBrsD,MAC7EzH,KAAKomD,YAAY/+B,MAAQniB,KAAKwH,IAAI1M,KAAKomD,YAAY/+B,MAAOrnB,KAAK8zD,gBAAgBrsD,KAAOzH,KAAK8zD,gBAAgBvhD,OAC3GvS,KAAKomD,YAAY9iC,OAASpe,KAAKwH,IAAI1M,KAAKomD,YAAY9iC,OAAQtjB,KAAKomD,YAAY9iC,OAAStjB,KAAK8zD,gBAAgBthD,UAI/GjP,EAAK4P,UAAUwqD,YAAc,SAAU52C,GACrC,IAAK/mB,KAAKuS,MAAO,CACf,GAAImH,GAAS,EACT4lD,EAAWt/D,KAAK8+D,YAAY/3C,EAChC/mB,MAAKuS,MAAQ+sD,EAAS/sD,MAAQ,EAAImH,EAClC1Z,KAAKwS,OAAS8sD,EAAS9sD,OAAS,EAAIkH,EAGpC1Z,KAAKuS,OAAUrN,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAAyBjgD,KAAK+7D,uBACjF/7D,KAAKwS,QAAUtN,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAAyBjgD,KAAKg8D,wBACjFh8D,KAAKyO,QAAQgd,QAASvmB,KAAK4G,IAAI9L,KAAKq8D,YAAc,EAAGr8D,KAAKigD,uBAAyBjgD,KAAKi8D,wBACxFj8D,KAAKk8D,gBAAkBl8D,KAAKuS,OAAS+sD,EAAS/sD,MAAQ,EAAImH,KAI9DnW,EAAK4P,UAAUuqD,UAAY,SAAU32C,GACnC/mB,KAAK29D,YAAY52C,GACjB/mB,KAAKyH,KAAOzH,KAAK+R,EAAI/R,KAAKuS,MAAQ,EAClCvS,KAAK2H,IAAM3H,KAAKgS,EAAIhS,KAAKwS,OAAS,EAElCxS,KAAK+1D,OAAOhvC,EAAK/mB,KAAKyoB,MAAOzoB,KAAK+R,EAAG/R,KAAKgS,GAE1ChS,KAAKomD,YAAYz+C,IAAM3H,KAAK2H,IAC5B3H,KAAKomD,YAAY3+C,KAAOzH,KAAKyH,KAC7BzH,KAAKomD,YAAY/+B,MAAQrnB,KAAKyH,KAAOzH,KAAKuS,MAC1CvS,KAAKomD,YAAY9iC,OAAStjB,KAAK2H,IAAM3H,KAAKwS,QAI5CjP,EAAK4P,UAAU4iD,OAAS,SAAUhvC,EAAKwC,EAAMxX,EAAGC,EAAG+8B,EAAOgxB,EAAUC,GAClE,GAAIz2C,GAAQrlB,OAAOlE,KAAKyO,QAAQ8uC,UAAYv9C,KAAKm8D,aAAen8D,KAAKg7D,kBAAmB,CACtFj0C,EAAIQ,MAAQvnB,KAAKqzC,SAAW,QAAU,IAAMrzC,KAAKyO,QAAQ8uC,SAAW,MAAQv9C,KAAKyO,QAAQ+uC,QAEzF,IAAIhX,GAAQjd,EAAKxhB,MAAM,MACnByuD,EAAYhwB,EAAM7gC,OAClB43C,EAAWr5C,OAAOlE,KAAKyO,QAAQ8uC,UAC/BwW,EAAQ/hD,GAAK,EAAIwkD,GAAa,EAAIjZ,CAChB,IAAlByiB,IACFjM,EAAQ/hD,GAAK,EAAIwkD,IAAc,EAAIjZ,GAKrC,KAAK,GADDhrC,GAAQwU,EAAI0vC,YAAYjwB,EAAM,IAAIj0B,MAC7B/M,EAAI,EAAOgxD,EAAJhxD,EAAeA,IAAK,CAClC,GAAI8hB,GAAYP,EAAI0vC,YAAYjwB,EAAMhhC,IAAI+M,KAC1CA,GAAQ+U,EAAY/U,EAAQ+U,EAAY/U,EAE1C,GAAIC,GAASxS,KAAKyO,QAAQ8uC,SAAWiZ,EACjC/uD,EAAOsK,EAAIQ,EAAQ,EACnB5K,EAAMqK,EAAIQ,EAAS,CACP,YAAZutD,IACFp4D,GAAO,GAAM41C,EACb51C,GAAO,EACPosD,GAAS,GAEX/zD,KAAK8zD,iBAAmBnsD,IAAIA,EAAIF,KAAKA,EAAK8K,MAAMA,EAAMC,OAAOA,EAAOuhD,MAAMA,GAG5CvtD,SAA1BxG,KAAKyO,QAAQgvC,UAAoD,OAA1Bz9C,KAAKyO,QAAQgvC,UAA+C,SAA1Bz9C,KAAKyO,QAAQgvC,WACxF12B,EAAIiB,UAAYhoB,KAAKyO,QAAQgvC,SAC7B12B,EAAIkwC,SAASxvD,EAAME,EAAK4K,EAAOC,IAIjCuU,EAAIiB,UAAYhoB,KAAKyO,QAAQ6uC,WAAa,QAC1Cv2B,EAAIuB,UAAYymB,GAAS,SACzBhoB,EAAIwB,aAAew3C,GAAY,SAC3B//D,KAAKyO,QAAQivC,gBAAkB,IACjC32B,EAAIO,UAActnB,KAAKyO,QAAQivC,gBAC/B32B,EAAIY,YAAc3nB,KAAKyO,QAAQkvC,gBAC/B52B,EAAImwC,SAAc,QAEpB,KAAK,GAAI1xD,GAAI,EAAOgxD,EAAJhxD,EAAeA,IAC1BxF,KAAKyO,QAAQivC,iBACd32B,EAAIowC,WAAW3wB,EAAMhhC,GAAIuM,EAAGgiD,GAE9BhtC,EAAIyB,SAASge,EAAMhhC,GAAIuM,EAAGgiD,GAC1BA,GAASxW,IAMfh6C,EAAK4P,UAAU2rD,YAAc,SAAS/3C,GACpC,GAAmBvgB,SAAfxG,KAAKyoB,MAAqB,CAC5B1B,EAAIQ,MAAQvnB,KAAKqzC,SAAW,QAAU,IAAMrzC,KAAKyO,QAAQ8uC,SAAW,MAAQv9C,KAAKyO,QAAQ+uC,QAMzF,KAAK,GAJDhX,GAAQxmC,KAAKyoB,MAAM1gB,MAAM,MACzByK,GAAUtO,OAAOlE,KAAKyO,QAAQ8uC,UAAY,GAAK/W,EAAM7gC,OACrD4M,EAAQ,EAEH/M,EAAI,EAAGq7B,EAAO2F,EAAM7gC,OAAYk7B,EAAJr7B,EAAUA,IAC7C+M,EAAQrN,KAAKwH,IAAI6F,EAAOwU,EAAI0vC,YAAYjwB,EAAMhhC,IAAI+M,MAGpD,QAAQA,MAASA,EAAOC,OAAUA,EAAQgkD,UAAWhwB,EAAM7gC,QAG3D,OAAQ4M,MAAS,EAAGC,OAAU,EAAGgkD,UAAW,IAUhDjzD,EAAK4P,UAAU48C,OAAS,WACtB,MAAmBvpD,UAAfxG,KAAKuS,MACDvS,KAAK+R,EAAI/R,KAAKuS,MAAOvS,KAAKm2D,iBAAoBn2D,KAAK+jD,cAAchyC,GACjE/R,KAAK+R,EAAI/R,KAAKuS,MAAOvS,KAAKm2D,gBAAoBn2D,KAAKgkD,kBAAkBjyC,GACrE/R,KAAKgS,EAAIhS,KAAKwS,OAAOxS,KAAKm2D,iBAAoBn2D,KAAK+jD,cAAc/xC,GACjEhS,KAAKgS,EAAIhS,KAAKwS,OAAOxS,KAAKm2D,gBAAoBn2D,KAAKgkD,kBAAkBhyC,GAGpE,GAQXzO,EAAK4P,UAAU8sD,OAAS,WACtB,MAAQjgE,MAAK+R,GAAK/R,KAAK+jD,cAAchyC,GAC7B/R,KAAK+R,EAAI/R,KAAKgkD,kBAAkBjyC,GAChC/R,KAAKgS,GAAKhS,KAAK+jD,cAAc/xC,GAC7BhS,KAAKgS,EAAIhS,KAAKgkD,kBAAkBhyC,GAW1CzO,EAAK4P,UAAU28C,eAAiB,SAAS7yC,EAAM8mC,EAAcC,GAC3DhkD,KAAKm2D,gBAAkB,EAAIl5C,EAC3Bjd,KAAKm8D,aAAel/C,EACpBjd,KAAK+jD,cAAgBA,EACrB/jD,KAAKgkD,kBAAoBA,GAS3BzgD,EAAK4P,UAAU6vB,SAAW,SAAS/lB,GACjCjd,KAAKm2D,gBAAkB,EAAIl5C,EAC3Bjd,KAAKm8D,aAAel/C,GAQtB1Z,EAAK4P,UAAU+sD,cAAgB,WAC7BlgE,KAAKy7D,GAAK,EACVz7D,KAAK07D,GAAK,GASZn4D,EAAK4P,UAAUgtD,eAAiB,SAASC,GACvC,GAAIC,GAAergE,KAAKy7D,GAAKz7D,KAAKy7D,GAAK2E,CAEvCpgE,MAAKy7D,GAAKv2D,KAAKyqB,KAAK0wC,EAAargE,KAAKyO,QAAQwuC,MAC9CojB,EAAergE,KAAK07D,GAAK17D,KAAK07D,GAAK0E,EAEnCpgE,KAAK07D,GAAKx2D,KAAKyqB,KAAK0wC,EAAargE,KAAKyO,QAAQwuC,OAGhDp9C,EAAOD,QAAU2D,GAKb,SAAS1D,GAWb,QAAS2D,GAAM+V,EAAWxH,EAAGC,EAAGuX,EAAMtc,GAElCjN,KAAKuZ,UADHA,EACeA,EAGAhI,SAASojB,KAIdnuB,SAAVyG,IACe,gBAAN8E,IACT9E,EAAQ8E,EACRA,EAAIvL,QACqB,gBAAT+iB,IAChBtc,EAAQsc,EACRA,EAAO/iB,QAGPyG,GACEqwC,UAAW,QACXC,SAAU,GACVC,SAAU,UACVryC,OACEiB,OAAQ,OACRD,WAAY,aAMpBnM,KAAK+R,EAAI,EACT/R,KAAKgS,EAAI,EACThS,KAAKgkB,QAAU,EAELxd,SAANuL,GAAyBvL,SAANwL,GACrBhS,KAAK6tD,YAAY97C,EAAGC,GAETxL,SAAT+iB,GACFvpB,KAAK8tD,QAAQvkC,GAIfvpB,KAAKsf,MAAQ/N,SAASM,cAAc,MACpC,IAAIyuD,GAAYtgE,KAAKsf,MAAMrS,KAC3BqzD,GAAU18C,SAAW,WACrB08C,EAAU9oC,WAAa,SACvB8oC,EAAUl0D,OAAS,aAAea,EAAM9B,MAAMiB,OAC9Ck0D,EAAUn1D,MAAQ8B,EAAMqwC,UACxBgjB,EAAU/iB,SAAWtwC,EAAMswC,SAAW,KACtC+iB,EAAUC,WAAatzD,EAAMuwC,SAC7B8iB,EAAUt8C,QAAUhkB,KAAKgkB,QAAU,KACnCs8C,EAAU3gD,gBAAkB1S,EAAM9B,MAAMgB,WACxCm0D,EAAUtwC,aAAe,MACzBswC,EAAUxuC,gBAAkB,MAC5BwuC,EAAUE,mBAAqB,MAC/BF,EAAUrwC,UAAY,wCACtBqwC,EAAUG,WAAa,SACvBzgE,KAAKuZ,UAAU9H,YAAYzR,KAAKsf,OAOlC9b,EAAM2P,UAAU06C,YAAc,SAAS97C,EAAGC,GACxChS,KAAK+R,EAAInH,SAASmH,GAClB/R,KAAKgS,EAAIpH,SAASoH,IAOpBxO,EAAM2P,UAAU26C,QAAU,SAASl+B,GAC7BA,YAAmB+c,UACrB3sC,KAAKsf,MAAM2E,UAAY,GACvBjkB,KAAKsf,MAAM7N,YAAYme,IAGvB5vB,KAAKsf,MAAM2E,UAAY2L,GAQ3BpsB,EAAM2P,UAAUw0B,KAAO,SAAUA,GAK/B,GAJanhC,SAATmhC,IACFA,GAAO,GAGLA,EAAM,CACR,GAAIn1B,GAASxS,KAAKsf,MAAMuF,aACpBtS,EAASvS,KAAKsf,MAAME,YACpBgV,EAAYx0B,KAAKsf,MAAMzV,WAAWgb,aAClC62B,EAAW17C,KAAKsf,MAAMzV,WAAW2V,YAEjC7X,EAAO3H,KAAKgS,EAAIQ,CAChB7K,GAAM6K,EAASxS,KAAKgkB,QAAUwQ,IAChC7sB,EAAM6sB,EAAYhiB,EAASxS,KAAKgkB,SAE9Brc,EAAM3H,KAAKgkB,UACbrc,EAAM3H,KAAKgkB,QAGb,IAAIvc,GAAOzH,KAAK+R,CACZtK,GAAO8K,EAAQvS,KAAKgkB,QAAU03B,IAChCj0C,EAAOi0C,EAAWnpC,EAAQvS,KAAKgkB,SAE7Bvc,EAAOzH,KAAKgkB,UACdvc,EAAOzH,KAAKgkB,SAGdhkB,KAAKsf,MAAMrS,MAAMxF,KAAOA,EAAO,KAC/BzH,KAAKsf,MAAMrS,MAAMtF,IAAMA,EAAM,KAC7B3H,KAAKsf,MAAMrS,MAAMuqB,WAAa,cAG9Bx3B,MAAK0nC,QAOTlkC,EAAM2P,UAAUu0B,KAAO,WACrB1nC,KAAKsf,MAAMrS,MAAMuqB,WAAa,UAGhC33B,EAAOD,QAAU4D,GAKb,SAAS3D,EAAQD,GAarB,QAAS8gE,GAAUhuD,GAEjB,MADAod,GAAMpd,EACCiuD,IAoCT,QAAS1+B,KACP95B,EAAQ,EACR1H,EAAIqvB,EAAI1K,OAAO,GAQjB,QAASiD,KACPlgB,IACA1H,EAAIqvB,EAAI1K,OAAOjd,GAOjB,QAASy4D,KACP,MAAO9wC,GAAI1K,OAAOjd,EAAQ,GAS5B,QAAS04D,GAAepgE,GACtB,MAAOqgE,GAAkB9yD,KAAKvN,GAShC,QAASsgE,GAAOx7D,EAAGa,GAKjB,GAJKb,IACHA,MAGEa,EACF,IAAK,GAAI6P,KAAQ7P,GACXA,EAAEN,eAAemQ,KACnB1Q,EAAE0Q,GAAQ7P,EAAE6P,GAIlB,OAAO1Q,GAeT,QAASqS,GAASmL,EAAKooB,EAAM9jC,GAG3B,IAFA,GAAI+F,GAAO+9B,EAAKpjC,MAAM,KAClBi5D,EAAIj+C,EACD3V,EAAKzH,QAAQ,CAClB,GAAI+C,GAAM0E,EAAKkE,OACXlE,GAAKzH,QAEFq7D,EAAEt4D,KACLs4D,EAAEt4D,OAEJs4D,EAAIA,EAAEt4D,IAINs4D,EAAEt4D,GAAOrB,GAWf,QAAS45D,GAAQhwC,EAAO60B,GAOtB,IANA,GAAItgD,GAAGC,EACHq0B,EAAU,KAGVonC,GAAUjwC,GACVvxB,EAAOuxB,EACJvxB,EAAKolC,QACVo8B,EAAOl5D,KAAKtI,EAAKolC,QACjBplC,EAAOA,EAAKolC,MAId,IAAIplC,EAAKs9C,MACP,IAAKx3C,EAAI,EAAGC,EAAM/F,EAAKs9C,MAAMr3C,OAAYF,EAAJD,EAASA,IAC5C,GAAIsgD,EAAKzlD,KAAOX,EAAKs9C,MAAMx3C,GAAGnF,GAAI,CAChCy5B,EAAUp6B,EAAKs9C,MAAMx3C,EACrB,OAiBN,IAZKs0B,IAEHA,GACEz5B,GAAIylD,EAAKzlD,IAEP4wB,EAAM60B,OAERhsB,EAAQqnC,KAAOJ,EAAMjnC,EAAQqnC,KAAMlwC,EAAM60B,QAKxCtgD,EAAI07D,EAAOv7D,OAAS,EAAGH,GAAK,EAAGA,IAAK,CACvC,GAAIkF,GAAIw2D,EAAO17D,EAEVkF,GAAEsyC,QACLtyC,EAAEsyC,UAE4B,IAA5BtyC,EAAEsyC,MAAMr2C,QAAQmzB,IAClBpvB,EAAEsyC,MAAMh1C,KAAK8xB,GAKbgsB,EAAKqb,OACPrnC,EAAQqnC,KAAOJ,EAAMjnC,EAAQqnC,KAAMrb,EAAKqb,OAS5C,QAASC,GAAQnwC,EAAOy8B,GAKtB,GAJKz8B,EAAM6sB,QACT7sB,EAAM6sB,UAER7sB,EAAM6sB,MAAM91C,KAAK0lD,GACbz8B,EAAMy8B,KAAM,CACd,GAAIyT,GAAOJ,KAAU9vC,EAAMy8B,KAC3BA,GAAKyT,KAAOJ,EAAMI,EAAMzT,EAAKyT,OAajC,QAASE,GAAWpwC,EAAO7H,EAAMC,EAAIviB,EAAMq6D,GACzC,GAAIzT,IACFtkC,KAAMA,EACNC,GAAIA,EACJviB,KAAMA,EAQR,OALImqB,GAAMy8B,OACRA,EAAKyT,KAAOJ,KAAU9vC,EAAMy8B,OAE9BA,EAAKyT,KAAOJ,EAAMrT,EAAKyT,SAAYA,GAE5BzT,EAOT,QAAS4T,KAKP,IAJAC,EAAYC,EAAUC,KACtBC,EAAQ,GAGI,KAALjhE,GAAiB,KAALA,GAAkB,MAALA,GAAkB,MAALA,GAC3C4nB,GAGF,GAAG,CACD,GAAIs5C,IAAY,CAGhB,IAAS,KAALlhE,EAAU,CAGZ,IADA,GAAI+E,GAAI2C,EAAQ,EACQ,KAAjB2nB,EAAI1K,OAAO5f,IAA8B,KAAjBsqB,EAAI1K,OAAO5f,IACxCA,GAEF,IAAqB,MAAjBsqB,EAAI1K,OAAO5f,IAA+B,IAAjBsqB,EAAI1K,OAAO5f,GAAU,CAEhD,KAAY,IAAL/E,GAAgB,MAALA,GAChB4nB,GAEFs5C,IAAY,GAGhB,GAAS,KAALlhE,GAA6B,KAAjBmgE,IAAsB,CAEpC,KAAY,IAALngE,GAAgB,MAALA,GAChB4nB,GAEFs5C,IAAY,EAEd,GAAS,KAALlhE,GAA6B,KAAjBmgE,IAAsB,CAEpC,KAAY,IAALngE,GAAS,CACd,GAAS,KAALA,GAA6B,KAAjBmgE,IAAsB,CAEpCv4C,IACAA,GACA,OAGAA,IAGJs5C,GAAY,EAId,KAAY,KAALlhE,GAAiB,KAALA,GAAkB,MAALA,GAAkB,MAALA,GAC3C4nB,UAGGs5C,EAGP,IAAS,IAALlhE,EAGF,YADA8gE,EAAYC,EAAUI,UAKxB,IAAIC,GAAKphE,EAAImgE,GACb,IAAIkB,EAAWD,GAKb,MAJAN,GAAYC,EAAUI,UACtBF,EAAQG,EACRx5C,QACAA,IAKF,IAAIy5C,EAAWrhE,GAIb,MAHA8gE,GAAYC,EAAUI,UACtBF,EAAQjhE,MACR4nB,IAMF,IAAIw4C,EAAepgE,IAAW,KAALA,EAAU,CAIjC,IAHAihE,GAASjhE,EACT4nB,IAEOw4C,EAAepgE,IACpBihE,GAASjhE,EACT4nB,GAYF,OAVa,SAATq5C,EACFA,GAAQ,EAEQ,QAATA,EACPA,GAAQ,EAEAh9D,MAAMR,OAAOw9D,MACrBA,EAAQx9D,OAAOw9D,SAEjBH,EAAYC,EAAUO,YAKxB,GAAS,KAALthE,EAAU,CAEZ,IADA4nB,IACY,IAAL5nB,IAAiB,KAALA,GAAkB,KAALA,GAA6B,KAAjBmgE,MAC1Cc,GAASjhE,EACA,KAALA,GACF4nB,IAEFA,GAEF,IAAS,KAAL5nB,EACF,KAAMuhE,GAAe,2BAIvB,OAFA35C,UACAk5C,EAAYC,EAAUO,YAMxB,IADAR,EAAYC,EAAUS,QACV,IAALxhE,GACLihE,GAASjhE,EACT4nB,GAEF,MAAM,IAAI7O,aAAY,yBAA2B0oD,EAAKR,EAAO,IAAM,KAOrE,QAASf,KACP,GAAI1vC,KAwBJ,IAtBAgR,IACAq/B,IAGa,UAATI,IACFzwC,EAAMkxC,QAAS,EACfb,MAIW,SAATI,GAA6B,WAATA,KACtBzwC,EAAMnqB,KAAO46D,EACbJ,KAIEC,GAAaC,EAAUO,aACzB9wC,EAAM5wB,GAAKqhE,EACXJ,KAIW,KAATI,EACF,KAAMM,GAAe,2BAQvB,IANAV,IAGAc,EAAgBnxC,GAGH,KAATywC,EACF,KAAMM,GAAe,2BAKvB,IAHAV,IAGc,KAAVI,EACF,KAAMM,GAAe,uBASvB,OAPAV,WAGOrwC,GAAM60B,WACN70B,GAAMy8B,WACNz8B,GAAMA,MAENA,EAOT,QAASmxC,GAAiBnxC,GACxB,KAAiB,KAAVywC,GAAyB,KAATA,GACrBW,EAAepxC,GACF,KAATywC,GACFJ,IAWN,QAASe,GAAepxC,GAEtB,GAAIqxC,GAAWC,EAActxC,EAC7B,IAAIqxC,EAIF,WAFAE,GAAUvxC,EAAOqxC,EAMnB,IAAInB,GAAOsB,EAAwBxxC,EACnC,KAAIkwC,EAAJ,CAKA,GAAII,GAAaC,EAAUO,WACzB,KAAMC,GAAe,sBAEvB,IAAI3hE,GAAKqhE,CAGT,IAFAJ,IAEa,KAATI,EAAc,CAGhB,GADAJ,IACIC,GAAaC,EAAUO,WACzB,KAAMC,GAAe,sBAEvB/wC,GAAM5wB,GAAMqhE,EACZJ,QAIAoB,GAAmBzxC,EAAO5wB,IAS9B,QAASkiE,GAAetxC,GACtB,GAAIqxC,GAAW,IAgBf,IAba,YAATZ,IACFY,KACAA,EAASx7D,KAAO,WAChBw6D,IAGIC,GAAaC,EAAUO,aACzBO,EAASjiE,GAAKqhE,EACdJ,MAKS,KAATI,EAAc,CAehB,GAdAJ,IAEKgB,IACHA,MAEFA,EAASx9B,OAAS7T,EAClBqxC,EAASxc,KAAO70B,EAAM60B,KACtBwc,EAAS5U,KAAOz8B,EAAMy8B,KACtB4U,EAASrxC,MAAQA,EAAMA,MAGvBmxC,EAAgBE,GAGH,KAATZ,EACF,KAAMM,GAAe,2BAEvBV,WAGOgB,GAASxc,WACTwc,GAAS5U,WACT4U,GAASrxC,YACTqxC,GAASx9B,OAGX7T,EAAM0xC,YACT1xC,EAAM0xC,cAER1xC,EAAM0xC,UAAU36D,KAAKs6D,GAGvB,MAAOA,GAYT,QAASG,GAAyBxxC,GAEhC,MAAa,QAATywC,GACFJ,IAGArwC,EAAM60B,KAAO8c,IACN,QAES,QAATlB,GACPJ,IAGArwC,EAAMy8B,KAAOkV,IACN,QAES,SAATlB,GACPJ,IAGArwC,EAAMA,MAAQ2xC,IACP,SAGF,KAQT,QAASF,GAAmBzxC,EAAO5wB,GAEjC,GAAIylD,IACFzlD,GAAIA,GAEF8gE,EAAOyB,GACPzB,KACFrb,EAAKqb,KAAOA,GAEdF,EAAQhwC,EAAO60B,GAGf0c,EAAUvxC,EAAO5wB,GAQnB,QAASmiE,GAAUvxC,EAAO7H,GACxB,KAAgB,MAATs4C,GAA0B,MAATA,GAAe,CACrC,GAAIr4C,GACAviB,EAAO46D,CACXJ,IAEA,IAAIgB,GAAWC,EAActxC,EAC7B,IAAIqxC,EACFj5C,EAAKi5C,MAEF,CACH,GAAIf,GAAaC,EAAUO,WACzB,KAAMC,GAAe,kCAEvB34C,GAAKq4C,EACLT,EAAQhwC,GACN5wB,GAAIgpB,IAENi4C,IAIF,GAAIH,GAAOyB,IAGPlV,EAAO2T,EAAWpwC,EAAO7H,EAAMC,EAAIviB,EAAMq6D,EAC7CC,GAAQnwC,EAAOy8B,GAEftkC,EAAOC,GASX,QAASu5C,KAGP,IAFA,GAAIzB,GAAO,KAEK,KAATO,GAAc,CAGnB,IAFAJ,IACAH,KACiB,KAAVO,GAAyB,KAATA,GAAc,CACnC,GAAIH,GAAaC,EAAUO,WACzB,KAAMC,GAAe,0BAEvB,IAAI/rD,GAAOyrD,CAGX,IADAJ,IACa,KAATI,EACF,KAAMM,GAAe,wBAIvB,IAFAV,IAEIC,GAAaC,EAAUO,WACzB,KAAMC,GAAe,2BAEvB,IAAI36D,GAAQq6D,CACZ9pD,GAASupD,EAAMlrD,EAAM5O,GAErBi6D,IACY,KAARI,GACFJ,IAIJ,GAAa,KAATI,EACF,KAAMM,GAAe,qBAEvBV,KAGF,MAAOH,GAQT,QAASa,GAAea,GACtB,MAAO,IAAIrpD,aAAYqpD,EAAU,UAAYX,EAAKR,EAAO,IAAM,WAAav5D,EAAQ,KAStF,QAAS+5D,GAAM34C,EAAMu5C,GACnB,MAAQv5C,GAAK5jB,QAAUm9D,EAAav5C,EAAQA,EAAKje,OAAO,EAAG,IAAM,MASnE,QAASy3D,GAASC,EAAQC,EAAQ/pD,GAC5BjT,MAAMC,QAAQ88D,GAChBA,EAAO36D,QAAQ,SAAU66D,GACnBj9D,MAAMC,QAAQ+8D,GAChBA,EAAO56D,QAAQ,SAAU86D,GACvBjqD,EAAGgqD,EAAOC,KAIZjqD,EAAGgqD,EAAOD,KAKVh9D,MAAMC,QAAQ+8D,GAChBA,EAAO56D,QAAQ,SAAU86D,GACvBjqD,EAAG8pD,EAAQG,KAIbjqD,EAAG8pD,EAAQC,GAWjB,QAAS5b,GAAY30C,GAEnB,GAAI00C,GAAUsZ,EAAShuD,GACnB0wD,GACFpmB,SACAc,SACArvC,WAmBF,IAfI24C,EAAQpK,OACVoK,EAAQpK,MAAM30C,QAAQ,SAAUg7D,GAC9B,GAAIC,IACFjjE,GAAIgjE,EAAQhjE,GACZooB,MAAOrkB,OAAOi/D,EAAQ56C,OAAS46C,EAAQhjE,IAEzC0gE,GAAMuC,EAAWD,EAAQlC,MACrBmC,EAAUjmB,QACZimB,EAAUlmB,MAAQ,SAEpBgmB,EAAUpmB,MAAMh1C,KAAKs7D,KAKrBlc,EAAQtJ,MAAO,CAMjB,GAAIylB,GAAc,SAAUC,GAC1B,GAAIC,IACFr6C,KAAMo6C,EAAQp6C,KACdC,GAAIm6C,EAAQn6C,GAId,OAFA03C,GAAM0C,EAAWD,EAAQrC,MACzBsC,EAAUx2D,MAAyB,MAAhBu2D,EAAQ18D,KAAgB,QAAU,OAC9C28D,EAGTrc,GAAQtJ,MAAMz1C,QAAQ,SAAUm7D,GAC9B,GAAIp6C,GAAMC,CAERD,GADEo6C,EAAQp6C,eAAgB7iB,QACnBi9D,EAAQp6C,KAAK4zB,OAIlB38C,GAAImjE,EAAQp6C,MAKdC,EADEm6C,EAAQn6C,aAAc9iB,QACnBi9D,EAAQn6C,GAAG2zB,OAId38C,GAAImjE,EAAQn6C,IAIZm6C,EAAQp6C,eAAgB7iB,SAAUi9D,EAAQp6C,KAAK00B,OACjD0lB,EAAQp6C,KAAK00B,MAAMz1C,QAAQ,SAAUq7D,GACnC,GAAID,GAAYF,EAAYG,EAC5BN,GAAUtlB,MAAM91C,KAAKy7D,KAIzBV,EAAS35C,EAAMC,EAAI,SAAUD,EAAMC,GACjC,GAAIq6C,GAAUrC,EAAW+B,EAAWh6C,EAAK/oB,GAAIgpB,EAAGhpB,GAAImjE,EAAQ18D,KAAM08D,EAAQrC,MACtEsC,EAAYF,EAAYG,EAC5BN,GAAUtlB,MAAM91C,KAAKy7D,KAGnBD,EAAQn6C,aAAc9iB,SAAUi9D,EAAQn6C,GAAGy0B,OAC7C0lB,EAAQn6C,GAAGy0B,MAAMz1C,QAAQ,SAAUq7D,GACjC,GAAID,GAAYF,EAAYG,EAC5BN,GAAUtlB,MAAM91C,KAAKy7D,OAW7B,MAJIrc,GAAQ+Z,OACViC,EAAU30D,QAAU24C,EAAQ+Z,MAGvBiC,EAnyBT,GAAI5B,IACFC,KAAO,EACPG,UAAY,EACZG,WAAY,EACZE,QAAU,GAIRH,GACF6B,KAAK,EACLC,KAAK,EACLC,KAAK,EACLC,KAAK,EACLC,KAAK,EACLC,KAAK,EACLC,KAAK,EAELC,MAAM,EACNC,MAAM,GAGJr0C,EAAM,GACN3nB,EAAQ,EACR1H,EAAI,GACJihE,EAAQ,GACRH,EAAYC,EAAUC,KAmCtBX,EAAoB,iBA2uBxBlhE,GAAQ8gE,SAAWA,EACnB9gE,EAAQynD,WAAaA,GAKjB,SAASxnD,EAAQD,GAGrB,QAAS4nD,GAAW4c,EAAW31D,GAC7B,GAAIqvC,MACAd,IACJh9C,MAAKyO,SACHqvC,OACEQ,cAAc,GAEhBtB,OACEqnB,eAAe,EACfn5D,YAAY,IAIA1E,SAAZiI,IACFzO,KAAKyO,QAAQuuC,MAAqB,cAAIvuC,EAAQ41D,eAAgB,EAC9DrkE,KAAKyO,QAAQuuC,MAAkB,WAAOvuC,EAAQvD,YAAgB,EAC9DlL,KAAKyO,QAAQqvC,MAAoB,aAAKrvC,EAAQ6vC,cAAgB,EAKhE,KAAK,GAFDgmB,GAASF,EAAUtmB,MACnBymB,EAASH,EAAUpnB,MACdx3C,EAAI,EAAGA,EAAI8+D,EAAO3+D,OAAQH,IAAK,CACtC,GAAIkoD,MACA8W,EAAQF,EAAO9+D,EACnBkoD,GAAS,GAAI8W,EAAMnkE,GACnBqtD,EAAW,KAAI8W,EAAMC,OACrB/W,EAAS,GAAI8W,EAAM96D,OACnBgkD,EAAiB,WAAI8W,EAAMvpB,WAG3ByS,EAAY,MAAI8W,EAAMr5D,MACtBuiD,EAAmB,aAAsBlnD,SAAlBknD,EAAY,OAAkB,EAAQ1tD,KAAKyO,QAAQ6vC,aAC1ER,EAAM91C,KAAK0lD,GAGb,IAAK,GAAIloD,GAAI,EAAGA,EAAI++D,EAAO5+D,OAAQH,IAAK,CACtC,GAAIsgD,MACA4e,EAAQH,EAAO/+D,EACnBsgD,GAAS,GAAI4e,EAAMrkE,GACnBylD,EAAiB,WAAI4e,EAAMzpB,WAC3B6K,EAAQ,EAAI4e,EAAM3yD,EAClB+zC,EAAQ,EAAI4e,EAAM1yD,EAClB8zC,EAAY,MAAI4e,EAAMj8C,MAEpBq9B,EAAY,MADuB,GAAjC9lD,KAAKyO,QAAQuuC,MAAM9xC,WACLw5D,EAAMv5D,MAGU3E,SAAhBk+D,EAAMv5D,OAAuBgB,WAAWu4D,EAAMv5D,MAAOiB,OAAOs4D,EAAMv5D,OAAS3E,OAE7Fs/C,EAAa,OAAI4e,EAAMryD,KACvByzC,EAAqB,eAAI9lD,KAAKyO,QAAQuuC,MAAMqnB,cAC5Cve,EAAqB,eAAI9lD,KAAKyO,QAAQuuC,MAAMqnB,cAC5CrnB,EAAMh1C,KAAK89C,GAGb,OAAQ9I,MAAMA,EAAOc,MAAMA,GAG7Bl+C,EAAQ4nD,WAAaA,GAIjB,SAAS3nD,EAAQD,EAASM,GAI9BL,EAAOD,QAA6B,mBAAX2J,SAA2BA,OAAe,QAAKrJ,EAAoB,KAKxF,SAASL,EAAQD,EAASM,GAI9B,GAAsB,mBAAXqJ,QAAwB,CACjC,GAAIo7D,GAAczkE,EAAoB,IAClC6D,EAASwF,OAAe,QAAKrJ,EAAoB,GACrDL,GAAOD,QAAU+kE,EAAY5gE,OAG7BlE,GAAOD,QAAU,WACf,KAAMgE,OAAM,+DAOZ,SAAS/D,EAAQD,EAASM,GAmB9B,QAASg2B,MAjBT,GAAInZ,GAAU7c,EAAoB,IAC9B6D,EAAS7D,EAAoB,IAC7BS,EAAOT,EAAoB,GAK3BolD,GAJUplD,EAAoB,GACnBA,EAAoB,GACvBA,EAAoB,IAClBA,EAAoB,IAClBA,EAAoB,KAChCyB,EAAWzB,EAAoB,GAYnC6c,GAAQmZ,EAAK/iB,WASb+iB,EAAK/iB,UAAUuhB,QAAU,SAAUnb,GAmHjC,QAASqrD,GAAat7D,GAChB6K,EAAGs1C,YACLt1C,EAAGyZ,KAAK,aAActkB,GApH1BtJ,KAAK+vB,OAEL/vB,KAAK+vB,IAAIrwB,KAAuB6R,SAASM,cAAc,OACvD7R,KAAK+vB,IAAI5jB,WAAuBoF,SAASM,cAAc,OACvD7R,KAAK+vB,IAAIgV,mBAAuBxzB,SAASM,cAAc,OACvD7R,KAAK+vB,IAAImY,qBAAuB32B,SAASM,cAAc,OACvD7R,KAAK+vB,IAAI8H,gBAAuBtmB,SAASM,cAAc,OACvD7R,KAAK+vB,IAAI80C,cAAuBtzD,SAASM,cAAc,OACvD7R,KAAK+vB,IAAI+0C,eAAuBvzD,SAASM,cAAc,OACvD7R,KAAK+vB,IAAI7D,OAAuB3a,SAASM,cAAc,OACvD7R,KAAK+vB,IAAItoB,KAAuB8J,SAASM,cAAc,OACvD7R,KAAK+vB,IAAI1I,MAAuB9V,SAASM,cAAc,OACvD7R,KAAK+vB,IAAIpoB,IAAuB4J,SAASM,cAAc,OACvD7R,KAAK+vB,IAAIzM,OAAuB/R,SAASM,cAAc,OACvD7R,KAAK+vB,IAAIg1C,UAAuBxzD,SAASM,cAAc,OACvD7R,KAAK+vB,IAAIi1C,aAAuBzzD,SAASM,cAAc,OACvD7R,KAAK+vB,IAAIk1C,cAAuB1zD,SAASM,cAAc,OACvD7R,KAAK+vB,IAAIm1C,iBAAuB3zD,SAASM,cAAc,OACvD7R,KAAK+vB,IAAIo1C,eAAuB5zD,SAASM,cAAc,OACvD7R,KAAK+vB,IAAIq1C,kBAAuB7zD,SAASM,cAAc,OAEvD7R,KAAK+vB,IAAIrwB,KAAKmI,UAA4B,oBAC1C7H,KAAK+vB,IAAI5jB,WAAWtE,UAAsB,sBAC1C7H,KAAK+vB,IAAIgV,mBAAmBl9B,UAAc,+BAC1C7H,KAAK+vB,IAAImY,qBAAqBrgC,UAAY,iCAC1C7H,KAAK+vB,IAAI8H,gBAAgBhwB,UAAiB,kBAC1C7H,KAAK+vB,IAAI80C,cAAch9D,UAAmB,gBAC1C7H,KAAK+vB,IAAI+0C,eAAej9D,UAAkB,iBAC1C7H,KAAK+vB,IAAIpoB,IAAIE,UAA6B,eAC1C7H,KAAK+vB,IAAIzM,OAAOzb,UAA0B,kBAC1C7H,KAAK+vB,IAAItoB,KAAKI,UAA4B,UAC1C7H,KAAK+vB,IAAI7D,OAAOrkB,UAA0B,UAC1C7H,KAAK+vB,IAAI1I,MAAMxf,UAA2B,UAC1C7H,KAAK+vB,IAAIg1C,UAAUl9D,UAAuB,aAC1C7H,KAAK+vB,IAAIi1C,aAAan9D,UAAoB,gBAC1C7H,KAAK+vB,IAAIk1C,cAAcp9D,UAAmB,aAC1C7H,KAAK+vB,IAAIm1C,iBAAiBr9D,UAAgB,gBAC1C7H,KAAK+vB,IAAIo1C,eAAet9D,UAAkB,aAC1C7H,KAAK+vB,IAAIq1C,kBAAkBv9D,UAAe,gBAE1C7H,KAAK+vB,IAAIrwB,KAAK+R,YAAYzR,KAAK+vB,IAAI5jB,YACnCnM,KAAK+vB,IAAIrwB,KAAK+R,YAAYzR,KAAK+vB,IAAIgV,oBACnC/kC,KAAK+vB,IAAIrwB,KAAK+R,YAAYzR,KAAK+vB,IAAImY,sBACnCloC,KAAK+vB,IAAIrwB,KAAK+R,YAAYzR,KAAK+vB,IAAI8H,iBACnC73B,KAAK+vB,IAAIrwB,KAAK+R,YAAYzR,KAAK+vB,IAAI80C,eACnC7kE,KAAK+vB,IAAIrwB,KAAK+R,YAAYzR,KAAK+vB,IAAI+0C,gBACnC9kE,KAAK+vB,IAAIrwB,KAAK+R,YAAYzR,KAAK+vB,IAAIpoB,KACnC3H,KAAK+vB,IAAIrwB,KAAK+R,YAAYzR,KAAK+vB,IAAIzM,QAEnCtjB,KAAK+vB,IAAI8H,gBAAgBpmB,YAAYzR,KAAK+vB,IAAI7D,QAC9ClsB,KAAK+vB,IAAI80C,cAAcpzD,YAAYzR,KAAK+vB,IAAItoB,MAC5CzH,KAAK+vB,IAAI+0C,eAAerzD,YAAYzR,KAAK+vB,IAAI1I,OAE7CrnB,KAAK+vB,IAAI8H,gBAAgBpmB,YAAYzR,KAAK+vB,IAAIg1C,WAC9C/kE,KAAK+vB,IAAI8H,gBAAgBpmB,YAAYzR,KAAK+vB,IAAIi1C,cAC9ChlE,KAAK+vB,IAAI80C,cAAcpzD,YAAYzR,KAAK+vB,IAAIk1C,eAC5CjlE,KAAK+vB,IAAI80C,cAAcpzD,YAAYzR,KAAK+vB,IAAIm1C,kBAC5CllE,KAAK+vB,IAAI+0C,eAAerzD,YAAYzR,KAAK+vB,IAAIo1C,gBAC7CnlE,KAAK+vB,IAAI+0C,eAAerzD,YAAYzR,KAAK+vB,IAAIq1C,mBAE7CplE,KAAKuT,GAAG,cAAevT,KAAKyhB,OAAOqT,KAAK90B,MAExC,IAAImU,GAAKnU,IACTA,MAAKuT,GAAG,SAAU,SAAUo7C,GACtBA,GAAkC,GAApBA,EAAWv7C,MAEtBe,EAAGkxD,eACNlxD,EAAGkxD,aAAe/rD,WAAW,WAC3BnF,EAAGkxD,aAAe,KAClBlxD,EAAGsN,UACF,IAKLtN,EAAGsN,WAMPzhB,KAAK8D,OAAS,GAAIC,GAAO/D,KAAK+vB,IAAIrwB,MAAO4lE,YAAa,UACtDtlE,KAAK8D,OAAOoR,IAAI,SAASi0C,KAAK/lB,QAAQ,IACtCpjC,KAAKulE,YAEL,IAAIC,IACF,MAAO,YAAa,QACpB,QACA,MAAO,WAAY,UAAW,SAsDhC,IA/CAA,EAAOn9D,QAAQ,SAAUvB,GACvB,GAAIgC,GAAW,SAAUQ,GACnB6K,EAAGs1C,YACLt1C,EAAGyZ,KAAK9mB,EAAMwC,GAGlB6K,GAAGrQ,OAAOyP,GAAGzM,EAAMgC,GACnBqL,EAAGoxD,UAAUz+D,GAAQgC,IAIvB9I,KAAK8D,OAAOyP,GAAG,eAAgB,SAAUjK,GACnCA,EAAMqnC,SACJx8B,EAAGs1C,YACLt1C,EAAGyZ,KAAK,QAAStkB,IAGrBwrB,KAAK90B,OAOPA,KAAK+vB,IAAIrwB,KAAKiJ,iBAAiB,aAAci8D,GAC7C5kE,KAAK+vB,IAAIrwB,KAAKiJ,iBAAiB,iBAAkBi8D,GAGjD5kE,KAAKgG,OACHtG,QACAyM,cACA0rB,mBACAgtC,iBACAC,kBACA54C,UACAzkB,QACA4f,SACA1f,OACA2b,UACAlX,UACAi7B,UAAW,EACXo+B,aAAc,GAGhBzlE,KAAK0lE,YAAc,GAGdnsD,EAAW,KAAM,IAAI3V,OAAM,wBAChC2V,GAAU9H,YAAYzR,KAAK+vB,IAAIrwB,OA4BjCw2B,EAAK/iB,UAAUD,WAAa,SAAUzE,GACpC,GAAIA,EAAS,CAEX,GAAIP,IAAU,QAAS,SAAU,YAAa,YAAa,aAAc,QAAS,MAAO,cAAe,aAAc,iBAAkB,cACxIvN,GAAKoF,gBAAgBmI,EAAQlO,KAAKyO,QAASA,GAEvC,eAAiBzO,MAAKyO,SACxB9M,EAAS81B,qBAAqBz3B,KAAK20B,KAAM30B,KAAKyO,QAAQsmB,aAGpD,cAAgBtmB,KACdA,EAAQ45C,WACLroD,KAAKsoD,YACRtoD,KAAKsoD,UAAY,GAAIhD,GAAUtlD,KAAK+vB,IAAIrwB,OAItCM,KAAKsoD,YACPtoD,KAAKsoD,UAAUh1C,gBACRtT,MAAKsoD,YAMlBtoD,KAAK2lE,kBASP,GALA3lE,KAAKgC,WAAWqG,QAAQ,SAAUu9D,GAChCA,EAAU1yD,WAAWzE,KAInBA,GAAWA,EAAQgH,MACrB,KAAM,IAAI7R,OAAM,wEAIlB5D,MAAKyhB,UAOPyU,EAAK/iB,UAAUs2C,SAAW,WACxB,OAAQzpD,KAAKsoD,WAAatoD,KAAKsoD,UAAUgL,QAM3Cp9B,EAAK/iB,UAAUG,QAAU,WAEvBtT,KAAKyW,QAGLzW,KAAK0T,MAGL1T,KAAK6lE,kBAGD7lE,KAAK+vB,IAAIrwB,KAAKmK,YAChB7J,KAAK+vB,IAAIrwB,KAAKmK,WAAWsH,YAAYnR,KAAK+vB,IAAIrwB,MAEhDM,KAAK+vB,IAAM,KAGP/vB,KAAKsoD,YACPtoD,KAAKsoD,UAAUh1C,gBACRtT,MAAKsoD,UAId,KAAK,GAAIh/C,KAAStJ,MAAKulE,UACjBvlE,KAAKulE,UAAUz/D,eAAewD,UACzBtJ,MAAKulE,UAAUj8D,EAG1BtJ,MAAKulE,UAAY,KACjBvlE,KAAK8D,OAAS,KAGd9D,KAAKgC,WAAWqG,QAAQ,SAAUu9D,GAChCA,EAAUtyD,YAGZtT,KAAK20B,KAAO,MAQduB,EAAK/iB,UAAUsyB,cAAgB,SAAUtL,GACvC,IAAKn6B,KAAK41B,WACR,KAAM,IAAIhyB,OAAM,yDAGlB5D,MAAK41B,WAAW6P,cAActL,IAOhCjE,EAAK/iB,UAAUuyB,cAAgB,WAC7B,IAAK1lC,KAAK41B,WACR,KAAM,IAAIhyB,OAAM,yDAGlB,OAAO5D,MAAK41B,WAAW8P,iBAQzBxP,EAAK/iB,UAAUi+B,gBAAkB,WAC/B,MAAOpxC,MAAK61B,SAAW71B,KAAK61B,QAAQub,uBAetClb,EAAK/iB,UAAUsD,MAAQ,SAASqvD,KAEzBA,GAAQA,EAAK7jE,QAChBjC,KAAKi2B,SAAS,QAIX6vC,GAAQA,EAAK3xC,SAChBn0B,KAAKg2B,UAAU,QAIZ8vC,GAAQA,EAAKr3D,WAChBzO,KAAKgC,WAAWqG,QAAQ,SAAUu9D,GAChCA,EAAU1yD,WAAW0yD,EAAUvxC,kBAGjCr0B,KAAKkT,WAAWlT,KAAKq0B,kBAazB6B,EAAK/iB,UAAUsjB,IAAM,SAAShoB,GAC5B,GAAIgnB,GAAQz1B,KAAKs2B,eAGjB,IAAoB,OAAhBb,EAAM7lB,OAAgC,OAAd6lB,EAAM5lB,IAAlC,CAIA,GAAI2mB,GAAW/nB,GAA+BjI,SAApBiI,EAAQ+nB,QAAyB/nB,EAAQ+nB,SAAU,CAC7Ex2B,MAAKy1B,MAAMlC,SAASkC,EAAM7lB,MAAO6lB,EAAM5lB,IAAK2mB,KAQ9CN,EAAK/iB,UAAUmjB,cAAgB,WAE7B,GAAID,GAAYr2B,KAAK+2B,eAGjBnnB,EAAQymB,EAAUvqB,IAClB+D,EAAMwmB,EAAU3pB,GACpB,IAAa,MAATkD,GAAwB,MAAPC,EAAa,CAChC,GAAI2iB,GAAY3iB,EAAI7I,UAAY4I,EAAM5I,SACtB,IAAZwrB,IAEFA,EAAW,OAEb5iB,EAAQ,GAAItL,MAAKsL,EAAM5I,UAAuB,IAAXwrB,GACnC3iB,EAAM,GAAIvL,MAAKuL,EAAI7I,UAAuB,IAAXwrB,GAGjC,OACE5iB,MAAOA,EACPC,IAAKA,IAuBTqmB,EAAK/iB,UAAUojB,UAAY,SAAS3mB,EAAOC,EAAKpB,GAC9C,GAAI+nB,GAAW/nB,GAA+BjI,SAApBiI,EAAQ+nB,QAAyB/nB,EAAQ+nB,SAAU,CAC7E,IAAwB,GAApB9wB,UAAUC,OAAa,CACzB,GAAI8vB,GAAQ/vB,UAAU,EACtB1F,MAAKy1B,MAAMlC,SAASkC,EAAM7lB,MAAO6lB,EAAM5lB,IAAK2mB,OAG5Cx2B,MAAKy1B,MAAMlC,SAAS3jB,EAAOC,EAAK2mB,IAcpCN,EAAK/iB,UAAU0U,OAAS,SAASsS,EAAM1rB,GACrC,GAAI+jB,GAAWxyB,KAAKy1B,MAAM5lB,IAAM7P,KAAKy1B,MAAM7lB,MACvC9B,EAAInN,EAAKkG,QAAQszB,EAAM,QAAQnzB,UAE/B4I,EAAQ9B,EAAI0kB,EAAW,EACvB3iB,EAAM/B,EAAI0kB,EAAW,EACrBgE,EAAW/nB,GAA+BjI,SAApBiI,EAAQ+nB,QAAyB/nB,EAAQ+nB,SAAU,CAE7Ex2B,MAAKy1B,MAAMlC,SAAS3jB,EAAOC,EAAK2mB,IAOlCN,EAAK/iB,UAAU4yD,UAAY,WACzB,GAAItwC,GAAQz1B,KAAKy1B,MAAM2J,UACvB,QACExvB,MAAO,GAAItL,MAAKmxB,EAAM7lB,OACtBC,IAAK,GAAIvL,MAAKmxB,EAAM5lB,OAQxBqmB,EAAK/iB,UAAUsO,OAAS,WACtB,GAAIgjB,IAAU,EACVh2B,EAAUzO,KAAKyO,QACfzI,EAAQhG,KAAKgG,MACb+pB,EAAM/vB,KAAK+vB,GAEf,IAAKA,EAAL,CAEApuB,EAASi2B,kBAAkB53B,KAAK20B,KAAM30B,KAAKyO,QAAQsmB,aAGxB,OAAvBtmB,EAAQ8lB,aACV5zB,EAAKiH,aAAamoB,EAAIrwB,KAAM,OAC5BiB,EAAKuH,gBAAgB6nB,EAAIrwB,KAAM,YAG/BiB,EAAKuH,gBAAgB6nB,EAAIrwB,KAAM,OAC/BiB,EAAKiH,aAAamoB,EAAIrwB,KAAM,WAI9BqwB,EAAIrwB,KAAKuN,MAAMunB,UAAY7zB,EAAKmJ,OAAOK,OAAOsE,EAAQ+lB,UAAW,IACjEzE,EAAIrwB,KAAKuN,MAAMwnB,UAAY9zB,EAAKmJ,OAAOK,OAAOsE,EAAQgmB,UAAW,IACjE1E,EAAIrwB,KAAKuN,MAAMsF,MAAQ5R,EAAKmJ,OAAOK,OAAOsE,EAAQ8D,MAAO,IAGzDvM,EAAMoG,OAAO3E,MAAUsoB,EAAI8H,gBAAgBzH,YAAcL,EAAI8H,gBAAgBrY,aAAe,EAC5FxZ,EAAMoG,OAAOib,MAASrhB,EAAMoG,OAAO3E,KACnCzB,EAAMoG,OAAOzE,KAAUooB,EAAI8H,gBAAgBvH,aAAeP,EAAI8H,gBAAgBhT,cAAgB,EAC9F7e,EAAMoG,OAAOkX,OAAStd,EAAMoG,OAAOzE,GACnC,IAAIq+D,GAAkBj2C,EAAIrwB,KAAK4wB,aAAeP,EAAIrwB,KAAKmlB,aACnDohD,EAAkBl2C,EAAIrwB,KAAK0wB,YAAcL,EAAIrwB,KAAK8f,WAIb,KAArCuQ,EAAI8H,gBAAgBhT,eACtB7e,EAAMoG,OAAO3E,KAAOzB,EAAMoG,OAAOzE,IACjC3B,EAAMoG,OAAOib,MAASrhB,EAAMoG,OAAO3E,MAEP,IAA1BsoB,EAAIrwB,KAAKmlB,eACXohD,EAAkBD,GAKpBhgE,EAAMkmB,OAAO1Z,OAASud,EAAI7D,OAAOoE,aACjCtqB,EAAMyB,KAAK+K,OAAWud,EAAItoB,KAAK6oB,aAC/BtqB,EAAMqhB,MAAM7U,OAAUud,EAAI1I,MAAMiJ,aAChCtqB,EAAM2B,IAAI6K,OAAYud,EAAIpoB,IAAIkd,eAAoB7e,EAAMoG,OAAOzE,IAC/D3B,EAAMsd,OAAO9Q,OAASud,EAAIzM,OAAOuB,eAAiB7e,EAAMoG,OAAOkX,MAM/D,IAAI+M,GAAgBnrB,KAAKwH,IAAI1G,EAAMyB,KAAK+K,OAAQxM,EAAMkmB,OAAO1Z,OAAQxM,EAAMqhB,MAAM7U,QAC7E0zD,EAAalgE,EAAM2B,IAAI6K,OAAS6d,EAAgBrqB,EAAMsd,OAAO9Q,OAC/DwzD,EAAmBhgE,EAAMoG,OAAOzE,IAAM3B,EAAMoG,OAAOkX,MACrDyM,GAAIrwB,KAAKuN,MAAMuF,OAAS7R,EAAKmJ,OAAOK,OAAOsE,EAAQ+D,OAAQ0zD,EAAa,MAGxElgE,EAAMtG,KAAK8S,OAASud,EAAIrwB,KAAK4wB,aAC7BtqB,EAAMmG,WAAWqG,OAASxM,EAAMtG,KAAK8S,OAASwzD,CAC9C,IAAI3qC,GAAkBr1B,EAAMtG,KAAK8S,OAASxM,EAAM2B,IAAI6K,OAASxM,EAAMsd,OAAO9Q,OACxEwzD,CACFhgE,GAAM6xB,gBAAgBrlB,OAAU6oB,EAChCr1B,EAAM6+D,cAAcryD,OAAY6oB,EAChCr1B,EAAM8+D,eAAetyD,OAAWxM,EAAM6+D,cAAcryD,OAGpDxM,EAAMtG,KAAK6S,MAAQwd,EAAIrwB,KAAK0wB,YAC5BpqB,EAAMmG,WAAWoG,MAAQvM,EAAMtG,KAAK6S,MAAQ0zD,EAC5CjgE,EAAMyB,KAAK8K,MAAQwd,EAAI80C,cAAcrlD,cAAkBxZ,EAAMoG,OAAO3E,KACpEzB,EAAM6+D,cAActyD,MAAQvM,EAAMyB,KAAK8K,MACvCvM,EAAMqhB,MAAM9U,MAAQwd,EAAI+0C,eAAetlD,cAAgBxZ,EAAMoG,OAAOib,MACpErhB,EAAM8+D,eAAevyD,MAAQvM,EAAMqhB,MAAM9U,KACzC,IAAI4zD,GAAcngE,EAAMtG,KAAK6S,MAAQvM,EAAMyB,KAAK8K,MAAQvM,EAAMqhB,MAAM9U,MAAQ0zD,CAC5EjgE,GAAMkmB,OAAO3Z,MAAiB4zD,EAC9BngE,EAAM6xB,gBAAgBtlB,MAAQ4zD,EAC9BngE,EAAM2B,IAAI4K,MAAoB4zD,EAC9BngE,EAAMsd,OAAO/Q,MAAiB4zD,EAG9Bp2C,EAAI5jB,WAAWc,MAAMuF,OAAmBxM,EAAMmG,WAAWqG,OAAS,KAClEud,EAAIgV,mBAAmB93B,MAAMuF,OAAWxM,EAAMmG,WAAWqG,OAAS,KAClEud,EAAImY,qBAAqBj7B,MAAMuF,OAASxM,EAAM6xB,gBAAgBrlB,OAAS,KACvEud,EAAI8H,gBAAgB5qB,MAAMuF,OAAcxM,EAAM6xB,gBAAgBrlB,OAAS,KACvEud,EAAI80C,cAAc53D,MAAMuF,OAAgBxM,EAAM6+D,cAAcryD,OAAS,KACrEud,EAAI+0C,eAAe73D,MAAMuF,OAAexM,EAAM8+D,eAAetyD,OAAS,KAEtEud,EAAI5jB,WAAWc,MAAMsF,MAAmBvM,EAAMmG,WAAWoG,MAAQ,KACjEwd,EAAIgV,mBAAmB93B,MAAMsF,MAAWvM,EAAM6xB,gBAAgBtlB,MAAQ,KACtEwd,EAAImY,qBAAqBj7B,MAAMsF,MAASvM,EAAMmG,WAAWoG,MAAQ,KACjEwd,EAAI8H,gBAAgB5qB,MAAMsF,MAAcvM,EAAMkmB,OAAO3Z,MAAQ,KAC7Dwd,EAAIpoB,IAAIsF,MAAMsF,MAA0BvM,EAAM2B,IAAI4K,MAAQ,KAC1Dwd,EAAIzM,OAAOrW,MAAMsF,MAAuBvM,EAAMsd,OAAO/Q,MAAQ,KAG7Dwd,EAAI5jB,WAAWc,MAAMxF,KAAiB,IACtCsoB,EAAI5jB,WAAWc,MAAMtF,IAAiB,IACtCooB,EAAIgV,mBAAmB93B,MAAMxF,KAAUzB,EAAMyB,KAAK8K,MAAQvM,EAAMoG,OAAO3E,KAAQ,KAC/EsoB,EAAIgV,mBAAmB93B,MAAMtF,IAAS,IACtCooB,EAAImY,qBAAqBj7B,MAAMxF,KAAO,IACtCsoB,EAAImY,qBAAqBj7B,MAAMtF,IAAO3B,EAAM2B,IAAI6K,OAAS,KACzDud,EAAI8H,gBAAgB5qB,MAAMxF,KAAYzB,EAAMyB,KAAK8K,MAAQ,KACzDwd,EAAI8H,gBAAgB5qB,MAAMtF,IAAY3B,EAAM2B,IAAI6K,OAAS,KACzDud,EAAI80C,cAAc53D,MAAMxF,KAAc,IACtCsoB,EAAI80C,cAAc53D,MAAMtF,IAAc3B,EAAM2B,IAAI6K,OAAS,KACzDud,EAAI+0C,eAAe73D,MAAMxF,KAAczB,EAAMyB,KAAK8K,MAAQvM,EAAMkmB,OAAO3Z,MAAS,KAChFwd,EAAI+0C,eAAe73D,MAAMtF,IAAa3B,EAAM2B,IAAI6K,OAAS,KACzDud,EAAIpoB,IAAIsF,MAAMxF,KAAwBzB,EAAMyB,KAAK8K,MAAQ,KACzDwd,EAAIpoB,IAAIsF,MAAMtF,IAAwB,IACtCooB,EAAIzM,OAAOrW,MAAMxF,KAAqBzB,EAAMyB,KAAK8K,MAAQ,KACzDwd,EAAIzM,OAAOrW,MAAMtF,IAAsB3B,EAAM2B,IAAI6K,OAASxM,EAAM6xB,gBAAgBrlB,OAAU,KAI1FxS,KAAKomE,kBAGL,IAAIz8C,GAAS3pB,KAAKgG,MAAMqhC,SACG,WAAvB54B,EAAQ8lB,cACV5K,GAAUzkB,KAAKwH,IAAI1M,KAAKgG,MAAM6xB,gBAAgBrlB,OAASxS,KAAKgG,MAAMkmB,OAAO1Z,OACvExS,KAAKgG,MAAMoG,OAAOzE,IAAM3H,KAAKgG,MAAMoG,OAAOkX,OAAQ,IAEtDyM,EAAI7D,OAAOjf,MAAMxF,KAAO,IACxBsoB,EAAI7D,OAAOjf,MAAMtF,IAAOgiB,EAAS,KACjCoG,EAAItoB,KAAKwF,MAAMxF,KAAS,IACxBsoB,EAAItoB,KAAKwF,MAAMtF,IAASgiB,EAAS,KACjCoG,EAAI1I,MAAMpa,MAAMxF,KAAQ,IACxBsoB,EAAI1I,MAAMpa,MAAMtF,IAAQgiB,EAAS,IAGjC,IAAI08C,GAAwC,GAAxBrmE,KAAKgG,MAAMqhC,UAAiB,SAAW,GACvDi/B,EAAmBtmE,KAAKgG,MAAMqhC,WAAarnC,KAAKgG,MAAMy/D,aAAe,SAAW,EAYpF,IAXA11C,EAAIg1C,UAAU93D,MAAMuqB,WAAsB6uC,EAC1Ct2C,EAAIi1C,aAAa/3D,MAAMuqB,WAAmB8uC,EAC1Cv2C,EAAIk1C,cAAch4D,MAAMuqB,WAAkB6uC,EAC1Ct2C,EAAIm1C,iBAAiBj4D,MAAMuqB,WAAe8uC,EAC1Cv2C,EAAIo1C,eAAel4D,MAAMuqB,WAAiB6uC,EAC1Ct2C,EAAIq1C,kBAAkBn4D,MAAMuqB,WAAc8uC,EAG1CtmE,KAAKgC,WAAWqG,QAAQ,SAAUu9D,GAChCnhC,EAAUmhC,EAAUnkD,UAAYgjB,IAE9BA,EAAS,CAEX,GAAI8hC,GAAc,CACdvmE,MAAK0lE,YAAca,GACrBvmE,KAAK0lE,cACL1lE,KAAKyhB,UAGLkX,QAAQhF,IAAI,qCAEd3zB,KAAK0lE,YAAc,EAGrB1lE,KAAK4tB,KAAK,oBAIZsI,EAAK/iB,UAAUqzD,QAAU,WACvB,KAAM,IAAI5iE,OAAM,wDAUlBsyB,EAAK/iB,UAAUiyB,eAAiB,SAASjL,GACvC,IAAKn6B,KAAK21B,YACR,KAAM,IAAI/xB,OAAM,sCAGlB5D,MAAK21B,YAAYyP,eAAejL,IAQlCjE,EAAK/iB,UAAUkyB,eAAiB,WAC9B,IAAKrlC,KAAK21B,YACR,KAAM,IAAI/xB,OAAM,sCAGlB,OAAO5D,MAAK21B,YAAY0P,kBAU1BnP,EAAK/iB,UAAUmiB,QAAU,SAASvjB,GAChC,MAAOpQ,GAAS0zB,OAAOr1B,KAAM+R,EAAG/R,KAAKgG,MAAMkmB,OAAO3Z,QAUpD2jB,EAAK/iB,UAAUqiB,cAAgB,SAASzjB,GACtC,MAAOpQ,GAAS0zB,OAAOr1B,KAAM+R,EAAG/R,KAAKgG,MAAMtG,KAAK6S,QAalD2jB,EAAK/iB,UAAU+hB,UAAY,SAASiF,GAClC,MAAOx4B,GAASszB,SAASj1B,KAAMm6B,EAAMn6B,KAAKgG,MAAMkmB,OAAO3Z,QAczD2jB,EAAK/iB,UAAUiiB,gBAAkB,SAAS+E,GACxC,MAAOx4B,GAASszB,SAASj1B,KAAMm6B,EAAMn6B,KAAKgG,MAAMtG,KAAK6S,QAUvD2jB,EAAK/iB,UAAUwyD,gBAAkB,WACA,GAA3B3lE,KAAKyO,QAAQ6lB,WACft0B,KAAKymE,mBAGLzmE,KAAK6lE,mBAST3vC,EAAK/iB,UAAUszD,iBAAmB,WAChC,GAAItyD,GAAKnU,IAETA,MAAK6lE,kBAEL7lE,KAAK0mE,UAAY,WACf,MAA6B,IAAzBvyD,EAAG1F,QAAQ6lB,eAEbngB,GAAG0xD,uBAID1xD,EAAG4b,IAAIrwB,OAKJyU,EAAG4b,IAAIrwB,KAAK0wB,aAAejc,EAAGnO,MAAM2rC,WACtCx9B,EAAG4b,IAAIrwB,KAAK4wB,cAAgBnc,EAAGnO,MAAM2gE,cACtCxyD,EAAGnO,MAAM2rC,UAAYx9B,EAAG4b,IAAIrwB,KAAK0wB,YACjCjc,EAAGnO,MAAM2gE,WAAaxyD,EAAG4b,IAAIrwB,KAAK4wB,aAElCnc,EAAGyZ,KAAK,aAMdjtB,EAAKgI,iBAAiBY,OAAQ,SAAUvJ,KAAK0mE,WAE7C1mE,KAAK4mE,WAAaC,YAAY7mE,KAAK0mE,UAAW,MAOhDxwC,EAAK/iB,UAAU0yD,gBAAkB,WAC3B7lE,KAAK4mE,aACPn0C,cAAczyB,KAAK4mE,YACnB5mE,KAAK4mE,WAAapgE,QAIpB7F,EAAKwI,oBAAoBI,OAAQ,SAAUvJ,KAAK0mE,WAChD1mE,KAAK0mE,UAAY,MASnBxwC,EAAK/iB,UAAU2zD,cAAgB,SAAUz/B,GAGvC,MAFArnC,MAAKgG,MAAMqhC,UAAYA,EACvBrnC,KAAKomE,mBACEpmE,KAAKgG,MAAMqhC,WAQpBnR,EAAK/iB,UAAUizD,iBAAmB,WAEhC,GAAIX,GAAevgE,KAAK4G,IAAI9L,KAAKgG,MAAM6xB,gBAAgBrlB,OAASxS,KAAKgG,MAAMkmB,OAAO1Z,OAAQ,EAc1F,OAbIizD,IAAgBzlE,KAAKgG,MAAMy/D,eAGG,UAA5BzlE,KAAKyO,QAAQ8lB,cACfv0B,KAAKgG,MAAMqhC,WAAco+B,EAAezlE,KAAKgG,MAAMy/D,cAErDzlE,KAAKgG,MAAMy/D,aAAeA,GAIxBzlE,KAAKgG,MAAMqhC,UAAY,IAAGrnC,KAAKgG,MAAMqhC,UAAY,GACjDrnC,KAAKgG,MAAMqhC,UAAYo+B,IAAczlE,KAAKgG,MAAMqhC,UAAYo+B,GAEzDzlE,KAAKgG,MAAMqhC,WAQpBnR,EAAK/iB,UAAU4zD,cAAgB,WAC7B,MAAO/mE,MAAKgG,MAAMqhC,WAGpBxnC,EAAOD,QAAUs2B,GAKb,SAASr2B,EAAQD,EAASM,GAEjBA,EAAoB,GAOjCN,GAAQ0pD,QAAU,SAAUxlD,EAAQwE,GAClCA,EAAS0+D,aAAe,SAAU19D,GAC5BA,EAAMqnC,SACRroC,EAASgB,IAIbxF,EAAOyP,GAAG,eAAgBjL,EAAS0+D,eAQrCpnE,EAAQqnE,UAAY,SAAUnjE,EAAQwE,GAOpC,MANAA,GAAS0+D,aAAe,SAAU19D,GAC5BA,EAAM49D,SACR5+D,EAASgB,IAINxF,EAAOyP,GAAG,eAAgBjL,EAAS0+D,eAQ5CpnE,EAAQunE,SAAW,SAAUrjE,EAAQwE,GACnCxE,EAAO4P,IAAI,eAAgBpL,EAAS0+D,eAQtCpnE,EAAQwnE,WAAaxnE,EAAQunE,UAKzB,SAAStnE,EAAQD,GAGrBA,EAAY,IACVk6B,QAAS,UACTK,KAAM,QAERv6B,EAAe,MAAIA,EAAY,GAC/BA,EAAe,MAAIA,EAAY,GAG/BA,EAAY,IACVynE,OAAQ,aACRltC,KAAM,QAERv6B,EAAe,MAAIA,EAAY,GAC/BA,EAAe,MAAIA,EAAY,IAK3B,SAASC,EAAQD,GAGrBA,EAAY,IACVg9C,KAAM,OACNG,IAAK,kBACLuqB,KAAM,OACNrG,QAAS,WACTG,QAAS,WACTmG,SAAU,YACV1qB,SAAU,YACV2qB,eAAgB,+CAChBC,gBAAiB,qEACjBC,oBAAqB,wEACrBC,gBAAiB,kCACjBC,mBAAoB,+BAEtBhoE,EAAe,MAAIA,EAAY,GAC/BA,EAAe,MAAIA,EAAY,GAG/BA,EAAY,IACVg9C,KAAM,WACNG,IAAK,uBACLuqB,KAAM,QACNrG,QAAS,iBACTG,QAAS,iBACTmG,SAAU,gBACV1qB,SAAU,gBACV2qB,eAAgB,uDAChBC,gBAAiB,6EACjBC,oBAAqB,kFACrBC,gBAAiB,wCACjBC,mBAAoB,2CAEtBhoE,EAAe,MAAIA,EAAY,GAC/BA,EAAe,MAAIA,EAAY,IAK3B,WAKoC,mBAA7BioE,4BAKTA,yBAAyB10D,UAAUisD,OAAS,SAASrtD,EAAGC,EAAGvH,GACzDzK,KAAK4nB,YACL5nB,KAAK0rB,IAAI3Z,EAAGC,EAAGvH,EAAG,EAAG,EAAEvF,KAAKymB,IAAI,IASlCk8C,yBAAyB10D,UAAU20D,OAAS,SAAS/1D,EAAGC,EAAGvH,GACzDzK,KAAK4nB,YACL5nB,KAAKyS,KAAKV,EAAItH,EAAGuH,EAAIvH,EAAO,EAAJA,EAAW,EAAJA,IASjCo9D,yBAAyB10D,UAAU4b,SAAW,SAAShd,EAAGC,EAAGvH,GAE3DzK,KAAK4nB,WAEL,IAAIhc,GAAQ,EAAJnB,EACJs9D,EAAKn8D,EAAI,EACTo8D,EAAK9iE,KAAKyqB,KAAK,GAAK,EAAI/jB,EACxBD,EAAIzG,KAAKyqB,KAAK/jB,EAAIA,EAAIm8D,EAAKA,EAE/B/nE,MAAK6nB,OAAO9V,EAAGC,GAAKrG,EAAIq8D,IACxBhoE,KAAK8nB,OAAO/V,EAAIg2D,EAAI/1D,EAAIg2D,GACxBhoE,KAAK8nB,OAAO/V,EAAIg2D,EAAI/1D,EAAIg2D,GACxBhoE,KAAK8nB,OAAO/V,EAAGC,GAAKrG,EAAIq8D,IACxBhoE,KAAKioB,aASP4/C,yBAAyB10D,UAAU80D,aAAe,SAASl2D,EAAGC,EAAGvH,GAE/DzK,KAAK4nB,WAEL,IAAIhc,GAAQ,EAAJnB,EACJs9D,EAAKn8D,EAAI,EACTo8D,EAAK9iE,KAAKyqB,KAAK,GAAK,EAAI/jB,EACxBD,EAAIzG,KAAKyqB,KAAK/jB,EAAIA,EAAIm8D,EAAKA,EAE/B/nE,MAAK6nB,OAAO9V,EAAGC,GAAKrG,EAAIq8D,IACxBhoE,KAAK8nB,OAAO/V,EAAIg2D,EAAI/1D,EAAIg2D,GACxBhoE,KAAK8nB,OAAO/V,EAAIg2D,EAAI/1D,EAAIg2D,GACxBhoE,KAAK8nB,OAAO/V,EAAGC,GAAKrG,EAAIq8D,IACxBhoE,KAAKioB,aASP4/C,yBAAyB10D,UAAU+0D,KAAO,SAASn2D,EAAGC,EAAGvH,GAEvDzK,KAAK4nB,WAEL,KAAK,GAAIugD,GAAI,EAAO,GAAJA,EAAQA,IAAK,CAC3B,GAAI18C,GAAU08C,EAAI,IAAM,EAAS,IAAJ19D,EAAc,GAAJA,CACvCzK,MAAK8nB,OACD/V,EAAI0Z,EAASvmB,KAAKkZ,IAAQ,EAAJ+pD,EAAQjjE,KAAKymB,GAAK,IACxC3Z,EAAIyZ,EAASvmB,KAAKqZ,IAAQ,EAAJ4pD,EAAQjjE,KAAKymB,GAAK,KAI9C3rB,KAAKioB,aAMP4/C,yBAAyB10D,UAAUssD,UAAY,SAAS1tD,EAAGC,EAAGq9C,EAAG1jD,EAAGlB,GAClE,GAAI29D,GAAMljE,KAAKymB,GAAG,GACE,GAAhB0jC,EAAM,EAAI5kD,IAAYA,EAAM4kD,EAAI,GAChB,EAAhB1jD,EAAM,EAAIlB,IAAYA,EAAMkB,EAAI,GACpC3L,KAAK4nB,YACL5nB,KAAK6nB,OAAO9V,EAAEtH,EAAEuH,GAChBhS,KAAK8nB,OAAO/V,EAAEs9C,EAAE5kD,EAAEuH,GAClBhS,KAAK0rB,IAAI3Z,EAAEs9C,EAAE5kD,EAAEuH,EAAEvH,EAAEA,EAAM,IAAJ29D,EAAY,IAAJA,GAAQ,GACrCpoE,KAAK8nB,OAAO/V,EAAEs9C,EAAEr9C,EAAErG,EAAElB,GACpBzK,KAAK0rB,IAAI3Z,EAAEs9C,EAAE5kD,EAAEuH,EAAErG,EAAElB,EAAEA,EAAE,EAAM,GAAJ29D,GAAO,GAChCpoE,KAAK8nB,OAAO/V,EAAEtH,EAAEuH,EAAErG,GAClB3L,KAAK0rB,IAAI3Z,EAAEtH,EAAEuH,EAAErG,EAAElB,EAAEA,EAAM,GAAJ29D,EAAW,IAAJA,GAAQ,GACpCpoE,KAAK8nB,OAAO/V,EAAEC,EAAEvH,GAChBzK,KAAK0rB,IAAI3Z,EAAEtH,EAAEuH,EAAEvH,EAAEA,EAAM,IAAJ29D,EAAY,IAAJA,GAAQ,IAMrCP,yBAAyB10D,UAAUysD,QAAU,SAAS7tD,EAAGC,EAAGq9C,EAAG1jD,GAC7D,GAAI08D,GAAQ,SACRC,EAAMjZ,EAAI,EAAKgZ,EACfE,EAAM58D,EAAI,EAAK08D,EACfG,EAAKz2D,EAAIs9C,EACToZ,EAAKz2D,EAAIrG,EACT+8D,EAAK32D,EAAIs9C,EAAI,EACbsZ,EAAK32D,EAAIrG,EAAI,CAEjB3L;KAAK4nB,YACL5nB,KAAK6nB,OAAO9V,EAAG42D,GACf3oE,KAAK4oE,cAAc72D,EAAG42D,EAAKJ,EAAIG,EAAKJ,EAAIt2D,EAAG02D,EAAI12D,GAC/ChS,KAAK4oE,cAAcF,EAAKJ,EAAIt2D,EAAGw2D,EAAIG,EAAKJ,EAAIC,EAAIG,GAChD3oE,KAAK4oE,cAAcJ,EAAIG,EAAKJ,EAAIG,EAAKJ,EAAIG,EAAIC,EAAID,GACjDzoE,KAAK4oE,cAAcF,EAAKJ,EAAIG,EAAI12D,EAAG42D,EAAKJ,EAAIx2D,EAAG42D,IAQjDd,yBAAyB10D,UAAUusD,SAAW,SAAS3tD,EAAGC,EAAGq9C,EAAG1jD,GAC9D,GAAIiC,GAAI,EAAE,EACNi7D,EAAWxZ,EACXyZ,EAAWn9D,EAAIiC,EAEfy6D,EAAQ,SACRC,EAAMO,EAAW,EAAKR,EACtBE,EAAMO,EAAW,EAAKT,EACtBG,EAAKz2D,EAAI82D,EACTJ,EAAKz2D,EAAI82D,EACTJ,EAAK32D,EAAI82D,EAAW,EACpBF,EAAK32D,EAAI82D,EAAW,EACpBC,EAAM/2D,GAAKrG,EAAIm9D,EAAS,GACxBE,EAAMh3D,EAAIrG,CAEd3L,MAAK4nB,YACL5nB,KAAK6nB,OAAO2gD,EAAIG,GAEhB3oE,KAAK4oE,cAAcJ,EAAIG,EAAKJ,EAAIG,EAAKJ,EAAIG,EAAIC,EAAID,GACjDzoE,KAAK4oE,cAAcF,EAAKJ,EAAIG,EAAI12D,EAAG42D,EAAKJ,EAAIx2D,EAAG42D,GAE/C3oE,KAAK4oE,cAAc72D,EAAG42D,EAAKJ,EAAIG,EAAKJ,EAAIt2D,EAAG02D,EAAI12D,GAC/ChS,KAAK4oE,cAAcF,EAAKJ,EAAIt2D,EAAGw2D,EAAIG,EAAKJ,EAAIC,EAAIG,GAEhD3oE,KAAK8nB,OAAO0gD,EAAIO,GAEhB/oE,KAAK4oE,cAAcJ,EAAIO,EAAMR,EAAIG,EAAKJ,EAAIU,EAAKN,EAAIM,GACnDhpE,KAAK4oE,cAAcF,EAAKJ,EAAIU,EAAKj3D,EAAGg3D,EAAMR,EAAIx2D,EAAGg3D,GAEjD/oE,KAAK8nB,OAAO/V,EAAG42D,IAOjBd,yBAAyB10D,UAAUukD,MAAQ,SAAS3lD,EAAGC,EAAGo8C,EAAOzoD,GAE/D,GAAIsjE,GAAKl3D,EAAIpM,EAAST,KAAKqZ,IAAI6vC,GAC3B8a,EAAKl3D,EAAIrM,EAAST,KAAKkZ,IAAIgwC,GAI3B+a,EAAKp3D,EAAa,GAATpM,EAAeT,KAAKqZ,IAAI6vC,GACjCgb,EAAKp3D,EAAa,GAATrM,EAAeT,KAAKkZ,IAAIgwC,GAGjCib,EAAKJ,EAAKtjE,EAAS,EAAIT,KAAKqZ,IAAI6vC,EAAQ,GAAMlpD,KAAKymB,IACnD29C,EAAKJ,EAAKvjE,EAAS,EAAIT,KAAKkZ,IAAIgwC,EAAQ,GAAMlpD,KAAKymB,IAGnD49C,EAAKN,EAAKtjE,EAAS,EAAIT,KAAKqZ,IAAI6vC,EAAQ,GAAMlpD,KAAKymB,IACnD69C,EAAKN,EAAKvjE,EAAS,EAAIT,KAAKkZ,IAAIgwC,EAAQ,GAAMlpD,KAAKymB,GAEvD3rB,MAAK4nB,YACL5nB,KAAK6nB,OAAO9V,EAAGC,GACfhS,KAAK8nB,OAAOuhD,EAAIC,GAChBtpE,KAAK8nB,OAAOqhD,EAAIC,GAChBppE,KAAK8nB,OAAOyhD,EAAIC,GAChBxpE,KAAKioB,aASP4/C,yBAAyB10D,UAAUqkD,WAAa,SAASzlD,EAAEC,EAAEwmD,EAAGC,EAAGgR,GAC5DA,IAAWA,GAAW,GAAG,IACd,GAAZC,IAAeA,EAAa,KAChC,IAAIC,GAAYF,EAAU9jE,MAC1B3F,MAAK6nB,OAAO9V,EAAGC,EAKf,KAJA,GAAI4M,GAAM45C,EAAGzmD,EAAI8M,EAAM45C,EAAGzmD,EACtB43D,EAAQ/qD,EAAGD,EACXirD,EAAgB3kE,KAAKyqB,KAAM/Q,EAAGA,EAAKC,EAAGA,GACtCirD,EAAU,EAAG/9B,GAAK,EACf89B,GAAe,IAAI,CACxB,GAAIH,GAAaD,EAAUK,IAAYH,EACnCD,GAAaG,IAAeH,EAAaG,EAC7C,IAAInuD,GAAQxW,KAAKyqB,KAAM+5C,EAAWA,GAAc,EAAIE,EAAMA,GACnD,GAAHhrD,IAAMlD,GAASA,GACnB3J,GAAK2J,EACL1J,GAAK43D,EAAMluD,EACX1b,KAAK+rC,EAAO,SAAW,UAAUh6B,EAAEC,GACnC63D,GAAiBH,EACjB39B,GAAQA,MAUV,SAASlsC,EAAQD,EAASM,GAQ9B,QAASyqC,GAAKrT,EAAS7oB,GACrBzO,KAAKs3B,QAAUA,EACft3B,KAAKyO,QAAUA,EALjB,GAAI7N,GAAUV,EAAoB,GAC9B2qC,EAAS3qC,EAAoB,GAOjCyqC,GAAKx3B,UAAU04B,UAAY,SAASC,GAGlC,IAAK,GAFDlwB,GAAOkwB,EAAU,GAAG95B,EACpB8J,EAAOgwB,EAAU,GAAG95B,EACf4Z,EAAI,EAAGA,EAAIkgB,EAAUnmC,OAAQimB,IACpChQ,EAAOA,EAAOkwB,EAAUlgB,GAAG5Z,EAAI85B,EAAUlgB,GAAG5Z,EAAI4J,EAChDE,EAAOA,EAAOgwB,EAAUlgB,GAAG5Z,EAAI85B,EAAUlgB,GAAG5Z,EAAI8J,CAElD,QAAQhQ,IAAK8P,EAAMlP,IAAKoP,EAAM8vB,iBAAkB5rC,KAAKyO,QAAQm9B,mBAU/DjB,EAAKx3B,UAAU44B,KAAO,SAAU/U,EAAS/kB,EAAO+5B,GAC9C,GAAe,MAAXhV,GACEA,EAAQrxB,OAAS,EAAG,CACtB,GAAIwlC,GAAMx+B,EACNosC,EAAY70C,OAAO8nC,EAAUpG,IAAI34B,MAAMuF,OAAOhI,QAAQ,KAAK,IAgB/D,IAfA2gC,EAAOvqC,EAAQwQ,cAAc,OAAQ46B,EAAU/E,YAAa+E,EAAUpG,KACtEuF,EAAK/4B,eAAe,KAAM,QAASH,EAAMpK,WACtBrB,SAAhByL,EAAMhF,OACPk+B,EAAK/4B,eAAe,KAAM,QAASH,EAAMhF,OAKzCN,EADsC,GAApCsF,EAAMxD,QAAQs8B,WAAWr8B,QACvBi8B,EAAKo/B,YAAY/yC,EAAS/kB,GAG1B04B,EAAKq/B,QAAQhzC,GAIiB,GAAhC/kB,EAAMxD,QAAQ88B,OAAO78B,QAAiB,CACxC,GACIu7D,GADA7+B,EAAWxqC,EAAQwQ,cAAc,OAAQ46B,EAAU/E,YAAa+E,EAAUpG,IAG5EqkC,GADsC,OAApCh4D,EAAMxD,QAAQ88B,OAAOhX,YACf,IAAMyC,EAAQ,GAAGjlB,EAAI,MAAgBpF,EAAI,IAAMqqB,EAAQA,EAAQrxB,OAAS,GAAGoM,EAAI,KAG/E,IAAMilB,EAAQ,GAAGjlB,EAAI,IAAMgnC,EAAY,IAAMpsC,EAAI,IAAMqqB,EAAQA,EAAQrxB,OAAS,GAAGoM,EAAI,IAAMgnC,EAEvG3N,EAASh5B,eAAe,KAAM,QAASH,EAAMpK,UAAY,SACvBrB,SAA/ByL,EAAMxD,QAAQ88B,OAAOt+B,OACtBm+B,EAASh5B,eAAe,KAAM,QAASH,EAAMxD,QAAQ88B,OAAOt+B,OAE9Dm+B,EAASh5B,eAAe,KAAM,IAAK63D,GAGrC9+B,EAAK/4B,eAAe,KAAM,IAAK,IAAMzF,GAGG,GAApCsF,EAAMxD,QAAQ0D,WAAWzD,SAC3Bm8B,EAAOkB,KAAK/U,EAAS/kB,EAAO+5B,KAepCrB,EAAKu/B,mBAAqB,SAASx3D,GAMjC,IAAK,GAJDy3D,GAAIC,EAAIC,EAAIC,EAAIC,EAAKC,EACrB79D,EAAIzH,KAAKwoB,MAAMhb,EAAK,GAAGX,GAAK,IAAM7M,KAAKwoB,MAAMhb,EAAK,GAAGV,GAAK,IAC1Dy4D,EAAgB,EAAE,EAClB9kE,EAAS+M,EAAK/M,OACTH,EAAI,EAAOG,EAAS,EAAbH,EAAgBA,IAE9B2kE,EAAW,GAAL3kE,EAAUkN,EAAK,GAAKA,EAAKlN,EAAE,GACjC4kE,EAAK13D,EAAKlN,GACV6kE,EAAK33D,EAAKlN,EAAE,GACZ8kE,EAAc3kE,EAARH,EAAI,EAAckN,EAAKlN,EAAE,GAAK6kE,EAUpCE,GAAQx4D,IAAMo4D,EAAGp4D,EAAI,EAAEq4D,EAAGr4D,EAAIs4D,EAAGt4D,GAAI04D,EAAgBz4D,IAAMm4D,EAAGn4D,EAAI,EAAEo4D,EAAGp4D,EAAIq4D,EAAGr4D,GAAIy4D,GAClFD,GAAQz4D,GAAMq4D,EAAGr4D,EAAI,EAAEs4D,EAAGt4D,EAAIu4D,EAAGv4D,GAAI04D,EAAgBz4D,GAAMo4D,EAAGp4D,EAAI,EAAEq4D,EAAGr4D,EAAIs4D,EAAGt4D,GAAIy4D,GAGlF99D,GAAK,IACL49D,EAAIx4D,EAAI,IACRw4D,EAAIv4D,EAAI,IACRw4D,EAAIz4D,EAAI,IACRy4D,EAAIx4D,EAAI,IACRq4D,EAAGt4D,EAAI,IACPs4D,EAAGr4D,EAAI,GAGT,OAAOrF,IAcTg+B,EAAKo/B,YAAc,SAASr3D,EAAMT,GAChC,GAAIg5B,GAAQh5B,EAAMxD,QAAQs8B,WAAWE,KACrC,IAAa,GAATA,GAAwBzkC,SAAVykC,EAChB,MAAOjrC,MAAKkqE,mBAAmBx3D,EAO/B,KAAK,GAJDy3D,GAAIC,EAAIC,EAAIC,EAAIC,EAAKC,EAAKE,EAAGC,EAAGC,EAAIC,EAAGpgD,EAAGqgD,EAAGC,EAC7CC,EAAQC,EAAQC,EAASC,EAASC,EAASC,EAC3C1+D,EAAIzH,KAAKwoB,MAAMhb,EAAK,GAAGX,GAAK,IAAM7M,KAAKwoB,MAAMhb,EAAK,GAAGV,GAAK,IAC1DrM,EAAS+M,EAAK/M,OACTH,EAAI,EAAOG,EAAS,EAAbH,EAAgBA,IAE9B2kE,EAAW,GAAL3kE,EAAUkN,EAAK,GAAKA,EAAKlN,EAAE,GACjC4kE,EAAK13D,EAAKlN,GACV6kE,EAAK33D,EAAKlN,EAAE,GACZ8kE,EAAc3kE,EAARH,EAAI,EAAckN,EAAKlN,EAAE,GAAK6kE,EAEpCK,EAAKxlE,KAAKyqB,KAAKzqB,KAAK4uB,IAAIq2C,EAAGp4D,EAAIq4D,EAAGr4D,EAAE,GAAK7M,KAAK4uB,IAAIq2C,EAAGn4D,EAAIo4D,EAAGp4D,EAAE,IAC9D24D,EAAKzlE,KAAKyqB,KAAKzqB,KAAK4uB,IAAIs2C,EAAGr4D,EAAIs4D,EAAGt4D,EAAE,GAAK7M,KAAK4uB,IAAIs2C,EAAGp4D,EAAIq4D,EAAGr4D,EAAE,IAC9D44D,EAAK1lE,KAAKyqB,KAAKzqB,KAAK4uB,IAAIu2C,EAAGt4D,EAAIu4D,EAAGv4D,EAAE,GAAK7M,KAAK4uB,IAAIu2C,EAAGr4D,EAAIs4D,EAAGt4D,EAAE,IAY9Dg5D,EAAU9lE,KAAK4uB,IAAI82C,EAAK3/B,GACxBigC,EAAUhmE,KAAK4uB,IAAI82C,EAAG,EAAE3/B,GACxBggC,EAAU/lE,KAAK4uB,IAAI62C,EAAK1/B,GACxBkgC,EAAUjmE,KAAK4uB,IAAI62C,EAAG,EAAE1/B,GACxBogC,EAAUnmE,KAAK4uB,IAAI42C,EAAKz/B,GACxBmgC,EAAUlmE,KAAK4uB,IAAI42C,EAAG,EAAEz/B,GAExB4/B,EAAI,EAAEO,EAAU,EAAEC,EAASJ,EAASE,EACpC1gD,EAAI,EAAEygD,EAAU,EAAEF,EAASC,EAASE,EACpCL,EAAI,EAAEO,GAAUA,EAASJ,GACrBH,EAAI,IAAIA,EAAI,EAAIA,GACpBC,EAAI,EAAEC,GAAUA,EAASC,GACrBF,EAAI,IAAIA,EAAI,EAAIA,GAEpBR,GAAQx4D,IAAMo5D,EAAUhB,EAAGp4D,EAAI84D,EAAET,EAAGr4D,EAAIq5D,EAAUf,EAAGt4D,GAAK+4D,EACxD94D,IAAMm5D,EAAUhB,EAAGn4D,EAAI64D,EAAET,EAAGp4D,EAAIo5D,EAAUf,EAAGr4D,GAAK84D,GAEpDN,GAAQz4D,GAAMm5D,EAAUd,EAAGr4D,EAAI0Y,EAAE4/C,EAAGt4D,EAAIo5D,EAAUb,EAAGv4D,GAAKg5D,EACxD/4D,GAAMk5D,EAAUd,EAAGp4D,EAAIyY,EAAE4/C,EAAGr4D,EAAIm5D,EAAUb,EAAGt4D,GAAK+4D,GAEvC,GAATR,EAAIx4D,GAAmB,GAATw4D,EAAIv4D,IAASu4D,EAAMH,GACxB,GAATI,EAAIz4D,GAAmB,GAATy4D,EAAIx4D,IAASw4D,EAAMH,GACrC19D,GAAK,IACL49D,EAAIx4D,EAAI,IACRw4D,EAAIv4D,EAAI,IACRw4D,EAAIz4D,EAAI,IACRy4D,EAAIx4D,EAAI,IACRq4D,EAAGt4D,EAAI,IACPs4D,EAAGr4D,EAAI,GAGT,OAAOrF,IAUXg+B,EAAKq/B,QAAU,SAASt3D,GAGtB,IAAK,GADD/F,GAAI,GACCnH,EAAI,EAAGA,EAAIkN,EAAK/M,OAAQH,IAE7BmH,GADO,GAALnH,EACGkN,EAAKlN,GAAGuM,EAAI,IAAMW,EAAKlN,GAAGwM,EAG1B,IAAMU,EAAKlN,GAAGuM,EAAI,IAAMW,EAAKlN,GAAGwM,CAGzC,OAAOrF,IAGT9M,EAAOD,QAAU+qC,GAKb,SAAS9qC,EAAQD,EAASM,GAQ9B,QAASorE,GAASh0C,EAAS7oB,GACzBzO,KAAKs3B,QAAUA,EACft3B,KAAKyO,QAAUA,EALjB,CAAA,GAAI7N,GAAUV,EAAoB,EACrBA,GAAoB,IAOjCorE,EAASn4D,UAAU04B,UAAY,SAASC,GACtC,GAA2C,SAAvC9rC,KAAKyO,QAAQymC,SAASC,cAA0B,CAGlD,IAAK,GAFDv5B,GAAOkwB,EAAU,GAAG95B,EACpB8J,EAAOgwB,EAAU,GAAG95B,EACf4Z,EAAI,EAAGA,EAAIkgB,EAAUnmC,OAAQimB,IACpChQ,EAAOA,EAAOkwB,EAAUlgB,GAAG5Z,EAAI85B,EAAUlgB,GAAG5Z,EAAI4J,EAChDE,EAAOA,EAAOgwB,EAAUlgB,GAAG5Z,EAAI85B,EAAUlgB,GAAG5Z,EAAI8J,CAElD,QAAQhQ,IAAK8P,EAAMlP,IAAKoP,EAAM8vB,iBAAkB5rC,KAAKyO,QAAQm9B,kBAI7D,IAAK,GADD2/B,MACK3/C,EAAI,EAAGA,EAAIkgB,EAAUnmC,OAAQimB,IACpC2/C,EAAgBvjE,MACd+J,EAAG+5B,EAAUlgB,GAAG7Z,EAChBC,EAAG85B,EAAUlgB,GAAG5Z,EAChBslB,QAASt3B,KAAKs3B,SAGlB,OAAOi0C,IAYXD,EAASv/B,KAAO,SAAUmE,EAAUsG,EAAoBxK,GACtD,GAEIw/B,GACA9iE,EAAK+iE,EACLx5D,EACAzM,EAAEomB,EALF8/C,KACAC,KAKAC,EAAY,CAGhB,KAAKpmE,EAAI,EAAGA,EAAI0qC,EAASvqC,OAAQH,IAE/B,GADAyM,EAAQ+5B,EAAU7X,OAAO+b,EAAS1qC,IACP,OAAvByM,EAAMxD,QAAQxB,OACK,GAAjBgF,EAAMyW,UAAyEliB,SAArDwlC,EAAUv9B,QAAQ0lB,OAAOqD,WAAW0Y,EAAS1qC,KAAyE,GAApDwmC,EAAUv9B,QAAQ0lB,OAAOqD,WAAW0Y,EAAS1qC,KAC3I,IAAKomB,EAAI,EAAGA,EAAI4qB,EAAmBtG,EAAS1qC,IAAIG,OAAQimB,IACtD8/C,EAAa1jE,MACX+J,EAAGykC,EAAmBtG,EAAS1qC,IAAIomB,GAAG7Z,EACtCC,EAAGwkC,EAAmBtG,EAAS1qC,IAAIomB,GAAG5Z,EACtCslB,QAAS4Y,EAAS1qC,KAEpBomE,GAAa,CAMrB,IAAiB,GAAbA,EAeJ,IAZAF,EAAax1D,KAAK,SAAU3Q,EAAGa,GAC7B,MAAIb,GAAEwM,GAAK3L,EAAE2L,EACJxM,EAAE+xB,QAAUlxB,EAAEkxB,QAEd/xB,EAAEwM,EAAI3L,EAAE2L,IAKnBu5D,EAASO,sBAAsBF,EAAeD,GAGzClmE,EAAI,EAAGA,EAAIkmE,EAAa/lE,OAAQH,IAAK,CACxCyM,EAAQ+5B,EAAU7X,OAAOu3C,EAAalmE,GAAG8xB,QACzC,IAAIqP,GAAW,GAAM10B,EAAMxD,QAAQymC,SAAS3iC,KAE5C7J,GAAMgjE,EAAalmE,GAAGuM,CACtB,IAAI+5D,GAAe,CACnB,IAA2BtlE,SAAvBmlE,EAAcjjE,GACZlD,EAAE,EAAIkmE,EAAa/lE,SAAS6lE,EAAetmE,KAAK2lB,IAAI6gD,EAAalmE,EAAE,GAAGuM,EAAIrJ,IAC1ElD,EAAI,IAAwBgmE,EAAetmE,KAAK4G,IAAI0/D,EAAatmE,KAAK2lB,IAAI6gD,EAAalmE,EAAE,GAAGuM,EAAIrJ,KACpG+iE,EAAWH,EAASS,iBAAiBP,EAAcv5D,EAAO00B,OAEvD,CACH,GAAIqlC,GAAUxmE,GAAKmmE,EAAcjjE,GAAKujE,OAASN,EAAcjjE,GAAKwjE,UAC9DC,EAAU3mE,GAAKmmE,EAAcjjE,GAAKwjE,SAAW,EAC7CF,GAAUN,EAAa/lE,SAAS6lE,EAAetmE,KAAK2lB,IAAI6gD,EAAaM,GAASj6D,EAAIrJ,IAClFyjE,EAAU,IAAsBX,EAAetmE,KAAK4G,IAAI0/D,EAAatmE,KAAK2lB,IAAI6gD,EAAaS,GAASp6D,EAAIrJ,KAC5G+iE,EAAWH,EAASS,iBAAiBP,EAAcv5D,EAAO00B,GAC1DglC,EAAcjjE,GAAKwjE,UAAY,EAEa,SAAxCj6D,EAAMxD,QAAQymC,SAASC,eACzB22B,EAAeH,EAAcjjE,GAAK0jE,YAClCT,EAAcjjE,GAAK0jE,aAAen6D,EAAMy4B,aAAeghC,EAAalmE,GAAGwM,GAExB,cAAxCC,EAAMxD,QAAQymC,SAASC,gBAC9Bs2B,EAASl5D,MAAQk5D,EAASl5D,MAAQo5D,EAAcjjE,GAAKujE,OACrDR,EAAS9hD,QAAWgiD,EAAcjjE,GAAa,SAAI+iE,EAASl5D,MAAS,GAAIk5D,EAASl5D,OAASo5D,EAAcjjE,GAAKujE,OAAO,GACjF,QAAhCh6D,EAAMxD,QAAQymC,SAASnG,MAAwB08B,EAAS9hD,QAAU,GAAI8hD,EAASl5D,MAC1C,SAAhCN,EAAMxD,QAAQymC,SAASnG,QAAmB08B,EAAS9hD,QAAU,GAAI8hD,EAASl5D,QAGvF3R,EAAQ0R,QAAQo5D,EAAalmE,GAAGuM,EAAI05D,EAAS9hD,OAAQ+hD,EAAalmE,GAAGwM,EAAI85D,EAAcL,EAASl5D,MAAON,EAAMy4B,aAAeghC,EAAalmE,GAAGwM,EAAGC,EAAMpK,UAAY,OAAQmkC,EAAU/E,YAAa+E,EAAUpG,KAElK,GAApC3zB,EAAMxD,QAAQ0D,WAAWzD,SAC3B9N,EAAQkR,UAAU45D,EAAalmE,GAAGuM,EAAI05D,EAAS9hD,OAAQ+hD,EAAalmE,GAAGwM,EAAGC,EAAO+5B,EAAU/E,YAAa+E,EAAUpG,OAYxH0lC,EAASO,sBAAwB,SAAUF,EAAeD,GAGxD,IAAK,GADDF,GACKhmE,EAAI,EAAGA,EAAIkmE,EAAa/lE,OAAQH,IACnCA,EAAI,EAAIkmE,EAAa/lE,SACvB6lE,EAAetmE,KAAK2lB,IAAI6gD,EAAalmE,EAAI,GAAGuM,EAAI25D,EAAalmE,GAAGuM,IAE9DvM,EAAI,IACNgmE,EAAetmE,KAAK4G,IAAI0/D,EAActmE,KAAK2lB,IAAI6gD,EAAalmE,EAAI,GAAGuM,EAAI25D,EAAalmE,GAAGuM,KAErE,GAAhBy5D,IACuChlE,SAArCmlE,EAAcD,EAAalmE,GAAGuM,KAChC45D,EAAcD,EAAalmE,GAAGuM,IAAMk6D,OAAQ,EAAGC,SAAU,EAAGE,YAAa,IAE3ET,EAAcD,EAAalmE,GAAGuM,GAAGk6D,QAAU,IAejDX,EAASS,iBAAmB,SAAUP,EAAcv5D,EAAO00B,GACzD,GAAIp0B,GAAOoX,CAwBX,OAvBI6hD,GAAev5D,EAAMxD,QAAQymC,SAAS3iC,OAASi5D,EAAe,GAChEj5D,EAAuBo0B,EAAf6kC,EAA0B7kC,EAAW6kC,EAE7C7hD,EAAS,EAC2B,QAAhC1X,EAAMxD,QAAQymC,SAASnG,MACzBplB,GAAU,GAAM6hD,EAEuB,SAAhCv5D,EAAMxD,QAAQymC,SAASnG,QAC9BplB,GAAU,GAAM6hD,KAKlBj5D,EAAQN,EAAMxD,QAAQymC,SAAS3iC,MAC/BoX,EAAS,EAC2B,QAAhC1X,EAAMxD,QAAQymC,SAASnG,MACzBplB,GAAU,GAAM1X,EAAMxD,QAAQymC,SAAS3iC,MAEA,SAAhCN,EAAMxD,QAAQymC,SAASnG,QAC9BplB,GAAU,GAAM1X,EAAMxD,QAAQymC,SAAS3iC,SAInCA,MAAOA,EAAOoX,OAAQA,IAGhC2hD,EAASxzB,oBAAsB,SAASyzB,EAAiB90B,EAAavG,EAAUm8B,EAAY93C,GAC1F,GAAIg3C,EAAgB5lE,OAAS,EAAG,CAE9B4lE,EAAgBr1D,KAAK,SAAU3Q,EAAGa,GAChC,MAAIb,GAAEwM,GAAK3L,EAAE2L,EACJxM,EAAE+xB,QAAUlxB,EAAEkxB,QAEd/xB,EAAEwM,EAAI3L,EAAE2L,GAGnB,IAAI45D,KAEJL,GAASO,sBAAsBF,EAAeJ,GAC9C90B,EAAY41B,GAAcf,EAASgB,qBAAqBX,EAAeJ,GACvE90B,EAAY41B,GAAYzgC,iBAAmBrX,EAC3C2b,EAASloC,KAAKqkE,KAIlBf,EAASgB,qBAAuB,SAAUX,EAAeD,GAIvD,IAAK,GAHDhjE,GACAkT,EAAO8vD,EAAa,GAAG15D,EACvB8J,EAAO4vD,EAAa,GAAG15D,EAClBxM,EAAI,EAAGA,EAAIkmE,EAAa/lE,OAAQH,IACvCkD,EAAMgjE,EAAalmE,GAAGuM,EACKvL,SAAvBmlE,EAAcjjE,IAChBkT,EAAOA,EAAO8vD,EAAalmE,GAAGwM,EAAI05D,EAAalmE,GAAGwM,EAAI4J,EACtDE,EAAOA,EAAO4vD,EAAalmE,GAAGwM,EAAI05D,EAAalmE,GAAGwM,EAAI8J,GAGtD6vD,EAAcjjE,GAAK0jE,aAAeV,EAAalmE,GAAGwM,CAGtD,KAAK,GAAIu6D,KAAQZ,GACXA,EAAc7lE,eAAeymE,KAC/B3wD,EAAOA,EAAO+vD,EAAcY,GAAMH,YAAcT,EAAcY,GAAMH,YAAcxwD,EAClFE,EAAOA,EAAO6vD,EAAcY,GAAMH,YAAcT,EAAcY,GAAMH,YAActwD,EAItF,QAAQhQ,IAAK8P,EAAMlP,IAAKoP,IAG1Bjc,EAAOD,QAAU0rE,GAIb,SAASzrE,EAAQD,EAASM,GAO9B,QAAS2qC,GAAOvT,EAAS7oB,GACvBzO,KAAKs3B,QAAUA,EACft3B,KAAKyO,QAAUA,EAJjB,GAAI7N,GAAUV,EAAoB,EAQlC2qC,GAAO13B,UAAU04B,UAAY,SAASC,GAGpC,IAAK,GAFDlwB,GAAOkwB,EAAU,GAAG95B,EACpB8J,EAAOgwB,EAAU,GAAG95B,EACf4Z,EAAI,EAAGA,EAAIkgB,EAAUnmC,OAAQimB,IACpChQ,EAAOA,EAAOkwB,EAAUlgB,GAAG5Z,EAAI85B,EAAUlgB,GAAG5Z,EAAI4J,EAChDE,EAAOA,EAAOgwB,EAAUlgB,GAAG5Z,EAAI85B,EAAUlgB,GAAG5Z,EAAI8J,CAElD,QAAQhQ,IAAK8P,EAAMlP,IAAKoP,EAAM8vB,iBAAkB5rC,KAAKyO,QAAQm9B,mBAG/Df,EAAO13B,UAAU44B,KAAO,SAAS/U,EAAS/kB,EAAO+5B,EAAWriB,GAC1DkhB,EAAOkB,KAAK/U,EAAS/kB,EAAO+5B,EAAWriB,IAYzCkhB,EAAOkB,KAAO,SAAU/U,EAAS/kB,EAAO+5B,EAAWriB,GAClCnjB,SAAXmjB,IAAuBA,EAAS,EACpC,KAAK,GAAInkB,GAAI,EAAGA,EAAIwxB,EAAQrxB,OAAQH,IAClC5E,EAAQkR,UAAUklB,EAAQxxB,GAAGuM,EAAI4X,EAAQqN,EAAQxxB,GAAGwM,EAAGC,EAAO+5B,EAAU/E,YAAa+E,EAAUpG,MAKnG/lC,EAAOD,QAAUirC,GAIb,SAAShrC,EAAQD,EAASM,GAE9B,GAAIssE,GAAetsE,EAAoB,IACnCusE,EAAevsE,EAAoB,IACnCwsE,EAAexsE,EAAoB,IACnCysE,EAAiBzsE,EAAoB,IACrC0sE,EAAoB1sE,EAAoB,IACxC2sE,EAAkB3sE,EAAoB,IACtC4sE,EAA0B5sE,EAAoB,GAQlDN,GAAQmtE,WAAa,SAAUC,GAC7B,IAAK,GAAIC,KAAiBD,GACpBA,EAAelnE,eAAemnE,KAChCjtE,KAAKitE,GAAiBD,EAAeC,KAY3CrtE,EAAQstE,YAAc,SAAUF,GAC9B,IAAK,GAAIC,KAAiBD,GACpBA,EAAelnE,eAAemnE,KAChCjtE,KAAKitE,GAAiBzmE,SAW5B5G,EAAQqjD,mBAAqB,WAC3BjjD,KAAK+sE,WAAWP,GAChBxsE,KAAKmtE,2BACkC,GAAnCntE,KAAK0hD,UAAUnD,iBACjBv+C,KAAKotE,4BAGLptE,KAAKoqD,gCAUTxqD,EAAQujD,mBAAqB,WAC3BnjD,KAAK87D,eAAiB,EACtB97D,KAAKqtE,aAAe,EACpBrtE,KAAK+sE,WAAWN,IASlB7sE,EAAQsjD,kBAAoB,WAC1BljD,KAAKkvD,WACLlvD,KAAKstE,cAAgB,WACrBttE,KAAKkvD,QAAgB,UACrBlvD,KAAKkvD,QAAgB,OAAE,YAAclS,SACnCc,SACAgG,eACAsY,eAAkB,EAClBmR,YAAe/mE,QACjBxG,KAAKkvD,QAAgB,UACrBlvD,KAAKkvD,QAAiB,SAAKlS,SACzBc,SACAgG,eACAsY,eAAkB,EAClBmR,YAAe/mE,QAEjBxG,KAAK8jD,YAAc9jD,KAAKkvD,QAAgB,OAAE,WAAwB,YAElElvD,KAAK+sE,WAAWL,IASlB9sE,EAAQwjD,qBAAuB,WAC7BpjD,KAAKkrD,cAAgBlO,SAAWc,UAEhC99C,KAAK+sE,WAAWJ,IASlB/sE,EAAQ6oD,wBAA0B,WAEhCzoD,KAAKwtE,8BAA+B,EACpCxtE,KAAKytE,sBAAuB,EAEmB,GAA3CztE,KAAK0hD,UAAUnB,iBAAiB7xC,SAELlI,SAAzBxG,KAAK0tE,kBACP1tE,KAAK0tE,gBAAkBn8D,SAASM,cAAc,OAC9C7R,KAAK0tE,gBAAgB7lE,UAAY,0BAE/B7H,KAAK0tE,gBAAgBzgE,MAAM26B,QADR,GAAjB5nC,KAAKkoD,SAC8B,QAGA,OAEvCloD,KAAKsf,MAAM7N,YAAYzR,KAAK0tE,kBAGLlnE,SAArBxG,KAAK2tE,cACP3tE,KAAK2tE,YAAcp8D,SAASM,cAAc,OAC1C7R,KAAK2tE,YAAY9lE,UAAY,gCAE3B7H,KAAK2tE,YAAY1gE,MAAM26B,QADJ,GAAjB5nC,KAAKkoD,SAC0B,OAGA,QAEnCloD,KAAKsf,MAAM7N,YAAYzR,KAAK2tE,cAGRnnE,SAAlBxG,KAAK4tE,WACP5tE,KAAK4tE,SAAWr8D,SAASM,cAAc,OACvC7R,KAAK4tE,SAAS/lE,UAAY,gCAC1B7H,KAAK4tE,SAAS3gE,MAAM26B,QAAU5nC,KAAK0tE,gBAAgBzgE,MAAM26B,QACzD5nC,KAAKsf,MAAM7N,YAAYzR,KAAK4tE,WAI9B5tE,KAAK+sE,WAAWH,GAGhB5sE,KAAKmnD,yBAGwB3gD,SAAzBxG,KAAK0tE,kBAEP1tE,KAAKmnD,wBAGLnnD,KAAKsf,MAAMnO,YAAYnR,KAAK0tE,iBAC5B1tE,KAAKsf,MAAMnO,YAAYnR,KAAK2tE,aAC5B3tE,KAAKsf,MAAMnO,YAAYnR,KAAK4tE,UAE5B5tE,KAAK0tE,gBAAkBlnE,OACvBxG,KAAK2tE,YAAcnnE,OACnBxG,KAAK4tE,SAAWpnE,OAEhBxG,KAAKktE,YAAYN,KAWvBhtE,EAAQ4oD,wBAA0B,WAChCxoD,KAAK+sE,WAAWF,GAEhB7sE,KAAK6tE,mBACoC,GAArC7tE,KAAK0hD,UAAUtB,WAAW1xC,SAC5B1O,KAAK8tE,2BAUTluE,EAAQyjD,qBAAuB,WAC7BrjD,KAAK+sE,WAAWD,KAMd,SAASjtE,EAAQD,EAASM,GAiB9B,QAASolD,GAAU/rC,GACjBvZ,KAAKszD,QAAS,EAEdtzD,KAAK+vB,KACHxW,UAAWA,GAGbvZ,KAAK+vB,IAAIg+C,QAAUx8D,SAASM,cAAc,OAC1C7R,KAAK+vB,IAAIg+C,QAAQlmE,UAAY,UAE7B7H,KAAK+vB,IAAIxW,UAAU9H,YAAYzR,KAAK+vB,IAAIg+C,SAExC/tE,KAAK8D,OAASC,EAAO/D,KAAK+vB,IAAIg+C,SAAUC,iBAAiB,IACzDhuE,KAAK8D,OAAOyP,GAAG,MAAOvT,KAAKiuE,cAAcn5C,KAAK90B,MAG9C,IAAImU,GAAKnU,KACLwlE,GACF,QAAS,QACT,YAAa,OACb,YAAa,OAAQ,UACrB,aAAc,iBAEhBA,GAAOn9D,QAAQ,SAAUiB,GACvB6K,EAAGrQ,OAAOyP,GAAGjK,EAAO,SAAUA,GAC5BA,EAAMq8B,sBAKV3lC,KAAKkuE,aAAenqE,EAAOwF,QAASykE,iBAAiB,IACrDhuE,KAAKkuE,aAAa36D,GAAG,MAAO,SAAUjK,GAE/B6kE,EAAW7kE,EAAMI,OAAQ6P,IAC5BpF,EAAGi6D,eAIe5nE,SAAlBxG,KAAKmlD,UACPnlD,KAAKmlD,SAAS7xC,UAEhBtT,KAAKmlD,SAAWA,IAGhBnlD,KAAKquE,YAAcruE,KAAKouE,WAAWt5C,KAAK90B,MAiF1C,QAASmuE,GAAWvlE,EAASk8B,GAC3B,KAAOl8B,GAAS,CACd,GAAIA,IAAYk8B,EACd,OAAO,CAETl8B,GAAUA,EAAQiB,WAEpB,OAAO,EAnJT,GAAIs7C,GAAWjlD,EAAoB,IAC/B6c,EAAU7c,EAAoB,IAC9B6D,EAAS7D,EAAoB,IAC7BS,EAAOT,EAAoB,EA4D/B6c,GAAQuoC,EAAUnyC,WAGlBmyC,EAAUxrB,QAAU,KAKpBwrB,EAAUnyC,UAAUG,QAAU,WAC5BtT,KAAKouE,aAGLpuE,KAAK+vB,IAAIg+C,QAAQlkE,WAAWsH,YAAYnR,KAAK+vB,IAAIg+C,SAGjD/tE,KAAK8D,OAAS,KACd9D,KAAKkuE,aAAe,MAQtB5oB,EAAUnyC,UAAUm7D,SAAW,WAEzBhpB,EAAUxrB,SACZwrB,EAAUxrB,QAAQs0C,aAEpB9oB,EAAUxrB,QAAU95B,KAEpBA,KAAKszD,QAAS,EACdtzD,KAAK+vB,IAAIg+C,QAAQ9gE,MAAM26B,QAAU,OACjCjnC,EAAKiH,aAAa5H,KAAK+vB,IAAIxW,UAAW,cAEtCvZ,KAAK4tB,KAAK,UACV5tB,KAAK4tB,KAAK,YAIV5tB,KAAKmlD,SAASrwB,KAAK,MAAO90B,KAAKquE,cAOjC/oB,EAAUnyC,UAAUi7D,WAAa,WAC/BpuE,KAAKszD,QAAS,EACdtzD,KAAK+vB,IAAIg+C,QAAQ9gE,MAAM26B,QAAU,GACjCjnC,EAAKuH,gBAAgBlI,KAAK+vB,IAAIxW,UAAW,cACzCvZ,KAAKmlD,SAASopB,OAAO,MAAOvuE,KAAKquE,aAEjCruE,KAAK4tB,KAAK,UACV5tB,KAAK4tB,KAAK,eAQZ03B,EAAUnyC,UAAU86D,cAAgB,SAAU3kE,GAE5CtJ,KAAKsuE,WACLhlE,EAAMq8B,mBAsBR9lC,EAAOD,QAAU0lD,GAKb,SAASzlD,GAeb,QAASkd,GAAQgG,GACf,MAAIA,GAAYqvC,EAAMrvC,GAAtB,OAWF,QAASqvC,GAAMrvC,GACb,IAAK,GAAIra,KAAOqU,GAAQ5J,UACtB4P,EAAIra,GAAOqU,EAAQ5J,UAAUzK,EAE/B,OAAOqa,GAxBTljB,EAAOD,QAAUmd,EAoCjBA,EAAQ5J,UAAUI,GAClBwJ,EAAQ5J,UAAUxK,iBAAmB,SAASW,EAAO4P,GAInD,MAHAlZ,MAAKwuE,WAAaxuE,KAAKwuE,gBACtBxuE,KAAKwuE,WAAWllE,GAAStJ,KAAKwuE,WAAWllE,QACvCtB,KAAKkR,GACDlZ,MAaT+c,EAAQ5J,UAAUs7D,KAAO,SAASnlE,EAAO4P,GAIvC,QAAS3F,KACPm7D,EAAKh7D,IAAIpK,EAAOiK,GAChB2F,EAAGnB,MAAM/X,KAAM0F,WALjB,GAAIgpE,GAAO1uE,IAUX,OATAA,MAAKwuE,WAAaxuE,KAAKwuE,eAOvBj7D,EAAG2F,GAAKA,EACRlZ,KAAKuT,GAAGjK,EAAOiK,GACRvT,MAaT+c,EAAQ5J,UAAUO,IAClBqJ,EAAQ5J,UAAUw7D,eAClB5xD,EAAQ5J,UAAUy7D,mBAClB7xD,EAAQ5J,UAAUhK,oBAAsB,SAASG,EAAO4P,GAItD,GAHAlZ,KAAKwuE,WAAaxuE,KAAKwuE,eAGnB,GAAK9oE,UAAUC,OAEjB,MADA3F,MAAKwuE,cACExuE,IAIT,IAAI6uE,GAAY7uE,KAAKwuE,WAAWllE,EAChC,KAAKulE,EAAW,MAAO7uE,KAGvB,IAAI,GAAK0F,UAAUC,OAEjB,aADO3F,MAAKwuE,WAAWllE,GAChBtJ,IAKT,KAAK,GADD8uE,GACKtpE,EAAI,EAAGA,EAAIqpE,EAAUlpE,OAAQH,IAEpC,GADAspE,EAAKD,EAAUrpE,GACXspE,IAAO51D,GAAM41D,EAAG51D,KAAOA,EAAI,CAC7B21D,EAAUzmE,OAAO5C,EAAG,EACpB,OAGJ,MAAOxF,OAWT+c,EAAQ5J,UAAUya,KAAO,SAAStkB,GAChCtJ,KAAKwuE,WAAaxuE,KAAKwuE,cACvB,IAAIv1D,MAAUhO,MAAM1K,KAAKmF,UAAW,GAChCmpE,EAAY7uE,KAAKwuE,WAAWllE,EAEhC,IAAIulE,EAAW,CACbA,EAAYA,EAAU5jE,MAAM,EAC5B,KAAK,GAAIzF,GAAI,EAAGC,EAAMopE,EAAUlpE,OAAYF,EAAJD,IAAWA,EACjDqpE,EAAUrpE,GAAGuS,MAAM/X,KAAMiZ,GAI7B,MAAOjZ,OAWT+c,EAAQ5J,UAAUoyD,UAAY,SAASj8D,GAErC,MADAtJ,MAAKwuE,WAAaxuE,KAAKwuE,eAChBxuE,KAAKwuE,WAAWllE,QAWzByT,EAAQ5J,UAAU47D,aAAe,SAASzlE,GACxC,QAAUtJ,KAAKulE,UAAUj8D,GAAO3D,SAM9B,SAAS9F,EAAQD,EAASM,GAE9B,GAAI8uE,IAA0D,SAASC,EAAQpvE,IAM/E,SAAW2G,GA+RP,QAAS0oE,GAAI3pE,EAAGa,EAAG3F,GACf,OAAQiF,UAAUC,QACd,IAAK,GAAG,MAAY,OAALJ,EAAYA,EAAIa,CAC/B,KAAK,GAAG,MAAY,OAALb,EAAYA,EAAS,MAALa,EAAYA,EAAI3F,CAC/C,SAAS,KAAM,IAAImD,OAAM,iBAIjC,QAASurE,GAAW5pE,EAAGa,GACnB,MAAON,IAAevF,KAAKgF,EAAGa,GAGlC,QAASgpE,KAGL,OACIC,OAAQ,EACRC,gBACAC,eACA1rD,SAAW,GACX2rD,cAAgB,EAChBC,WAAY,EACZC,aAAe,KACfC,eAAgB,EAChBC,iBAAkB,EAClBC,KAAK,GAIb,QAASC,GAASC,GACVlsE,GAAOmsE,+BAAgC,GAChB,mBAAZr3C,UAA2BA,QAAQs3C,MAC9Ct3C,QAAQs3C,KAAK,wBAA0BF,GAI/C,QAASG,GAAUH,EAAK72D,GACpB,GAAIi3D,IAAY,CAChB,OAAO7qE,GAAO,WAKV,MAJI6qE,KACAL,EAASC,GACTI,GAAY,GAETj3D,EAAGnB,MAAM/X,KAAM0F,YACvBwT,GAGP,QAASk3D,GAAgBn6D,EAAM85D,GACtBM,GAAap6D,KACd65D,EAASC,GACTM,GAAap6D,IAAQ,GAI7B,QAASq6D,GAASC,EAAMv5D,GACpB,MAAO,UAAUzR,GACb,MAAOirE,GAAaD,EAAKhwE,KAAKP,KAAMuF,GAAIyR,IAGhD,QAASy5D,GAAgBF,EAAMG,GAC3B,MAAO,UAAUnrE,GACb,MAAOvF,MAAK2wE,aAAaC,QAAQL,EAAKhwE,KAAKP,KAAMuF,GAAImrE,IAI7D,QAASG,GAAUtrE,EAAGa,GAElB,GAGI0qE,GAASC,EAHTC,EAA0C,IAAvB5qE,EAAEmyB,OAAShzB,EAAEgzB,SAAiBnyB,EAAEsyB,QAAUnzB,EAAEmzB,SAE/D+hB,EAASl1C,EAAE6yB,QAAQnlB,IAAI+9D,EAAgB,SAa3C,OAViB,GAAb5qE,EAAIq0C,GACJq2B,EAAUvrE,EAAE6yB,QAAQnlB,IAAI+9D,EAAiB,EAAG,UAE5CD,GAAU3qE,EAAIq0C,IAAWA,EAASq2B,KAElCA,EAAUvrE,EAAE6yB,QAAQnlB,IAAI+9D,EAAiB,EAAG,UAE5CD,GAAU3qE,EAAIq0C,IAAWq2B,EAAUr2B,MAG9Bu2B,EAAiBD,GAc9B,QAASE,GAAgB5sC,EAAQzC,EAAMsvC,GACnC,GAAIC,EAEJ,OAAgB,OAAZD,EAEOtvC,EAEgB,MAAvByC,EAAO+sC,aACA/sC,EAAO+sC,aAAaxvC,EAAMsvC,GACX,MAAf7sC,EAAOgtC,MAEdF,EAAO9sC,EAAOgtC,KAAKH,GACfC,GAAe,GAAPvvC,IACRA,GAAQ,IAEPuvC,GAAiB,KAATvvC,IACTA,EAAO,GAEJA,GAGAA,EAQf,QAAS0vC,MAIT,QAASC,GAAOC,EAAQC,GAChBA,KAAiB,GACjBC,EAAcF,GAElBG,EAAW3xE,KAAMwxE,GACjBxxE,KAAKk4B,GAAK,GAAI5zB,OAAMktE,EAAOt5C,IAGvB05C,MAAqB,IACrBA,IAAmB,EACnB/tE,GAAOguE,aAAa7xE,MACpB4xE,IAAmB,GAK3B,QAASE,GAAShiE,GACd,GAAIiiE,GAAkBC,EAAqBliE,GACvCmiE,EAAQF,EAAgBx5C,MAAQ,EAChC25C,EAAWH,EAAgBI,SAAW,EACtCC,EAASL,EAAgBr5C,OAAS,EAClC25C,EAAQN,EAAgBO,MAAQ,EAChCC,EAAOR,EAAgB15C,KAAO,EAC9B+E,EAAQ20C,EAAgBnwC,MAAQ,EAChCvE,EAAU00C,EAAgBpwC,QAAU,EACpCrE,EAAUy0C,EAAgBrwC,QAAU,EACpCnE,EAAew0C,EAAgBtwC,aAAe,CAGlDzhC,MAAKwyE,eAAiBj1C,EACR,IAAVD,EACU,IAAVD,EACQ,KAARD,EAGJp9B,KAAKyyE,OAASF,EACF,EAARF,EAIJryE,KAAK0yE,SAAWN,EACD,EAAXF,EACQ,GAARD,EAEJjyE,KAAK4S,SAEL5S,KAAK2yE,QAAU9uE,GAAO8sE,aAEtB3wE,KAAK4yE,UAQT,QAASttE,GAAOC,EAAGa,GACf,IAAK,GAAIZ,KAAKY,GACN+oE,EAAW/oE,EAAGZ,KACdD,EAAEC,GAAKY,EAAEZ,GAYjB,OARI2pE,GAAW/oE,EAAG,cACdb,EAAEF,SAAWe,EAAEf,UAGf8pE,EAAW/oE,EAAG,aACdb,EAAEyB,QAAUZ,EAAEY,SAGXzB,EAGX,QAASosE,GAAWtoD,EAAID,GACpB,GAAI5jB,GAAGK,EAAMgtE,CAiCb,IA/BqC,mBAA1BzpD,GAAK0pD,mBACZzpD,EAAGypD,iBAAmB1pD,EAAK0pD,kBAER,mBAAZ1pD,GAAK2pD,KACZ1pD,EAAG0pD,GAAK3pD,EAAK2pD,IAEM,mBAAZ3pD,GAAK4pD,KACZ3pD,EAAG2pD,GAAK5pD,EAAK4pD,IAEM,mBAAZ5pD,GAAK6pD,KACZ5pD,EAAG4pD,GAAK7pD,EAAK6pD,IAEW,mBAAjB7pD,GAAK8pD,UACZ7pD,EAAG6pD,QAAU9pD,EAAK8pD,SAEG,mBAAd9pD,GAAK+pD,OACZ9pD,EAAG8pD,KAAO/pD,EAAK+pD,MAEQ,mBAAhB/pD,GAAKgqD,SACZ/pD,EAAG+pD,OAAShqD,EAAKgqD,QAEO,mBAAjBhqD,GAAKiqD,UACZhqD,EAAGgqD,QAAUjqD,EAAKiqD,SAEE,mBAAbjqD,GAAKkqD,MACZjqD,EAAGiqD,IAAMlqD,EAAKkqD,KAEU,mBAAjBlqD,GAAKupD,UACZtpD,EAAGspD,QAAUvpD,EAAKupD,SAGlBY,GAAiB5tE,OAAS,EAC1B,IAAKH,IAAK+tE,IACN1tE,EAAO0tE,GAAiB/tE,GACxBqtE,EAAMzpD,EAAKvjB,GACQ,mBAARgtE,KACPxpD,EAAGxjB,GAAQgtE,EAKvB,OAAOxpD,GAGX,QAASmqD,GAASC,GACd,MAAa,GAATA,EACOvuE,KAAKwyC,KAAK+7B,GAEVvuE,KAAKC,MAAMsuE,GAM1B,QAASjD,GAAaiD,EAAQC,EAAcC,GAIxC,IAHA,GAAIC,GAAS,GAAK1uE,KAAK2lB,IAAI4oD,GACvBzkD,EAAOykD,GAAU,EAEdG,EAAOjuE,OAAS+tE,GACnBE,EAAS,IAAMA,CAEnB,QAAQ5kD,EAAQ2kD,EAAY,IAAM,GAAM,KAAOC,EAGnD,QAASC,GAA0BC,EAAMluE,GACrC,GAAImuE,IAAOx2C,aAAc,EAAG60C,OAAQ,EAUpC,OARA2B,GAAI3B,OAASxsE,EAAM8yB,QAAUo7C,EAAKp7C,QACC,IAA9B9yB,EAAM2yB,OAASu7C,EAAKv7C,QACrBu7C,EAAK17C,QAAQnlB,IAAI8gE,EAAI3B,OAAQ,KAAK4B,QAAQpuE,MACxCmuE,EAAI3B,OAGV2B,EAAIx2C,cAAgB33B,GAAUkuE,EAAK17C,QAAQnlB,IAAI8gE,EAAI3B,OAAQ,KAEpD2B,EAGX,QAASE,GAAkBH,EAAMluE,GAC7B,GAAImuE,EAUJ,OATAnuE,GAAQsuE,EAAOtuE,EAAOkuE,GAClBA,EAAKK,SAASvuE,GACdmuE,EAAMF,EAA0BC,EAAMluE,IAEtCmuE,EAAMF,EAA0BjuE,EAAOkuE,GACvCC,EAAIx2C,cAAgBw2C,EAAIx2C,aACxBw2C,EAAI3B,QAAU2B,EAAI3B,QAGf2B,EAIX,QAASK,GAAYl5C,EAAWjlB,GAC5B,MAAO,UAAU48D,EAAKnC,GAClB,GAAI2D,GAAKC,CAUT,OARe,QAAX5D,GAAoBhsE,OAAOgsE,KAC3BN,EAAgBn6D,EAAM,YAAcA,EAAQ,uDAAyDA,EAAO,qBAC5Gq+D,EAAMzB,EAAKA,EAAMnC,EAAQA,EAAS4D,GAGtCzB,EAAqB,gBAARA,IAAoBA,EAAMA,EACvCwB,EAAMxwE,GAAOiM,SAAS+iE,EAAKnC,GAC3B6D,EAAgCv0E,KAAMq0E,EAAKn5C,GACpCl7B,MAIf,QAASu0E,GAAgCC,EAAK1kE,EAAU2kE,EAAU5C,GAC9D,GAAIt0C,GAAeztB,EAAS0iE,cACxBD,EAAOziE,EAAS2iE,MAChBL,EAAStiE,EAAS4iE,OACtBb,GAA+B,MAAhBA,GAAuB,EAAOA,EAEzCt0C,GACAi3C,EAAIt8C,GAAGw8C,SAASF,EAAIt8C,GAAKqF,EAAek3C,GAExClC,GACAoC,GAAUH,EAAK,OAAQI,GAAUJ,EAAK,QAAUjC,EAAOkC,GAEvDrC,GACAyC,GAAeL,EAAKI,GAAUJ,EAAK,SAAWpC,EAASqC,GAEvD5C,GACAhuE,GAAOguE,aAAa2C,EAAKjC,GAAQH,GAKzC,QAASlsE,GAAQ4uE,GACb,MAAiD,mBAA1CvuE,OAAO4M,UAAU9N,SAAS9E,KAAKu0E,GAG1C,QAASzwE,GAAOywE,GACZ,MAAiD,kBAA1CvuE,OAAO4M,UAAU9N,SAAS9E,KAAKu0E,IAClCA,YAAiBxwE,MAIzB,QAASywE,GAAc/R,EAAQC,EAAQ+R,GACnC,GAGIxvE,GAHAC,EAAMP,KAAK4G,IAAIk3D,EAAOr9D,OAAQs9D,EAAOt9D,QACrCsvE,EAAa/vE,KAAK2lB,IAAIm4C,EAAOr9D,OAASs9D,EAAOt9D,QAC7CuvE,EAAQ,CAEZ,KAAK1vE,EAAI,EAAOC,EAAJD,EAASA,KACZwvE,GAAehS,EAAOx9D,KAAOy9D,EAAOz9D,KACnCwvE,GAAeG,EAAMnS,EAAOx9D,MAAQ2vE,EAAMlS,EAAOz9D,MACnD0vE,GAGR,OAAOA,GAAQD,EAGnB,QAASG,GAAeC,GACpB,GAAIA,EAAO,CACP,GAAIC,GAAUD,EAAM9wC,cAAc/5B,QAAQ,QAAS,KACnD6qE,GAAQE,GAAYF,IAAUG,GAAeF,IAAYA,EAE7D,MAAOD,GAGX,QAASrD,GAAqByD,GAC1B,GACIC,GACA7vE,EAFAksE,IAIJ,KAAKlsE,IAAQ4vE,GACLtG,EAAWsG,EAAa5vE,KACxB6vE,EAAiBN,EAAevvE,GAC5B6vE,IACA3D,EAAgB2D,GAAkBD,EAAY5vE,IAK1D,OAAOksE,GAGX,QAAS4D,GAAS7mE,GACd,GAAIkI,GAAO4+D,CAEX,IAA8B,IAA1B9mE,EAAMnI,QAAQ,QACdqQ,EAAQ,EACR4+D,EAAS,UAER,CAAA,GAA+B,IAA3B9mE,EAAMnI,QAAQ,SAKnB,MAJAqQ,GAAQ,GACR4+D,EAAS,QAMb/xE,GAAOiL,GAAS,SAAUwyB,EAAQn5B,GAC9B,GAAI3C,GAAGqwE,EACH78D,EAASnV,GAAO8uE,QAAQ7jE,GACxBgnE,IAYJ,IAVsB,gBAAXx0C,KACPn5B,EAAQm5B,EACRA,EAAS96B,GAGbqvE,EAAS,SAAUrwE,GACf,GAAIhF,GAAIqD,KAASkyE,MAAM5sB,IAAIysB,EAAQpwE,EACnC,OAAOwT,GAAOzY,KAAKsD,GAAO8uE,QAASnyE,EAAG8gC,GAAU,KAGvC,MAATn5B,EACA,MAAO0tE,GAAO1tE,EAGd,KAAK3C,EAAI,EAAOwR,EAAJxR,EAAWA,IACnBswE,EAAQ9tE,KAAK6tE,EAAOrwE,GAExB,OAAOswE,IAKnB,QAASX,GAAMa,GACX,GAAIC,IAAiBD,EACjB3uE,EAAQ,CAUZ,OARsB,KAAlB4uE,GAAuBC,SAASD,KAE5B5uE,EADA4uE,GAAiB,EACT/wE,KAAKC,MAAM8wE,GAEX/wE,KAAKwyC,KAAKu+B,IAInB5uE,EAGX,QAAS8uE,GAAY59C,EAAMG,GACvB,MAAO,IAAIp0B,MAAKA,KAAK8xE,IAAI79C,EAAMG,EAAQ,EAAG,IAAI29C,aAGlD,QAASC,GAAY/9C,EAAMg+C,EAAKC,GAC5B,MAAOC,IAAW5yE,IAAQ00B,EAAM,GAAI,GAAKg+C,EAAMC,IAAOD,EAAKC,GAAKlE,KAGpE,QAASoE,GAAWn+C,GAChB,MAAOo+C,GAAWp+C,GAAQ,IAAM,IAGpC,QAASo+C,GAAWp+C,GAChB,MAAQA,GAAO,IAAM,GAAKA,EAAO,MAAQ,GAAMA,EAAO,MAAQ,EAGlE,QAASm5C,GAAclxE,GACnB,GAAIqjB,EACArjB,GAAEo2E,IAAyB,KAAnBp2E,EAAE8yE,IAAIzvD,WACdA,EACIrjB,EAAEo2E,GAAGC,IAAS,GAAKr2E,EAAEo2E,GAAGC,IAAS,GAAKA,GACtCr2E,EAAEo2E,GAAGE,IAAQ,GAAKt2E,EAAEo2E,GAAGE,IAAQX,EAAY31E,EAAEo2E,GAAGG,IAAOv2E,EAAEo2E,GAAGC,KAAUC,GACtEt2E,EAAEo2E,GAAGI,IAAQ,GAAKx2E,EAAEo2E,GAAGI,IAAQ,IACX,KAAfx2E,EAAEo2E,GAAGI,MAAkC,IAAjBx2E,EAAEo2E,GAAGK,KACY,IAAjBz2E,EAAEo2E,GAAGM,KACiB,IAAtB12E,EAAEo2E,GAAGO,KAAuBH,GACvDx2E,EAAEo2E,GAAGK,IAAU,GAAKz2E,EAAEo2E,GAAGK,IAAU,GAAKA,GACxCz2E,EAAEo2E,GAAGM,IAAU,GAAK12E,EAAEo2E,GAAGM,IAAU,GAAKA,GACxC12E,EAAEo2E,GAAGO,IAAe,GAAK32E,EAAEo2E,GAAGO,IAAe,IAAMA,GACnD,GAEA32E,EAAE8yE,IAAI8D,qBAAkCL,GAAXlzD,GAAmBA,EAAWizD,MAC3DjzD,EAAWizD,IAGft2E,EAAE8yE,IAAIzvD,SAAWA,GAIzB,QAASwzD,GAAQ72E,GAiBb,MAhBkB,OAAdA,EAAE82E,WACF92E,EAAE82E,UAAY5yE,MAAMlE,EAAE03B,GAAGq/C,YACrB/2E,EAAE8yE,IAAIzvD,SAAW,IAChBrjB,EAAE8yE,IAAIjE,QACN7uE,EAAE8yE,IAAI5D,eACNlvE,EAAE8yE,IAAI7D,YACNjvE,EAAE8yE,IAAI3D,gBACNnvE,EAAE8yE,IAAI1D,gBAEPpvE,EAAE0yE,UACF1yE,EAAE82E,SAAW92E,EAAE82E,UACa,IAAxB92E,EAAE8yE,IAAI9D,eACwB,IAA9BhvE,EAAE8yE,IAAIhE,aAAa3pE,QACnBnF,EAAE8yE,IAAIkE,UAAYhxE,IAGvBhG,EAAE82E,SAGb,QAASG,GAAgB/uE,GACrB,MAAOA,GAAMA,EAAI67B,cAAc/5B,QAAQ,IAAK,KAAO9B,EAMvD,QAASgvE,GAAaC,GAGlB,IAFA,GAAW/rD,GAAGvD,EAAMgc,EAAQt8B,EAAxBvC,EAAI,EAEDA,EAAImyE,EAAMhyE,QAAQ,CAKrB,IAJAoC,EAAQ0vE,EAAgBE,EAAMnyE,IAAIuC,MAAM,KACxC6jB,EAAI7jB,EAAMpC,OACV0iB,EAAOovD,EAAgBE,EAAMnyE,EAAI,IACjC6iB,EAAOA,EAAOA,EAAKtgB,MAAM,KAAO,KACzB6jB,EAAI,GAAG,CAEV,GADAyY,EAASuzC,EAAW7vE,EAAMkD,MAAM,EAAG2gB,GAAG3jB,KAAK,MAEvC,MAAOo8B,EAEX,IAAIhc,GAAQA,EAAK1iB,QAAUimB,GAAKmpD,EAAchtE,EAAOsgB,GAAM,IAASuD,EAAI,EAEpE,KAEJA,KAEJpmB,IAEJ,MAAO,MAGX,QAASoyE,GAAW3hE,GAChB,GAAI4hE,GAAY,IAChB,KAAKhzC,GAAQ5uB,IAAS6hE,GAClB,IACID,EAAYh0E,GAAOwgC,UACjB,WAAkC,GAAIvN,GAAI,GAAIlzB,OAAM,gCAAiE,MAA7BkzB,GAAEihD,KAAO,mBAA0BjhD,KAE7HjzB,GAAOwgC,OAAOwzC,GAChB,MAAO/gD,IAEb,MAAO+N,IAAQ5uB,GAKnB,QAASi+D,GAAOY,EAAOkD,GACnB,GAAIjE,GAAK1nD,CACT,OAAI2rD,GAAM5E,QACNW,EAAMiE,EAAM5/C,QACZ/L,GAAQxoB,GAAOoD,SAAS6tE,IAAUzwE,EAAOywE,IAChCA,GAASjxE,GAAOixE,KAAYf,EAErCA,EAAI77C,GAAGw8C,SAASX,EAAI77C,GAAK7L,GACzBxoB,GAAOguE,aAAakC,GAAK,GAClBA,GAEAlwE,GAAOixE,GAAOmD,QA6N7B,QAASC,GAAuBpD,GAC5B,MAAIA,GAAMvwE,MAAM,YACLuwE,EAAMtqE,QAAQ,WAAY,IAE9BsqE,EAAMtqE,QAAQ,MAAO,IAGhC,QAAS2tE,GAAmB72C,GACxB,GAA4C97B,GAAGG,EAA3C6C,EAAQ84B,EAAO/8B,MAAM6zE,GAEzB,KAAK5yE,EAAI,EAAGG,EAAS6C,EAAM7C,OAAYA,EAAJH,EAAYA,IAEvCgD,EAAMhD,GADN6yE,GAAqB7vE,EAAMhD,IAChB6yE,GAAqB7vE,EAAMhD,IAE3B0yE,EAAuB1vE,EAAMhD,GAIhD,OAAO,UAAUgvE,GACb,GAAIZ,GAAS,EACb,KAAKpuE,EAAI,EAAOG,EAAJH,EAAYA,IACpBouE,GAAUprE,EAAMhD,YAAcwrC,UAAWxoC,EAAMhD,GAAGjF,KAAKi0E,EAAKlzC,GAAU94B,EAAMhD,EAEhF,OAAOouE,IAKf,QAAS0E,GAAa93E,EAAG8gC,GACrB,MAAK9gC,GAAE62E,WAIP/1C,EAASi3C,EAAaj3C,EAAQ9gC,EAAEmwE,cAE3B6H,GAAgBl3C,KACjBk3C,GAAgBl3C,GAAU62C,EAAmB72C,IAG1Ck3C,GAAgBl3C,GAAQ9gC,IATpBA,EAAEmwE,aAAa8H,cAY9B,QAASF,GAAaj3C,EAAQ+C,GAG1B,QAASq0C,GAA4B5D,GACjC,MAAOzwC,GAAOs0C,eAAe7D,IAAUA,EAH3C,GAAItvE,GAAI,CAOR,KADAozE,GAAsBC,UAAY,EAC3BrzE,GAAK,GAAKozE,GAAsB5qE,KAAKszB,IACxCA,EAASA,EAAO92B,QAAQouE,GAAuBF,GAC/CE,GAAsBC,UAAY,EAClCrzE,GAAK,CAGT,OAAO87B,GAUX,QAASw3C,GAAsBpX,EAAO8P,GAClC,GAAIjsE,GAAG48D,EAASqP,EAAO0B,OACvB,QAAQxR,GACR,IAAK,IACD,MAAOqX,GACX,KAAK,OACD,MAAOC,GACX,KAAK,OACL,IAAK,OACL,IAAK,OACD,MAAO7W,GAAS8W,GAAuBC,EAC3C,KAAK,IACL,IAAK,IACL,IAAK,IACD,MAAOC,GACX,KAAK,SACL,IAAK,QACL,IAAK,QACL,IAAK,QACD,MAAOhX,GAASiX,GAAsBC,EAC1C,KAAK,IACD,GAAIlX,EACA,MAAO4W,GAGf,KAAK,KACD,GAAI5W,EACA,MAAOmX,GAGf,KAAK,MACD,GAAInX,EACA,MAAO6W,GAGf,KAAK,MACD,MAAOO,GACX,KAAK,MACL,IAAK,OACL,IAAK,KACL,IAAK,MACL,IAAK,OACD,MAAOC,GACX,KAAK,IACL,IAAK,IACD,MAAOhI,GAAOmB,QAAQ8G,cAC1B,KAAK,IACD,MAAOC,GACX,KAAK,IACD,MAAOC,GACX,KAAK,IACL,IAAK,KACD,MAAOC,GACX,KAAK,IACD,MAAOC,GACX,KAAK,OACD,MAAOC,GACX,KAAK,KACL,IAAK,KACL,IAAK,KACL,IAAK,KACL,IAAK,KACL,IAAK,KACL,IAAK,KACL,IAAK,KACL,IAAK,KACL,IAAK,KACL,IAAK,KACD,MAAO3X,GAASmX,GAAsBS,EAC1C,KAAK,IACL,IAAK,IACL,IAAK,IACL,IAAK,IACL,IAAK,IACL,IAAK,IACL,IAAK,IACL,IAAK,IACL,IAAK,IACL,IAAK,IACL,IAAK,IACD,MAAOA,GACX,KAAK,KACD,MAAO5X,GAASqP,EAAOmB,QAAQqH,cAAgBxI,EAAOmB,QAAQsH,oBAClE,SAEI,MADA10E,GAAI,GAAI20E,QAAOC,GAAaC,GAAe1Y,EAAMl3D,QAAQ,KAAM,KAAM,OAK7E,QAAS6vE,GAAoBC,GACzBA,EAASA,GAAU,EACnB,IAAIC,GAAqBD,EAAO/1E,MAAMq1E,QAClCY,EAAUD,EAAkBA,EAAkB50E,OAAS,OACvDwH,GAASqtE,EAAU,IAAIj2E,MAAMk2E,MAA0B,IAAK,EAAG,GAC/Dp9C,IAAuB,GAAXlwB,EAAM,IAAWgoE,EAAMhoE,EAAM,GAE7C,OAAoB,MAAbA,EAAM,GAAakwB,GAAWA,EAIzC,QAASq9C,GAAwBhZ,EAAOoT,EAAOtD,GAC3C,GAAIjsE,GAAGo1E,EAAgBnJ,EAAOoF,EAE9B,QAAQlV,GAER,IAAK,IACY,MAAToT,IACA6F,EAAc9D,IAA8B,GAApB1B,EAAML,GAAS,GAE3C,MAEJ,KAAK,IACL,IAAK,KACY,MAATA,IACA6F,EAAc9D,IAAS1B,EAAML,GAAS,EAE1C,MACJ,KAAK,MACL,IAAK,OACDvvE,EAAIisE,EAAOmB,QAAQiI,YAAY9F,EAAOpT,EAAO8P,EAAO0B,SAE3C,MAAL3tE,EACAo1E,EAAc9D,IAAStxE,EAEvBisE,EAAO8B,IAAI5D,aAAeoF,CAE9B,MAEJ,KAAK,IACL,IAAK,KACY,MAATA,IACA6F,EAAc7D,IAAQ3B,EAAML,GAEhC,MACJ,KAAK,KACY,MAATA,IACA6F,EAAc7D,IAAQ3B,EAAMvqE,SAChBkqE,EAAMvwE,MAAM,WAAW,GAAI,KAE3C,MAEJ,KAAK,MACL,IAAK,OACY,MAATuwE,IACAtD,EAAOqJ,WAAa1F,EAAML,GAG9B,MAEJ,KAAK,KACD6F,EAAc5D,IAAQlzE,GAAOi3E,kBAAkBhG,EAC/C,MACJ,KAAK,OACL,IAAK,QACL,IAAK,SACD6F,EAAc5D,IAAQ5B,EAAML,EAC5B,MAEJ,KAAK,IACL,IAAK,IACDtD,EAAOuJ,UAAYjG,CAEnB,MAEJ,KAAK,IACL,IAAK,KACDtD,EAAO8B,IAAIkE,SAAU,CAEzB,KAAK,IACL,IAAK,KACDmD,EAAc3D,IAAQ7B,EAAML,EAC5B,MAEJ,KAAK,IACL,IAAK,KACD6F,EAAc1D,IAAU9B,EAAML,EAC9B,MAEJ,KAAK,IACL,IAAK,KACD6F,EAAczD,IAAU/B,EAAML,EAC9B,MAEJ,KAAK,IACL,IAAK,KACL,IAAK,MACL,IAAK,OACD6F,EAAcxD,IAAehC,EAAuB,KAAhB,KAAOL,GAC3C,MAEJ,KAAK,IACDtD,EAAOt5C,GAAK,GAAI5zB,MAAK6wE,EAAML,GAC3B,MAEJ,KAAK,IACDtD,EAAOt5C,GAAK,GAAI5zB,MAAyB,IAApB+gB,WAAWyvD,GAChC,MAEJ,KAAK,IACL,IAAK,KACDtD,EAAOwJ,SAAU,EACjBxJ,EAAO2B,KAAOkH,EAAoBvF,EAClC,MAEJ,KAAK,KACL,IAAK,MACL,IAAK,OACDvvE,EAAIisE,EAAOmB,QAAQsI,cAAcnG,GAExB,MAALvvE,GACAisE,EAAO0J,GAAK1J,EAAO0J,OACnB1J,EAAO0J,GAAM,EAAI31E,GAEjBisE,EAAO8B,IAAI6H,eAAiBrG,CAEhC,MAEJ,KAAK,IACL,IAAK,KACL,IAAK,IACL,IAAK,KACL,IAAK,IACL,IAAK,IACL,IAAK,IACDpT,EAAQA,EAAMp2D,OAAO,EAAG,EAE5B,KAAK,OACL,IAAK,OACL,IAAK,QACDo2D,EAAQA,EAAMp2D,OAAO,EAAG,GACpBwpE,IACAtD,EAAO0J,GAAK1J,EAAO0J,OACnB1J,EAAO0J,GAAGxZ,GAASyT,EAAML,GAE7B,MACJ,KAAK,KACL,IAAK,KACDtD,EAAO0J,GAAK1J,EAAO0J,OACnB1J,EAAO0J,GAAGxZ,GAAS79D,GAAOi3E,kBAAkBhG,IAIpD,QAASsG,GAAsB5J,GAC3B,GAAIniB,GAAGgsB,EAAU/I,EAAMzwC,EAAS00C,EAAKC,EAAK8E,CAE1CjsB,GAAImiB,EAAO0J,GACC,MAAR7rB,EAAEksB,IAAqB,MAAPlsB,EAAEmsB,GAAoB,MAAPnsB,EAAEosB,GACjClF,EAAM,EACNC,EAAM,EAMN6E,EAAWnM,EAAI7f,EAAEksB,GAAI/J,EAAOoF,GAAGG,IAAON,GAAW5yE,KAAU,EAAG,GAAG00B,MACjE+5C,EAAOpD,EAAI7f,EAAEmsB,EAAG,GAChB35C,EAAUqtC,EAAI7f,EAAEosB,EAAG,KAEnBlF,EAAM/E,EAAOmB,QAAQ+I,MAAMnF,IAC3BC,EAAMhF,EAAOmB,QAAQ+I,MAAMlF,IAE3B6E,EAAWnM,EAAI7f,EAAEssB,GAAInK,EAAOoF,GAAGG,IAAON,GAAW5yE,KAAU0yE,EAAKC,GAAKj+C,MACrE+5C,EAAOpD,EAAI7f,EAAEA,EAAG,GAEL,MAAPA,EAAE1iD,GAEFk1B,EAAUwtB,EAAE1iD,EACE4pE,EAAV10C,KACEywC,GAINzwC,EAFc,MAAPwtB,EAAEv4B,EAECu4B,EAAEv4B,EAAIy/C,EAGNA,GAGlB+E,EAAOM,GAAmBP,EAAU/I,EAAMzwC,EAAS20C,EAAKD,GAExD/E,EAAOoF,GAAGG,IAAQuE,EAAK/iD,KACvBi5C,EAAOqJ,WAAaS,EAAKhjD,UAO7B,QAASujD,GAAerK,GACpB,GAAIhsE,GAAGizB,EAAkBqjD,EAAaC,EAAzBjH,IAEb,KAAItD,EAAOt5C,GAAX,CA6BA,IAzBA4jD,EAAcE,GAAiBxK,GAG3BA,EAAO0J,IAAyB,MAAnB1J,EAAOoF,GAAGE,KAAqC,MAApBtF,EAAOoF,GAAGC,KAClDuE,EAAsB5J,GAItBA,EAAOqJ,aACPkB,EAAY7M,EAAIsC,EAAOoF,GAAGG,IAAO+E,EAAY/E,KAEzCvF,EAAOqJ,WAAanE,EAAWqF,KAC/BvK,EAAO8B,IAAI8D,oBAAqB,GAGpC3+C,EAAOwjD,GAAYF,EAAW,EAAGvK,EAAOqJ,YACxCrJ,EAAOoF,GAAGC,IAASp+C,EAAKyjD,cACxB1K,EAAOoF,GAAGE,IAAQr+C,EAAK49C,cAQtB7wE,EAAI,EAAO,EAAJA,GAAyB,MAAhBgsE,EAAOoF,GAAGpxE,KAAcA,EACzCgsE,EAAOoF,GAAGpxE,GAAKsvE,EAAMtvE,GAAKs2E,EAAYt2E,EAI1C,MAAW,EAAJA,EAAOA,IACVgsE,EAAOoF,GAAGpxE,GAAKsvE,EAAMtvE,GAAsB,MAAhBgsE,EAAOoF,GAAGpxE,GAAqB,IAANA,EAAU,EAAI,EAAKgsE,EAAOoF,GAAGpxE,EAI7D,MAApBgsE,EAAOoF,GAAGI,KACgB,IAAtBxF,EAAOoF,GAAGK,KACY,IAAtBzF,EAAOoF,GAAGM,KACiB,IAA3B1F,EAAOoF,GAAGO,MACd3F,EAAO2K,UAAW,EAClB3K,EAAOoF,GAAGI,IAAQ,GAGtBxF,EAAOt5C,IAAMs5C,EAAOwJ,QAAUiB,GAAcG,IAAUrkE,MAAM,KAAM+8D,GAG/C,MAAftD,EAAO2B,MACP3B,EAAOt5C,GAAGmkD,cAAc7K,EAAOt5C,GAAGokD,gBAAkB9K,EAAO2B,MAG3D3B,EAAO2K,WACP3K,EAAOoF,GAAGI,IAAQ,KAI1B,QAASuF,GAAe/K,GACpB,GAAIO,EAEAP,GAAOt5C,KAIX65C,EAAkBC,EAAqBR,EAAOuB,IAC9CvB,EAAOoF,IACH7E,EAAgBx5C,KAChBw5C,EAAgBr5C,MAChBq5C,EAAgB15C,KAAO05C,EAAgBt5C,KACvCs5C,EAAgBnwC,KAChBmwC,EAAgBpwC,OAChBowC,EAAgBrwC,OAChBqwC,EAAgBtwC,aAGpBo6C,EAAerK,IAGnB,QAASwK,IAAiBxK,GACtB,GAAIr0C,GAAM,GAAI74B,KACd,OAAIktE,GAAOwJ,SAEH79C,EAAIq/C,iBACJr/C,EAAI++C,cACJ/+C,EAAIk5C,eAGAl5C,EAAIgF,cAAehF,EAAI4F,WAAY5F,EAAI2F,WAKvD,QAAS25C,IAA4BjL,GACjC,GAAIA,EAAOwB,KAAOnvE,GAAO64E,SAErB,WADAC,IAASnL,EAIbA,GAAOoF,MACPpF,EAAO8B,IAAIjE,OAAQ,CAGnB,IACI7pE,GAAGo3E,EAAaC,EAAQnb,EAAOob,EAD/BxC,EAAS,GAAK9I,EAAOuB,GAErBgK,EAAezC,EAAO30E,OACtBq3E,EAAyB,CAI7B,KAFAH,EAAStE,EAAa/G,EAAOwB,GAAIxB,EAAOmB,SAASpuE,MAAM6zE,QAElD5yE,EAAI,EAAGA,EAAIq3E,EAAOl3E,OAAQH,IAC3Bk8D,EAAQmb,EAAOr3E,GACfo3E,GAAetC,EAAO/1E,MAAMu0E,EAAsBpX,EAAO8P,SAAgB,GACrEoL,IACAE,EAAUxC,EAAOhvE,OAAO,EAAGgvE,EAAO3zE,QAAQi2E,IACtCE,EAAQn3E,OAAS,GACjB6rE,EAAO8B,IAAI/D,YAAYvnE,KAAK80E,GAEhCxC,EAASA,EAAOrvE,MAAMqvE,EAAO3zE,QAAQi2E,GAAeA,EAAYj3E,QAChEq3E,GAA0BJ,EAAYj3E,QAGtC0yE,GAAqB3W,IACjBkb,EACApL,EAAO8B,IAAIjE,OAAQ,EAGnBmC,EAAO8B,IAAIhE,aAAatnE,KAAK05D,GAEjCgZ,EAAwBhZ,EAAOkb,EAAapL,IAEvCA,EAAO0B,UAAY0J,GACxBpL,EAAO8B,IAAIhE,aAAatnE,KAAK05D,EAKrC8P,GAAO8B,IAAI9D,cAAgBuN,EAAeC,EACtC1C,EAAO30E,OAAS,GAChB6rE,EAAO8B,IAAI/D,YAAYvnE,KAAKsyE,GAI5B9I,EAAO8B,IAAIkE,WAAY,GAAQhG,EAAOoF,GAAGI,KAAS,KAClDxF,EAAO8B,IAAIkE,QAAUhxE,GAGzBgrE,EAAOoF,GAAGI,IAAQ/F,EAAgBO,EAAOmB,QAASnB,EAAOoF,GAAGI,IACpDxF,EAAOuJ,WACfc,EAAerK,GACfE,EAAcF,GAGlB,QAAS4I,IAAexuE,GACpB,MAAOA,GAAEpB,QAAQ,sCAAuC,SAAUyyE,EAAS7S,EAAIC,EAAIC,EAAI4S,GACnF,MAAO9S,IAAMC,GAAMC,GAAM4S,IAKjC,QAAS/C,IAAavuE,GAClB,MAAOA,GAAEpB,QAAQ,yBAA0B,QAI/C,QAAS2yE,IAA2B3L,GAChC,GAAI4L,GACAC,EAEAC,EACA93E,EACA+3E,CAEJ,IAAyB,IAArB/L,EAAOwB,GAAGrtE,OAGV,MAFA6rE,GAAO8B,IAAI3D,eAAgB,OAC3B6B,EAAOt5C,GAAK,GAAI5zB,MAAKk5E,KAIzB,KAAKh4E,EAAI,EAAGA,EAAIgsE,EAAOwB,GAAGrtE,OAAQH,IAC9B+3E,EAAe,EACfH,EAAazL,KAAeH,GACN,MAAlBA,EAAOwJ,UACPoC,EAAWpC,QAAUxJ,EAAOwJ,SAEhCoC,EAAW9J,IAAMlE,IACjBgO,EAAWpK,GAAKxB,EAAOwB,GAAGxtE,GAC1Bi3E,GAA4BW,GAEvB/F,EAAQ+F,KAKbG,GAAgBH,EAAW9J,IAAI9D,cAG/B+N,GAAqD,GAArCH,EAAW9J,IAAIhE,aAAa3pE,OAE5Cy3E,EAAW9J,IAAImK,MAAQF,GAEJ,MAAfD,GAAsCA,EAAfC,KACvBD,EAAcC,EACdF,EAAaD,GAIrB93E,GAAOksE,EAAQ6L,GAAcD,GAIjC,QAAST,IAASnL,GACd,GAAIhsE,GAAGk4E,EACHpD,EAAS9I,EAAOuB,GAChBxuE,EAAQo5E,GAASl5E,KAAK61E,EAE1B,IAAI/1E,EAAO,CAEP,IADAitE,EAAO8B,IAAIzD,KAAM,EACZrqE,EAAI,EAAGk4E,EAAIE,GAASj4E,OAAY+3E,EAAJl4E,EAAOA,IACpC,GAAIo4E,GAASp4E,GAAG,GAAGf,KAAK61E,GAAS,CAE7B9I,EAAOwB,GAAK4K,GAASp4E,GAAG,IAAMjB,EAAM,IAAM,IAC1C,OAGR,IAAKiB,EAAI,EAAGk4E,EAAIG,GAASl4E,OAAY+3E,EAAJl4E,EAAOA,IACpC,GAAIq4E,GAASr4E,GAAG,GAAGf,KAAK61E,GAAS,CAC7B9I,EAAOwB,IAAM6K,GAASr4E,GAAG,EACzB,OAGJ80E,EAAO/1E,MAAMq1E,MACbpI,EAAOwB,IAAM,KAEjByJ,GAA4BjL,OAE5BA,GAAO8F,UAAW,EAK1B,QAASwG,IAAmBtM,GACxBmL,GAASnL,GACLA,EAAO8F,YAAa,UACb9F,GAAO8F,SACdzzE,GAAOk6E,wBAAwBvM,IAIvC,QAASnkE,IAAI+sC,EAAKlhC,GACd,GAAc1T,GAAVuuE,IACJ,KAAKvuE,EAAI,EAAGA,EAAI40C,EAAIz0C,SAAUH,EAC1BuuE,EAAI/rE,KAAKkR,EAAGkhC,EAAI50C,GAAIA,GAExB,OAAOuuE,GAGX,QAASiK,IAAkBxM,GACvB,GAAuByL,GAAnBnI,EAAQtD,EAAOuB,EACf+B,KAAUtuE,EACVgrE,EAAOt5C,GAAK,GAAI5zB,MACTD,EAAOywE,GACdtD,EAAOt5C,GAAK,GAAI5zB,OAAMwwE,GAC6B,QAA3CmI,EAAUgB,GAAgBx5E,KAAKqwE,IACvCtD,EAAOt5C,GAAK,GAAI5zB,OAAM24E,EAAQ,IACN,gBAAVnI,GACdgJ,GAAmBtM,GACZtrE,EAAQ4uE,IACftD,EAAOoF,GAAKvpE,GAAIynE,EAAM7pE,MAAM,GAAI,SAAU8X,GACtC,MAAOnY,UAASmY,EAAK,MAEzB84D,EAAerK,IACU,gBAAZ,GACb+K,EAAe/K,GACU,gBAAZ,GAEbA,EAAOt5C,GAAK,GAAI5zB,MAAKwwE,GAErBjxE,GAAOk6E,wBAAwBvM,GAIvC,QAAS4K,IAASpqE,EAAGxR,EAAGmM,EAAGhB,EAAGo/D,EAAGn/D,EAAGsyE,GAGhC,GAAIzlD,GAAO,GAAIn0B,MAAK0N,EAAGxR,EAAGmM,EAAGhB,EAAGo/D,EAAGn/D,EAAGsyE,EAMtC,OAHQ,MAAJlsE,GACAymB,EAAKyJ,YAAYlwB,GAEdymB,EAGX,QAASwjD,IAAYjqE,GACjB,GAAIymB,GAAO,GAAIn0B,MAAKA,KAAK8xE,IAAIr+D,MAAM,KAAMrS,WAIzC,OAHQ,MAAJsM,GACAymB,EAAK0lD,eAAensE,GAEjBymB,EAGX,QAAS2lD,IAAatJ,EAAOzwC,GACzB,GAAqB,gBAAVywC,GACP,GAAKpwE,MAAMowE,IAKP,GADAA,EAAQzwC,EAAO42C,cAAcnG,GACR,gBAAVA,GACP,MAAO,UALXA,GAAQlqE,SAASkqE,EAAO,GAShC,OAAOA,GASX,QAASuJ,IAAkB/D,EAAQ7G,EAAQ6K,EAAeC,EAAUl6C,GAChE,MAAOA,GAAOm6C,aAAa/K,GAAU,IAAK6K,EAAehE,EAAQiE,GAGrE,QAASC,IAAaC,EAAgBH,EAAej6C,GACjD,GAAIv0B,GAAWjM,GAAOiM,SAAS2uE,GAAgB5zD,MAC3CyS,EAAU5P,GAAM5d,EAASmf,GAAG,MAC5BoO,EAAU3P,GAAM5d,EAASmf,GAAG,MAC5BmO,EAAQ1P,GAAM5d,EAASmf,GAAG,MAC1BsjD,EAAO7kD,GAAM5d,EAASmf,GAAG,MACzBmjD,EAAS1kD,GAAM5d,EAASmf,GAAG,MAC3BgjD,EAAQvkD,GAAM5d,EAASmf,GAAG,MAE1BhW,EAAOqkB,EAAUohD,GAAuB9yE,IAAM,IAAK0xB,IACnC,IAAZD,IAAkB,MAClBA,EAAUqhD,GAAuBl+E,IAAM,KAAM68B,IACnC,IAAVD,IAAgB,MAChBA,EAAQshD,GAAuB/yE,IAAM,KAAMyxB,IAClC,IAATm1C,IAAe,MACfA,EAAOmM,GAAuB/xE,IAAM,KAAM4lE,IAC/B,IAAXH,IAAiB,MACjBA,EAASsM,GAAuB3T,IAAM,KAAMqH,IAClC,IAAVH,IAAgB,OAAS,KAAMA,EAKvC,OAHAh5D,GAAK,GAAKqlE,EACVrlE,EAAK,IAAMwlE,EAAiB,EAC5BxlE,EAAK,GAAKorB,EACHg6C,GAAkBtmE,SAAUkB,GAgBvC,QAASw9D,IAAWjC,EAAKmK,EAAgBC,GACrC,GAEIC,GAFAhvE,EAAM+uE,EAAuBD,EAC7BG,EAAkBF,EAAuBpK,EAAIn8C,KAajD,OATIymD,GAAkBjvE,IAClBivE,GAAmB,GAGDjvE,EAAM,EAAxBivE,IACAA,GAAmB,GAGvBD,EAAiBh7E,GAAO2wE,GAAKvhE,IAAI6rE,EAAiB,MAE9CxM,KAAMptE,KAAKwyC,KAAKmnC,EAAevmD,YAAc,GAC7CC,KAAMsmD,EAAetmD,QAK7B,QAASqjD,IAAmBrjD,EAAM+5C,EAAMzwC,EAAS+8C,EAAsBD,GACnE,GAA6CI,GAAWzmD,EAApD3rB,EAAIsvE,GAAY1jD,EAAM,EAAG,GAAGymD,WAOhC,OALAryE,GAAU,IAANA,EAAU,EAAIA,EAClBk1B,EAAqB,MAAXA,EAAkBA,EAAU88C,EACtCI,EAAYJ,EAAiBhyE,GAAKA,EAAIiyE,EAAuB,EAAI,IAAUD,EAAJhyE,EAAqB,EAAI,GAChG2rB,EAAY,GAAKg6C,EAAO,IAAMzwC,EAAU88C,GAAkBI,EAAY,GAGlExmD,KAAMD,EAAY,EAAIC,EAAOA,EAAO,EACpCD,UAAWA,EAAY,EAAKA,EAAYo+C,EAAWn+C,EAAO,GAAKD,GAQvE,QAAS2mD,IAAWzN,GAChB,GAEIuC,GAFAe,EAAQtD,EAAOuB,GACfzxC,EAASkwC,EAAOwB,EAKpB,OAFAxB,GAAOmB,QAAUnB,EAAOmB,SAAW9uE,GAAO8sE,WAAWa,EAAOyB,IAE9C,OAAV6B,GAAmBxzC,IAAW96B,GAAuB,KAAVsuE,EACpCjxE,GAAOq7E,SAASzP,WAAW,KAGjB,gBAAVqF,KACPtD,EAAOuB,GAAK+B,EAAQtD,EAAOmB,QAAQwM,SAASrK,IAG5CjxE,GAAOoD,SAAS6tE,GACT,GAAIvD,GAAOuD,GAAO,IAClBxzC,EACHp7B,EAAQo7B,GACR67C,GAA2B3L,GAE3BiL,GAA4BjL,GAGhCwM,GAAkBxM,GAGtBuC,EAAM,GAAIxC,GAAOC,GACbuC,EAAIoI,WAEJpI,EAAI9gE,IAAI,EAAG,KACX8gE,EAAIoI,SAAW31E,GAGZutE,IAyCX,QAASqL,IAAOlmE,EAAImmE,GAChB,GAAItL,GAAKvuE,CAIT,IAHuB,IAAnB65E,EAAQ15E,QAAgBO,EAAQm5E,EAAQ,MACxCA,EAAUA,EAAQ,KAEjBA,EAAQ15E,OACT,MAAO9B,KAGX,KADAkwE,EAAMsL,EAAQ,GACT75E,EAAI,EAAGA,EAAI65E,EAAQ15E,SAAUH,EAC1B65E,EAAQ75E,GAAG0T,GAAI66D,KACfA,EAAMsL,EAAQ75E,GAGtB,OAAOuuE,GAsvBX,QAASc,IAAeL,EAAKntE,GACzB,GAAIi4E,EAGJ,OAAqB,gBAAVj4E,KACPA,EAAQmtE,EAAI7D,aAAaiK,YAAYvzE,GAEhB,gBAAVA,IACAmtE,GAIf8K,EAAap6E,KAAK4G,IAAI0oE,EAAI/7C,OAClB09C,EAAY3B,EAAIj8C,OAAQlxB,IAChCmtE,EAAIt8C,GAAG,OAASs8C,EAAIpB,OAAS,MAAQ,IAAM,SAAS/rE,EAAOi4E,GACpD9K,GAGX,QAASI,IAAUJ,EAAK+K,GACpB,MAAO/K,GAAIt8C,GAAG,OAASs8C,EAAIpB,OAAS,MAAQ,IAAMmM,KAGtD,QAAS5K,IAAUH,EAAK+K,EAAMl4E,GAC1B,MAAa,UAATk4E,EACO1K,GAAeL,EAAKntE,GAEpBmtE,EAAIt8C,GAAG,OAASs8C,EAAIpB,OAAS,MAAQ,IAAMmM,GAAMl4E,GAIhE,QAASm4E,IAAaD,EAAME,GACxB,MAAO,UAAUp4E,GACb,MAAa,OAATA,GACAstE,GAAU30E,KAAMu/E,EAAMl4E,GACtBxD,GAAOguE,aAAa7xE,KAAMy/E,GACnBz/E,MAEA40E,GAAU50E,KAAMu/E,IAqCnC,QAASG,IAAanN,GAElB,MAAc,KAAPA,EAAa,OAGxB,QAASoN,IAAa1N,GAGlB,MAAe,QAARA,EAAiB,IAuL5B,QAAS2N,IAAmB3pE,GACxBpS,GAAOiM,SAASoJ,GAAGjD,GAAQ,WACvB,MAAOjW,MAAK4S,MAAMqD,IA2D1B,QAAS4pE,IAAWC,GAEK,mBAAVC,SAGXC,GAAkBC,GAAYp8E,OAE1Bo8E,GAAYp8E,OADZi8E,EACqB5P,EACb,uGAGArsE,IAEaA,IAplF7B,IA/WA,GAAIA,IAIAm8E,GAGAx6E,GANA06E,GAAU,QAEVD,GAAiC,mBAAXhR,IAA6C,mBAAX1lE,SAA0BA,SAAW0lE,EAAO1lE,OAAoBvJ,KAATivE,EAE/GvhD,GAAQxoB,KAAKwoB,MACb5nB,GAAiBS,OAAO4M,UAAUrN,eAGlCixE,GAAO,EACPF,GAAQ,EACRC,GAAO,EACPE,GAAO,EACPC,GAAS,EACTC,GAAS,EACTC,GAAc,EAGdtyC,MAGA0uC,MAGAuE,GAA+B,mBAAXj4E,IAA0BA,GAAUA,EAAOD,QAG/Dq+E,GAAkB,sBAClBkC,GAA0B,uDAI1BC,GAAmB,gIAGnBhI,GAAmB,qKACnBQ,GAAwB,6CAGxBmB,GAA2B,QAC3BR,GAA6B,UAC7BL,GAA4B,UAC5BG,GAA2B,gBAC3BS,GAAmB,MACnBN,GAAiB,mHACjBI,GAAqB,uBACrBC,GAAc,KACdH,GAAqB,aACrBC,GAAwB,yBAGxBZ,GAAqB,KACrBO,GAAsB,OACtBN,GAAwB,QACxBC,GAAuB,QACvBG,GAAsB,aACtBD,GAAyB,WAIzBwE,GAAW,4IAEX0C,GAAY,uBAEZzC,KACK,eAAgB,0BAChB,aAAc,sBACd,eAAgB,oBAChB,aAAc,iBACd,WAAY,gBAIjBC,KACK,gBAAiB,6BACjB,WAAY,wBACZ,QAAS,mBACT,KAAM,cAIXpD,GAAuB,kBAIvB6F,IADyB,0CAA0Cv4E,MAAM,MAErEw4E,aAAiB,EACjBC,QAAY,IACZC,QAAY,IACZC,MAAU,KACVC,KAAS,MACTC,OAAW,OACXC,MAAU,UAGdtL,IACI2I,GAAK,cACLtyE,EAAI,SACJpL,EAAI,SACJmL,EAAI,OACJgB,EAAI,MACJm0E,EAAI,OACJzxB,EAAI,OACJmsB,EAAI,UACJzQ,EAAI,QACJgW,EAAI,UACJ/uE,EAAI,OACJgvE,IAAM,YACNlqD,EAAI,UACJ2kD,EAAI,aACJE,GAAI,WACJJ,GAAI,eAGR/F,IACIyL,UAAY,YACZC,WAAa,aACbC,QAAU,UACVC,SAAW,WACXC,YAAc,eAIlB7I,MAGAkG,IACI9yE,EAAG,GACHpL,EAAG,GACHmL,EAAG,GACHgB,EAAG,GACHo+D,EAAG,IAIPuW,GAAmB,gBAAgBv5E,MAAM,KACzCw5E,GAAe,kBAAkBx5E,MAAM,KAEvCswE,IACItN,EAAO,WACH,MAAO/qE,MAAK04B,QAAU,GAE1B8oD,IAAO,SAAUlgD,GACb,MAAOthC,MAAK2wE,aAAa8Q,YAAYzhF,KAAMshC,IAE/CogD,KAAO,SAAUpgD,GACb,MAAOthC,MAAK2wE,aAAayB,OAAOpyE,KAAMshC,IAE1Cw/C,EAAO,WACH,MAAO9gF,MAAKy4B,QAEhBuoD,IAAO,WACH,MAAOhhF,MAAKs4B,aAEhB3rB,EAAO,WACH,MAAO3M,MAAKq4B,OAEhBspD,GAAO,SAAUrgD,GACb,MAAOthC,MAAK2wE,aAAaiR,YAAY5hF,KAAMshC,IAE/CugD,IAAO,SAAUvgD,GACb,MAAOthC,MAAK2wE,aAAamR,cAAc9hF,KAAMshC,IAEjDygD,KAAO,SAAUzgD,GACb,MAAOthC,MAAK2wE,aAAaqR,SAAShiF,KAAMshC,IAE5C+tB,EAAO,WACH,MAAOrvD,MAAKsyE,QAEhBkJ,EAAO,WACH,MAAOx7E,MAAKiiF,WAEhBC,GAAO,WACH,MAAO1R,GAAaxwE,KAAKu4B,OAAS,IAAK,IAE3C4pD,KAAO,WACH,MAAO3R,GAAaxwE,KAAKu4B,OAAQ,IAErC6pD,MAAQ,WACJ,MAAO5R,GAAaxwE,KAAKu4B,OAAQ,IAErC8pD,OAAS,WACL,GAAIrwE,GAAIhS,KAAKu4B,OAAQvJ,EAAOhd,GAAK,EAAI,IAAM,GAC3C,OAAOgd,GAAOwhD,EAAatrE,KAAK2lB,IAAI7Y,GAAI,IAE5C2pE,GAAO,WACH,MAAOnL,GAAaxwE,KAAKq7E,WAAa,IAAK,IAE/CiH,KAAO,WACH,MAAO9R,GAAaxwE,KAAKq7E,WAAY,IAEzCkH,MAAQ,WACJ,MAAO/R,GAAaxwE,KAAKq7E,WAAY,IAEzCE,GAAO,WACH,MAAO/K,GAAaxwE,KAAKwiF,cAAgB,IAAK,IAElDC,KAAO,WACH,MAAOjS,GAAaxwE,KAAKwiF,cAAe,IAE5CE,MAAQ,WACJ,MAAOlS,GAAaxwE,KAAKwiF,cAAe,IAE5C1rD,EAAI,WACA,MAAO92B,MAAK6hC,WAEhB45C,EAAI,WACA,MAAOz7E,MAAK2iF,cAEhBp9E,EAAO,WACH,MAAOvF,MAAK2wE,aAAaO,SAASlxE,KAAKo9B,QAASp9B,KAAKq9B,WAAW,IAEpEwtC,EAAO,WACH,MAAO7qE,MAAK2wE,aAAaO,SAASlxE,KAAKo9B,QAASp9B,KAAKq9B,WAAW,IAEpEjT,EAAO,WACH,MAAOpqB,MAAKo9B,SAEhBzxB,EAAO,WACH,MAAO3L,MAAKo9B,QAAU,IAAM,IAEhC58B,EAAO,WACH,MAAOR,MAAKq9B,WAEhBzxB,EAAO,WACH,MAAO5L,MAAKs9B,WAEhBjT,EAAO,WACH,MAAO8qD,GAAMn1E,KAAKu9B,eAAiB,MAEvCqlD,GAAO,WACH,MAAOpS,GAAa2E,EAAMn1E,KAAKu9B,eAAiB,IAAK,IAEzDslD,IAAO,WACH,MAAOrS,GAAaxwE,KAAKu9B,eAAgB,IAE7CulD,KAAO,WACH,MAAOtS,GAAaxwE,KAAKu9B,eAAgB,IAE7CwlD,EAAO,WACH,GAAIx9E,GAAIvF,KAAKgjF,YACT58E,EAAI,GAKR,OAJQ,GAAJb,IACAA,GAAKA,EACLa,EAAI,KAEDA,EAAIoqE,EAAa2E,EAAM5vE,EAAI,IAAK,GAAK,IAAMirE,EAAa2E,EAAM5vE,GAAK,GAAI,IAElF09E,GAAO,WACH,GAAI19E,GAAIvF,KAAKgjF,YACT58E,EAAI,GAKR,OAJQ,GAAJb,IACAA,GAAKA,EACLa,EAAI,KAEDA,EAAIoqE,EAAa2E,EAAM5vE,EAAI,IAAK,GAAKirE,EAAa2E,EAAM5vE,GAAK,GAAI,IAE5E2X,EAAI,WACA,MAAOld,MAAKkjF,YAEhBC,GAAK,WACD,MAAOnjF,MAAKojF,YAEhBrxE,EAAO,WACH,MAAO/R,MAAKgH,WAEhB4jB,EAAO,WACH,MAAO5qB,MAAKqjF,QAEhBtC,EAAI,WACA,MAAO/gF,MAAKmyE,YAIpB9B,MAEAiT,IAAS,SAAU,cAAe,WAAY,gBAAiB,eAE/D1R,IAAmB,EAyFhB0P,GAAiB37E,QACpBH,GAAI87E,GAAiBjnC,MACrBg+B,GAAqB7yE,GAAI,KAAOirE,EAAgB4H,GAAqB7yE,IAAIA,GAE7E,MAAO+7E,GAAa57E,QAChBH,GAAI+7E,GAAalnC,MACjBg+B,GAAqB7yE,GAAIA,IAAK8qE,EAAS+H,GAAqB7yE,IAAI,EAEpE6yE,IAAqBkL,KAAOjT,EAAS+H,GAAqB2I,IAAK,GA0d/D17E,EAAOgsE,EAAOn+D,WAEVg2C,IAAM,SAAUqoB,GACZ,GAAI3rE,GAAML,CACV,KAAKA,IAAKgsE,GACN3rE,EAAO2rE,EAAOhsE,GACM,kBAATK,GACP7F,KAAKwF,GAAKK,EAEV7F,KAAK,IAAMwF,GAAKK,CAKxB7F,MAAKi6E,qBAAuB,GAAIC,QAAOl6E,KAAKg6E,cAAcvV,OAAS,IAAM,UAAUA,SAGvFiO,QAAU,wFAAwF3qE,MAAM,KACxGqqE,OAAS,SAAU5xE,GACf,MAAOR,MAAK0yE,QAAQlyE,EAAEk4B,UAG1B8qD,aAAe,kDAAkDz7E,MAAM,KACvE05E,YAAc,SAAUjhF,GACpB,MAAOR,MAAKwjF,aAAahjF,EAAEk4B,UAG/BkiD,YAAc,SAAU6I,EAAWniD,EAAQ6gC,GACvC,GAAI38D,GAAGgvE,EAAKkP,CAQZ,KANK1jF,KAAK2jF,eACN3jF,KAAK2jF,gBACL3jF,KAAK4jF,oBACL5jF,KAAK6jF,sBAGJr+E,EAAI,EAAO,GAAJA,EAAQA,IAAK,CAYrB,GAVAgvE,EAAM3wE,GAAOkyE,KAAK,IAAMvwE,IACpB28D,IAAWniE,KAAK4jF,iBAAiBp+E,KACjCxF,KAAK4jF,iBAAiBp+E,GAAK,GAAI00E,QAAO,IAAMl6E,KAAKoyE,OAAOoC,EAAK,IAAIhqE,QAAQ,IAAK,IAAM,IAAK,KACzFxK,KAAK6jF,kBAAkBr+E,GAAK,GAAI00E,QAAO,IAAMl6E,KAAKyhF,YAAYjN,EAAK,IAAIhqE,QAAQ,IAAK,IAAM,IAAK,MAE9F23D,GAAWniE,KAAK2jF,aAAan+E,KAC9Bk+E,EAAQ,IAAM1jF,KAAKoyE,OAAOoC,EAAK,IAAM,KAAOx0E,KAAKyhF,YAAYjN,EAAK,IAClEx0E,KAAK2jF,aAAan+E,GAAK,GAAI00E,QAAOwJ,EAAMl5E,QAAQ,IAAK,IAAK,MAG1D23D,GAAqB,SAAX7gC,GAAqBthC,KAAK4jF,iBAAiBp+E,GAAGwI,KAAKy1E,GAC7D,MAAOj+E,EACJ,IAAI28D,GAAqB,QAAX7gC,GAAoBthC,KAAK6jF,kBAAkBr+E,GAAGwI,KAAKy1E,GACpE,MAAOj+E,EACJ,KAAK28D,GAAUniE,KAAK2jF,aAAan+E,GAAGwI,KAAKy1E,GAC5C,MAAOj+E,KAKnBs+E,UAAY,2DAA2D/7E,MAAM,KAC7Ei6E,SAAW,SAAUxhF,GACjB,MAAOR,MAAK8jF,UAAUtjF,EAAE63B,QAG5B0rD,eAAiB,8BAA8Bh8E,MAAM,KACrD+5E,cAAgB,SAAUthF,GACtB,MAAOR,MAAK+jF,eAAevjF,EAAE63B,QAGjC2rD,aAAe,uBAAuBj8E,MAAM,KAC5C65E,YAAc,SAAUphF,GACpB,MAAOR,MAAKgkF,aAAaxjF,EAAE63B,QAG/B4iD,cAAgB,SAAUgJ,GACtB,GAAIz+E,GAAGgvE,EAAKkP,CAMZ,KAJK1jF,KAAKkkF,iBACNlkF,KAAKkkF,mBAGJ1+E,EAAI,EAAO,EAAJA,EAAOA,IAQf,GANKxF,KAAKkkF,eAAe1+E,KACrBgvE,EAAM3wE,IAAQ,IAAM,IAAIw0B,IAAI7yB,GAC5Bk+E,EAAQ,IAAM1jF,KAAKgiF,SAASxN,EAAK,IAAM,KAAOx0E,KAAK8hF,cAActN,EAAK,IAAM,KAAOx0E,KAAK4hF,YAAYpN,EAAK,IACzGx0E,KAAKkkF,eAAe1+E,GAAK,GAAI00E,QAAOwJ,EAAMl5E,QAAQ,IAAK,IAAK,MAG5DxK,KAAKkkF,eAAe1+E,GAAGwI,KAAKi2E,GAC5B,MAAOz+E,IAKnB2+E,iBACIC,IAAM,YACNC,GAAK,SACLC,EAAI,aACJC,GAAK,eACLC,IAAM,kBACNC,KAAO,yBAEX9L,eAAiB,SAAUjwE,GACvB,GAAIkrE,GAAS5zE,KAAKmkF,gBAAgBz7E,EAOlC;OANKkrE,GAAU5zE,KAAKmkF,gBAAgBz7E,EAAIu8B,iBACpC2uC,EAAS5zE,KAAKmkF,gBAAgBz7E,EAAIu8B,eAAez6B,QAAQ,mBAAoB,SAAUqoE,GACnF,MAAOA,GAAI5nE,MAAM,KAErBjL,KAAKmkF,gBAAgBz7E,GAAOkrE,GAEzBA,GAGXvC,KAAO,SAAUyD,GAGb,MAAiD,OAAxCA,EAAQ,IAAIvwC,cAAcnf,OAAO,IAG9Cq0D,eAAiB,gBACjBvI,SAAW,SAAU9zC,EAAOC,EAASqnD,GACjC,MAAItnD,GAAQ,GACDsnD,EAAU,KAAO,KAEjBA,EAAU,KAAO,MAKhCC,WACIC,QAAU,gBACVC,QAAU,mBACVC,SAAW,eACXC,QAAU,oBACVC,SAAW,sBACXC,SAAW,KAEfC,SAAW,SAAUx8E,EAAK8rE,EAAKr3C,GAC3B,GAAIy2C,GAAS5zE,KAAK2kF,UAAUj8E,EAC5B,OAAyB,kBAAXkrE,GAAwBA,EAAO77D,MAAMy8D,GAAMr3C,IAAQy2C,GAGrEuR,eACIC,OAAS,QACTC,KAAO,SACPz5E,EAAI,gBACJpL,EAAI,WACJ8kF,GAAK,aACL35E,EAAI,UACJ45E,GAAK,WACL54E,EAAI,QACJg1E,GAAK,UACL5W,EAAI,UACJya,GAAK,YACLxzE,EAAI,SACJyzE,GAAK,YAGTjH,aAAe,SAAU/K,EAAQ6K,EAAehE,EAAQiE,GACpD,GAAI3K,GAAS5zE,KAAKmlF,cAAc7K,EAChC,OAA0B,kBAAX1G,GACXA,EAAOH,EAAQ6K,EAAehE,EAAQiE,GACtC3K,EAAOppE,QAAQ,MAAOipE,IAG9BiS,WAAa,SAAUr5D,EAAMunD,GACzB,GAAItyC,GAASthC,KAAKmlF,cAAc94D,EAAO,EAAI,SAAW,OACtD,OAAyB,kBAAXiV,GAAwBA,EAAOsyC,GAAUtyC,EAAO92B,QAAQ,MAAOopE,IAGjFhD,QAAU,SAAU6C,GAChB,MAAOzzE,MAAK2lF,SAASn7E,QAAQ,KAAMipE,IAEvCkS,SAAW,KACX3L,cAAgB,UAEhBmF,SAAW,SAAU7E,GACjB,MAAOA,IAGXsL,WAAa,SAAUtL,GACnB,MAAOA,IAGXhI,KAAO,SAAUkC,GACb,MAAOiC,IAAWjC,EAAKx0E,KAAK07E,MAAMnF,IAAKv2E,KAAK07E,MAAMlF,KAAKlE,MAG3DoJ,OACInF,IAAM,EACNC,IAAM,GAGVmI,eAAiB,WACb,MAAO3+E,MAAK07E,MAAMnF,KAGtBsP,eAAiB,WACb,MAAO7lF,MAAK07E,MAAMlF,KAGtBsP,aAAc,eACdrN,YAAa,WACT,MAAOz4E,MAAK8lF,gBA0yBpBjiF,GAAS,SAAUixE,EAAOxzC,EAAQ+C,EAAQ89B,GACtC,GAAI1hE,EAiBJ,OAfuB,iBAAb,KACN0hE,EAAS99B,EACTA,EAAS79B,GAIb/F,KACAA,EAAEqyE,kBAAmB,EACrBryE,EAAEsyE,GAAK+B,EACPr0E,EAAEuyE,GAAK1xC,EACP7gC,EAAEwyE,GAAK5uC,EACP5jC,EAAEyyE,QAAU/Q,EACZ1hE,EAAE2yE,QAAS,EACX3yE,EAAE6yE,IAAMlE,IAED6P,GAAWx+E,IAGtBoD,GAAOmsE,6BAA8B,EAErCnsE,GAAOk6E,wBAA0B7N,EAC7B,4LAIA,SAAUsB,GACNA,EAAOt5C,GAAK,GAAI5zB,MAAKktE,EAAOuB,IAAMvB,EAAOwJ,QAAU,OAAS,OA0BpEn3E,GAAOiI,IAAM,WACT,GAAImN,MAAUhO,MAAM1K,KAAKmF,UAAW,EAEpC,OAAO05E,IAAO,WAAYnmE,IAG9BpV,GAAO6I,IAAM,WACT,GAAIuM,MAAUhO,MAAM1K,KAAKmF,UAAW,EAEpC,OAAO05E,IAAO,UAAWnmE,IAI7BpV,GAAOkyE,IAAM,SAAUjB,EAAOxzC,EAAQ+C,EAAQ89B,GAC1C,GAAI1hE,EAkBJ,OAhBuB,iBAAb,KACN0hE,EAAS99B,EACTA,EAAS79B,GAIb/F,KACAA,EAAEqyE,kBAAmB,EACrBryE,EAAEu6E,SAAU,EACZv6E,EAAE2yE,QAAS,EACX3yE,EAAEwyE,GAAK5uC,EACP5jC,EAAEsyE,GAAK+B,EACPr0E,EAAEuyE,GAAK1xC,EACP7gC,EAAEyyE,QAAU/Q,EACZ1hE,EAAE6yE,IAAMlE,IAED6P,GAAWx+E,GAAGs1E,OAIzBlyE,GAAOw/E,KAAO,SAAUvO,GACpB,MAAOjxE,IAAe,IAARixE,IAIlBjxE,GAAOiM,SAAW,SAAUglE,EAAOpsE,GAC/B,GAGIsmB,GACA+2D,EACAC,EACAC,EANAn2E,EAAWglE,EAEXvwE,EAAQ,IAiEZ,OA3DIV,IAAOqiF,WAAWpR,GAClBhlE,GACIouE,GAAIpJ,EAAMtC,cACV7lE,EAAGmoE,EAAMrC,MACT1H,EAAG+J,EAAMpC,SAEW,gBAAVoC,IACdhlE,KACIpH,EACAoH,EAASpH,GAAOosE,EAEhBhlE,EAASytB,aAAeu3C,IAElBvwE,EAAQ47E,GAAwB17E,KAAKqwE,KAC/C9lD,EAAqB,MAAbzqB,EAAM,GAAc,GAAK,EACjCuL,GACIkC,EAAG,EACHrF,EAAGwoE,EAAM5wE,EAAMuyE,KAAS9nD,EACxBrjB,EAAGwpE,EAAM5wE,EAAMyyE,KAAShoD,EACxBxuB,EAAG20E,EAAM5wE,EAAM0yE,KAAWjoD,EAC1BpjB,EAAGupE,EAAM5wE,EAAM2yE,KAAWloD,EAC1BkvD,GAAI/I,EAAM5wE,EAAM4yE,KAAgBnoD,KAE1BzqB,EAAQ67E,GAAiB37E,KAAKqwE,KACxC9lD,EAAqB,MAAbzqB,EAAM,GAAc,GAAK,EACjCyhF,EAAW,SAAUG,GAIjB,GAAIpS,GAAMoS,GAAO9gE,WAAW8gE,EAAI37E,QAAQ,IAAK,KAE7C,QAAQ9F,MAAMqvE,GAAO,EAAIA,GAAO/kD,GAEpClf,GACIkC,EAAGg0E,EAASzhF,EAAM,IAClBwmE,EAAGib,EAASzhF,EAAM,IAClBoI,EAAGq5E,EAASzhF,EAAM,IAClBoH,EAAGq6E,EAASzhF,EAAM,IAClB/D,EAAGwlF,EAASzhF,EAAM,IAClBqH,EAAGo6E,EAASzhF,EAAM,IAClB8qD,EAAG22B,EAASzhF,EAAM,MAEH,MAAZuL,EACPA,KAC2B,gBAAbA,KACT,QAAUA,IAAY,MAAQA,MACnCm2E,EAAUhS,EAAkBpwE,GAAOiM,EAASsZ,MAAOvlB,GAAOiM,EAASuZ,KAEnEvZ,KACAA,EAASouE,GAAK+H,EAAQ1oD,aACtBztB,EAASi7D,EAAIkb,EAAQ7T,QAGzB2T,EAAM,GAAIjU,GAAShiE,GAEfjM,GAAOqiF,WAAWpR,IAAU3F,EAAW2F,EAAO,aAC9CiR,EAAIpT,QAAUmC,EAAMnC,SAGjBoT,GAIXliF,GAAOuiF,QAAUlG,GAGjBr8E,GAAOm+B,cAAgBq+C,GAGvBx8E,GAAO64E,SAAW,aAIlB74E,GAAO0vE,iBAAmBA,GAI1B1vE,GAAOguE,aAAe,aAGtBhuE,GAAOwiF,sBAAwB,SAAUruB,EAAWsuB,GAChD,MAAI5H,IAAuB1mB,KAAexxD,GAC/B,EAEP8/E,IAAU9/E,EACHk4E,GAAuB1mB,IAElC0mB,GAAuB1mB,GAAasuB,GAC7B,IAGXziF,GAAOygC,KAAO4rC,EACV,wDACA,SAAUxnE,EAAKrB,GACX,MAAOxD,IAAOwgC,OAAO37B,EAAKrB,KAOlCxD,GAAOwgC,OAAS,SAAU37B,EAAKoO,GAC3B,GAAIpE,EAcJ,OAbIhK,KAEIgK,EADmB,mBAAb,GACC7O,GAAO0iF,aAAa79E,EAAKoO,GAGzBjT,GAAO8sE,WAAWjoE,GAGzBgK,IACA7O,GAAOiM,SAAS6iE,QAAU9uE,GAAO8uE,QAAUjgE,IAI5C7O,GAAO8uE,QAAQ6T,OAG1B3iF,GAAO0iF,aAAe,SAAUtwE,EAAMa,GAClC,MAAe,QAAXA,GACAA,EAAO2vE,KAAOxwE,EACT4uB,GAAQ5uB,KACT4uB,GAAQ5uB,GAAQ,GAAIq7D,IAExBzsC,GAAQ5uB,GAAMkzC,IAAIryC,GAGlBjT,GAAOwgC,OAAOpuB,GAEP4uB,GAAQ5uB,WAGR4uB,IAAQ5uB,GACR,OAIfpS,GAAO6iF,SAAWxW,EACd,gEACA,SAAUxnE,GACN,MAAO7E,IAAO8sE,WAAWjoE,KAKjC7E,GAAO8sE,WAAa,SAAUjoE,GAC1B,GAAI27B,EAMJ,IAJI37B,GAAOA,EAAIiqE,SAAWjqE,EAAIiqE,QAAQ6T,QAClC99E,EAAMA,EAAIiqE,QAAQ6T,QAGjB99E,EACD,MAAO7E,IAAO8uE,OAGlB,KAAKzsE,EAAQwC,GAAM,CAGf,GADA27B,EAASuzC,EAAWlvE,GAEhB,MAAO27B,EAEX37B,IAAOA,GAGX,MAAOgvE,GAAahvE,IAIxB7E,GAAOoD,SAAW,SAAU8b,GACxB,MAAOA,aAAewuD,IACV,MAAPxuD,GAAeosD,EAAWpsD,EAAK,qBAIxClf,GAAOqiF,WAAa,SAAUnjE,GAC1B,MAAOA,aAAe+uD,GAG1B,KAAKtsE,GAAI89E,GAAM39E,OAAS,EAAGH,IAAK,IAAKA,GACjCmwE,EAAS2N,GAAM99E,IAGnB3B,IAAOuxE,eAAiB,SAAUC,GAC9B,MAAOD,GAAeC,IAG1BxxE,GAAOq7E,QAAU,SAAUyH,GACvB,GAAInmF,GAAIqD,GAAOkyE,IAAIyH,IAQnB,OAPa,OAATmJ,EACArhF,EAAO9E,EAAE8yE,IAAKqT,GAGdnmF,EAAE8yE,IAAI1D,iBAAkB,EAGrBpvE,GAGXqD,GAAO+iF,UAAY,WACf,MAAO/iF,IAAOkU,MAAM,KAAMrS,WAAWkhF,aAGzC/iF,GAAOi3E,kBAAoB,SAAUhG,GACjC,MAAOK,GAAML,IAAUK,EAAML,GAAS,GAAK,KAAO,MAGtDjxE,GAAOQ,OAASA,EAOhBiB,EAAOzB,GAAOqV,GAAKq4D,EAAOp+D,WAEtBilB,MAAQ,WACJ,MAAOv0B,IAAO7D,OAGlBgH,QAAU,WACN,OAAQhH,KAAKk4B,GAA4B,KAArBl4B,KAAKqzE,SAAW,IAGxCgQ,KAAO,WACH,MAAOn+E,MAAKC,OAAOnF,KAAO,MAG9BqF,SAAW,WACP,MAAOrF,MAAKo4B,QAAQiM,OAAO,MAAM/C,OAAO,qCAG5Cp6B,OAAS,WACL,MAAOlH,MAAKqzE,QAAU,GAAI/uE,OAAMtE,MAAQA,KAAKk4B,IAGjD9wB,YAAc,WACV,GAAI5G,GAAIqD,GAAO7D,MAAM+1E,KACrB,OAAI,GAAIv1E,EAAE+3B,QAAU/3B,EAAE+3B,QAAU,KACxB,kBAAsBj0B,MAAK6O,UAAU/L,YAE9BpH,KAAKkH,SAASE,cAEdkxE,EAAa93E,EAAG,gCAGpB83E,EAAa93E,EAAG,mCAI/B+H,QAAU,WACN,GAAI/H,GAAIR,IACR,QACIQ,EAAE+3B,OACF/3B,EAAEk4B,QACFl4B,EAAEi4B,OACFj4B,EAAE48B,QACF58B,EAAE68B,UACF78B,EAAE88B,UACF98B,EAAE+8B,iBAIV85C,QAAU,WACN,MAAOA,GAAQr3E,OAGnB6mF,aAAe,WACX,MAAI7mF,MAAK42E,GACE52E,KAAKq3E,WAAatC,EAAc/0E,KAAK42E,IAAK52E,KAAKozE,OAASvvE,GAAOkyE,IAAI/1E,KAAK42E,IAAM/yE,GAAO7D,KAAK42E,KAAKruE,WAAa,GAGhH,GAGXu+E,aAAe,WACX,MAAOxhF,MAAWtF,KAAKszE,MAG3ByT,UAAW,WACP,MAAO/mF,MAAKszE,IAAIzvD,UAGpBkyD,IAAM,SAAUiR,GACZ,MAAOhnF,MAAKgjF,UAAU,EAAGgE,IAG7B/O,MAAQ,SAAU+O,GASd,MARIhnF,MAAKozE,SACLpzE,KAAKgjF,UAAU,EAAGgE,GAClBhnF,KAAKozE,QAAS,EAEV4T,GACAhnF,KAAKqrB,SAASrrB,KAAKinF,iBAAkB,MAGtCjnF,MAGXshC,OAAS,SAAU4lD,GACf,GAAItT,GAAS0E,EAAat4E,KAAMknF,GAAerjF,GAAOm+B,cACtD,OAAOhiC,MAAK2wE,aAAaiV,WAAWhS,IAGxC3gE,IAAMmhE,EAAY,EAAG,OAErB/oD,SAAW+oD,EAAY,GAAI,YAE3B/nD,KAAO,SAAUyoD,EAAOO,EAAO8R,GAC3B,GAEY96D,GAAMunD,EAFdwT,EAAOlT,EAAOY,EAAO90E,MACrBqnF,EAAmD,KAAvCD,EAAKpE,YAAchjF,KAAKgjF,YAqBxC,OAlBA3N,GAAQD,EAAeC,GAET,SAAVA,GAA8B,UAAVA,GAA+B,YAAVA,GACzCzB,EAAS/C,EAAU7wE,KAAMonF,GACX,YAAV/R,EACAzB,GAAkB,EACD,SAAVyB,IACPzB,GAAkB,MAGtBvnD,EAAOrsB,KAAOonF,EACdxT,EAAmB,WAAVyB,EAAqBhpD,EAAO,IACvB,WAAVgpD,EAAqBhpD,EAAO,IAClB,SAAVgpD,EAAmBhpD,EAAO,KAChB,QAAVgpD,GAAmBhpD,EAAOg7D,GAAY,MAC5B,SAAVhS,GAAoBhpD,EAAOg7D,GAAY,OACvCh7D,GAED86D,EAAUvT,EAASJ,EAASI,IAGvCxqD,KAAO,SAAU+Q,EAAMmkD,GACnB,MAAOz6E,IAAOiM,UAAUuZ,GAAIrpB,KAAMopB,KAAM+Q,IAAOkK,OAAOrkC,KAAKqkC,UAAUijD,UAAUhJ,IAGnFiJ,QAAU,SAAUjJ,GAChB,MAAOt+E,MAAKopB,KAAKvlB,KAAUy6E,IAG/B4G,SAAW,SAAU/qD,GAIjB,GAAIgD,GAAMhD,GAAQt2B,KACd2jF,EAAMtT,EAAO/2C,EAAKn9B,MAAMynF,QAAQ,OAChCp7D,EAAOrsB,KAAKqsB,KAAKm7D,EAAK,QAAQ,GAC9BlmD,EAAgB,GAAPjV,EAAY,WACV,GAAPA,EAAY,WACL,EAAPA,EAAW,UACJ,EAAPA,EAAW,UACJ,EAAPA,EAAW,UACJ,EAAPA,EAAW,WAAa,UAChC,OAAOrsB,MAAKshC,OAAOthC,KAAK2wE,aAAauU,SAAS5jD,EAAQthC,KAAM6D,GAAOs5B,MAGvEw5C,WAAa,WACT,MAAOA,GAAW32E,KAAKu4B,SAG3BmvD,MAAQ,WACJ,MAAQ1nF,MAAKgjF,YAAchjF,KAAKo4B,QAAQM,MAAM,GAAGsqD,aAC7ChjF,KAAKgjF,YAAchjF,KAAKo4B,QAAQM,MAAM,GAAGsqD,aAGjD3qD,IAAM,SAAUy8C,GACZ,GAAIz8C,GAAMr4B,KAAKozE,OAASpzE,KAAKk4B,GAAG8mD,YAAch/E,KAAKk4B,GAAGyvD,QACtD,OAAa,OAAT7S,GACAA,EAAQsJ,GAAatJ,EAAO90E,KAAK2wE,cAC1B3wE,KAAKiT,IAAI6hE,EAAQz8C,EAAK,MAEtBA,GAIfK,MAAQ8mD,GAAa,SAAS,GAE9BiI,QAAU,SAAUpS,GAIhB,OAHAA,EAAQD,EAAeC,IAIvB,IAAK,OACDr1E,KAAK04B,MAAM,EAEf,KAAK,UACL,IAAK,QACD14B,KAAKy4B,KAAK,EAEd,KAAK,OACL,IAAK,UACL,IAAK,MACDz4B,KAAKo9B,MAAM,EAEf,KAAK,OACDp9B,KAAKq9B,QAAQ,EAEjB,KAAK,SACDr9B,KAAKs9B,QAAQ,EAEjB,KAAK,SACDt9B,KAAKu9B,aAAa,GAgBtB,MAXc,SAAV83C,EACAr1E,KAAK6hC,QAAQ,GACI,YAAVwzC,GACPr1E,KAAK2iF,WAAW,GAIN,YAAVtN,GACAr1E,KAAK04B,MAAqC,EAA/BxzB,KAAKC,MAAMnF,KAAK04B,QAAU,IAGlC14B,MAGX4nF,MAAO,SAAUvS,GAEb,MADAA,GAAQD,EAAeC,GACnBA,IAAU7uE,GAAuB,gBAAV6uE,EAChBr1E,KAEJA,KAAKynF,QAAQpS,GAAOpiE,IAAI,EAAc,YAAVoiE,EAAsB,OAASA,GAAQhqD,SAAS,EAAG,OAG1F2oD,QAAS,SAAUc,EAAOO,GACtB,GAAIwS,EAEJ,OADAxS,GAAQD,EAAgC,mBAAVC,GAAwBA,EAAQ,eAChD,gBAAVA,GACAP,EAAQjxE,GAAOoD,SAAS6tE,GAASA,EAAQjxE,GAAOixE,IACxC90E,MAAQ80E,IAEhB+S,EAAUhkF,GAAOoD,SAAS6tE,IAAUA,GAASjxE,GAAOixE,GAC7C+S,GAAW7nF,KAAKo4B,QAAQqvD,QAAQpS,KAI/ClB,SAAU,SAAUW,EAAOO,GACvB,GAAIwS,EAEJ,OADAxS,GAAQD,EAAgC,mBAAVC,GAAwBA,EAAQ,eAChD,gBAAVA,GACAP,EAAQjxE,GAAOoD,SAAS6tE,GAASA,EAAQjxE,GAAOixE,IAChCA,GAAR90E,OAER6nF,EAAUhkF,GAAOoD,SAAS6tE,IAAUA,GAASjxE,GAAOixE,IAC5C90E,KAAKo4B,QAAQwvD,MAAMvS,GAASwS,IAI5CC,UAAW,SAAU1+D,EAAMC,EAAIgsD,GAC3B,MAAOr1E,MAAKg0E,QAAQ5qD,EAAMisD,IAAUr1E,KAAKm0E,SAAS9qD,EAAIgsD,IAG1DpxC,OAAQ,SAAU6wC,EAAOO,GACrB,GAAIwS,EAEJ,OADAxS,GAAQD,EAAeC,GAAS,eAClB,gBAAVA,GACAP,EAAQjxE,GAAOoD,SAAS6tE,GAASA,EAAQjxE,GAAOixE,IACxC90E,QAAU80E,IAElB+S,GAAWhkF,GAAOixE,IACT90E,KAAKo4B,QAAQqvD,QAAQpS,IAAWwS,GAAWA,IAAa7nF,KAAKo4B,QAAQwvD,MAAMvS,KAI5FvpE,IAAKokE,EACI,mGACA,SAAUtqE,GAEN,MADAA,GAAQ/B,GAAOkU,MAAM,KAAMrS,WACZ1F,KAAR4F,EAAe5F,KAAO4F,IAI1C8G,IAAKwjE,EACG,mGACA,SAAUtqE,GAEN,MADAA,GAAQ/B,GAAOkU,MAAM,KAAMrS,WACpBE,EAAQ5F,KAAOA,KAAO4F,IAIzCmiF,KAAO7X,EACC,4GAEA,SAAU4E,EAAOkS,GACb,MAAa,OAATlS,GACqB,gBAAVA,KACPA,GAASA,GAGb90E,KAAKgjF,UAAUlO,EAAOkS,GAEfhnF,OAECA,KAAKgjF,cAe7BA,UAAY,SAAUlO,EAAOkS,GACzB,GACIgB,GADAr+D,EAAS3pB,KAAKqzE,SAAW,CAE7B,OAAa,OAATyB,GACqB,gBAAVA,KACPA,EAAQuF,EAAoBvF,IAE5B5vE,KAAK2lB,IAAIiqD,GAAS,KAClBA,EAAgB,GAARA,IAEP90E,KAAKozE,QAAU4T,IAChBgB,EAAchoF,KAAKinF,kBAEvBjnF,KAAKqzE,QAAUyB,EACf90E,KAAKozE,QAAS,EACK,MAAf4U,GACAhoF,KAAKiT,IAAI+0E,EAAa,KAEtBr+D,IAAWmrD,KACNkS,GAAiBhnF,KAAKioF,kBACvB1T,EAAgCv0E,KACxB6D,GAAOiM,SAASglE,EAAQnrD,EAAQ,KAAM,GAAG,GACzC3pB,KAAKioF,oBACbjoF,KAAKioF,mBAAoB,EACzBpkF,GAAOguE,aAAa7xE,MAAM,GAC1BA,KAAKioF,kBAAoB,OAI1BjoF,MAEAA,KAAKozE,OAASzpD,EAAS3pB,KAAKinF,kBAI3CiB,QAAU,WACN,OAAQloF,KAAKozE,QAGjB+U,YAAc,WACV,MAAOnoF,MAAKozE,QAGhBgV,MAAQ,WACJ,MAAOpoF,MAAKozE,QAA2B,IAAjBpzE,KAAKqzE,SAG/B6P,SAAW,WACP,MAAOljF,MAAKozE,OAAS,MAAQ,IAGjCgQ,SAAW,WACP,MAAOpjF,MAAKozE,OAAS,6BAA+B,IAGxDwT,UAAY,WAMR,MALI5mF,MAAKmzE,KACLnzE,KAAKgjF,UAAUhjF,KAAKmzE,MACM,gBAAZnzE,MAAK+yE,IACnB/yE,KAAKgjF,UAAU3I,EAAoBr6E,KAAK+yE,KAErC/yE,MAGXqoF,qBAAuB,SAAUvT,GAQ7B,MAHIA,GAJCA,EAIOjxE,GAAOixE,GAAOkO,YAHd,GAMJhjF,KAAKgjF,YAAclO,GAAS,KAAO,GAG/CqB,YAAc,WACV,MAAOA,GAAYn2E,KAAKu4B,OAAQv4B,KAAK04B,UAGzCJ,UAAY,SAAUw8C,GAClB,GAAIx8C,GAAY5K,IAAO7pB,GAAO7D,MAAMynF,QAAQ,OAAS5jF,GAAO7D,MAAMynF,QAAQ,SAAW,OAAS,CAC9F,OAAgB,OAAT3S,EAAgBx8C,EAAYt4B,KAAKiT,IAAK6hE,EAAQx8C,EAAY,MAGrE65C,QAAU,SAAU2C,GAChB,MAAgB,OAATA,EAAgB5vE,KAAKwyC,MAAM13C,KAAK04B,QAAU,GAAK,GAAK14B,KAAK04B,MAAoB,GAAbo8C,EAAQ,GAAS90E,KAAK04B,QAAU,IAG3G2iD,SAAW,SAAUvG,GACjB,GAAIv8C,GAAOk+C,GAAWz2E,KAAMA,KAAK2wE,aAAa+K,MAAMnF,IAAKv2E,KAAK2wE,aAAa+K,MAAMlF,KAAKj+C,IACtF,OAAgB,OAATu8C,EAAgBv8C,EAAOv4B,KAAKiT,IAAK6hE,EAAQv8C,EAAO,MAG3DiqD,YAAc,SAAU1N,GACpB,GAAIv8C,GAAOk+C,GAAWz2E,KAAM,EAAG,GAAGu4B,IAClC,OAAgB,OAATu8C,EAAgBv8C,EAAOv4B,KAAKiT,IAAK6hE,EAAQv8C,EAAO,MAG3D+5C,KAAO,SAAUwC,GACb,GAAIxC,GAAOtyE,KAAK2wE,aAAa2B,KAAKtyE,KAClC,OAAgB,OAAT80E,EAAgBxC,EAAOtyE,KAAKiT,IAAqB,GAAhB6hE,EAAQxC,GAAW,MAG/D2P,QAAU,SAAUnN,GAChB,GAAIxC,GAAOmE,GAAWz2E,KAAM,EAAG,GAAGsyE,IAClC,OAAgB,OAATwC,EAAgBxC,EAAOtyE,KAAKiT,IAAqB,GAAhB6hE,EAAQxC,GAAW,MAG/DzwC,QAAU,SAAUizC,GAChB,GAAIjzC,IAAW7hC,KAAKq4B,MAAQ,EAAIr4B,KAAK2wE,aAAa+K,MAAMnF,KAAO,CAC/D,OAAgB,OAATzB,EAAgBjzC,EAAU7hC,KAAKiT,IAAI6hE,EAAQjzC,EAAS,MAG/D8gD,WAAa,SAAU7N,GAInB,MAAgB,OAATA,EAAgB90E,KAAKq4B,OAAS,EAAIr4B,KAAKq4B,IAAIr4B,KAAKq4B,MAAQ,EAAIy8C,EAAQA,EAAQ,IAGvFwT,eAAiB,WACb,MAAOhS,GAAYt2E,KAAKu4B,OAAQ,EAAG,IAGvC+9C,YAAc,WACV,GAAIiS,GAAWvoF,KAAK2wE,aAAa+K,KACjC,OAAOpF,GAAYt2E,KAAKu4B,OAAQgwD,EAAShS,IAAKgS,EAAS/R,MAG3DthE,IAAM,SAAUmgE,GAEZ,MADAA,GAAQD,EAAeC,GAChBr1E,KAAKq1E,MAGhBlsB,IAAM,SAAUksB,EAAOhuE,GACnB,GAAIk4E,EACJ,IAAqB,gBAAVlK,GACP,IAAKkK,IAAQlK,GACTr1E,KAAKmpD,IAAIo2B,EAAMlK,EAAMkK,QAIzBlK,GAAQD,EAAeC,GACI,kBAAhBr1E,MAAKq1E,IACZr1E,KAAKq1E,GAAOhuE,EAGpB,OAAOrH,OAMXqkC,OAAS,SAAU37B,GACf,GAAI8/E,EAEJ,OAAI9/E,KAAQlC,EACDxG,KAAK2yE,QAAQ6T,OAEpBgC,EAAgB3kF,GAAO8sE,WAAWjoE,GACb,MAAjB8/E,IACAxoF,KAAK2yE,QAAU6V,GAEZxoF,OAIfskC,KAAO4rC,EACH,kJACA,SAAUxnE,GACN,MAAIA,KAAQlC,EACDxG,KAAK2wE,aAEL3wE,KAAKqkC,OAAO37B,KAK/BioE,WAAa,WACT,MAAO3wE,MAAK2yE,SAGhBsU,eAAiB,WAGb,MAAuD,KAA/C/hF,KAAKwoB,MAAM1tB,KAAKk4B,GAAGuwD,oBAAsB,OA+CzD5kF,GAAOqV,GAAGuoB,YAAc59B,GAAOqV,GAAGqkB,aAAeiiD,GAAa,gBAAgB,GAC9E37E,GAAOqV,GAAGwoB,OAAS79B,GAAOqV,GAAGokB,QAAUkiD,GAAa,WAAW,GAC/D37E,GAAOqV,GAAGyoB,OAAS99B,GAAOqV,GAAGmkB,QAAUmiD,GAAa,WAAW,GAK/D37E,GAAOqV,GAAG0oB,KAAO/9B,GAAOqV,GAAGkkB,MAAQoiD,GAAa,SAAS,GAEzD37E,GAAOqV,GAAGuf,KAAO+mD,GAAa,QAAQ,GACtC37E,GAAOqV,GAAGsgB,MAAQ02C,EAAU,kDAAmDsP,GAAa,QAAQ,IACpG37E,GAAOqV,GAAGqf,KAAOinD,GAAa,YAAY,GAC1C37E,GAAOqV,GAAG+4D,MAAQ/B,EAAU,kDAAmDsP,GAAa,YAAY,IAGxG37E,GAAOqV,GAAGq5D,KAAO1uE,GAAOqV,GAAGmf,IAC3Bx0B,GAAOqV,GAAGk5D,OAASvuE,GAAOqV,GAAGwf,MAC7B70B,GAAOqV,GAAGm5D,MAAQxuE,GAAOqV,GAAGo5D,KAC5BzuE,GAAOqV,GAAGwvE,SAAW7kF,GAAOqV,GAAG+oE,QAC/Bp+E,GAAOqV,GAAGg5D,SAAWruE,GAAOqV,GAAGi5D,QAG/BtuE,GAAOqV,GAAGyvE,OAAS9kF,GAAOqV,GAAG9R,YAG7BvD,GAAOqV,GAAG0vE,MAAQ/kF,GAAOqV,GAAGkvE,MAkB5B9iF,EAAOzB,GAAOiM,SAASoJ,GAAK44D,EAAS3+D,WAEjCy/D,QAAU,WACN,GAIIt1C,GAASD,EAASD,EAJlBG,EAAev9B,KAAKwyE,cACpBD,EAAOvyE,KAAKyyE,MACZL,EAASpyE,KAAK0yE,QACdhgE,EAAO1S,KAAK4S,MACaq/D,EAAQ,CAIrCv/D,GAAK6qB,aAAeA,EAAe,IAEnCD,EAAUk2C,EAASj2C,EAAe,KAClC7qB,EAAK4qB,QAAUA,EAAU,GAEzBD,EAAUm2C,EAASl2C,EAAU,IAC7B5qB,EAAK2qB,QAAUA,EAAU,GAEzBD,EAAQo2C,EAASn2C,EAAU,IAC3B3qB,EAAK0qB,MAAQA,EAAQ,GAErBm1C,GAAQiB,EAASp2C,EAAQ,IAGzB60C,EAAQuB,EAASkM,GAAYnN,IAC7BA,GAAQiB,EAASmM,GAAY1N,IAI7BG,GAAUoB,EAASjB,EAAO,IAC1BA,GAAQ,GAGRN,GAASuB,EAASpB,EAAS,IAC3BA,GAAU,GAEV1/D,EAAK6/D,KAAOA,EACZ7/D,EAAK0/D,OAASA,EACd1/D,EAAKu/D,MAAQA,GAGjBpnD,IAAM,WAYF,MAXA7qB,MAAKwyE,cAAgBttE,KAAK2lB,IAAI7qB,KAAKwyE,eACnCxyE,KAAKyyE,MAAQvtE,KAAK2lB,IAAI7qB,KAAKyyE,OAC3BzyE,KAAK0yE,QAAUxtE,KAAK2lB,IAAI7qB,KAAK0yE,SAE7B1yE,KAAK4S,MAAM2qB,aAAer4B,KAAK2lB,IAAI7qB,KAAK4S,MAAM2qB,cAC9Cv9B,KAAK4S,MAAM0qB,QAAUp4B,KAAK2lB,IAAI7qB,KAAK4S,MAAM0qB,SACzCt9B,KAAK4S,MAAMyqB,QAAUn4B,KAAK2lB,IAAI7qB,KAAK4S,MAAMyqB,SACzCr9B,KAAK4S,MAAMwqB,MAAQl4B,KAAK2lB,IAAI7qB,KAAK4S,MAAMwqB,OACvCp9B,KAAK4S,MAAMw/D,OAASltE,KAAK2lB,IAAI7qB,KAAK4S,MAAMw/D,QACxCpyE,KAAK4S,MAAMq/D,MAAQ/sE,KAAK2lB,IAAI7qB,KAAK4S,MAAMq/D,OAEhCjyE,MAGXqyE,MAAQ,WACJ,MAAOmB,GAASxzE,KAAKuyE,OAAS,IAGlCvrE,QAAU,WACN,MAAOhH,MAAKwyE,cACG,MAAbxyE,KAAKyyE,MACJzyE,KAAK0yE,QAAU,GAAM,OACK,QAA3ByC,EAAMn1E,KAAK0yE,QAAU,KAG3B4U,SAAW,SAAUuB,GACjB,GAAIjV,GAAS4K,GAAax+E,MAAO6oF,EAAY7oF,KAAK2wE,aAMlD,OAJIkY,KACAjV,EAAS5zE,KAAK2wE,aAAa+U,YAAY1lF,KAAM4zE,IAG1C5zE,KAAK2wE,aAAaiV,WAAWhS,IAGxC3gE,IAAM,SAAU6hE,EAAOjC,GAEnB,GAAIwB,GAAMxwE,GAAOiM,SAASglE,EAAOjC,EAQjC,OANA7yE,MAAKwyE,eAAiB6B,EAAI7B,cAC1BxyE,KAAKyyE,OAAS4B,EAAI5B,MAClBzyE,KAAK0yE,SAAW2B,EAAI3B,QAEpB1yE,KAAK4yE,UAEE5yE,MAGXqrB,SAAW,SAAUypD,EAAOjC,GACxB,GAAIwB,GAAMxwE,GAAOiM,SAASglE,EAAOjC,EAQjC,OANA7yE,MAAKwyE,eAAiB6B,EAAI7B,cAC1BxyE,KAAKyyE,OAAS4B,EAAI5B,MAClBzyE,KAAK0yE,SAAW2B,EAAI3B,QAEpB1yE,KAAK4yE,UAEE5yE,MAGXkV,IAAM,SAAUmgE,GAEZ,MADAA,GAAQD,EAAeC,GAChBr1E,KAAKq1E,EAAM9wC,cAAgB,QAGtCtV,GAAK,SAAUomD,GACX,GAAI9C,GAAMH,CAGV,IAFAiD,EAAQD,EAAeC,GAET,UAAVA,GAA+B,SAAVA,EAGrB,MAFA9C,GAAOvyE,KAAKyyE,MAAQzyE,KAAKwyE,cAAgB,MACzCJ,EAASpyE,KAAK0yE,QAA8B,GAApBgN,GAAYnN,GACnB,UAAV8C,EAAoBjD,EAASA,EAAS,EAI7C,QADAG,EAAOvyE,KAAKyyE,MAAQvtE,KAAKwoB,MAAMiyD,GAAY3/E,KAAK0yE,QAAU,KAClD2C,GACJ,IAAK,OAAQ,MAAO9C,GAAO,EAAIvyE,KAAKwyE,cAAgB,MACpD,KAAK,MAAO,MAAOD,GAAOvyE,KAAKwyE,cAAgB,KAC/C,KAAK,OAAQ,MAAc,IAAPD,EAAYvyE,KAAKwyE,cAAgB,IACrD,KAAK,SAAU,MAAc,IAAPD,EAAY,GAAKvyE,KAAKwyE,cAAgB,GAC5D,KAAK,SAAU,MAAc,IAAPD,EAAY,GAAK,GAAKvyE,KAAKwyE,cAAgB,GAEjE,KAAK,cAAe,MAAOttE,MAAKC,MAAa,GAAPotE,EAAY,GAAK,GAAK,KAAQvyE,KAAKwyE,aACzE,SAAS,KAAM,IAAI5uE,OAAM,gBAAkByxE,KAKvD/wC,KAAOzgC,GAAOqV,GAAGorB,KACjBD,OAASxgC,GAAOqV,GAAGmrB,OAEnBykD,YAAc5Y,EACV,sFAEA,WACI,MAAOlwE,MAAKoH,gBAIpBA,YAAc,WAEV,GAAI6qE,GAAQ/sE,KAAK2lB,IAAI7qB,KAAKiyE,SACtBG,EAASltE,KAAK2lB,IAAI7qB,KAAKoyE,UACvBG,EAAOrtE,KAAK2lB,IAAI7qB,KAAKuyE,QACrBn1C,EAAQl4B,KAAK2lB,IAAI7qB,KAAKo9B,SACtBC,EAAUn4B,KAAK2lB,IAAI7qB,KAAKq9B,WACxBC,EAAUp4B,KAAK2lB,IAAI7qB,KAAKs9B,UAAYt9B,KAAKu9B,eAAiB,IAE9D,OAAKv9B,MAAK+oF,aAMF/oF,KAAK+oF,YAAc,EAAI,IAAM,IACjC,KACC9W,EAAQA,EAAQ,IAAM,KACtBG,EAASA,EAAS,IAAM,KACxBG,EAAOA,EAAO,IAAM,KACnBn1C,GAASC,GAAWC,EAAW,IAAM,KACtCF,EAAQA,EAAQ,IAAM,KACtBC,EAAUA,EAAU,IAAM,KAC1BC,EAAUA,EAAU,IAAM,IAXpB,OAcfqzC,WAAa,WACT,MAAO3wE,MAAK2yE,SAGhBgW,OAAS,WACL,MAAO3oF,MAAKoH,iBAIpBvD,GAAOiM,SAASoJ,GAAG7T,SAAWxB,GAAOiM,SAASoJ,GAAG9R,WAQjD,KAAK5B,KAAK86E,IACFnR,EAAWmR,GAAwB96E,KACnCo6E,GAAmBp6E,GAAE++B,cAI7B1gC,IAAOiM,SAASoJ,GAAG8vE,eAAiB,WAChC,MAAOhpF,MAAKivB,GAAG,OAEnBprB,GAAOiM,SAASoJ,GAAG6vE,UAAY,WAC3B,MAAO/oF,MAAKivB,GAAG,MAEnBprB,GAAOiM,SAASoJ,GAAG+vE,UAAY,WAC3B,MAAOjpF,MAAKivB,GAAG,MAEnBprB,GAAOiM,SAASoJ,GAAGgwE,QAAU,WACzB,MAAOlpF,MAAKivB,GAAG,MAEnBprB,GAAOiM,SAASoJ,GAAGiwE,OAAS,WACxB,MAAOnpF,MAAKivB,GAAG,MAEnBprB,GAAOiM,SAASoJ,GAAGkwE,QAAU,WACzB,MAAOppF,MAAKivB,GAAG,UAEnBprB,GAAOiM,SAASoJ,GAAGmwE,SAAW,WAC1B,MAAOrpF,MAAKivB,GAAG,MAEnBprB,GAAOiM,SAASoJ,GAAGowE,QAAU,WACzB,MAAOtpF,MAAKivB,GAAG,MASnBprB,GAAOwgC,OAAO,MACVklD,aAAc,uBACd3Y,QAAU,SAAU6C,GAChB,GAAIrtE,GAAIqtE,EAAS,GACbG,EAAuC,IAA7BuB,EAAM1B,EAAS,IAAM,IAAa,KACrC,IAANrtE,EAAW,KACL,IAANA,EAAW,KACL,IAANA,EAAW,KAAO,IACvB,OAAOqtE,GAASG,KA4BpBkE,GACAj4E,EAAOD,QAAUiE,IAEfmrE,EAAgC,SAAUwa,EAAS5pF,EAASC,GAM1D,MALIA,GAAO2xE,QAAU3xE,EAAO2xE,UAAY3xE,EAAO2xE,SAASiY,YAAa,IAEjExJ,GAAYp8E,OAASm8E,IAGlBn8E,IACTtD,KAAKX,EAASM,EAAqBN,EAASC,KAASmvE,IAAkCxoE,IAAc3G,EAAOD,QAAUovE,IACxH6Q,IAAW,MAIhBt/E,KAAKP,QAEqBO,KAAKX,EAAU,WAAa,MAAOI,SAAYE,EAAoB,IAAIL,KAIhG,SAASA,EAAQD,GAErB,GAAI8pF,GAAgCC,EAA8B3a,GAOjE,SAAUtvE,EAAMC,GAGXgqF,KAAmCD,EAAiC,EAAW1a,EAA2E,kBAAnC0a,GAAiDA,EAA+B3xE,MAAMnY,EAAS+pF,GAAiCD,IAAmEljF,SAAlCwoE,IAAgDnvE,EAAOD,QAAUovE,KAU7VhvE,KAAM,WAEN,QAASmlD,GAAS12C,GAChB,GAOIjJ,GAPA6D,EAAiBoF,GAAWA,EAAQpF,iBAAkB,EAEtDkQ,EAAY9K,GAAWA,EAAQ8K,WAAahQ,OAE5CqgF,KACAC,GAAUC,WAAYC,UACtBC,IAIJ,KAAKxkF,EAAI,GAAS,KAALA,EAAUA,IAAMwkF,EAAM5lF,OAAO6lF,aAAazkF,KAAOuyE,KAAK,IAAMvyE,EAAI,IAAK8L,OAAO,EAEzF,KAAK9L,EAAI,GAAS,IAALA,EAASA,IAAMwkF,EAAM5lF,OAAO6lF,aAAazkF,KAAOuyE,KAAKvyE,EAAG8L,OAAO,EAE5E,KAAK9L,EAAI,EAAS,GAALA,EAAUA,IAAMwkF,EAAM,GAAKxkF,IAAMuyE,KAAK,GAAKvyE,EAAG8L,OAAO,EAElE,KAAK9L,EAAI,EAAS,IAALA,EAAWA,IAAMwkF,EAAM,IAAMxkF,IAAMuyE,KAAK,IAAMvyE,EAAG8L,OAAO,EAErE,KAAK9L,EAAI,EAAS,GAALA,EAAUA,IAAMwkF,EAAM,MAAQxkF,IAAMuyE,KAAK,GAAKvyE,EAAG8L,OAAO,EAGrE04E,GAAM,SAAWjS,KAAK,IAAKzmE,OAAO,GAClC04E,EAAM,SAAWjS,KAAK,IAAKzmE,OAAO,GAClC04E,EAAM,SAAWjS,KAAK,IAAKzmE,OAAO,GAClC04E,EAAM,SAAWjS,KAAK,IAAKzmE,OAAO,GAClC04E,EAAM,SAAWjS,KAAK,IAAKzmE,OAAO,GAElC04E,EAAY,MAAMjS,KAAK,GAAIzmE,OAAO,GAClC04E,EAAU,IAAQjS,KAAK,GAAIzmE,OAAO,GAClC04E,EAAa,OAAKjS,KAAK,GAAIzmE,OAAO,GAClC04E,EAAY,MAAMjS,KAAK,GAAIzmE,OAAO,GAElC04E,EAAa,OAAKjS,KAAK,GAAIzmE,OAAO,GAClC04E,EAAa,OAAKjS,KAAK,GAAIzmE,OAAO,GAClC04E,EAAa,OAAKjS,KAAK,GAAIzmE,MAAO9K,QAClCwjF,EAAW,KAAOjS,KAAK,GAAIzmE,OAAO,GAClC04E,EAAiB,WAAKjS,KAAK,EAAGzmE,OAAO,GACrC04E,EAAW,KAAWjS,KAAK,EAAGzmE,OAAO,GACrC04E,EAAY,MAAUjS,KAAK,GAAIzmE,OAAO,GACtC04E,EAAW,KAAWjS,KAAK,GAAIzmE,OAAO,GACtC04E,EAAM,WAAgBjS,KAAK,GAAIzmE,OAAO,GACtC04E,EAAc,QAAQjS,KAAK,GAAIzmE,OAAO,GACtC04E,EAAgB,UAAMjS,KAAK,GAAIzmE,OAAO,GAEtC04E,EAAM,MAAYjS,KAAK,IAAKzmE,OAAO,GACnC04E,EAAM,MAAYjS,KAAK,IAAKzmE,OAAO,GACnC04E,EAAM,MAAYjS,KAAK,IAAKzmE,OAAO,GACnC04E,EAAM,MAAYjS,KAAK,IAAKzmE,OAAO,EAInC,IAAI44E,GAAO,SAAS5gF,GAAQ6gF,EAAY7gF,EAAM,YAC1C8gF,EAAK,SAAS9gF,GAAQ6gF,EAAY7gF,EAAM,UAGxC6gF,EAAc,SAAS7gF,EAAMxC,GAC/B,GAAoCN,SAAhCqjF,EAAO/iF,GAAMwC,EAAM+gF,SAAwB,CAE7C,IAAK,GADDC,GAAQT,EAAO/iF,GAAMwC,EAAM+gF,SACtB7kF,EAAI,EAAGA,EAAI8kF,EAAM3kF,OAAQH,IACTgB,SAAnB8jF,EAAM9kF,GAAG8L,MACXg5E,EAAM9kF,GAAG0T,GAAG5P,GAEa,GAAlBghF,EAAM9kF,GAAG8L,OAAmC,GAAlBhI,EAAMyqC,SACvCu2C,EAAM9kF,GAAG0T,GAAG5P,GAEa,GAAlBghF,EAAM9kF,GAAG8L,OAAoC,GAAlBhI,EAAMyqC,UACxCu2C,EAAM9kF,GAAG0T,GAAG5P,EAIM,IAAlBD,GACFC,EAAMD,kBA4FZ,OAtFAugF,GAAiB90D,KAAO,SAASpsB,EAAKJ,EAAUxB,GAI9C,GAHaN,SAATM,IACFA,EAAO,WAEUN,SAAfwjF,EAAMthF,GACR,KAAM,IAAI9E,OAAM,oBAAsB8E,EAEFlC,UAAlCqjF,EAAO/iF,GAAMkjF,EAAMthF,GAAKqvE,QAC1B8R,EAAO/iF,GAAMkjF,EAAMthF,GAAKqvE,UAE1B8R,EAAO/iF,GAAMkjF,EAAMthF,GAAKqvE,MAAM/vE,MAAMkR,GAAG5Q,EAAUgJ,MAAM04E,EAAMthF,GAAK4I,SAKpEs4E,EAAiBW,QAAU,SAASjiF,EAAUxB,GAC/BN,SAATM,IACFA,EAAO,UAET,KAAK,GAAI4B,KAAOshF,GACVA,EAAMlkF,eAAe4C,IACvBkhF,EAAiB90D,KAAKpsB,EAAIJ,EAASxB,IAMzC8iF,EAAiBY,OAAS,SAASlhF,GACjC,IAAK,GAAIZ,KAAOshF,GACd,GAAIA,EAAMlkF,eAAe4C,GAAM,CAC7B,GAAsB,GAAlBY,EAAMyqC,UAAwC,GAApBi2C,EAAMthF,GAAK4I,OAAiBhI,EAAM+gF,SAAWL,EAAMthF,GAAKqvE,KACpF,MAAOrvE,EAEJ,IAAsB,GAAlBY,EAAMyqC,UAAyC,GAApBi2C,EAAMthF,GAAK4I,OAAkBhI,EAAM+gF,SAAWL,EAAMthF,GAAKqvE,KAC3F,MAAOrvE,EAEJ,IAAIY,EAAM+gF,SAAWL,EAAMthF,GAAKqvE,MAAe,SAAPrvE,EAC3C,MAAOA,GAIb,MAAO,wCAITkhF,EAAiBrb,OAAS,SAAS7lE,EAAKJ,EAAUxB,GAIhD,GAHaN,SAATM,IACFA,EAAO,WAEUN,SAAfwjF,EAAMthF,GACR,KAAM,IAAI9E,OAAM,oBAAsB8E,EAExC,IAAiBlC,SAAb8B,EAAwB,CAC1B,GAAImiF,MACAH,EAAQT,EAAO/iF,GAAMkjF,EAAMthF,GAAKqvE,KACpC,IAAcvxE,SAAV8jF,EACF,IAAK,GAAI9kF,GAAI,EAAGA,EAAI8kF,EAAM3kF,OAAQH,KAC1B8kF,EAAM9kF,GAAG0T,IAAM5Q,GAAYgiF,EAAM9kF,GAAG8L,OAAS04E,EAAMthF,GAAK4I,QAC5Dm5E,EAAYziF,KAAK6hF,EAAO/iF,GAAMkjF,EAAMthF,GAAKqvE,MAAMvyE,GAIrDqkF,GAAO/iF,GAAMkjF,EAAMthF,GAAKqvE,MAAQ0S,MAGhCZ,GAAO/iF,GAAMkjF,EAAMthF,GAAKqvE,UAK5B6R,EAAiBpgC,MAAQ,WACvBqgC,GAAUC,WAAYC,WAIxBH,EAAiBt2E,QAAU,WACzBu2E,GAAUC,WAAYC,UACtBxwE,EAAUpQ,oBAAoB,UAAW+gF,GAAM,GAC/C3wE,EAAUpQ,oBAAoB,QAASihF,GAAI,IAI7C7wE,EAAU5Q,iBAAiB,UAAUuhF,GAAK,GAC1C3wE,EAAU5Q,iBAAiB,QAAQyhF,GAAG,GAG/BR,EAGT,MAAOzkC,MAQL,SAAStlD,EAAQD,GAErB,GAAI8pF,GAAgCC,EAA8B3a,GAEjE,SAAUtvE,EAAMC,GAGXgqF,KAAmCD,EAAiC,EAAW1a,EAA2E,kBAAnC0a,GAAiDA,EAA+B3xE,MAAMnY,EAAS+pF,GAAiCD,IAAmEljF,SAAlCwoE,IAAgDnvE,EAAOD,QAAUovE,KAU7VhvE,KAAM,WAEN,GAAI0qF,GAAe,IAkBnB,OAAO,SAAS/lB,GAAY7gE,GAuG1B,QAASiE,GAAMy9D,GACb,MAAOA,GAAOjhE,MAAM,UAOtB,QAASomF,GAAkBrhF,GAEzB,GAAmB,iBAAfA,EAAMxC,KAAyB,CACjC,GAAIwC,EAAMwqC,SAAS82C,UAAYthF,EAAMwqC,SAAS82C,SAASthF,EAAMxC,MAC3D,MAIAwC,GAAMwqC,SAAS82C,YACfthF,EAAMwqC,SAAS82C,SAASthF,EAAMxC,OAAQ,EAK1C,GAAI+jF,IAAU,CACdvhF,GAAMq8B,gBAAkB,WACtBklD,GAAU,GAIZvhF,EAAMwhF,YAAcJ,CAIpB,KADA,GAAInjF,GAAOmjF,EACJnjF,IAASsjF,GAAS,CACvB,GAAIE,GAAYxjF,EAAKzD,QAAUyD,EAAKzD,OAAOinF,UAAUzhF,EAAMxC,KAC3D,IAAIikF,EACF,IAAK,GAAIvlF,GAAI,EAAGA,EAAIulF,EAAUplF,SAAWklF,EAASrlF,IAChDulF,EAAUvlF,GAAG8D,EAIjB/B,GAAOA,EAAKsC,YA9IhB,GAAI/F,EAAOknF,QAAS,CAGlB,GAAIjnF,GAASD,EAETmnF,EAAoB,SAASriF,EAAS6F,GACxC,MAAOk2D,GAAY,GAAI5gE,GAAO6E,EAAS6F,IAOzC,OALA1K,GAAOuB,OAAO2lF,EAAmBlnF,GACjCknF,EAAkBD,QAAU,SAAUpiF,EAAS6F,GAC7C,MAAOk2D,GAAY,GAAI5gE,GAAOinF,QAAQpiF,EAAS6F,KAG1Cw8E,EAIT,GAAIriF,GAAU9E,EAAO8E,OAiIrB,OAhIAA,GAAQ9E,OAASA,EAGjBA,EAAOonF,IAAMpnF,EAAOyP,GACpBzP,EAAOqnF,KAAOrnF,EAAO4P,IACrB5P,EAAOsnF,SAAWtnF,EAAOwP,QAGzBxP,EAAOinF,aAIPjnF,EAAOonF,IAAI,eAAgB,SAAU5hF,GAC/BA,EAAMqnC,UACR+5C,EAAephF,EAAMI,UAUzB5F,EAAOyP,GAAK,SAAUiyD,EAAQ6lB,GAa5B,MAXAtjF,GAAMy9D,GAAQn9D,QAAQ,SAAUiB,GAC9B,GAAIyhF,GAAYjnF,EAAOinF,UAAUzhF,EAC5ByhF,KACHjnF,EAAOinF,UAAUzhF,GAASyhF,KAG1BjnF,EAAOonF,IAAI5hF,EAAOqhF,IAEpBI,EAAU/iF,KAAKqjF,KAGVvnF,GAWTA,EAAO4P,IAAM,SAAU8xD,EAAQ6lB,GAoB7B,MAlBAtjF,GAAMy9D,GAAQn9D,QAAQ,SAAUiB,GAC9B,GAAIyhF,GAAYjnF,EAAOinF,UAAUzhF,EAC7ByhF,KACFA,EAAYM,EAAUN,EAAUp3E,OAAO,SAAUhI,GAC/C,MAAOA,KAAM0/E,OAGXN,EAAUplF,OAAS,EACrB7B,EAAOinF,UAAUzhF,GAASyhF,GAI1BjnF,EAAOqnF,KAAK7hF,EAAOqhF,SACZ7mF,GAAOinF,UAAUzhF,OAKvBxF,GAGTA,EAAOwP,QAAU,WAEf,GAAI1K,GAAU9E,EAAO8E,cACdA,GAAQ9E,OAGfA,EAAOinF,aAGPjnF,EAAOsnF,YAgDFtnF,MAOP,SAASjE,EAAQD,EAASM,GAE9B,GAAI8uE,IAKJ,SAAUzlE,EAAQgI,EAAU+5E,EAAY9kF,GAmBxC,QAAS+kF,GAAkBryE,EAAIsyE,EAASryE,GACpC,MAAOG,YAAWmyE,EAAOvyE,EAAIC,GAAUqyE,GAY3C,QAASE,GAAeC,EAAKzyE,EAAIC,GAC7B,MAAIlT,OAAMC,QAAQylF,IACdC,EAAKD,EAAKxyE,EAAQD,GAAKC,IAChB,IAEJ,EASX,QAASyyE,GAAK7oE,EAAK8oE,EAAU1yE,GACzB,GAAI3T,EAEJ,IAAKud,EAIL,GAAIA,EAAI1a,QACJ0a,EAAI1a,QAAQwjF,EAAU1yE,OACnB,IAAI4J,EAAIpd,SAAWa,EAEtB,IADAhB,EAAI,EACGA,EAAIud,EAAIpd,QACXkmF,EAAStrF,KAAK4Y,EAAS4J,EAAIvd,GAAIA,EAAGud,GAClCvd,QAGJ,KAAKA,IAAKud,GACNA,EAAIjd,eAAeN,IAAMqmF,EAAStrF,KAAK4Y,EAAS4J,EAAIvd,GAAIA,EAAGud,GAavE,QAASzd,GAAOwmF,EAAMlmC,EAAKmb,GAGvB,IAFA,GAAI3zD,GAAO7G,OAAO6G,KAAKw4C,GACnBpgD,EAAI,EACDA,EAAI4H,EAAKzH,UACPo7D,GAAUA,GAAS+qB,EAAK1+E,EAAK5H,MAAQgB,KACtCslF,EAAK1+E,EAAK5H,IAAMogD,EAAIx4C,EAAK5H,KAE7BA,GAEJ,OAAOsmF,GAUX,QAAS/qB,GAAM+qB,EAAMlmC,GACjB,MAAOtgD,GAAOwmF,EAAMlmC,GAAK,GAS7B,QAASmmC,GAAQC,EAAOlY,EAAMnlB,GAC1B,GACIs9B,GADAC,EAAQpY,EAAK3gE,SAGjB84E,GAASD,EAAM74E,UAAY5M,OAAO8H,OAAO69E,GACzCD,EAAO3lF,YAAc0lF,EACrBC,EAAOE,OAASD,EAEZv9B,GACArpD,EAAO2mF,EAAQt9B,GAUvB,QAAS88B,GAAOvyE,EAAIC,GAChB,MAAO,YACH,MAAOD,GAAGnB,MAAMoB,EAASzT,YAWjC,QAAS0mF,GAASvZ,EAAK55D,GACnB,aAAW45D,IAAOwZ,GACPxZ,EAAI96D,MAAMkB,EAAOA,EAAK,IAAMzS,EAAYA,EAAWyS,GAEvD45D,EASX,QAASyZ,GAAYC,EAAMC,GACvB,MAAQD,KAAS/lF,EAAagmF,EAAOD,EASzC,QAASE,GAAkB/iF,EAAQwN,EAAOm0E,GACtCO,EAAKc,EAASx1E,GAAQ,SAASpQ,GAC3B4C,EAAOf,iBAAiB7B,EAAMukF,GAAS,KAU/C,QAASsB,GAAqBjjF,EAAQwN,EAAOm0E,GACzCO,EAAKc,EAASx1E,GAAQ,SAASpQ,GAC3B4C,EAAOP,oBAAoBrC,EAAMukF,GAAS,KAWlD,QAASuB,GAAU9mC,EAAMhhB,GACrB,KAAOghB,GAAM,CACT,GAAIA,GAAQhhB,EACR,OAAO,CAEXghB,GAAOA,EAAKj8C,WAEhB,OAAO,EASX,QAASgjF,GAAMC,EAAKC,GAChB,MAAOD,GAAInmF,QAAQomF,GAAQ,GAQ/B,QAASL,GAASI,GACd,MAAOA,GAAI5/E,OAAOnF,MAAM,QAU5B,QAASilF,GAAQpnC,EAAKmnC,EAAME,GACxB,GAAIrnC,EAAIj/C,UAAYsmF,EAChB,MAAOrnC,GAAIj/C,QAAQomF,EAGnB,KADA,GAAIvnF,GAAI,EACDA,EAAIogD,EAAIjgD,QAAQ,CACnB,GAAKsnF,GAAarnC,EAAIpgD,GAAGynF,IAAcF,IAAWE,GAAarnC,EAAIpgD,KAAOunF,EACtE,MAAOvnF,EAEXA,KAEJ,MAAO,GASf,QAAS+C,GAAQwa,GACb,MAAO9c,OAAMkN,UAAUlI,MAAM1K,KAAKwiB,EAAK,GAU3C,QAASmqE,GAAYtnC,EAAKl9C,EAAKwN,GAK3B,IAJA,GAAI4/D,MACAh/D,KACAtR,EAAI,EAEDA,EAAIogD,EAAIjgD,QAAQ,CACnB,GAAIktE,GAAMnqE,EAAMk9C,EAAIpgD,GAAGkD,GAAOk9C,EAAIpgD,EAC9BwnF,GAAQl2E,EAAQ+7D,GAAO,GACvBiD,EAAQ9tE,KAAK49C,EAAIpgD,IAErBsR,EAAOtR,GAAKqtE,EACZrtE,IAaJ,MAVI0Q,KAII4/D,EAHCptE,EAGSotE,EAAQ5/D,KAAK,SAAyB3Q,EAAGa,GAC/C,MAAOb,GAAEmD,GAAOtC,EAAEsC,KAHZotE,EAAQ5/D,QAQnB4/D,EASX,QAASqX,GAASpqE,EAAKqqE,GAKnB,IAJA,GAAIC,GAAQxnF,EACRynF,EAAYF,EAAS,GAAGnoD,cAAgBmoD,EAASniF,MAAM,GAEvDzF,EAAI,EACDA,EAAI+nF,GAAgB5nF,QAAQ,CAI/B,GAHA0nF,EAASE,GAAgB/nF,GACzBK,EAAO,EAAWwnF,EAASC,EAAYF,EAEnCvnF,IAAQkd,GACR,MAAOld,EAEXL,KAEJ,MAAOgB,GAQX,QAASgnF,KACL,MAAOC,MAQX,QAASC,GAAoB9kF,GACzB,GAAI+kF,GAAM/kF,EAAQglF,aAClB,OAAQD,GAAIE,aAAeF,EAAIG,aAyCnC,QAASC,GAAMC,EAAS1lF,GACpB,GAAIomE,GAAO1uE,IACXA,MAAKguF,QAAUA,EACfhuF,KAAKsI,SAAWA,EAChBtI,KAAK4I,QAAUolF,EAAQplF,QACvB5I,KAAK0J,OAASskF,EAAQv/E,QAAQw/E,YAI9BjuF,KAAKkuF,WAAa,SAASC,GACnB/B,EAAS4B,EAAQv/E,QAAQ20B,QAAS4qD,KAClCtf,EAAK2c,QAAQ8C,IAIrBnuF,KAAKouF,OAoCT,QAASC,GAAoBL,GACzB,GAAIM,GACAC,EAAaP,EAAQv/E,QAAQ8/E,UAajC,OAAO,KAVHD,EADAC,EACOA,EACAC,GACAC,EACAC,GACAC,EACCC,GAGDC,EAFAC,GAIOd,EAAShnB,GAS/B,QAASA,GAAagnB,EAASe,EAAWja,GACtC,GAAIka,GAAcla,EAAMma,SAAStpF,OAC7BupF,EAAqBpa,EAAMqa,gBAAgBxpF,OAC3CgrC,EAAWo+C,EAAYK,IAAgBJ,EAAcE,IAAuB,EAC5EhoB,EAAW6nB,GAAaM,GAAYC,KAAkBN,EAAcE,IAAuB,CAE/Fpa,GAAMnkC,UAAYA,EAClBmkC,EAAM5N,UAAYA,EAEdv2B,IACAq9C,EAAQuB,YAKZza,EAAMia,UAAYA,EAGlBS,EAAiBxB,EAASlZ,GAG1BkZ,EAAQpgE,KAAK,eAAgBknD,GAE7BkZ,EAAQyB,UAAU3a,GAClBkZ,EAAQuB,QAAQG,UAAY5a,EAQhC,QAAS0a,GAAiBxB,EAASlZ,GAC/B,GAAIya,GAAUvB,EAAQuB,QAClBN,EAAWna,EAAMma,SACjBU,EAAiBV,EAAStpF,MAGzB4pF,GAAQK,aACTL,EAAQK,WAAaC,EAAqB/a,IAI1C6a,EAAiB,IAAMJ,EAAQO,cAC/BP,EAAQO,cAAgBD,EAAqB/a,GACnB,IAAnB6a,IACPJ,EAAQO,eAAgB,EAG5B,IAAIF,GAAaL,EAAQK,WACrBE,EAAgBP,EAAQO,cACxBC,EAAeD,EAAgBA,EAAc5jE,OAAS0jE,EAAW1jE,OAEjEA,EAAS4oD,EAAM5oD,OAAS8jE,EAAUf,EACtCna,GAAMmb,UAAY9yD,KAClB23C,EAAMob,UAAYpb,EAAMmb,UAAYL,EAAWK,UAE/Cnb,EAAM1mB,MAAQ+hC,EAASJ,EAAc7jE,GACrC4oD,EAAMnvD,SAAW84C,EAAYsxB,EAAc7jE,GAE3CkkE,EAAeb,EAASza,GACxBA,EAAMub,gBAAkBC,EAAaxb,EAAMt1C,OAAQs1C,EAAMr1C,QAEzDq1C,EAAM73D,MAAQ6yE,EAAgBv8B,EAASu8B,EAAcb,SAAUA,GAAY,EAC3Ena,EAAMyb,SAAWT,EAAgBU,EAAYV,EAAcb,SAAUA,GAAY,EAEjFwB,EAAyBlB,EAASza,EAGlC,IAAIprE,GAASskF,EAAQplF,OACjBgkF,GAAU9X,EAAMhhC,SAASpqC,OAAQA,KACjCA,EAASorE,EAAMhhC,SAASpqC,QAE5BorE,EAAMprE,OAASA,EAGnB,QAAS0mF,GAAeb,EAASza,GAC7B,GAAI5oD,GAAS4oD,EAAM5oD,OACfvC,EAAS4lE,EAAQmB,gBACjBC,EAAYpB,EAAQoB,cACpBjB,EAAYH,EAAQG,eAEpB5a,EAAMia,YAAcK,IAAeM,EAAUX,YAAcM,MAC3DsB,EAAYpB,EAAQoB,WAChB5+E,EAAG29E,EAAUlwD,QAAU,EACvBxtB,EAAG09E,EAAUjwD,QAAU,GAG3B9V,EAAS4lE,EAAQmB,aACb3+E,EAAGma,EAAOna,EACVC,EAAGka,EAAOla,IAIlB8iE,EAAMt1C,OAASmxD,EAAU5+E,GAAKma,EAAOna,EAAI4X,EAAO5X,GAChD+iE,EAAMr1C,OAASkxD,EAAU3+E,GAAKka,EAAOla,EAAI2X,EAAO3X,GAQpD,QAASy+E,GAAyBlB,EAASza,GACvC,GAEItW,GAAUoyB,EAAWC,EAAW31D,EAFhC41D,EAAOvB,EAAQwB,cAAgBjc,EAC/Bob,EAAYpb,EAAMmb,UAAYa,EAAKb,SAGvC,IAAInb,EAAMia,WAAaO,KAAiBY,EAAYc,IAAoBF,EAAKtyB,WAAah4D,GAAY,CAClG,GAAIg5B,GAASsxD,EAAKtxD,OAASs1C,EAAMt1C,OAC7BC,EAASqxD,EAAKrxD,OAASq1C,EAAMr1C,OAE7B5zB,EAAIolF,EAAYf,EAAW1wD,EAAQC,EACvCmxD,GAAY/kF,EAAEkG,EACd8+E,EAAYhlF,EAAEmG,EACdwsD,EAAY3zC,GAAIhf,EAAEkG,GAAK8Y,GAAIhf,EAAEmG,GAAMnG,EAAEkG,EAAIlG,EAAEmG,EAC3CkpB,EAAYo1D,EAAa9wD,EAAQC,GAEjC8vD,EAAQwB,aAAejc,MAGvBtW,GAAWsyB,EAAKtyB,SAChBoyB,EAAYE,EAAKF,UACjBC,EAAYC,EAAKD,UACjB31D,EAAY41D,EAAK51D,SAGrB45C,GAAMtW,SAAWA,EACjBsW,EAAM8b,UAAYA,EAClB9b,EAAM+b,UAAYA,EAClB/b,EAAM55C,UAAYA,EAQtB,QAAS20D,GAAqB/a,GAK1B,IAFA,GAAIma,MACAzpF,EAAI,EACDA,EAAIsvE,EAAMma,SAAStpF,QACtBspF,EAASzpF,IACLmX,QAAS+Q,GAAMonD,EAAMma,SAASzpF,GAAGmX,SACjCG,QAAS4Q,GAAMonD,EAAMma,SAASzpF,GAAGsX,UAErCtX,GAGJ,QACIyqF,UAAW9yD,KACX8xD,SAAUA,EACV/iE,OAAQ8jE,EAAUf,GAClBzvD,OAAQs1C,EAAMt1C,OACdC,OAAQq1C,EAAMr1C,QAStB,QAASuwD,GAAUf,GACf,GAAIU,GAAiBV,EAAStpF,MAG9B,IAAuB,IAAnBgqF,EACA,OACI59E,EAAG2b,GAAMuhE,EAAS,GAAGtyE,SACrB3K,EAAG0b,GAAMuhE,EAAS,GAAGnyE,SAK7B,KADA,GAAI/K,GAAI,EAAGC,EAAI,EAAGxM,EAAI,EACXmqF,EAAJnqF,GACHuM,GAAKk9E,EAASzpF,GAAGmX,QACjB3K,GAAKi9E,EAASzpF,GAAGsX,QACjBtX,GAGJ,QACIuM,EAAG2b,GAAM3b,EAAI49E,GACb39E,EAAG0b,GAAM1b,EAAI29E,IAWrB,QAASsB,GAAYf,EAAWn+E,EAAGC,GAC/B,OACID,EAAGA,EAAIm+E,GAAa,EACpBl+E,EAAGA,EAAIk+E,GAAa,GAU5B,QAASI,GAAav+E,EAAGC,GACrB,MAAID,KAAMC,EACCk/E,GAGPrmE,GAAI9Y,IAAM8Y,GAAI7Y,GACPD,EAAI,EAAIo/E,GAAiBC,GAE7Bp/E,EAAI,EAAIq/E,GAAeC,GAUlC,QAAS7yB,GAAY2L,EAAIC,EAAIrkE,GACpBA,IACDA,EAAQurF,GAEZ,IAAIx/E,GAAIs4D,EAAGrkE,EAAM,IAAMokE,EAAGpkE,EAAM,IAC5BgM,EAAIq4D,EAAGrkE,EAAM,IAAMokE,EAAGpkE,EAAM,GAEhC,OAAOd,MAAKyqB,KAAM5d,EAAIA,EAAMC,EAAIA,GAUpC,QAASm+E,GAAS/lB,EAAIC,EAAIrkE,GACjBA,IACDA,EAAQurF,GAEZ,IAAIx/E,GAAIs4D,EAAGrkE,EAAM,IAAMokE,EAAGpkE,EAAM,IAC5BgM,EAAIq4D,EAAGrkE,EAAM,IAAMokE,EAAGpkE,EAAM,GAChC,OAA0B,KAAnBd,KAAK4xD,MAAM9kD,EAAGD,GAAW7M,KAAKymB,GASzC,QAAS6kE,GAAY5gF,EAAOC,GACxB,MAAOsgF,GAAStgF,EAAI,GAAIA,EAAI,GAAI2hF,IAAmBrB,EAASvgF,EAAM,GAAIA,EAAM,GAAI4hF,IAUpF,QAASj+B,GAAS3jD,EAAOC,GACrB,MAAO4uD,GAAY5uD,EAAI,GAAIA,EAAI,GAAI2hF,IAAmB/yB,EAAY7uD,EAAM,GAAIA,EAAM,GAAI4hF,IAiB1F,QAAS1C,KACL9uF,KAAKyxF,KAAOC,GACZ1xF,KAAK2xF,MAAQC,GAEb5xF,KAAK6xF,OAAQ,EACb7xF,KAAK8xF,SAAU,EAEf/D,EAAMh2E,MAAM/X,KAAM0F,WAoEtB,QAAS+oF,KACLzuF,KAAKyxF,KAAOM,GACZ/xF,KAAK2xF,MAAQK,GAEbjE,EAAMh2E,MAAM/X,KAAM0F,WAElB1F,KAAKiyF,MAASjyF,KAAKguF,QAAQuB,QAAQ2C,iBAoEvC,QAASC,KACLnyF,KAAKoyF,SAAWC,GAChBryF,KAAK2xF,MAAQW,GACbtyF,KAAKuyF,SAAU,EAEfxE,EAAMh2E,MAAM/X,KAAM0F,WAsCtB,QAAS8sF,GAAuBrE,EAAIrnF,GAChC,GAAI2rF,GAAMlqF,EAAQ4lF,EAAGuE,SACjBzzD,EAAU12B,EAAQ4lF,EAAGwE,eAMzB,OAJI7rF,IAAQuoF,GAAYC,MACpBmD,EAAMvF,EAAYuF,EAAIz+E,OAAOirB,GAAU,cAAc,KAGjDwzD,EAAKxzD,GAiBjB,QAAS0vD,KACL3uF,KAAKoyF,SAAWQ,GAChB5yF,KAAK6yF,aAEL9E,EAAMh2E,MAAM/X,KAAM0F,WA0BtB,QAASotF,GAAW3E,EAAIrnF,GACpB,GAAIisF,GAAaxqF,EAAQ4lF,EAAGuE,SACxBG,EAAY7yF,KAAK6yF,SAGrB,IAAI/rF,GAAQsoF,GAAc4D,KAAqC,IAAtBD,EAAWptF,OAEhD,MADAktF,GAAUE,EAAW,GAAGE,aAAc,GAC9BF,EAAYA,EAGxB,IAAIvtF,GACAoX,EACA+1E,EAAiBpqF,EAAQ4lF,EAAGwE,gBAC5BO,KACAxpF,EAAS1J,KAAK0J,MAQlB,IALAkT,EAAgBm2E,EAAWp/E,OAAO,SAASmqB,GACvC,MAAO8uD,GAAU9uD,EAAMp0B,OAAQA,KAI/B5C,IAASsoF,GAET,IADA5pF,EAAI,EACGA,EAAIoX,EAAcjX,QACrBktF,EAAUj2E,EAAcpX,GAAGytF,aAAc,EACzCztF,GAMR,KADAA,EAAI,EACGA,EAAImtF,EAAehtF,QAClBktF,EAAUF,EAAentF,GAAGytF,aAC5BC,EAAqBlrF,KAAK2qF,EAAentF,IAIzCsB,GAAQuoF,GAAYC,WACbuD,GAAUF,EAAentF,GAAGytF,YAEvCztF,GAGJ,OAAK0tF,GAAqBvtF,QAMtBunF,EAAYtwE,EAAc5I,OAAOk/E,GAAuB,cAAc,GACtEA,GAPJ,OAoBJ,QAASrE,KACLd,EAAMh2E,MAAM/X,KAAM0F,UAElB,IAAI2lF,GAAUI,EAAOzrF,KAAKqrF,QAASrrF,KACnCA,MAAK89B,MAAQ,GAAI6wD,GAAW3uF,KAAKguF,QAAS3C,GAC1CrrF,KAAKmzF,MAAQ,GAAIrE,GAAW9uF,KAAKguF,QAAS3C,GAyD9C,QAAS+H,GAAYpF,EAAS3mF,GAC1BrH,KAAKguF,QAAUA,EACfhuF,KAAKmpD,IAAI9hD,GAuFb,QAASgsF,GAAkBC,GAEvB,GAAIzG,EAAMyG,EAASC,IACf,MAAOA,GAGX,IAAIC,GAAU3G,EAAMyG,EAASG,IACzBC,EAAU7G,EAAMyG,EAASK,GAG7B,OAAIH,IAAWE,EACJD,GAAqB,IAAME,GAIlCH,GAAWE,EACJF,EAAUC,GAAqBE,GAItC9G,EAAMyG,EAASM,IACRA,GAGJC,GA4CX,QAASC,GAAWrlF,GAChBzO,KAAKK,GAAKmtF,IAEVxtF,KAAKguF,QAAU,KACfhuF,KAAKyO,QAAUsyD,EAAMtyD,MAAezO,KAAK+zF,UAGzC/zF,KAAKyO,QAAQ20B,OAASkpD,EAAYtsF,KAAKyO,QAAQ20B,QAAQ,GAEvDpjC,KAAKg0F,MAAQC,GAEbj0F,KAAKk0F,gBACLl0F,KAAKm0F,eAiOT,QAASC,GAASJ,GACd,MAAIA,GAAQK,GACD,SACAL,EAAQM,GACR,MACAN,EAAQO,GACR,OACAP,EAAQQ,GACR,QAEJ,GAQX,QAASC,GAAav5D,GAClB,MAAIA,IAAao2D,GACN,OACAp2D,GAAam2D,GACb,KACAn2D,GAAai2D,GACb,OACAj2D,GAAak2D,GACb,QAEJ,GASX,QAASsD,IAA6BC,EAAiBC,GACnD,GAAI5G,GAAU4G,EAAW5G,OACzB,OAAIA,GACOA,EAAQ94E,IAAIy/E,GAEhBA,EAQX,QAASE,MACLf,EAAW/7E,MAAM/X,KAAM0F,WA6D3B,QAASovF,MACLD,GAAe98E,MAAM/X,KAAM0F,WAE3B1F,KAAK+0F,GAAK,KACV/0F,KAAKg1F,GAAK,KA2Ed,QAASC,MACLJ,GAAe98E,MAAM/X,KAAM0F,WAsC/B,QAASwvF,MACLpB,EAAW/7E,MAAM/X,KAAM0F,WAEvB1F,KAAKm1F,OAAS,KACdn1F,KAAKo1F,OAAS,KAmElB,QAASC,MACLR,GAAe98E,MAAM/X,KAAM0F,WA8B/B,QAAS4vF,MACLT,GAAe98E,MAAM/X,KAAM0F,WA0D/B,QAAS6vF,MACLzB,EAAW/7E,MAAM/X,KAAM0F,WAIvB1F,KAAKw1F,OAAQ,EACbx1F,KAAKy1F,SAAU,EAEfz1F,KAAKm1F,OAAS,KACdn1F,KAAKo1F,OAAS,KACdp1F,KAAKgX,MAAQ,EAqGjB,QAASjT,IAAO6E,EAAS6F,GAGrB,MAFAA,GAAUA,MACVA,EAAQinF,YAAcpJ,EAAY79E,EAAQinF,YAAa3xF,GAAOgwF,SAAS4B,QAChE,GAAI3K,IAAQpiF,EAAS6F,GAiIhC,QAASu8E,IAAQpiF,EAAS6F,GACtBA,EAAUA,MAEVzO,KAAKyO,QAAUsyD,EAAMtyD,EAAS1K,GAAOgwF,UACrC/zF,KAAKyO,QAAQw/E,YAAcjuF,KAAKyO,QAAQw/E,aAAerlF,EAEvD5I,KAAK41F,YACL51F,KAAKuvF,WACLvvF,KAAK01F,eAEL11F,KAAK4I,QAAUA,EACf5I,KAAK80E,MAAQuZ,EAAoBruF,MACjCA,KAAKslE,YAAc,GAAI8tB,GAAYpzF,KAAMA,KAAKyO,QAAQ62D,aAEtDuwB,GAAe71F,MAAM,GAErB4rF,EAAKn9E,EAAQinF,YAAa,SAASrmF,GAC/B,GAAIulF,GAAa50F,KAAKiT,IAAI,GAAK5D,GAAK,GAAIA,EAAK,IAC7CA,GAAK,IAAMulF,EAAWkB,cAAczmF,EAAK,IACzCA,EAAK,IAAMulF,EAAWmB,eAAe1mF,EAAK,KAC3CrP,MAyOP,QAAS61F,IAAe7H,EAAS/6E,GAC7B,GAAIrK,GAAUolF,EAAQplF,OACtBgjF,GAAKoC,EAAQv/E,QAAQunF,SAAU,SAAS3uF,EAAO4O,GAC3CrN,EAAQqE,MAAMkgF,EAASvkF,EAAQqE,MAAOgJ,IAAShD,EAAM5L,EAAQ,KASrE,QAAS4uF,IAAgB3sF,EAAOoJ,GAC5B,GAAIwjF,GAAe3kF,EAAS4kF,YAAY,QACxCD,GAAaE,UAAU9sF,GAAO,GAAM,GACpC4sF,EAAaG,QAAU3jF,EACvBA,EAAKhJ,OAAO4sF,cAAcJ,GAr1E9B,GAAI3I,KAAmB,GAAI,SAAU,MAAO,KAAM,KAAM,KACpDgJ,GAAehlF,EAASM,cAAc,OAEtCw6E,GAAgB,WAEhB3+D,GAAQxoB,KAAKwoB,MACb7C,GAAM3lB,KAAK2lB,IACXsS,GAAM74B,KAAK64B,IAwSXswD,GAAY,EAeZ+I,GAAe,wCAEf5H,GAAiB,gBAAkBrlF,GACnCilF,GAAyBrB,EAAS5jF,EAAQ,kBAAoB/C,EAC9DkoF,GAAqBE,IAAiB4H,GAAaxoF,KAAKhF,UAAUC,WAElEwtF,GAAmB,QACnBC,GAAiB,MACjBC,GAAmB,QACnBC,GAAoB,SAEpB5F,GAAmB,GAEnB5B,GAAc,EACd4D,GAAa,EACb3D,GAAY,EACZC,GAAe,EAEf4B,GAAiB,EACjBC,GAAiB,EACjBC,GAAkB,EAClBC,GAAe,EACfC,GAAiB,GAEjBuF,GAAuB1F,GAAiBC,GACxC0F,GAAqBzF,GAAeC,GACpCyF,GAAgBF,GAAuBC,GAEvCvF,IAAY,IAAK,KACjBC,IAAmB,UAAW,UA4BlCzD,GAAM56E,WAKFk4E,QAAS,aAKT+C,KAAM,WACFpuF,KAAKyxF,MAAQhF,EAAkBzsF,KAAK4I,QAAS5I,KAAKyxF,KAAMzxF,KAAKkuF,YAC7DluF,KAAKoyF,UAAY3F,EAAkBzsF,KAAK0J,OAAQ1J,KAAKoyF,SAAUpyF,KAAKkuF,YACpEluF,KAAK2xF,OAASlF,EAAkBiB,EAAoB1tF,KAAK4I,SAAU5I,KAAK2xF,MAAO3xF,KAAKkuF,aAMxF56E,QAAS,WACLtT,KAAKyxF,MAAQ9E,EAAqB3sF,KAAK4I,QAAS5I,KAAKyxF,KAAMzxF,KAAKkuF,YAChEluF,KAAKoyF,UAAYzF,EAAqB3sF,KAAK0J,OAAQ1J,KAAKoyF,SAAUpyF,KAAKkuF,YACvEluF,KAAK2xF,OAAShF,EAAqBe,EAAoB1tF,KAAK4I,SAAU5I,KAAK2xF,MAAO3xF,KAAKkuF,aAoT/F,IAAI8I,KACAC,UAAW7H,GACX8H,UAAWlE,GACXmE,QAAS9H,IAGTqC,GAAuB,YACvBE,GAAsB,mBAiB1B7F,GAAQ+C,EAAYf,GAKhB1C,QAAS,SAAmB8C,GACxB,GAAIY,GAAYiI,GAAgB7I,EAAGrnF,KAG/BioF,GAAYK,IAA6B,IAAdjB,EAAG1hE,SAC9BzsB,KAAK8xF,SAAU,GAGf/C,EAAYiE,IAA2B,IAAb7E,EAAG3hE,QAC7BuiE,EAAYM,IAIXrvF,KAAK8xF,SAAY9xF,KAAK6xF,QAIvB9C,EAAYM,KACZrvF,KAAK8xF,SAAU,GAGnB9xF,KAAKsI,SAAStI,KAAKguF,QAASe,GACxBE,UAAWd,GACXgB,iBAAkBhB,GAClBiJ,YAAaT,GACb7iD,SAAUq6C,OAKtB,IAAIkJ,KACAC,YAAalI,GACbmI,YAAavE,GACbwE,UAAWnI,GACXoI,cAAenI,GACfoI,WAAYpI,IAIZqI,IACAC,EAAGnB,GACHoB,EAAGnB,GACHoB,EAAGnB,GACHoB,EAAGnB,IAGH7E,GAAyB,cACzBC,GAAwB,qCAGxBzoF,GAAOyuF,iBACPjG,GAAyB,gBACzBC,GAAwB,6CAiB5BjG,EAAQ0C,EAAmBV,GAKvB1C,QAAS,SAAmB8C,GACxB,GAAI8D,GAAQjyF,KAAKiyF,MACbgG,GAAgB,EAEhBC,EAAsB/J,EAAGrnF,KAAKy9B,cAAc/5B,QAAQ,KAAM,IAC1DukF,EAAYsI,GAAkBa,GAC9Bd,EAAcO,GAAuBxJ,EAAGiJ,cAAgBjJ,EAAGiJ,YAE3De,EAAWf,GAAeX,GAG1B2B,EAAapL,EAAQiF,EAAO9D,EAAGkK,UAAW,YAG1CtJ,GAAYK,KAA8B,IAAdjB,EAAG1hE,QAAgB0rE,GAC9B,EAAbC,IACAnG,EAAMjqF,KAAKmmF,GACXiK,EAAanG,EAAMtsF,OAAS,GAEzBopF,GAAaM,GAAYC,MAChC2I,GAAgB,GAIH,EAAbG,IAKJnG,EAAMmG,GAAcjK,EAEpBnuF,KAAKsI,SAAStI,KAAKguF,QAASe,GACxBE,SAAUgD,EACV9C,iBAAkBhB,GAClBiJ,YAAaA,EACbtjD,SAAUq6C,IAGV8J,GAEAhG,EAAM7pF,OAAOgwF,EAAY,MAKrC,IAAIE,KACAC,WAAYnJ,GACZoJ,UAAWxF,GACXyF,SAAUpJ,GACVqJ,YAAapJ,IAGb+C,GAA6B,aAC7BC,GAA6B,2CAejCvG,GAAQoG,EAAkBpE,GACtB1C,QAAS,SAAmB8C,GACxB,GAAIrnF,GAAOwxF,GAAuBnK,EAAGrnF,KAOrC,IAJIA,IAASsoF,KACTpvF,KAAKuyF,SAAU,GAGdvyF,KAAKuyF,QAAV,CAIA,GAAIG,GAAUF,EAAuBjyF,KAAKP,KAAMmuF,EAAIrnF,EAGhDA,IAAQuoF,GAAYC,KAAiBoD,EAAQ,GAAG/sF,OAAS+sF,EAAQ,GAAG/sF,SAAW,IAC/E3F,KAAKuyF,SAAU,GAGnBvyF,KAAKsI,SAAStI,KAAKguF,QAASlnF,GACxBmoF,SAAUyD,EAAQ,GAClBvD,gBAAiBuD,EAAQ,GACzB0E,YAAaX,GACb3iD,SAAUq6C,OAsBtB,IAAIwK,KACAJ,WAAYnJ,GACZoJ,UAAWxF,GACXyF,SAAUpJ,GACVqJ,YAAapJ,IAGbsD,GAAsB,2CAc1B7G,GAAQ4C,EAAYZ,GAChB1C,QAAS,SAAoB8C,GACzB,GAAIrnF,GAAO6xF,GAAgBxK,EAAGrnF,MAC1B4rF,EAAUI,EAAWvyF,KAAKP,KAAMmuF,EAAIrnF,EACnC4rF,IAIL1yF,KAAKsI,SAAStI,KAAKguF,QAASlnF,GACxBmoF,SAAUyD,EAAQ,GAClBvD,gBAAiBuD,EAAQ,GACzB0E,YAAaX,GACb3iD,SAAUq6C,OAmFtBpC,EAAQ8C,EAAiBd,GAOrB1C,QAAS,SAAoB2C,EAAS4K,EAAYC,GAC9C,GAAIV,GAAWU,EAAUzB,aAAeX,GACpCqC,EAAWD,EAAUzB,aAAeT,EAIxC,IAAIwB,EACAn4F,KAAKmzF,MAAMtB,OAAQ,MAChB,IAAIiH,IAAY94F,KAAKmzF,MAAMtB,MAC9B,MAIA+G,IAAcvJ,GAAYC,MAC1BtvF,KAAKmzF,MAAMtB,OAAQ,GAGvB7xF,KAAKsI,SAAS0lF,EAAS4K,EAAYC,IAMvCvlF,QAAS,WACLtT,KAAK89B,MAAMxqB,UACXtT,KAAKmzF,MAAM7/E,YAInB,IAAIylF,IAAwB5L,EAASoJ,GAAatpF,MAAO,eACrD+rF,GAAsBD,KAA0BvyF,EAGhDyyF,GAAuB,UACvBpF,GAAoB,OACpBD,GAA4B,eAC5BL,GAAoB,OACpBE,GAAqB,QACrBE,GAAqB,OAczBP,GAAYjgF,WAKRg2C,IAAK,SAAS9hD,GAENA,GAAS4xF,KACT5xF,EAAQrH,KAAKk5F,WAGbF,KACAh5F,KAAKguF,QAAQplF,QAAQqE,MAAM8rF,IAAyB1xF,GAExDrH,KAAKszF,QAAUjsF,EAAMk9B,cAAcr3B,QAMvC2H,OAAQ,WACJ7U,KAAKmpD,IAAInpD,KAAKguF,QAAQv/E,QAAQ62D,cAOlC4zB,QAAS,WACL,GAAI5F,KAMJ,OALA1H,GAAK5rF,KAAKguF,QAAQ0H,YAAa,SAASd,GAChCxI,EAASwI,EAAWnmF,QAAQ20B,QAASwxD,MACrCtB,EAAUA,EAAQt/E,OAAO4gF,EAAWuE,qBAGrC9F,EAAkBC,EAAQrrF,KAAK,OAO1CmxF,gBAAiB,SAAStkB,GAEtB,IAAIkkB,GAAJ,CAIA,GAAIllD,GAAWghC,EAAMhhC,SACjB5Y,EAAY45C,EAAMub,eAGtB,IAAIrwF,KAAKguF,QAAQuB,QAAQ8J,UAErB,WADAvlD,GAASzqC,gBAIb,IAAIiqF,GAAUtzF,KAAKszF,QACfgG,EAAUzM,EAAMyG,EAASC,IACzBG,EAAU7G,EAAMyG,EAASK,IACzBH,EAAU3G,EAAMyG,EAASG,GAE7B,OAAI6F,IACC5F,GAAWx4D,EAAY27D,IACvBrD,GAAWt4D,EAAY47D,GACjB92F,KAAKu5F,WAAWzlD,GAH3B,SAWJylD,WAAY,SAASzlD,GACjB9zC,KAAKguF,QAAQuB,QAAQ8J,WAAY,EACjCvlD,EAASzqC,kBA+DjB,IAAI4qF,IAAiB,EACjBO,GAAc,EACdD,GAAgB,EAChBD,GAAc,EACdkF,GAAmBlF,GACnBD,GAAkB,GAClBoF,GAAe,EAuBnB3F,GAAW3gF,WAKP4gF,YAOA5qC,IAAK,SAAS16C,GAKV,MAJAnJ,GAAOtF,KAAKyO,QAASA,GAGrBzO,KAAKguF,SAAWhuF,KAAKguF,QAAQ1oB,YAAYzwD,SAClC7U,MAQX81F,cAAe,SAASnB,GACpB,GAAIjJ,EAAeiJ,EAAiB,gBAAiB30F,MACjD,MAAOA,KAGX,IAAIk0F,GAAel0F,KAAKk0F,YAMxB,OALAS,GAAkBD,GAA6BC,EAAiB30F,MAC3Dk0F,EAAaS,EAAgBt0F,MAC9B6zF,EAAaS,EAAgBt0F,IAAMs0F,EACnCA,EAAgBmB,cAAc91F,OAE3BA,MAQX05F,kBAAmB,SAAS/E,GACxB,MAAIjJ,GAAeiJ,EAAiB,oBAAqB30F,MAC9CA,MAGX20F,EAAkBD,GAA6BC,EAAiB30F,YACzDA,MAAKk0F,aAAaS,EAAgBt0F,IAClCL,OAQX+1F,eAAgB,SAASpB,GACrB,GAAIjJ,EAAeiJ,EAAiB,iBAAkB30F,MAClD,MAAOA,KAGX,IAAIm0F,GAAcn0F,KAAKm0F,WAMvB,OALAQ,GAAkBD,GAA6BC,EAAiB30F,MAClB,KAA1CgtF,EAAQmH,EAAaQ,KACrBR,EAAYnsF,KAAK2sF,GACjBA,EAAgBoB,eAAe/1F,OAE5BA,MAQX25F,mBAAoB,SAAShF,GACzB,GAAIjJ,EAAeiJ,EAAiB,qBAAsB30F,MACtD,MAAOA,KAGX20F,GAAkBD,GAA6BC,EAAiB30F,KAChE,IAAImI,GAAQ6kF,EAAQhtF,KAAKm0F,YAAaQ,EAItC,OAHIxsF,GAAQ,IACRnI,KAAKm0F,YAAY/rF,OAAOD,EAAO,GAE5BnI,MAOX45F,mBAAoB,WAChB,MAAO55F,MAAKm0F,YAAYxuF,OAAS,GAQrCk0F,iBAAkB,SAASlF,GACvB,QAAS30F,KAAKk0F,aAAaS,EAAgBt0F,KAQ/CutB,KAAM,SAASknD,GAIX,QAASlnD,GAAKksE,GACVprB,EAAKsf,QAAQpgE,KAAK8gD,EAAKjgE,QAAQnF,OAASwwF,EAAY1F,EAASJ,GAAS,IAAKlf,GAJ/E,GAAIpG,GAAO1uE,KACPg0F,EAAQh0F,KAAKg0F,KAOLM,IAARN,GACApmE,GAAK,GAGTA,IAGIomE,GAASM,IACT1mE,GAAK,IAUbmsE,QAAS,SAASjlB,GACd,MAAI90E,MAAKg6F,UACEh6F,KAAK4tB,KAAKknD,QAGrB90E,KAAKg0F,MAAQyF,KAOjBO,QAAS,WAEL,IADA,GAAIx0F,GAAI,EACDA,EAAIxF,KAAKm0F,YAAYxuF,QAAQ,CAChC,KAAM3F,KAAKm0F,YAAY3uF,GAAGwuF,OAASyF,GAAexF,KAC9C,OAAO,CAEXzuF,KAEJ,OAAO,GAOXiqF,UAAW,SAASoJ,GAGhB,GAAIoB,GAAiB30F,KAAWuzF,EAGhC,OAAKzM,GAASpsF,KAAKyO,QAAQ20B,QAASpjC,KAAMi6F,KAOtCj6F,KAAKg0F,OAASwF,GAAmBnF,GAAkBoF,MACnDz5F,KAAKg0F,MAAQC,IAGjBj0F,KAAKg0F,MAAQh0F,KAAKk6F,QAAQD,QAItBj6F,KAAKg0F,OAASQ,GAAcD,GAAgBD,GAAcD,KAC1Dr0F,KAAK+5F,QAAQE,MAfbj6F,KAAKwpD,aACLxpD,KAAKg0F,MAAQyF,MAyBrBS,QAAS,aAOTf,eAAgB,aAOhB3vC,MAAO,cA8DXuiC,EAAQ8I,GAAgBf,GAKpBC,UAKI9E,SAAU,GASdkL,SAAU,SAASrlB,GACf,GAAIslB,GAAiBp6F,KAAKyO,QAAQwgF,QAClC,OAA0B,KAAnBmL,GAAwBtlB,EAAMma,SAAStpF,SAAWy0F,GAS7DF,QAAS,SAASplB,GACd,GAAIkf,GAAQh0F,KAAKg0F,MACbjF,EAAYja,EAAMia,UAElBsL,EAAerG,GAASQ,GAAcD,IACtCld,EAAUr3E,KAAKm6F,SAASrlB,EAG5B,OAAIulB,KAAiBtL,EAAYO,KAAiBjY,GACvC2c,EAAQK,GACRgG,GAAgBhjB,EACnB0X,EAAYM,GACL2E,EAAQM,GACNN,EAAQQ,GAGdR,EAAQO,GAFJC,GAIRiF,MAiBf1N,EAAQ+I,GAAeD,IAKnBd,UACIzqF,MAAO,MACP0uD,UAAW,GACXi3B,SAAU,EACV/zD,UAAW67D,IAGfoC,eAAgB,WACZ,GAAIj+D,GAAYl7B,KAAKyO,QAAQysB,UACzBo4D,IAOJ,OANIp4D,GAAY27D,IACZvD,EAAQtrF,KAAK2rF,IAEbz4D,EAAY47D,IACZxD,EAAQtrF,KAAKyrF,IAEVH,GAGXgH,cAAe,SAASxlB,GACpB,GAAIrmE,GAAUzO,KAAKyO,QACf8rF,GAAW,EACX50E,EAAWmvD,EAAMnvD,SACjBuV,EAAY45C,EAAM55C,UAClBnpB,EAAI+iE,EAAMt1C,OACVxtB,EAAI8iE,EAAMr1C,MAed,OAZMvE,GAAYzsB,EAAQysB,YAClBzsB,EAAQysB,UAAY27D,IACpB37D,EAAmB,IAANnpB,EAAWm/E,GAAsB,EAAJn/E,EAASo/E,GAAiBC,GACpEmJ,EAAWxoF,GAAK/R,KAAK+0F,GACrBpvE,EAAWzgB,KAAK2lB,IAAIiqD,EAAMt1C,UAE1BtE,EAAmB,IAANlpB,EAAWk/E,GAAsB,EAAJl/E,EAASq/E,GAAeC,GAClEiJ,EAAWvoF,GAAKhS,KAAKg1F,GACrBrvE,EAAWzgB,KAAK2lB,IAAIiqD,EAAMr1C,UAGlCq1C,EAAM55C,UAAYA,EACXq/D,GAAY50E,EAAWlX,EAAQupD,WAAa98B,EAAYzsB,EAAQysB,WAG3Ei/D,SAAU,SAASrlB,GACf,MAAO+f,IAAe1hF,UAAUgnF,SAAS55F,KAAKP,KAAM80E,KAC/C90E,KAAKg0F,MAAQQ,MAAkBx0F,KAAKg0F,MAAQQ,KAAgBx0F,KAAKs6F,cAAcxlB,KAGxFlnD,KAAM,SAASknD,GACX90E,KAAK+0F,GAAKjgB,EAAMt1C,OAChBx/B,KAAKg1F,GAAKlgB,EAAMr1C,MAEhB,IAAIvE,GAAYu5D,EAAa3f,EAAM55C,UAC/BA,IACAl7B,KAAKguF,QAAQpgE,KAAK5tB,KAAKyO,QAAQnF,MAAQ4xB,EAAW45C,GAGtD90E,KAAKmsF,OAAOv+D,KAAKrtB,KAAKP,KAAM80E,MAcpCiX,EAAQkJ,GAAiBJ,IAKrBd,UACIzqF,MAAO,QACP0uD,UAAW,EACXi3B,SAAU,GAGdkK,eAAgB,WACZ,OAAQ5F,KAGZ4G,SAAU,SAASrlB,GACf,MAAO90E,MAAKmsF,OAAOgO,SAAS55F,KAAKP,KAAM80E,KAClC5vE,KAAK2lB,IAAIiqD,EAAM73D,MAAQ,GAAKjd,KAAKyO,QAAQupD,WAAah4D,KAAKg0F,MAAQQ,KAG5E5mE,KAAM,SAASknD,GAEX,GADA90E,KAAKmsF,OAAOv+D,KAAKrtB,KAAKP,KAAM80E,GACR,IAAhBA,EAAM73D,MAAa,CACnB,GAAIu9E,GAAQ1lB,EAAM73D,MAAQ,EAAI,KAAO,KACrCjd,MAAKguF,QAAQpgE,KAAK5tB,KAAKyO,QAAQnF,MAAQkxF,EAAO1lB,OAkB1DiX,EAAQmJ,GAAiBpB,GAKrBC,UACIzqF,MAAO,QACP2lF,SAAU,EACV90D,KAAM,IACN69B,UAAW,GAGfmhC,eAAgB,WACZ,OAAQtF,KAGZqG,QAAS,SAASplB,GACd,GAAIrmE,GAAUzO,KAAKyO,QACfgsF,EAAgB3lB,EAAMma,SAAStpF,SAAW8I,EAAQwgF,SAClDyL,EAAgB5lB,EAAMnvD,SAAWlX,EAAQupD,UACzC2iC,EAAY7lB,EAAMob,UAAYzhF,EAAQ0rB,IAM1C,IAJAn6B,KAAKo1F,OAAStgB,GAIT4lB,IAAkBD,GAAkB3lB,EAAMia,WAAaM,GAAYC,MAAkBqL,EACtF36F,KAAKwpD,YACF,IAAIsrB,EAAMia,UAAYK,GACzBpvF,KAAKwpD,QACLxpD,KAAKm1F,OAAS5J,EAAkB,WAC5BvrF,KAAKg0F,MAAQwF,GACbx5F,KAAK+5F;EACNtrF,EAAQ0rB,KAAMn6B,UACd,IAAI80E,EAAMia,UAAYM,GACzB,MAAOmK,GAEX,OAAOC,KAGXjwC,MAAO,WACHnwC,aAAarZ,KAAKm1F,SAGtBvnE,KAAM,SAASknD,GACP90E,KAAKg0F,QAAUwF,KAIf1kB,GAAUA,EAAMia,UAAYM,GAC5BrvF,KAAKguF,QAAQpgE,KAAK5tB,KAAKyO,QAAQnF,MAAQ,KAAMwrE,IAE7C90E,KAAKo1F,OAAOnF,UAAY9yD,KACxBn9B,KAAKguF,QAAQpgE,KAAK5tB,KAAKyO,QAAQnF,MAAOtJ,KAAKo1F,aAevDrJ,EAAQsJ,GAAkBR,IAKtBd,UACIzqF,MAAO,SACP0uD,UAAW,EACXi3B,SAAU,GAGdkK,eAAgB,WACZ,OAAQ5F,KAGZ4G,SAAU,SAASrlB,GACf,MAAO90E,MAAKmsF,OAAOgO,SAAS55F,KAAKP,KAAM80E,KAClC5vE,KAAK2lB,IAAIiqD,EAAMyb,UAAYvwF,KAAKyO,QAAQupD,WAAah4D,KAAKg0F,MAAQQ,OAc/EzI,EAAQuJ,GAAiBT,IAKrBd,UACIzqF,MAAO,QACP0uD,UAAW,GACXwG,SAAU,IACVtjC,UAAW27D,GAAuBC,GAClC7H,SAAU,GAGdkK,eAAgB,WACZ,MAAOrE,IAAc3hF,UAAUgmF,eAAe54F,KAAKP,OAGvDm6F,SAAU,SAASrlB,GACf,GACItW,GADAtjC,EAAYl7B,KAAKyO,QAAQysB,SAW7B,OARIA,IAAa27D,GAAuBC,IACpCt4B,EAAWsW,EAAMtW,SACVtjC,EAAY27D,GACnBr4B,EAAWsW,EAAM8b,UACV11D,EAAY47D,KACnBt4B,EAAWsW,EAAM+b,WAGd7wF,KAAKmsF,OAAOgO,SAAS55F,KAAKP,KAAM80E,IACnC55C,EAAY45C,EAAM55C,WAClB45C,EAAMnvD,SAAW3lB,KAAKyO,QAAQupD,WAC9BntC,GAAI2zC,GAAYx+D,KAAKyO,QAAQ+vD,UAAYsW,EAAMia,UAAYM,IAGnEzhE,KAAM,SAASknD,GACX,GAAI55C,GAAYu5D,EAAa3f,EAAM55C,UAC/BA,IACAl7B,KAAKguF,QAAQpgE,KAAK5tB,KAAKyO,QAAQnF,MAAQ4xB,EAAW45C,GAGtD90E,KAAKguF,QAAQpgE,KAAK5tB,KAAKyO,QAAQnF,MAAOwrE,MA2B9CiX,EAAQwJ,GAAezB,GAKnBC,UACIzqF,MAAO,MACP2lF,SAAU,EACV2L,KAAM,EACNpoE,SAAU,IACV2H,KAAM,IACN69B,UAAW,EACX6iC,aAAc,IAGlB1B,eAAgB,WACZ,OAAQvF,KAGZsG,QAAS,SAASplB,GACd,GAAIrmE,GAAUzO,KAAKyO,QAEfgsF,EAAgB3lB,EAAMma,SAAStpF,SAAW8I,EAAQwgF,SAClDyL,EAAgB5lB,EAAMnvD,SAAWlX,EAAQupD,UACzC8iC,EAAiBhmB,EAAMob,UAAYzhF,EAAQ0rB,IAI/C,IAFAn6B,KAAKwpD,QAEAsrB,EAAMia,UAAYK,IAAgC,IAAfpvF,KAAKgX,MACzC,MAAOhX,MAAK+6F,aAKhB,IAAIL,GAAiBI,GAAkBL,EAAe,CAClD,GAAI3lB,EAAMia,WAAaM,GACnB,MAAOrvF,MAAK+6F,aAGhB,IAAIC,GAAgBh7F,KAAKw1F,MAAS1gB,EAAMmb,UAAYjwF,KAAKw1F,MAAQ/mF,EAAQ+jB,UAAY,EACjFyoE,GAAiBj7F,KAAKy1F,SAAWh3B,EAAYz+D,KAAKy1F,QAAS3gB,EAAM5oD,QAAUzd,EAAQosF,YAEvF76F,MAAKw1F,MAAQ1gB,EAAMmb,UACnBjwF,KAAKy1F,QAAU3gB,EAAM5oD,OAEhB+uE,GAAkBD,EAGnBh7F,KAAKgX,OAAS,EAFdhX,KAAKgX,MAAQ,EAKjBhX,KAAKo1F,OAAStgB,CAId,IAAIomB,GAAWl7F,KAAKgX,MAAQvI,EAAQmsF,IACpC,IAAiB,IAAbM,EAGA,MAAKl7F,MAAK45F,sBAGN55F,KAAKm1F,OAAS5J,EAAkB,WAC5BvrF,KAAKg0F,MAAQwF,GACbx5F,KAAK+5F,WACNtrF,EAAQ+jB,SAAUxyB,MACdw0F,IANAgF,GAUnB,MAAOC,KAGXsB,YAAa,WAIT,MAHA/6F,MAAKm1F,OAAS5J,EAAkB,WAC5BvrF,KAAKg0F,MAAQyF,IACdz5F,KAAKyO,QAAQ+jB,SAAUxyB,MACnBy5F,IAGXjwC,MAAO,WACHnwC,aAAarZ,KAAKm1F,SAGtBvnE,KAAM,WACE5tB,KAAKg0F,OAASwF,KACdx5F,KAAKo1F,OAAO8F,SAAWl7F,KAAKgX,MAC5BhX,KAAKguF,QAAQpgE,KAAK5tB,KAAKyO,QAAQnF,MAAOtJ,KAAKo1F,YAoBvDrxF,GAAOm8E,QAAU,QAMjBn8E,GAAOgwF,UAOHoH,WAAW,EAQX71B,YAAa2zB,GAMb71D,QAAQ,EASR6qD,YAAa,KAObM,WAAY,KAOZoH,SAEKN,IAAoBjyD,QAAQ,KAC5B6xD,IAAmB7xD,QAAQ,IAAU,YACrCkyD,IAAkBp6D,UAAW27D,MAC7B/B,IAAiB55D,UAAW27D,KAAyB,WACrDtB,KACAA,IAAiBjsF,MAAO,YAAasxF,KAAM,IAAM,SACjD1F,KAQLc,UAMIoF,WAAY,OAOZC,YAAa,OASbC,aAAc,OAOdC,eAAgB,OAOhBC,SAAU,OAQVC,kBAAmB,iBAI3B,IAAIC,IAAO,EACPC,GAAc,CA+BlB3Q,IAAQ73E,WAMJg2C,IAAK,SAAS16C,GAaV,MAZAnJ,GAAOtF,KAAKyO,QAASA,GAGjBA,EAAQ62D,aACRtlE,KAAKslE,YAAYzwD,SAEjBpG,EAAQw/E,cAERjuF,KAAK80E,MAAMxhE,UACXtT,KAAK80E,MAAMprE,OAAS+E,EAAQw/E,YAC5BjuF,KAAK80E,MAAMsZ,QAERpuF,MASXklB,KAAM,SAAS0b,GACX5gC,KAAKuvF,QAAQ1E,QAAUjqD,EAAQ+6D,GAAcD,IASjDjM,UAAW,SAASoJ,GAChB,GAAItJ,GAAUvvF,KAAKuvF,OACnB,KAAIA,EAAQ1E,QAAZ,CAKA7qF,KAAKslE,YAAY8zB,gBAAgBP,EAEjC,IAAIjE,GACAc,EAAc11F,KAAK01F,YAKnBkG,EAAgBrM,EAAQqM,gBAIvBA,GAAkBA,GAAiBA,EAAc5H,MAAQwF,MAC1DoC,EAAgBrM,EAAQqM,cAAgB,KAI5C,KADA,GAAIp2F,GAAI,EACDA,EAAIkwF,EAAY/vF,QACnBivF,EAAac,EAAYlwF,GAQrB+pF,EAAQ1E,UAAY8Q,IACfC,GAAiBhH,GAAcgH,IAChChH,EAAWiF,iBAAiB+B,GAGhChH,EAAWprC,QAFXorC,EAAWnF,UAAUoJ,IAOpB+C,GAAiBhH,EAAWZ,OAASQ,GAAcD,GAAgBD,MACpEsH,EAAgBrM,EAAQqM,cAAgBhH,GAE5CpvF,MASR0P,IAAK,SAAS0/E,GACV,GAAIA,YAAsBd,GACtB,MAAOc,EAIX,KAAK,GADDc,GAAc11F,KAAK01F,YACdlwF,EAAI,EAAGA,EAAIkwF,EAAY/vF,OAAQH,IACpC,GAAIkwF,EAAYlwF,GAAGiJ,QAAQnF,OAASsrF,EAChC,MAAOc,GAAYlwF,EAG3B,OAAO,OASXyN,IAAK,SAAS2hF,GACV,GAAIlJ,EAAekJ,EAAY,MAAO50F,MAClC,MAAOA,KAIX,IAAI+hD,GAAW/hD,KAAKkV,IAAI0/E,EAAWnmF,QAAQnF,MAS3C,OARIy4C,IACA/hD,KAAKqW,OAAO0rC,GAGhB/hD,KAAK01F,YAAY1tF,KAAK4sF,GACtBA,EAAW5G,QAAUhuF,KAErBA,KAAKslE,YAAYzwD,SACV+/E,GAQXv+E,OAAQ,SAASu+E,GACb,GAAIlJ,EAAekJ,EAAY,SAAU50F,MACrC,MAAOA,KAGX,IAAI01F,GAAc11F,KAAK01F,WAKvB,OAJAd,GAAa50F,KAAKkV,IAAI0/E,GACtBc,EAAYttF,OAAO4kF,EAAQ0I,EAAad,GAAa,GAErD50F,KAAKslE,YAAYzwD,SACV7U,MASXuT,GAAI,SAASiyD,EAAQ6lB,GACjB,GAAIuK,GAAW51F,KAAK41F,QAKpB,OAJAhK,GAAKc,EAASlnB,GAAS,SAASl8D,GAC5BssF,EAAStsF,GAASssF,EAAStsF,OAC3BssF,EAAStsF,GAAOtB,KAAKqjF,KAElBrrF,MASX0T,IAAK,SAAS8xD,EAAQ6lB,GAClB,GAAIuK,GAAW51F,KAAK41F,QAQpB,OAPAhK,GAAKc,EAASlnB,GAAS,SAASl8D,GACvB+hF,EAGDuK,EAAStsF,GAAOlB,OAAO4kF,EAAQ4I,EAAStsF,GAAQ+hF,GAAU,SAFnDuK,GAAStsF,KAKjBtJ,MAQX4tB,KAAM,SAAStkB,EAAOoJ,GAEd1S,KAAKyO,QAAQ0sF,WACblF,GAAgB3sF,EAAOoJ,EAI3B,IAAIkjF,GAAW51F,KAAK41F,SAAStsF,IAAUtJ,KAAK41F,SAAStsF,GAAO2B,OAC5D,IAAK2qF,GAAaA,EAASjwF,OAA3B,CAIA+M,EAAK5L,KAAOwC,EACZoJ,EAAKrJ,eAAiB,WAClBqJ,EAAKohC,SAASzqC,iBAIlB,KADA,GAAI7D,GAAI,EACDA,EAAIowF,EAASjwF,QAChBiwF,EAASpwF,GAAGkN,GACZlN,MAQR8N,QAAS,WACLtT,KAAK4I,SAAWitF,GAAe71F,MAAM,GAErCA,KAAK41F,YACL51F,KAAKuvF,WACLvvF,KAAK80E,MAAMxhE,UACXtT,KAAK4I,QAAU,OA4BvBtD,EAAOvB,IACHqrF,YAAaA,GACb4D,WAAYA,GACZ3D,UAAWA,GACXC,aAAcA,GAEd2E,eAAgBA,GAChBO,YAAaA,GACbD,cAAeA,GACfD,YAAaA,GACbkF,iBAAkBA,GAClBnF,gBAAiBA,GACjBoF,aAAcA,GAEdvI,eAAgBA,GAChBC,eAAgBA,GAChBC,gBAAiBA,GACjBC,aAAcA,GACdC,eAAgBA,GAChBuF,qBAAsBA,GACtBC,mBAAoBA,GACpBC,cAAeA,GAEf/L,QAASA,GACT+C,MAAOA,EACPqF,YAAaA,EAEbzE,WAAYA,EACZG,WAAYA,EACZL,kBAAmBA,EACnBI,gBAAiBA,EACjBsD,iBAAkBA,EAElB2B,WAAYA,EACZe,eAAgBA,GAChBgH,IAAKtG,GACLuG,IAAKhH,GACLiH,MAAOzG,GACP0G,MAAO/G,GACPgH,OAAQ5G,GACR6G,MAAOhH,GAEP3hF,GAAIk5E,EACJ/4E,IAAKi5E,EACLf,KAAMA,EACN7qB,MAAOA,EACPz7D,OAAQA,EACRymF,QAASA,EACTN,OAAQA,EACR0B,SAAUA,IAGV,YAAcd,IAAiBnsF,EAAoB,KACjD8uE,EAAgC,WAC9B,MAAOjrE,KACTxD,KAAKX,EAASM,EAAqBN,EAASC,KAASmvE,IAAkCxoE,IAAc3G,EAAOD,QAAUovE,KAChG,mBAAVnvE,IAAyBA,EAAOD,QAC9CC,EAAOD,QAAUmE,GAEjBwF,EAAO+hF,GAAcvnF,IAGtBwF,OAAQgI,SAAU,WAKjB,SAAS1R,EAAQD,GAYrBA,EAAQslD,oBAAsB,WAE7BllD,KAAKm8F,aAAan8F,KAAK0hD,UAAUvC,WAAWC,iBAAiB,GAG7Dp/C,KAAKwuD,eAIDxuD,KAAKmhD,WACPnhD,KAAK6nD,aAEP7nD,KAAK4P,SASNhQ,EAAQu8F,aAAe,SAASC,EAAkBC,GAOhD,IANA,GAAI31C,GAAgB1mD,KAAK8jD,YAAYn+C,OAEjC22F,EAAY,GACZ1+C,EAAQ,EAGL8I,EAAgB01C,GAA4BE,EAAR1+C,GACrCA,EAAQ,GAAK,GACf59C,KAAKu8F,oBAAmB,GACxBv8F,KAAKw8F,0BAGLx8F,KAAKy8F,uBAGP/1C,EAAgB1mD,KAAK8jD,YAAYn+C,OACjCi4C,GAAS,CAIPA,GAAQ,GAAmB,GAAdy+C,GACfr8F,KAAK08F,kBAEP18F,KAAKquD,2BASPzuD,EAAQ+8F,YAAc,SAAS72C,GAC7B,GAAI82C,GAA2B58F,KAAK8kD,MACpC,IAAIgB,EAAKuW,YAAcr8D,KAAK0hD,UAAUvC,WAAWM,iBAAmBz/C,KAAK68F,kBAAkB/2C,KACrE,WAAlB9lD,KAAK88F,WAAqD,GAA3B98F,KAAK8jD,YAAYn+C,QAAc,CAEhE3F,KAAK+8F,WAAWj3C,EAIhB,KAHA,GAAIlI,GAAQ,EAGJ59C,KAAK8jD,YAAYn+C,OAAS3F,KAAK0hD,UAAUvC,WAAWC,iBAA6B,GAARxB,GAC/E59C,KAAKg9F,uBACLp/C,GAAS,MAKX59C,MAAKi9F,mBAAmBn3C,GAAK,GAAM,GAGnC9lD,KAAKgnD,uBACLhnD,KAAKk9F,sBACLl9F,KAAKquD,0BACLruD,KAAKwuD,cAIHxuD,MAAK8kD,QAAU83C,GACjB58F,KAAK4P,SAQThQ,EAAQ4sD,sBAAwB,WACW,GAArCxsD,KAAK0hD,UAAUvC,WAAWzwC,SAC5B1O,KAAKm9F,eAAe,GAAE,GAAM,IAUhCv9F,EAAQ68F,qBAAuB,WAC7Bz8F,KAAKm9F,eAAe,IAAG,GAAM,IAS/Bv9F,EAAQo9F,qBAAuB,WAC7Bh9F,KAAKm9F,eAAe,GAAE,GAAM,IAgB9Bv9F,EAAQu9F,eAAiB,SAASC,EAAcC,EAAUz8D,EAAM08D,GAC9D,GAAIV,GAA2B58F,KAAK8kD,OAChCy4C,EAAgBv9F,KAAK8jD,YAAYn+C,MAGjC3F,MAAKmkD,cAAgBnkD,KAAKid,OAA0B,GAAjBmgF,GACrCp9F,KAAKw9F,kBAIHx9F,KAAKmkD,cAAgBnkD,KAAKid,OAA0B,IAAjBmgF,EAGrCp9F,KAAKy9F,cAAc78D,IAEZ5gC,KAAKmkD,cAAgBnkD,KAAKid,OAA0B,GAAjBmgF,KAC7B,GAATx8D,EAGF5gC,KAAK09F,cAAcL,EAAUz8D,GAI7B5gC,KAAK29F,uBAGT39F,KAAKgnD,uBAGDhnD,KAAK8jD,YAAYn+C,QAAU43F,IAAkBv9F,KAAKmkD,cAAgBnkD,KAAKid,OAA0B,IAAjBmgF,KAClFp9F,KAAK49F,eAAeh9D,GACpB5gC,KAAKgnD,yBAIHhnD,KAAKmkD,cAAgBnkD,KAAKid,OAA0B,IAAjBmgF,KACrCp9F,KAAK69F,eACL79F,KAAKgnD,wBAGPhnD,KAAKmkD,cAAgBnkD,KAAKid,MAG1Bjd,KAAKk9F,sBACLl9F,KAAKwuD,eAGDxuD,KAAK8jD,YAAYn+C,OAAS43F,IAC5Bv9F,KAAK87D,gBAAkB,EAEvB97D,KAAKw8F,2BAGW,GAAdc,GAAsC92F,SAAf82F,IAErBt9F,KAAK8kD,QAAU83C,GACjB58F,KAAK4P,QAIT5P,KAAKquD,2BAMPzuD,EAAQi+F,aAAe,WAErB,GAAIC,GAAkB99F,KAAK+9F,mBACvBD,GAAkB99F,KAAK0hD,UAAUvC,WAAWI,gBAC9Cv/C,KAAKg+F,sBAAsB,EAAIh+F,KAAK0hD,UAAUvC,WAAWI,eAAiBu+C,IAW9El+F,EAAQg+F,eAAiB,SAASh9D,GAChC5gC,KAAKi+F,cACLj+F,KAAKk+F,mBAAmBt9D,GAAM,IAQhChhC,EAAQ28F,mBAAqB,SAASe,GACpC,GAAIV,GAA2B58F,KAAK8kD,OAChCy4C,EAAgBv9F,KAAK8jD,YAAYn+C,MAErC3F,MAAK49F,gBAAe,GAGpB59F,KAAKgnD,uBACLhnD,KAAKk9F,sBACLl9F,KAAKwuD,eAGDxuD,KAAK8jD,YAAYn+C,QAAU43F,IAC7Bv9F,KAAK87D,gBAAkB,IAGP,GAAdwhC,GAAsC92F,SAAf82F,IAErBt9F,KAAK8kD,QAAU83C,GACjB58F,KAAK4P,SAUXhQ,EAAQ+9F,oBAAsB,WAC5B,IAAK,GAAIx3C,KAAUnmD,MAAKg9C,MACtB,GAAIh9C,KAAKg9C,MAAMl3C,eAAeqgD,GAAS,CACrC,GAAIL,GAAO9lD,KAAKg9C,MAAMmJ,EACD,IAAjBL,EAAKma,WACFna,EAAKvzC,MAAMvS,KAAKid,MAAQjd,KAAK0hD,UAAUvC,WAAWO,oBAAsB1/C,KAAKsf,MAAMC,OAAOC,aAC1FsmC,EAAKtzC,OAAOxS,KAAKid,MAAQjd,KAAK0hD,UAAUvC,WAAWO,oBAAsB1/C,KAAKsf,MAAMC,OAAOsF,eAC9F7kB,KAAK28F,YAAY72C,KAc3BlmD,EAAQ89F,cAAgB,SAASL,EAAUz8D,GACzC,IAAK,GAAIp7B,GAAI,EAAGA,EAAIxF,KAAK8jD,YAAYn+C,OAAQH,IAAK,CAChD,GAAIsgD,GAAO9lD,KAAKg9C,MAAMh9C,KAAK8jD,YAAYt+C,GACvCxF,MAAKi9F,mBAAmBn3C,EAAKu3C,EAAUz8D,GACvC5gC,KAAKquD,4BAeTzuD,EAAQq9F,mBAAqB,SAASpzF,EAAYwzF,EAAWz8D,EAAOu9D,GAElE,GAAIt0F,EAAWwyD,YAAc,IAEvBxyD,EAAWwyD,YAAcr8D,KAAK0hD,UAAUvC,WAAWM,kBACrD0+C,GAAU,GAEZd,EAAYc,GAAU,EAAOd,EAGzBxzF,EAAWuyD,eAAiBp8D,KAAKid,OAAkB,GAAT2jB,GAE5C,IAAK,GAAIw9D,KAAmBv0F,GAAWyyD,eACrC,GAAIzyD,EAAWyyD,eAAex2D,eAAes4F,GAAkB,CAC7D,GAAIC,GAAYx0F,EAAWyyD,eAAe8hC,EAI7B,IAATx9D,GACEy9D,EAAUviC,gBAAkBjyD,EAAW2yD,gBAAgB3yD,EAAW2yD,gBAAgB72D,OAAO,IACtFw4F,IACLn+F,KAAKs+F,sBAAsBz0F,EAAWu0F,EAAgBf,EAAUz8D,EAAMu9D,GAIpEn+F,KAAK68F,kBAAkBhzF,IACzB7J,KAAKs+F,sBAAsBz0F,EAAWu0F,EAAgBf,EAAUz8D,EAAMu9D,KAwBpFv+F,EAAQ0+F,sBAAwB,SAASz0F,EAAYu0F,EAAiBf,EAAWz8D,EAAOu9D,GACtF,GAAIE,GAAYx0F,EAAWyyD,eAAe8hC,EAG1C,IAAIC,EAAUjiC,eAAiBp8D,KAAKid,OAAkB,GAAT2jB,EAAe,CAE1D5gC,KAAKu+F,eAGLv+F,KAAKg9C,MAAMohD,GAAmBC,EAG9Br+F,KAAKw+F,uBAAuB30F,EAAWw0F,GAGvCr+F,KAAKy+F,wBAAwB50F,EAAWw0F,GAGxCr+F,KAAK0+F,eAAe70F,GAGpBA,EAAW4E,QAAQwuC,MAAQohD,EAAU5vF,QAAQwuC,KAC7CpzC,EAAWwyD,aAAegiC,EAAUhiC,YACpCxyD,EAAW4E,QAAQ8uC,SAAWr4C,KAAK4G,IAAI9L,KAAK0hD,UAAUvC,WAAWS,YAAa5/C,KAAK0hD,UAAU1E,MAAMO,SAAWv9C,KAAK0hD,UAAUvC,WAAWQ,oBAAoB91C,EAAWwyD,YAAY,IACnLxyD,EAAWgyD,mBAAqBhyD,EAAWslD,aAAaxpD,OAGxD04F,EAAUtsF,EAAIlI,EAAWkI,EAAIlI,EAAWqyD,iBAAmB,GAAMh3D,KAAKE,UACtEi5F,EAAUrsF,EAAInI,EAAWmI,EAAInI,EAAWqyD,iBAAmB,GAAMh3D,KAAKE,gBAG/DyE,GAAWyyD,eAAe8hC,EAGjC,IAAIO,IAAgB,CACpB,KAAK,GAAIC,KAAe/0F,GAAWyyD,eACjC,GAAIzyD,EAAWyyD,eAAex2D,eAAe84F,IACvC/0F,EAAWyyD,eAAesiC,GAAa9iC,gBAAkBuiC,EAAUviC,eAAgB,CACrF6iC,GAAgB,CAChB,OAKe,GAAjBA,GACF90F,EAAW2yD,gBAAgBniB,MAG7Br6C,KAAK6+F,uBAAuBR,GAI5BA,EAAUviC,eAAiB,EAG3BjyD,EAAWs0D,iBAGXn+D,KAAK8kD,QAAS,EAIC,GAAbu4C,GACFr9F,KAAKi9F,mBAAmBoB,EAAUhB,EAAUz8D,EAAMu9D,IAWtDv+F,EAAQi/F,uBAAyB,SAAS/4C,GACxC,IAAK,GAAItgD,GAAI,EAAGA,EAAIsgD,EAAKqJ,aAAaxpD,OAAQH,IAC5CsgD,EAAKqJ,aAAa3pD,GAAG2sD,sBAczBvyD,EAAQ69F,cAAgB,SAAS78D,GAClB,GAATA,EACF5gC,KAAK8+F,sBAGL9+F,KAAK++F,wBAUTn/F,EAAQk/F,oBAAsB,WAC5B,GAAIlgF,GAAGC,EAAGlZ,EACNq5F,EAAYh/F,KAAK0hD,UAAUvC,WAAWK,qBAAqBx/C,KAAKid,KAIpE,KAAK,GAAI+vC,KAAUhtD,MAAK89C,MACtB,GAAI99C,KAAK89C,MAAMh4C,eAAeknD,GAAS,CACrC,GAAIU,GAAO1tD,KAAK89C,MAAMkP,EACtB,IAAIU,EAAKC,WACHD,EAAKkG,MAAQlG,EAAKiG,SACpB/0C,EAAM8uC,EAAKrkC,GAAGtX,EAAI27C,EAAKtkC,KAAKrX,EAC5B8M,EAAM6uC,EAAKrkC,GAAGrX,EAAI07C,EAAKtkC,KAAKpX,EAC5BrM,EAAST,KAAKyqB,KAAK/Q,EAAKA,EAAKC,EAAKA,GAGrBmgF,EAATr5F,GAAoB,CAEtB,GAAIkE,GAAa6jD,EAAKtkC,KAClBi1E,EAAY3wC,EAAKrkC,EACjBqkC,GAAKrkC,GAAG5a,QAAQwuC,KAAOyQ,EAAKtkC,KAAK3a,QAAQwuC,OAC3CpzC,EAAa6jD,EAAKrkC,GAClBg1E,EAAY3wC,EAAKtkC,MAGiB,GAAhCi1E,EAAUxiC,mBACZ77D,KAAKi/F,cAAcp1F,EAAWw0F,GAAU,GAEA,GAAjCx0F,EAAWgyD,oBAClB77D,KAAKi/F,cAAcZ,EAAUx0F,GAAW,MAetDjK,EAAQm/F,qBAAuB,WAC7B,IAAK,GAAI54C,KAAUnmD,MAAKg9C,MAEtB,GAAIh9C,KAAKg9C,MAAMl3C,eAAeqgD,GAAS,CACrC,GAAIk4C,GAAYr+F,KAAKg9C,MAAMmJ,EAG3B,IAAoC,GAAhCk4C,EAAUxiC,oBAA4D,GAAjCwiC,EAAUlvC,aAAaxpD,OAAa,CAC3E,GAAI+nD,GAAO2wC,EAAUlvC,aAAa,GAC9BtlD,EAAc6jD,EAAKkG,MAAQyqC,EAAUh+F,GAAML,KAAKg9C,MAAM0Q,EAAKiG,QAAU3zD,KAAKg9C,MAAM0Q,EAAKkG,KAGrFyqC,GAAUh+F,IAAMwJ,EAAWxJ,KACzBwJ,EAAW4E,QAAQwuC,KAAOohD,EAAU5vF,QAAQwuC,KAC9Cj9C,KAAKi/F,cAAcp1F,EAAWw0F,GAAU,GAGxCr+F,KAAKi/F,cAAcZ,EAAUx0F,GAAW,OAgBpDjK,EAAQs/F,4BAA8B,SAASp5C,GAG7C,IAAK,GAFDq5C,GAAoB,GACpBC,EAAwB,KACnB55F,EAAI,EAAGA,EAAIsgD,EAAKqJ,aAAaxpD,OAAQH,IAC5C,GAA6BgB,SAAzBs/C,EAAKqJ,aAAa3pD,GAAkB,CACtC,GAAI65F,GAAY,IACZv5C,GAAKqJ,aAAa3pD,GAAGmuD,QAAU7N,EAAKzlD,GACtCg/F,EAAYv5C,EAAKqJ,aAAa3pD,GAAG4jB,KAE1B08B,EAAKqJ,aAAa3pD,GAAGouD,MAAQ9N,EAAKzlD,KACzCg/F,EAAYv5C,EAAKqJ,aAAa3pD,GAAG6jB,IAIlB,MAAbg2E,GAAqBF,EAAoBE,EAAU7iC,gBAAgB72D,SACrEw5F,EAAoBE,EAAU7iC,gBAAgB72D,OAC9Cy5F,EAAwBC,GAKb,MAAbA,GAAkD74F,SAA7BxG,KAAKg9C,MAAMqiD,EAAUh/F,KAC5CL,KAAKi/F,cAAcI,EAAWv5C,GAAM,IAYxClmD,EAAQs+F,mBAAqB,SAASt9D,EAAO0+D,GAE3C,IAAK,GAAIn5C,KAAUnmD,MAAKg9C,MAElBh9C,KAAKg9C,MAAMl3C,eAAeqgD,IAC5BnmD,KAAKu/F,oBAAoBv/F,KAAKg9C,MAAMmJ,GAAQvlB,EAAM0+D,IAcxD1/F,EAAQ2/F,oBAAsB,SAASC,EAAS5+D,EAAO0+D,EAAWG,GAKhE,GAJ6Bj5F,SAAzBi5F,IACFA,EAAuB,GAGpBD,EAAQ3jC,oBAAsB77D,KAAKqtE,cAA6B,GAAbiyB,GACrDE,EAAQ3jC,oBAAsB77D,KAAKqtE,cAA6B,GAAbiyB,EAAoB,CASxE,IAAK,GAPD1gF,GAAGC,EAAGlZ,EACNq5F,EAAYh/F,KAAK0hD,UAAUvC,WAAWK,qBAAqBx/C,KAAKid,MAChEyiF,GAAe,EAGfC,KACAC,EAAuBJ,EAAQrwC,aAAaxpD,OACvCimB,EAAI,EAAOg0E,EAAJh0E,EAA0BA,IACxC+zE,EAAa33F,KAAKw3F,EAAQrwC,aAAavjC,GAAGvrB,GAK5C,IAAa,GAATugC,EAEF,IADA8+D,GAAe,EACV9zE,EAAI,EAAOg0E,EAAJh0E,EAA0BA,IAAK,CACzC,GAAI8hC,GAAO1tD,KAAK89C,MAAM6hD,EAAa/zE,GACnC,IAAaplB,SAATknD,GACEA,EAAKC,WACHD,EAAKkG,MAAQlG,EAAKiG,SACpB/0C,EAAM8uC,EAAKrkC,GAAGtX,EAAI27C,EAAKtkC,KAAKrX,EAC5B8M,EAAM6uC,EAAKrkC,GAAGrX,EAAI07C,EAAKtkC,KAAKpX,EAC5BrM,EAAST,KAAKyqB,KAAK/Q,EAAKA,EAAKC,EAAKA,GAErBmgF,EAATr5F,GAAoB,CACtB+5F,GAAe,CACf,QASZ,IAAM9+D,GAAS8+D,GAAiB9+D,EAE9B,IAAKhV,EAAI,EAAOg0E,EAAJh0E,EAA0BA,IAGpC,GAFA8hC,EAAO1tD,KAAK89C,MAAM6hD,EAAa/zE,IAElBplB,SAATknD,EAAoB,CACtB,GAAI2wC,GAAYr+F,KAAKg9C,MAAO0Q,EAAKiG,QAAU6rC,EAAQn/F,GAAMqtD,EAAKkG,KAAOlG,EAAKiG,OAErE0qC,GAAUlvC,aAAaxpD,QAAW3F,KAAKqtE,aAAeoyB,GACtDpB,EAAUh+F,IAAMm/F,EAAQn/F,IAC3BL,KAAKi/F,cAAcO,EAAQnB,EAAUz9D,MAkBjDhhC,EAAQq/F,cAAgB,SAASp1F,EAAYw0F,EAAWz9D,GAEtD/2B,EAAWyyD,eAAe+hC,EAAUh+F,IAAMg+F,CAG1C,KAAK,GAAI74F,GAAI,EAAGA,EAAI64F,EAAUlvC,aAAaxpD,OAAQH,IAAK,CACtD,GAAIkoD,GAAO2wC,EAAUlvC,aAAa3pD,EAC9BkoD,GAAKkG,MAAQ/pD,EAAWxJ,IAAMqtD,EAAKiG,QAAU9pD,EAAWxJ,GAC1DL,KAAK6/F,qBAAqBh2F,EAAWw0F,EAAU3wC,GAG/C1tD,KAAK8/F,sBAAsBj2F,EAAWw0F,EAAU3wC,GAIpD2wC,EAAUlvC,gBAGVnvD,KAAK+/F,8BAA8Bl2F,EAAWw0F,SAIvCr+F,MAAKg9C,MAAMqhD,EAAUh+F,GAG5B,IAAI2/F,GAAan2F,EAAW4E,QAAQwuC,IACpCohD,GAAUviC,eAAiB97D,KAAK87D,eAChCjyD,EAAW4E,QAAQwuC,MAAQohD,EAAU5vF,QAAQwuC,KAC7CpzC,EAAWwyD,aAAegiC,EAAUhiC,YACpCxyD,EAAW4E,QAAQ8uC,SAAWr4C,KAAK4G,IAAI9L,KAAK0hD,UAAUvC,WAAWS,YAAa5/C,KAAK0hD,UAAU1E,MAAMO,SAAWv9C,KAAK0hD,UAAUvC,WAAWQ,mBAAmB91C,EAAWwyD,aAGlKxyD,EAAW2yD,gBAAgB3yD,EAAW2yD,gBAAgB72D,OAAS,IAAM3F,KAAK87D,gBAC5EjyD,EAAW2yD,gBAAgBx0D,KAAKhI,KAAK87D,gBAMrCjyD,EAAWuyD,eAFA,GAATx7B,EAE0B,EAGA5gC,KAAKid,MAInCpT,EAAWs0D,iBAGXt0D,EAAWyyD,eAAe+hC,EAAUh+F,IAAI+7D,eAAiBvyD,EAAWuyD,eAGpEiiC,EAAUn+B,gBAGVr2D,EAAWs2D,eAAe6/B,GAG1BhgG,KAAK8kD,QAAS,GAUhBllD,EAAQs9F,oBAAsB,WAC5B,IAAK,GAAI13F,GAAI,EAAGA,EAAIxF,KAAK8jD,YAAYn+C,OAAQH,IAAK,CAChD,GAAIsgD,GAAO9lD,KAAKg9C,MAAMh9C,KAAK8jD,YAAYt+C,GACvCsgD,GAAK+V,mBAAqB/V,EAAKqJ,aAAaxpD,MAG5C,IAAIs6F,GAAa,CACjB,IAAIn6C,EAAK+V,mBAAqB,EAC5B,IAAK,GAAIjwC,GAAI,EAAGA,EAAIk6B,EAAK+V,mBAAqB,EAAGjwC,IAG/C,IAAK,GAFDs0E,GAAWp6C,EAAKqJ,aAAavjC,GAAGgoC,KAChCusC,EAAar6C,EAAKqJ,aAAavjC,GAAG+nC,OAC7BysC,EAAIx0E,EAAE,EAAGw0E,EAAIt6C,EAAK+V,mBAAoBukC,KACxCt6C,EAAKqJ,aAAaixC,GAAGxsC,MAAQssC,GAAYp6C,EAAKqJ,aAAaixC,GAAGzsC,QAAUwsC,GACxEr6C,EAAKqJ,aAAaixC,GAAGzsC,QAAUusC,GAAYp6C,EAAKqJ,aAAaixC,GAAGxsC,MAAQusC,KAC3EF,GAAc,EAKtBn6C,GAAK+V,oBAAsBokC,IAa/BrgG,EAAQigG,qBAAuB,SAASh2F,EAAYw0F,EAAW3wC,GAEvD7jD,EAAW0yD,eAAez2D,eAAeu4F,EAAUh+F,MACvDwJ,EAAW0yD,eAAe8hC,EAAUh+F,QAGtCwJ,EAAW0yD,eAAe8hC,EAAUh+F,IAAI2H,KAAK0lD,SAGtC1tD,MAAK89C,MAAM4P,EAAKrtD,GAGvB,KAAK,GAAImF,GAAI,EAAGA,EAAIqE,EAAWslD,aAAaxpD,OAAQH,IAClD,GAAIqE,EAAWslD,aAAa3pD,GAAGnF,IAAMqtD,EAAKrtD,GAAI,CAC5CwJ,EAAWslD,aAAa/mD,OAAO5C,EAAE,EACjC,SAcN5F,EAAQkgG,sBAAwB,SAASj2F,EAAYw0F,EAAW3wC,GAE1DA,EAAKkG,MAAQlG,EAAKiG,OACpB3zD,KAAK6/F,qBAAqBh2F,EAAYw0F,EAAW3wC,IAG7CA,EAAKkG,MAAQyqC,EAAUh+F,IACzBqtD,EAAK0G,aAAapsD,KAAKq2F,EAAUh+F,IACjCqtD,EAAKrkC,GAAKxf,EACV6jD,EAAKkG,KAAO/pD,EAAWxJ,KAIvBqtD,EAAKyG,eAAensD,KAAKq2F,EAAUh+F,IACnCqtD,EAAKtkC,KAAOvf,EACZ6jD,EAAKiG,OAAS9pD,EAAWxJ,IAG3BL,KAAKqgG,oBAAoBx2F,EAAWw0F,EAAU3wC,KAalD9tD,EAAQmgG,8BAAgC,SAASl2F,EAAYw0F,GAE3D,IAAK,GAAI74F,GAAI,EAAGA,EAAIqE,EAAWslD,aAAaxpD,OAAQH,IAAK,CACvD,GAAIkoD,GAAO7jD,EAAWslD,aAAa3pD,EAE/BkoD,GAAKkG,MAAQlG,EAAKiG,QACpB3zD,KAAK6/F,qBAAqBh2F,EAAYw0F,EAAW3wC,KAcvD9tD,EAAQygG,oBAAsB,SAASx2F,EAAYw0F,EAAW3wC,GAGtD7jD,EAAWkxD,cAAcj1D,eAAeu4F,EAAUh+F,MACtDwJ,EAAWkxD,cAAcsjC,EAAUh+F,QAErCwJ,EAAWkxD,cAAcsjC,EAAUh+F,IAAI2H,KAAK0lD,GAG5C7jD,EAAWslD,aAAannD,KAAK0lD,IAY/B9tD,EAAQ6+F,wBAA0B,SAAS50F,EAAYw0F,GACrD,GAAIx0F,EAAWkxD,cAAcj1D,eAAeu4F,EAAUh+F,IAAK,CACzD,IAAK,GAAImF,GAAI,EAAGA,EAAIqE,EAAWkxD,cAAcsjC,EAAUh+F,IAAIsF,OAAQH,IAAK,CACtE,GAAIkoD,GAAO7jD,EAAWkxD,cAAcsjC,EAAUh+F,IAAImF,EAC9CkoD,GAAKyG,eAAezG,EAAKyG,eAAexuD,OAAO,IAAM04F,EAAUh+F,IACjEqtD,EAAKyG,eAAe9Z,MACpBqT,EAAKiG,OAAS0qC,EAAUh+F,GACxBqtD,EAAKtkC,KAAOi1E,IAGZ3wC,EAAK0G,aAAa/Z,MAClBqT,EAAKkG,KAAOyqC,EAAUh+F,GACtBqtD,EAAKrkC,GAAKg1E,GAIZA,EAAUlvC,aAAannD,KAAK0lD,EAG5B,KAAK,GAAI9hC,GAAI,EAAGA,EAAI/hB,EAAWslD,aAAaxpD,OAAQimB,IAClD,GAAI/hB,EAAWslD,aAAavjC,GAAGvrB,IAAMqtD,EAAKrtD,GAAI,CAC5CwJ,EAAWslD,aAAa/mD,OAAOwjB,EAAE,EACjC,cAKC/hB,GAAWkxD,cAAcsjC,EAAUh+F,MAa9CT,EAAQ8+F,eAAiB,SAAS70F,GAChC,IAAK,GAAIrE,GAAI,EAAGA,EAAIqE,EAAWslD,aAAaxpD,OAAQH,IAAK,CACvD,GAAIkoD,GAAO7jD,EAAWslD,aAAa3pD,EAC/BqE,GAAWxJ,IAAMqtD,EAAKkG,MAAQ/pD,EAAWxJ,IAAMqtD,EAAKiG,QACtD9pD,EAAWslD,aAAa/mD,OAAO5C,EAAE,KAcvC5F,EAAQ4+F,uBAAyB,SAAS30F,EAAYw0F,GACpD,IAAK,GAAI74F,GAAI,EAAGA,EAAIqE,EAAW0yD,eAAe8hC,EAAUh+F,IAAIsF,OAAQH,IAAK,CACvE,GAAIkoD,GAAO7jD,EAAW0yD,eAAe8hC,EAAUh+F,IAAImF,EAGnDxF,MAAK89C,MAAM4P,EAAKrtD,IAAMqtD,EAGtB2wC,EAAUlvC,aAAannD,KAAK0lD,GAC5B7jD,EAAWslD,aAAannD,KAAK0lD,SAGxB7jD,GAAW0yD,eAAe8hC,EAAUh+F,KAa7CT,EAAQ4uD,aAAe,WACrB,GAAIrI,EAEJ,KAAKA,IAAUnmD,MAAKg9C,MAClB,GAAIh9C,KAAKg9C,MAAMl3C,eAAeqgD,GAAS,CACrC,GAAIL,GAAO9lD,KAAKg9C,MAAMmJ,EAClBL,GAAKuW,YAAc,IACrBvW,EAAKr9B,MAAQ,IAAIzU,OAAO5P,OAAO0hD,EAAKuW,aAAa,MAMvD,IAAKlW,IAAUnmD,MAAKg9C,MACdh9C,KAAKg9C,MAAMl3C,eAAeqgD,KAC5BL,EAAO9lD,KAAKg9C,MAAMmJ,GACM,GAApBL,EAAKuW,cAELvW,EAAKr9B,MADoBjiB,SAAvBs/C,EAAK2W,cACM3W,EAAK2W,cAGLr4D,OAAO0hD,EAAKzlD,OAuBnCT,EAAQ48F,uBAAyB,WAC/B,GAGIr2C,GAHAm6C,EAAW,EACXC,EAAW,IACXC,EAAe,CAInB,KAAKr6C,IAAUnmD,MAAKg9C,MACdh9C,KAAKg9C,MAAMl3C,eAAeqgD,KAC5Bq6C,EAAexgG,KAAKg9C,MAAMmJ,GAAQqW,gBAAgB72D,OACnC66F,EAAXF,IAA0BA,EAAWE,GACrCD,EAAWC,IAAeD,EAAWC,GAI7C,IAAIF,EAAWC,EAAWvgG,KAAK0hD,UAAUvC,WAAWgB,uBAAwB,CAC1E,GAAIo9C,GAAgBv9F,KAAK8jD,YAAYn+C,OACjC86F,EAAcH,EAAWtgG,KAAK0hD,UAAUvC,WAAWgB,sBAEvD,KAAKgG,IAAUnmD,MAAKg9C,MACdh9C,KAAKg9C,MAAMl3C,eAAeqgD,IACxBnmD,KAAKg9C,MAAMmJ,GAAQqW,gBAAgB72D,OAAS86F,GAC9CzgG,KAAKk/F,4BAA4Bl/F,KAAKg9C,MAAMmJ,GAIlDnmD,MAAKgnD,uBACLhnD,KAAKk9F,sBAEDl9F,KAAK8jD,YAAYn+C,QAAU43F,IAC7Bv9F,KAAK87D,gBAAkB,KAe7Bl8D,EAAQi9F,kBAAoB,SAAS/2C,GACnC,MACE5gD,MAAK2lB,IAAIi7B,EAAK/zC,EAAI/R,KAAKkkD,WAAWnyC,IAAM/R,KAAK0hD,UAAUvC,WAAWe,kBAAkBlgD,KAAKid,OAEzF/X,KAAK2lB,IAAIi7B,EAAK9zC,EAAIhS,KAAKkkD,WAAWlyC,IAAMhS,KAAK0hD,UAAUvC,WAAWe,kBAAkBlgD,KAAKid,OAU7Frd,EAAQ88F,gBAAkB,WACxB,IAAK,GAAIl3F,GAAI,EAAGA,EAAIxF,KAAK8jD,YAAYn+C,OAAQH,IAAK,CAChD,GAAIsgD,GAAO9lD,KAAKg9C,MAAMh9C,KAAK8jD,YAAYt+C,GACvC,IAAoB,GAAfsgD,EAAKqF,QAAkC,GAAfrF,EAAKsF,OAAkB,CAClD,GAAI3/B,GAAS,EAASzrB,KAAK8jD,YAAYn+C,OAAST,KAAK4G,IAAI,IAAIg6C,EAAKr3C,QAAQwuC,MACtEmR,EAAQ,EAAIlpD,KAAKymB,GAAKzmB,KAAKE,QACZ,IAAf0gD,EAAKqF,SAAkBrF,EAAK/zC,EAAI0Z,EAASvmB,KAAKqZ,IAAI6vC,IACnC,GAAftI,EAAKsF,SAAkBtF,EAAK9zC,EAAIyZ,EAASvmB,KAAKkZ,IAAIgwC,IACtDpuD,KAAK6+F,uBAAuB/4C,MAYlClmD,EAAQq+F,YAAc,WAMpB,IAAK,GALDyC,GAAU,EACVC,EAAiB,EACjBC,EAAa,EACbC,EAAa,EAERr7F,EAAI,EAAGA,EAAIxF,KAAK8jD,YAAYn+C,OAAQH,IAAK,CAEhD,GAAIsgD,GAAO9lD,KAAKg9C,MAAMh9C,KAAK8jD,YAAYt+C,GACnCsgD,GAAK+V,mBAAqBglC,IAC5BA,EAAa/6C,EAAK+V,oBAEpB6kC,GAAW56C,EAAK+V,mBAChB8kC,GAAkBz7F,KAAK4uB,IAAIgyB,EAAK+V,mBAAmB,GACnD+kC,GAAc,EAEhBF,GAAoBE,EACpBD,GAAkCC,CAElC,IAAIE,GAAWH,EAAiBz7F,KAAK4uB,IAAI4sE,EAAQ,GAE7CK,EAAoB77F,KAAKyqB,KAAKmxE,EAElC9gG,MAAKqtE,aAAenoE,KAAKC,MAAMu7F,EAAU,EAAEK,GAGvC/gG,KAAKqtE,aAAewzB,IACtB7gG,KAAKqtE,aAAewzB,IAexBjhG,EAAQo+F,sBAAwB,SAASgD,GACvChhG,KAAKqtE,aAAe,CACpB,IAAI4zB,GAAe/7F,KAAKC,MAAMnF,KAAK8jD,YAAYn+C,OAASq7F,EACxD,KAAK,GAAI76C,KAAUnmD,MAAKg9C,MAClBh9C,KAAKg9C,MAAMl3C,eAAeqgD,IACiB,GAAzCnmD,KAAKg9C,MAAMmJ,GAAQ0V,oBAA2B77D,KAAKg9C,MAAMmJ,GAAQgJ,aAAaxpD,QAAU,GACtFs7F,EAAe,IACjBjhG,KAAKu/F,oBAAoBv/F,KAAKg9C,MAAMmJ,IAAQ,GAAK,EAAK,GACtD86C,GAAgB,IAa1BrhG,EAAQm+F,kBAAoB,WAC1B,GAAImD,GAAS,EACTC,EAAQ,CACZ,KAAK,GAAIh7C,KAAUnmD,MAAKg9C,MAClBh9C,KAAKg9C,MAAMl3C,eAAeqgD,KACiB,GAAzCnmD,KAAKg9C,MAAMmJ,GAAQ0V,oBAA2B77D,KAAKg9C,MAAMmJ,GAAQgJ,aAAaxpD,QAAU,IAC1Fu7F,GAAU,GAEZC,GAAS,EAGb,OAAOD,GAAOC,IAMZ,SAASthG,EAAQD,EAASM,GAE9B,GAAIS,GAAOT,EAAoB,GAC3BqD,EAAOrD,EAAoB,GAgB/BN,GAAQ+nD,iBAAmB,WACzB3nD,KAAKkvD,QAAgB,OAAElvD,KAAK88F,WAAW9/C,MAAQh9C,KAAKg9C,MACpDh9C,KAAKkvD,QAAgB,OAAElvD,KAAK88F,WAAWh/C,MAAQ99C,KAAK89C,MACpD99C,KAAKkvD,QAAgB,OAAElvD,KAAK88F,WAAWh5C,YAAc9jD,KAAK8jD,aAa5DlkD,EAAQwhG,gBAAkB,SAASC,EAAUC,GACxB96F,SAAf86F,GAA0C,UAAdA,EAC9BthG,KAAKuhG,sBAAsBF,GAG3BrhG,KAAKwhG,sBAAsBH,IAY/BzhG,EAAQ2hG,sBAAwB,SAASF,GACvCrhG,KAAK8jD,YAAc9jD,KAAKkvD,QAAgB,OAAEmyC,GAAuB,YACjErhG,KAAKg9C,MAAch9C,KAAKkvD,QAAgB,OAAEmyC,GAAiB,MAC3DrhG,KAAK89C,MAAc99C,KAAKkvD,QAAgB,OAAEmyC,GAAiB,OAU7DzhG,EAAQ6hG,uBAAyB,WAC/BzhG,KAAK8jD,YAAc9jD,KAAKkvD,QAAiB,QAAe,YACxDlvD,KAAKg9C,MAAch9C,KAAKkvD,QAAiB,QAAS,MAClDlvD,KAAK89C,MAAc99C,KAAKkvD,QAAiB,QAAS,OAWpDtvD,EAAQ4hG,sBAAwB,SAASH,GACvCrhG,KAAK8jD,YAAc9jD,KAAKkvD,QAAgB,OAAEmyC,GAAuB,YACjErhG,KAAKg9C,MAAch9C,KAAKkvD,QAAgB,OAAEmyC,GAAiB,MAC3DrhG,KAAK89C,MAAc99C,KAAKkvD,QAAgB,OAAEmyC,GAAiB,OAU7DzhG,EAAQ8hG,kBAAoB,WAC1B1hG,KAAKohG,gBAAgBphG,KAAK88F,YAU5Bl9F,EAAQk9F,QAAU,WAChB,MAAO98F,MAAKstE,aAAattE,KAAKstE,aAAa3nE,OAAO,IAUpD/F,EAAQ+hG,gBAAkB,WACxB,GAAI3hG,KAAKstE,aAAa3nE,OAAS,EAC7B,MAAO3F,MAAKstE,aAAattE,KAAKstE,aAAa3nE,OAAO,EAGlD,MAAM,IAAIU,WAAU,iEAaxBzG,EAAQgiG,iBAAmB,SAASC,GAClC7hG,KAAKstE,aAAatlE,KAAK65F,IAUzBjiG,EAAQkiG,kBAAoB,WAC1B9hG,KAAKstE,aAAajzB,OAWpBz6C,EAAQmiG,iBAAmB,SAASF,GAElC7hG,KAAKkvD,QAAgB,OAAE2yC,IAAU7kD,SACAc,SACAgG,eACAsY,eAAkBp8D,KAAKid,MACvBswD,YAAe/mE,QAGhDxG,KAAKkvD,QAAgB,OAAE2yC,GAAoB,YAAI,GAAIt+F,IAC9ClD,GAAGwhG,EACF12F,OACEgB,WAAY,UACZC,OAAQ,iBAEJpM,KAAK0hD,WACjB1hD,KAAKkvD,QAAgB,OAAE2yC,GAAoB,YAAExlC,YAAc,GAW7Dz8D,EAAQoiG,oBAAsB,SAASX,SAC9BrhG,MAAKkvD,QAAgB,OAAEmyC,IAWhCzhG,EAAQqiG,oBAAsB,SAASZ,SAC9BrhG,MAAKkvD,QAAgB,OAAEmyC,IAWhCzhG,EAAQsiG,cAAgB,SAASb,GAE/BrhG,KAAKkvD,QAAgB,OAAEmyC,GAAYrhG,KAAKkvD,QAAgB,OAAEmyC,GAG1DrhG,KAAKgiG,oBAAoBX,IAW3BzhG,EAAQuiG,gBAAkB,SAASd,GAEjCrhG,KAAKkvD,QAAgB,OAAEmyC,GAAYrhG,KAAKkvD,QAAgB,OAAEmyC,GAG1DrhG,KAAKiiG,oBAAoBZ,IAa3BzhG,EAAQwiG,qBAAuB,SAASf,GAEtC,IAAK,GAAIl7C,KAAUnmD,MAAKg9C,MAClBh9C,KAAKg9C,MAAMl3C,eAAeqgD,KAC5BnmD,KAAKkvD,QAAgB,OAAEmyC,GAAiB,MAAEl7C,GAAUnmD,KAAKg9C,MAAMmJ,GAKnE,KAAK,GAAI6G,KAAUhtD,MAAK89C,MAClB99C,KAAK89C,MAAMh4C,eAAeknD,KAC5BhtD,KAAKkvD,QAAgB,OAAEmyC,GAAiB,MAAEr0C,GAAUhtD,KAAK89C,MAAMkP,GAKnE,KAAK,GAAIxnD,GAAI,EAAGA,EAAIxF,KAAK8jD,YAAYn+C,OAAQH,IAC3CxF,KAAKkvD,QAAgB,OAAEmyC,GAAuB,YAAEr5F,KAAKhI,KAAK8jD,YAAYt+C,KAW1E5F,EAAQyiG,6BAA+B,WACrCriG,KAAKm8F,aAAa,GAAE,IAUtBv8F,EAAQm9F,WAAa,SAASj3C,GAE5B,GAAIw8C,GAAStiG,KAAK88F,gBAWX98F,MAAKg9C,MAAM8I,EAAKzlD,GAEvB,IAAIkiG,GAAmB5hG,EAAKqE,YAG5BhF,MAAKkiG,cAAcI,GAGnBtiG,KAAK+hG,iBAAiBQ,GAGtBviG,KAAK4hG,iBAAiBW,GAGtBviG,KAAKohG,gBAAgBphG,KAAK88F,WAG1B98F,KAAKg9C,MAAM8I,EAAKzlD,IAAMylD,GAUxBlmD,EAAQ49F,gBAAkB,WAExB,GAAI8E,GAAStiG,KAAK88F,SAGlB,IAAc,WAAVwF,IAC8B,GAA3BtiG,KAAK8jD,YAAYn+C,QACpB3F,KAAKkvD,QAAgB,OAAEozC,GAAqB,YAAE/vF,MAAMvS,KAAKid,MAAQjd,KAAK0hD,UAAUvC,WAAWO,oBAAsB1/C,KAAKsf,MAAMC,OAAOC,aACnIxf,KAAKkvD,QAAgB,OAAEozC,GAAqB,YAAE9vF,OAAOxS,KAAKid,MAAQjd,KAAK0hD,UAAUvC,WAAWO,oBAAsB1/C,KAAKsf,MAAMC,OAAOsF,cAAe,CACnJ,GAAI29E,GAAiBxiG,KAAK2hG,iBAG1B3hG,MAAKqiG,+BAILriG,KAAKoiG,qBAAqBI,GAI1BxiG,KAAKgiG,oBAAoBM,GAGzBtiG,KAAKmiG,gBAAgBK,GAGrBxiG,KAAKohG,gBAAgBoB,GAGrBxiG,KAAK8hG,oBAGL9hG,KAAKgnD,uBAGLhnD,KAAKquD,4BAeXzuD,EAAQsxD,sBAAwB,SAASuxC,EAAYC,GACnD,GAAIC,KACJ,IAAiBn8F,SAAbk8F,EACF,IAAK,GAAIJ,KAAUtiG,MAAKkvD,QAAgB,OAClClvD,KAAKkvD,QAAgB,OAAEppD,eAAew8F,KAExCtiG,KAAKuhG,sBAAsBe,GAC3BK,EAAa36F,KAAMhI,KAAKyiG,WAK5B,KAAK,GAAIH,KAAUtiG,MAAKkvD,QAAgB,OACtC,GAAIlvD,KAAKkvD,QAAgB,OAAEppD,eAAew8F,GAAS,CAEjDtiG,KAAKuhG,sBAAsBe,EAC3B,IAAIrpF,GAAOhT,MAAMkN,UAAU/K,OAAO7H,KAAKmF,UAAW,EAEhDi9F,GAAa36F,KADXiR,EAAKtT,OAAS,EACG3F,KAAKyiG,GAAaxpF,EAAK,GAAGA,EAAK,IAG/BjZ,KAAKyiG,GAAaC,IAO7C,MADA1iG,MAAK0hG,oBACEiB,GAaT/iG,EAAQuxD,mBAAqB,SAASsxC,EAAYC,GAChD,GAAIC,IAAe,CACnB,IAAiBn8F,SAAbk8F,EACF1iG,KAAKyhG,yBACLkB,EAAe3iG,KAAKyiG,SAEjB,CACHziG,KAAKyhG,wBACL,IAAIxoF,GAAOhT,MAAMkN,UAAU/K,OAAO7H,KAAKmF,UAAW,EAEhDi9F,GADE1pF,EAAKtT,OAAS,EACD3F,KAAKyiG,GAAaxpF,EAAK,GAAGA,EAAK,IAG/BjZ,KAAKyiG,GAAaC,GAKrC,MADA1iG,MAAK0hG,oBACEiB,GAaT/iG,EAAQgjG,sBAAwB,SAASH,EAAYC,GACnD,GAAiBl8F,SAAbk8F,EACF,IAAK,GAAIJ,KAAUtiG,MAAKkvD,QAAgB,OAClClvD,KAAKkvD,QAAgB,OAAEppD,eAAew8F,KAExCtiG,KAAKwhG,sBAAsBc,GAC3BtiG,KAAKyiG,UAKT,KAAK,GAAIH,KAAUtiG,MAAKkvD,QAAgB,OACtC,GAAIlvD,KAAKkvD,QAAgB,OAAEppD,eAAew8F,GAAS,CAEjDtiG,KAAKwhG,sBAAsBc,EAC3B,IAAIrpF,GAAOhT,MAAMkN,UAAU/K,OAAO7H,KAAKmF,UAAW,EAC9CuT,GAAKtT,OAAS,EAChB3F,KAAKyiG,GAAaxpF,EAAK,GAAGA,EAAK,IAG/BjZ,KAAKyiG,GAAaC,GAK1B1iG,KAAK0hG,qBAaP9hG,EAAQ4vD,gBAAkB,SAASizC,EAAYC,GAC7C,GAAIzpF,GAAOhT,MAAMkN,UAAU/K,OAAO7H,KAAKmF,UAAW,EACjCc,UAAbk8F,GACF1iG,KAAKkxD,sBAAsBuxC,GAC3BziG,KAAK4iG,sBAAsBH,IAGvBxpF,EAAKtT,OAAS,GAChB3F,KAAKkxD,sBAAsBuxC,EAAYxpF,EAAK,GAAGA,EAAK,IACpDjZ,KAAK4iG,sBAAsBH,EAAYxpF,EAAK,GAAGA,EAAK,MAGpDjZ,KAAKkxD,sBAAsBuxC,EAAYC,GACvC1iG,KAAK4iG,sBAAsBH,EAAYC,KAY7C9iG,EAAQqnD,oBAAsB,WAC5B,GAAIq7C,GAAStiG,KAAK88F,SAClB98F,MAAKkvD,QAAgB,OAAEozC,GAAqB,eAC5CtiG,KAAK8jD,YAAc9jD,KAAKkvD,QAAgB,OAAEozC,GAAqB,aAWjE1iG,EAAQijG,iBAAmB,SAAS97E,EAAIu6E,GACtC,GAAsDx7C,GAAlDC,EAAO,IAAKC,EAAO,KAAMC,EAAO,IAAKC,EAAO,IAChD,KAAK,GAAIo8C,KAAUtiG,MAAKkvD,QAAQoyC,GAC9B,GAAIthG,KAAKkvD,QAAQoyC,GAAYx7F,eAAew8F,IACc97F,SAApDxG,KAAKkvD,QAAQoyC,GAAYgB,GAAqB,YAAiB,CAEjEtiG,KAAKohG,gBAAgBkB,EAAOhB,GAE5Bv7C,EAAO,IAAKC,EAAO,KAAMC,EAAO,IAAKC,EAAO,IAC5C,KAAK,GAAIC,KAAUnmD,MAAKg9C,MAClBh9C,KAAKg9C,MAAMl3C,eAAeqgD,KAC5BL,EAAO9lD,KAAKg9C,MAAMmJ,GAClBL,EAAKkQ,OAAOjvC,GACRk/B,EAAOH,EAAK/zC,EAAI,GAAM+zC,EAAKvzC,QAAQ0zC,EAAOH,EAAK/zC,EAAI,GAAM+zC,EAAKvzC,OAC9D2zC,EAAOJ,EAAK/zC,EAAI,GAAM+zC,EAAKvzC,QAAQ2zC,EAAOJ,EAAK/zC,EAAI,GAAM+zC,EAAKvzC,OAC9DwzC,EAAOD,EAAK9zC,EAAI,GAAM8zC,EAAKtzC,SAASuzC,EAAOD,EAAK9zC,EAAI,GAAM8zC,EAAKtzC,QAC/DwzC,EAAOF,EAAK9zC,EAAI,GAAM8zC,EAAKtzC,SAASwzC,EAAOF,EAAK9zC,EAAI,GAAM8zC,EAAKtzC,QAGvEszC,GAAO9lD,KAAKkvD,QAAQoyC,GAAYgB,GAAqB,YACrDx8C,EAAK/zC,EAAI,IAAOm0C,EAAOD,GACvBH,EAAK9zC,EAAI,IAAOg0C,EAAOD,GACvBD,EAAKvzC,MAAQ,GAAKuzC,EAAK/zC,EAAIk0C,GAC3BH,EAAKtzC,OAAS,GAAKszC,EAAK9zC,EAAI+zC,GAC5BD,EAAKr3C,QAAQgd,OAASvmB,KAAKyqB,KAAKzqB,KAAK4uB,IAAI,GAAIgyB,EAAKvzC,MAAM,GAAKrN,KAAK4uB,IAAI,GAAIgyB,EAAKtzC,OAAO,IACtFszC,EAAK9iB,SAAShjC,KAAKid,OACnB6oC,EAAKoX,YAAYn2C,KAMzBnnB,EAAQkjG,oBAAsB,SAAS/7E,GACrC/mB,KAAK6iG,iBAAiB97E,EAAI,UAC1B/mB,KAAK6iG,iBAAiB97E,EAAI,UAC1B/mB,KAAK0hG,sBAMH,SAAS7hG,EAAQD,EAASM,GAE9B,GAAIqD,GAAOrD,EAAoB,GAS/BN,GAAQmjG,yBAA2B,SAAS9+F,EAAQqpD,GAClD,GAAItQ,GAAQh9C,KAAKg9C,KACjB,KAAK,GAAImJ,KAAUnJ,GACbA,EAAMl3C,eAAeqgD,IACnBnJ,EAAMmJ,GAAQoH,kBAAkBtpD,IAClCqpD,EAAiBtlD,KAAKm+C,IAY9BvmD,EAAQojG,4BAA8B,SAAU/+F,GAC9C,GAAIqpD,KAEJ,OADAttD,MAAKkxD,sBAAsB,2BAA2BjtD,EAAOqpD,GACtDA,GAWT1tD,EAAQqjG,yBAA2B,SAASpjE,GAC1C,GAAI9tB,GAAI/R,KAAKurD,qBAAqB1rB,EAAQ9tB,GACtCC,EAAIhS,KAAKyrD,qBAAqB5rB,EAAQ7tB,EAE1C,QACEvK,KAAQsK,EACRpK,IAAQqK,EACRqV,MAAQtV,EACRuR,OAAQtR,IAYZpS,EAAQgrD,WAAa,SAAU/qB,GAE7B,GAAIqjE,GAAiBljG,KAAKijG,yBAAyBpjE,GAC/CytB,EAAmBttD,KAAKgjG,4BAA4BE,EAIxD,OAAI51C,GAAiB3nD,OAAS,EACpB3F,KAAKg9C,MAAMsQ,EAAiBA,EAAiB3nD,OAAS,IAGvD,MAWX/F,EAAQujG,yBAA2B,SAAUl/F,EAAQwpD,GACnD,GAAI3P,GAAQ99C,KAAK89C,KACjB,KAAK,GAAIkP,KAAUlP,GACbA,EAAMh4C,eAAeknD,IACnBlP,EAAMkP,GAAQO,kBAAkBtpD,IAClCwpD,EAAiBzlD,KAAKglD,IAa9BptD,EAAQwjG,4BAA8B,SAAUn/F,GAC9C,GAAIwpD,KAEJ,OADAztD,MAAKkxD,sBAAsB,2BAA2BjtD,EAAOwpD,GACtDA,GAWT7tD,EAAQqtD,WAAa,SAASptB,GAC5B,GAAIqjE,GAAiBljG,KAAKijG,yBAAyBpjE,GAC/C4tB,EAAmBztD,KAAKojG,4BAA4BF,EAExD,OAAIz1C,GAAiB9nD,OAAS,EACrB3F,KAAK89C,MAAM2P,EAAiBA,EAAiB9nD,OAAS,IAGtD,MAWX/F,EAAQyjG,gBAAkB,SAAStgF,GAC7BA,YAAexf,GACjBvD,KAAKkrD,aAAalO,MAAMj6B,EAAI1iB,IAAM0iB,EAGlC/iB,KAAKkrD,aAAapN,MAAM/6B,EAAI1iB,IAAM0iB,GAUtCnjB,EAAQ0jG,YAAc,SAASvgF,GACzBA,YAAexf,GACjBvD,KAAK4hD,SAAS5E,MAAMj6B,EAAI1iB,IAAM0iB,EAG9B/iB,KAAK4hD,SAAS9D,MAAM/6B,EAAI1iB,IAAM0iB,GAWlCnjB,EAAQ2jG,qBAAuB,SAASxgF,GAClCA,YAAexf,SACVvD,MAAKkrD,aAAalO,MAAMj6B,EAAI1iB,UAG5BL,MAAKkrD,aAAapN,MAAM/6B,EAAI1iB,KAUvCT,EAAQ2+F,aAAe,SAASiF,GACTh9F,SAAjBg9F,IACFA,GAAe,EAEjB,KAAI,GAAIr9C,KAAUnmD,MAAKkrD,aAAalO,MAC/Bh9C,KAAKkrD,aAAalO,MAAMl3C,eAAeqgD,IACxCnmD,KAAKkrD,aAAalO,MAAMmJ,GAAQjV,UAGpC,KAAI,GAAI8b,KAAUhtD,MAAKkrD,aAAapN,MAC/B99C,KAAKkrD,aAAapN,MAAMh4C,eAAeknD,IACxChtD,KAAKkrD,aAAapN,MAAMkP,GAAQ9b,UAIpClxC,MAAKkrD,cAAgBlO,SAASc,UAEV,GAAhB0lD,GACFxjG,KAAK4tB,KAAK,SAAU5tB,KAAK42B,iBAU7Bh3B,EAAQ6jG,kBAAoB,SAASD,GACdh9F,SAAjBg9F,IACFA,GAAe,EAGjB,KAAK,GAAIr9C,KAAUnmD,MAAKkrD,aAAalO,MAC/Bh9C,KAAKkrD,aAAalO,MAAMl3C,eAAeqgD,IACrCnmD,KAAKkrD,aAAalO,MAAMmJ,GAAQkW,YAAc,IAChDr8D,KAAKkrD,aAAalO,MAAMmJ,GAAQjV,WAChClxC,KAAKujG,qBAAqBvjG,KAAKkrD,aAAalO,MAAMmJ,IAKpC,IAAhBq9C,GACFxjG,KAAK4tB,KAAK,SAAU5tB,KAAK42B,iBAW7Bh3B,EAAQ8jG,sBAAwB,WAC9B,GAAI1sF,GAAQ,CACZ,KAAK,GAAImvC,KAAUnmD,MAAKkrD,aAAalO,MAC/Bh9C,KAAKkrD,aAAalO,MAAMl3C,eAAeqgD,KACzCnvC,GAAS,EAGb,OAAOA,IASTpX,EAAQ+jG,iBAAmB,WACzB,IAAK,GAAIx9C,KAAUnmD,MAAKkrD,aAAalO,MACnC,GAAIh9C,KAAKkrD,aAAalO,MAAMl3C,eAAeqgD,GACzC,MAAOnmD,MAAKkrD,aAAalO,MAAMmJ,EAGnC,OAAO,OASTvmD,EAAQgkG,iBAAmB,WACzB,IAAK,GAAI52C,KAAUhtD,MAAKkrD,aAAapN,MACnC,GAAI99C,KAAKkrD,aAAapN,MAAMh4C,eAAeknD,GACzC,MAAOhtD,MAAKkrD,aAAapN,MAAMkP,EAGnC,OAAO,OAUTptD,EAAQikG,sBAAwB,WAC9B,GAAI7sF,GAAQ,CACZ,KAAK,GAAIg2C,KAAUhtD,MAAKkrD,aAAapN,MAC/B99C,KAAKkrD,aAAapN,MAAMh4C,eAAeknD,KACzCh2C,GAAS,EAGb,OAAOA,IAUTpX,EAAQkkG,wBAA0B,WAChC,GAAI9sF,GAAQ,CACZ,KAAI,GAAImvC,KAAUnmD,MAAKkrD,aAAalO,MAC/Bh9C,KAAKkrD,aAAalO,MAAMl3C,eAAeqgD,KACxCnvC,GAAS,EAGb,KAAI,GAAIg2C,KAAUhtD,MAAKkrD,aAAapN,MAC/B99C,KAAKkrD,aAAapN,MAAMh4C,eAAeknD,KACxCh2C,GAAS,EAGb,OAAOA,IASTpX,EAAQmkG,kBAAoB,WAC1B,IAAI,GAAI59C,KAAUnmD,MAAKkrD,aAAalO,MAClC,GAAGh9C,KAAKkrD,aAAalO,MAAMl3C,eAAeqgD,GACxC,OAAO,CAGX,KAAI,GAAI6G,KAAUhtD,MAAKkrD,aAAapN,MAClC,GAAG99C,KAAKkrD,aAAapN,MAAMh4C,eAAeknD,GACxC,OAAO,CAGX,QAAO,GAUTptD,EAAQokG,oBAAsB,WAC5B,IAAI,GAAI79C,KAAUnmD,MAAKkrD,aAAalO,MAClC,GAAGh9C,KAAKkrD,aAAalO,MAAMl3C,eAAeqgD,IACpCnmD,KAAKkrD,aAAalO,MAAMmJ,GAAQkW,YAAc,EAChD,OAAO,CAIb,QAAO,GASTz8D,EAAQqkG,sBAAwB,SAASn+C,GACvC,IAAK,GAAItgD,GAAI,EAAGA,EAAIsgD,EAAKqJ,aAAaxpD,OAAQH,IAAK,CACjD,GAAIkoD,GAAO5H,EAAKqJ,aAAa3pD,EAC7BkoD,GAAKvc,SACLnxC,KAAKqjG,gBAAgB31C,KAUzB9tD,EAAQskG,qBAAuB,SAASp+C,GACtC,IAAK,GAAItgD,GAAI,EAAGA,EAAIsgD,EAAKqJ,aAAaxpD,OAAQH,IAAK,CACjD,GAAIkoD,GAAO5H,EAAKqJ,aAAa3pD,EAC7BkoD,GAAKphD,OAAQ,EACbtM,KAAKsjG,YAAY51C,KAWrB9tD,EAAQukG,wBAA0B,SAASr+C,GACzC,IAAK,GAAItgD,GAAI,EAAGA,EAAIsgD,EAAKqJ,aAAaxpD,OAAQH,IAAK,CACjD,GAAIkoD,GAAO5H,EAAKqJ,aAAa3pD,EAC7BkoD,GAAKxc,WACLlxC,KAAKujG,qBAAqB71C,KAgB9B9tD,EAAQmrD,cAAgB,SAAS9mD,EAAQmgG,EAAQZ,EAAca,EAAgBC,GACxD99F,SAAjBg9F,IACFA,GAAe,GAEMh9F,SAAnB69F,IACFA,GAAiB,GAGa,GAA5BrkG,KAAK+jG,qBAA0C,GAAVK,GAAgD,GAA7BpkG,KAAKytE,sBAC/DztE,KAAKu+F,cAAa,GAIG,GAAnBt6F,EAAOovC,UAAmD,GAA7BrzC,KAAK0hD,UAAUzS,aAAsBq1D,EAQ1C,GAAnBrgG,EAAOovC,UACdrzC,KAAKqjG,gBAAgBp/F,GACrBu/F,GAAe,IAGfv/F,EAAOitC,WACPlxC,KAAKujG,qBAAqBt/F,KAb1BA,EAAOktC,SACPnxC,KAAKqjG,gBAAgBp/F,GACjBA,YAAkBV,IAA6C,GAArCvD,KAAKwtE,8BAA2D,GAAlB62B,GAC1ErkG,KAAKikG,sBAAsBhgG,IAaX,GAAhBu/F,GACFxjG,KAAK4tB,KAAK,SAAU5tB,KAAK42B,iBAY7Bh3B,EAAQutD,YAAc,SAASlpD,GACT,GAAhBA,EAAOqI,QACTrI,EAAOqI,OAAQ,EACftM,KAAK4tB,KAAK,YAAYk4B,KAAK7hD,EAAO5D,OAWtCT,EAAQstD,aAAe,SAASjpD,GACV,GAAhBA,EAAOqI,QACTrI,EAAOqI,OAAQ,EACftM,KAAKsjG,YAAYr/F,GACbA,YAAkBV,IACpBvD,KAAK4tB,KAAK,aAAak4B,KAAK7hD,EAAO5D,MAGnC4D,YAAkBV,IACpBvD,KAAKkkG,qBAAqBjgG,IAa9BrE,EAAQ8qD,aAAe,aAUvB9qD,EAAQgsD,WAAa,SAAS/rB,GAC5B,GAAIimB,GAAO9lD,KAAK4qD,WAAW/qB,EAC3B,IAAY,MAARimB,EACF9lD,KAAK+qD,cAAcjF,GAAM,OAEtB,CACH,GAAI4H,GAAO1tD,KAAKitD,WAAWptB,EACf,OAAR6tB,EACF1tD,KAAK+qD,cAAc2C,GAAM,GAGzB1tD,KAAKu+F,eAGT,GAAI5vC,GAAa3uD,KAAK42B,cACtB+3B,GAAoB,SAClB41C,KAAMxyF,EAAG8tB,EAAQ9tB,EAAGC,EAAG6tB,EAAQ7tB,GAC/BuN,QAASxN,EAAG/R,KAAKurD,qBAAqB1rB,EAAQ9tB,GAAIC,EAAGhS,KAAKyrD,qBAAqB5rB,EAAQ7tB,KAEzFhS,KAAK4tB,KAAK,QAAS+gC,GACnB3uD,KAAK6iD,WAUPjjD,EAAQisD,iBAAmB,SAAShsB,GAClC,GAAIimB,GAAO9lD,KAAK4qD,WAAW/qB,EACf,OAARimB,GAAyBt/C,SAATs/C,IAElB9lD,KAAKkkD,YAAenyC,EAAM/R,KAAKurD,qBAAqB1rB,EAAQ9tB,GACxCC,EAAMhS,KAAKyrD,qBAAqB5rB,EAAQ7tB,IAC5DhS,KAAK28F,YAAY72C,GAEnB,IAAI6I,GAAa3uD,KAAK42B,cACtB+3B,GAAoB,SAClB41C,KAAMxyF,EAAG8tB,EAAQ9tB,EAAGC,EAAG6tB,EAAQ7tB,GAC/BuN,QAASxN,EAAG/R,KAAKurD,qBAAqB1rB,EAAQ9tB,GAAIC,EAAGhS,KAAKyrD,qBAAqB5rB,EAAQ7tB,KAEzFhS,KAAK4tB,KAAK,cAAe+gC,IAU3B/uD,EAAQksD,cAAgB,SAASjsB,GAC/B,GAAIimB,GAAO9lD,KAAK4qD,WAAW/qB,EAC3B,IAAY,MAARimB,EACF9lD,KAAK+qD,cAAcjF,GAAK,OAErB,CACH,GAAI4H,GAAO1tD,KAAKitD,WAAWptB,EACf,OAAR6tB,GACF1tD,KAAK+qD,cAAc2C,GAAK,GAG5B1tD,KAAK6iD,WAUPjjD,EAAQosD,iBAAmB,SAASnsB,GAClC7/B,KAAKwkG,6BAA6B3kE,GAClC7/B,KAAKykG,2BAA2B5kE,IAGlCjgC,EAAQ4kG,6BAA+B,aACvC5kG,EAAQ6kG,2BAA6B,aAOrC7kG,EAAQg3B,aAAe,WACrB,GAAIo0B,GAAUhrD,KAAK0kG,mBACfC,EAAU3kG,KAAK4kG,kBACnB,QAAQ5nD,MAAMgO,EAASlN,MAAM6mD,IAS/B/kG,EAAQ8kG,iBAAmB,WACzB,GAAIG,KACJ,IAAiC,GAA7B7kG,KAAK0hD,UAAUzS,WACjB,IAAK,GAAIkX,KAAUnmD,MAAKkrD,aAAalO,MAC/Bh9C,KAAKkrD,aAAalO,MAAMl3C,eAAeqgD,IACzC0+C,EAAQ78F,KAAKm+C,EAInB,OAAO0+C,IASTjlG,EAAQglG,iBAAmB,WACzB,GAAIC,KACJ,IAAiC,GAA7B7kG,KAAK0hD,UAAUzS,WACjB,IAAK,GAAI+d,KAAUhtD,MAAKkrD,aAAapN,MAC/B99C,KAAKkrD,aAAapN,MAAMh4C,eAAeknD,IACzC63C,EAAQ78F,KAAKglD,EAInB,OAAO63C,IASTjlG,EAAQ82B,aAAe,WACrBiC,QAAQhF,IAAI,gEAUd/zB,EAAQklG,YAAc,SAAS30D,EAAWk0D,GACxC,GAAI7+F,GAAGq7B,EAAMxgC,CAEb,KAAK8vC,GAAkC3pC,QAApB2pC,EAAUxqC,OAC3B,KAAM,qCAKR,KAFA3F,KAAKu+F,cAAa,GAEb/4F,EAAI,EAAGq7B,EAAOsP,EAAUxqC,OAAYk7B,EAAJr7B,EAAUA,IAAK,CAClDnF,EAAK8vC,EAAU3qC,EAEf,IAAIsgD,GAAO9lD,KAAKg9C,MAAM38C,EACtB,KAAKylD,EACH,KAAM,IAAIi/C,YAAW,iBAAmB1kG,EAAK,cAE/CL,MAAK+qD,cAAcjF,GAAK,GAAK,EAAKu+C,GAAe,GAEnDrkG,KAAKyhB,UASP7hB,EAAQolG,YAAc,SAAS70D,GAC7B,GAAI3qC,GAAGq7B,EAAMxgC,CAEb,KAAK8vC,GAAkC3pC,QAApB2pC,EAAUxqC,OAC3B,KAAM,qCAKR,KAFA3F,KAAKu+F,cAAa,GAEb/4F,EAAI,EAAGq7B,EAAOsP,EAAUxqC,OAAYk7B,EAAJr7B,EAAUA,IAAK,CAClDnF,EAAK8vC,EAAU3qC,EAEf,IAAIkoD,GAAO1tD,KAAK89C,MAAMz9C,EACtB,KAAKqtD,EACH,KAAM,IAAIq3C,YAAW,iBAAmB1kG,EAAK,cAE/CL,MAAK+qD,cAAc2C,GAAK,GAAK,GAAK,GAAM,GAE1C1tD,KAAKyhB,UAOP7hB,EAAQuuD,iBAAmB,WACzB,IAAI,GAAIhI,KAAUnmD,MAAKkrD,aAAalO,MAC/Bh9C,KAAKkrD,aAAalO,MAAMl3C,eAAeqgD,KACnCnmD,KAAKg9C,MAAMl3C,eAAeqgD,UACtBnmD,MAAKkrD,aAAalO,MAAMmJ,GAIrC,KAAI,GAAI6G,KAAUhtD,MAAKkrD,aAAapN,MAC/B99C,KAAKkrD,aAAapN,MAAMh4C,eAAeknD,KACnChtD,KAAK89C,MAAMh4C,eAAeknD,UACtBhtD,MAAKkrD,aAAapN,MAAMkP,MASnC,SAASntD,EAAQD,EAASM,GAE9B,GAAIS,GAAOT,EAAoB,GAC3BqD,EAAOrD,EAAoB,IAC3BkD,EAAOlD,EAAoB,GAO/BN,GAAQqlG,qBAAuB,WAC7BjlG,KAAKqqD,oBAAoBrqD,KAAK0tE,iBAC9B1tE,KAAKklG,mBAELllG,KAAKwkG,6BAA+B,mBAC7BxkG,MAAKkvD,QAAiB,QAAS,MAAc,iBAC7ClvD,MAAKkvD,QAAiB,QAAS,MAAiB,cACvDlvD,KAAK6hD,oBAAqB,EAC1B7hD,KAAKujD,kBAAmB,GAU1B3jD,EAAQulG,4BAA8B,WACpC,IAAK,GAAIC,KAAgBplG,MAAKwjD,gBACxBxjD,KAAKwjD,gBAAgB19C,eAAes/F,KACtCplG,KAAKolG,GAAgBplG,KAAKwjD,gBAAgB4hD,SACnCplG,MAAKwjD,gBAAgB4hD,KAUlCxlG,EAAQylG,gBAAkB,WACxBrlG,KAAKkoD,UAAYloD,KAAKkoD,QACtB,IAAIo9C,GAAUtlG,KAAK0tE,gBACfE,EAAW5tE,KAAK4tE,SAChBD,EAAc3tE,KAAK2tE,WACF,IAAjB3tE,KAAKkoD,UACPo9C,EAAQr4F,MAAM26B,QAAQ,QACtBgmC,EAAS3gE,MAAM26B,QAAQ,QACvB+lC,EAAY1gE,MAAM26B,QAAQ,OAC1BgmC,EAAS57C,QAAUhyB,KAAKqlG,gBAAgBvwE,KAAK90B,QAG7CslG,EAAQr4F,MAAM26B,QAAQ,OACtBgmC,EAAS3gE,MAAM26B,QAAQ,OACvB+lC,EAAY1gE,MAAM26B,QAAQ,QAC1BgmC,EAAS57C,QAAU,MAErBhyB,KAAKmnD,yBAQPvnD,EAAQunD,sBAAwB,WAE1BnnD,KAAKulG,eACPvlG,KAAK0T,IAAI,SAAU1T,KAAKulG,cAG1B,IAAIlhE,GAASrkC,KAAK0hD,UAAU7c,QAAQ7kC,KAAK0hD,UAAUrd,OAqBnD,IAnB6B79B,SAAzBxG,KAAKwlG,kBACPxlG,KAAKwlG,gBAAgB/rC,uBACrBz5D,KAAKwlG,gBAAkBh/F,OACvBxG,KAAKylG,oBAAsB,KAC3BzlG,KAAK6hD,oBAAqB,EAC1B7hD,KAAK6iD,WAIP7iD,KAAKmlG,8BAGLnlG,KAAKujD,kBAAmB,EAGxBvjD,KAAKwtE,8BAA+B,EACpCxtE,KAAKytE,sBAAuB,EAC5BztE,KAAKklG,mBAEgB,GAAjBllG,KAAKkoD,SAAkB,CACzB,KAAOloD,KAAK0tE,gBAAgBhqD,iBAC1B1jB,KAAK0tE,gBAAgBv8D,YAAYnR,KAAK0tE,gBAAgB/pD,WAGxD3jB,MAAKklG,gBAA6B,YAAI3zF,SAASM,cAAc,QAC7D7R,KAAKklG,gBAA6B,YAAEr9F,UAAY,6BAChD7H,KAAKklG,gBAAkC,iBAAI3zF,SAASM,cAAc,QAClE7R,KAAKklG,gBAAkC,iBAAEr9F,UAAY,4BACrD7H,KAAKklG,gBAAkC,iBAAEjhF,UAAYogB,EAAgB,QACrErkC,KAAKklG,gBAA6B,YAAEzzF,YAAYzR,KAAKklG,gBAAkC,kBAEvFllG,KAAKklG,gBAAmC,kBAAI3zF,SAASM,cAAc,OACnE7R,KAAKklG,gBAAmC,kBAAEr9F,UAAY,wBAEtD7H,KAAKklG,gBAA6B,YAAI3zF,SAASM,cAAc,QAC7D7R,KAAKklG,gBAA6B,YAAEr9F,UAAY,iCAChD7H,KAAKklG,gBAAkC,iBAAI3zF,SAASM,cAAc,QAClE7R,KAAKklG,gBAAkC,iBAAEr9F,UAAY,4BACrD7H,KAAKklG,gBAAkC,iBAAEjhF,UAAYogB,EAAgB,QACrErkC,KAAKklG,gBAA6B,YAAEzzF,YAAYzR,KAAKklG,gBAAkC,kBAEvFllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAA6B,aACnEllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAAmC,mBACzEllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAA6B,aAE/B,GAAhCllG,KAAK0jG,yBAAgC1jG,KAAK28C,iBAAiBC,MAC7D58C,KAAKklG,gBAAmC,kBAAI3zF,SAASM,cAAc,OACnE7R,KAAKklG,gBAAmC,kBAAEr9F,UAAY,wBAEtD7H,KAAKklG,gBAA8B,aAAI3zF,SAASM,cAAc,QAC9D7R,KAAKklG,gBAA8B,aAAEr9F,UAAY,8BACjD7H,KAAKklG,gBAAmC,kBAAI3zF,SAASM,cAAc,QACnE7R,KAAKklG,gBAAmC,kBAAEr9F,UAAY,4BACtD7H,KAAKklG,gBAAmC,kBAAEjhF,UAAYogB,EAAiB,SACvErkC,KAAKklG,gBAA8B,aAAEzzF,YAAYzR,KAAKklG,gBAAmC,mBAEzFllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAAmC,mBACzEllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAA8B,eAE7B,GAAhCllG,KAAK6jG,yBAAgE,GAAhC7jG,KAAK0jG,0BACjD1jG,KAAKklG,gBAAmC,kBAAI3zF,SAASM,cAAc,OACnE7R,KAAKklG,gBAAmC,kBAAEr9F,UAAY,wBAEtD7H,KAAKklG,gBAA8B,aAAI3zF,SAASM,cAAc,QAC9D7R,KAAKklG,gBAA8B,aAAEr9F,UAAY,8BACjD7H,KAAKklG,gBAAmC,kBAAI3zF,SAASM,cAAc,QACnE7R,KAAKklG,gBAAmC,kBAAEr9F,UAAY,4BACtD7H,KAAKklG,gBAAmC,kBAAEjhF,UAAYogB,EAAiB,SACvErkC,KAAKklG,gBAA8B,aAAEzzF,YAAYzR,KAAKklG,gBAAmC,mBAEzFllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAAmC,mBACzEllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAA8B,eAEtC,GAA5BllG,KAAK+jG,sBACP/jG,KAAKklG,gBAAmC,kBAAI3zF,SAASM,cAAc,OACnE7R,KAAKklG,gBAAmC,kBAAEr9F,UAAY,wBAEtD7H,KAAKklG,gBAA4B,WAAI3zF,SAASM,cAAc,QAC5D7R,KAAKklG,gBAA4B,WAAEr9F,UAAY,gCAC/C7H,KAAKklG,gBAAiC,gBAAI3zF,SAASM,cAAc,QACjE7R,KAAKklG,gBAAiC,gBAAEr9F,UAAY,4BACpD7H,KAAKklG,gBAAiC,gBAAEjhF,UAAYogB,EAAY,IAChErkC,KAAKklG,gBAA4B,WAAEzzF,YAAYzR,KAAKklG,gBAAiC,iBAErFllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAAmC,mBACzEllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAA4B,aAKpEllG,KAAKklG,gBAA6B,YAAElzE,QAAUhyB,KAAK0lG,sBAAsB5wE,KAAK90B,MAC9EA,KAAKklG,gBAA6B,YAAElzE,QAAUhyB,KAAK2lG,sBAAsB7wE,KAAK90B,MAC1C,GAAhCA,KAAK0jG,yBAAgC1jG,KAAK28C,iBAAiBC,KAC7D58C,KAAKklG,gBAA8B,aAAElzE,QAAUhyB,KAAK4lG,UAAU9wE,KAAK90B,MAE5B,GAAhCA,KAAK6jG,yBAAgE,GAAhC7jG,KAAK0jG,0BACjD1jG,KAAKklG,gBAA8B,aAAElzE,QAAUhyB,KAAK6lG,uBAAuB/wE,KAAK90B,OAElD,GAA5BA,KAAK+jG,sBACP/jG,KAAKklG,gBAA4B,WAAElzE,QAAUhyB,KAAKmqD,gBAAgBr1B,KAAK90B,OAEzEA,KAAK4tE,SAAS57C,QAAUhyB,KAAKqlG,gBAAgBvwE,KAAK90B,KAElD;GAAImU,GAAKnU,IACTA,MAAKulG,cAAgBpxF,EAAGgzC,sBACxBnnD,KAAKuT,GAAG,SAAUvT,KAAKulG,mBAEpB,CACH,KAAOvlG,KAAK2tE,YAAYjqD,iBACtB1jB,KAAK2tE,YAAYx8D,YAAYnR,KAAK2tE,YAAYhqD,WAGhD3jB,MAAKklG,gBAA8B,aAAI3zF,SAASM,cAAc,QAC9D7R,KAAKklG,gBAA8B,aAAEr9F,UAAY,uCACjD7H,KAAKklG,gBAAmC,kBAAI3zF,SAASM,cAAc,QACnE7R,KAAKklG,gBAAmC,kBAAEr9F,UAAY,4BACtD7H,KAAKklG,gBAAmC,kBAAEjhF,UAAYogB,EAAa,KACnErkC,KAAKklG,gBAA8B,aAAEzzF,YAAYzR,KAAKklG,gBAAmC,mBAEzFllG,KAAK2tE,YAAYl8D,YAAYzR,KAAKklG,gBAA8B,cAEhEllG,KAAKklG,gBAA8B,aAAElzE,QAAUhyB,KAAKqlG,gBAAgBvwE,KAAK90B,QAW7EJ,EAAQ8lG,sBAAwB,WAE9B1lG,KAAKilG,uBACDjlG,KAAKulG,eACPvlG,KAAK0T,IAAI,SAAU1T,KAAKulG,cAG1B,IAAIlhE,GAASrkC,KAAK0hD,UAAU7c,QAAQ7kC,KAAK0hD,UAAUrd,OAEnDrkC,MAAKklG,mBACLllG,KAAKklG,gBAA0B,SAAI3zF,SAASM,cAAc,QAC1D7R,KAAKklG,gBAA0B,SAAEr9F,UAAY,8BAC7C7H,KAAKklG,gBAA+B,cAAI3zF,SAASM,cAAc,QAC/D7R,KAAKklG,gBAA+B,cAAEr9F,UAAY,4BAClD7H,KAAKklG,gBAA+B,cAAEjhF,UAAYogB,EAAa,KAC/DrkC,KAAKklG,gBAA0B,SAAEzzF,YAAYzR,KAAKklG,gBAA+B,eAEjFllG,KAAKklG,gBAAmC,kBAAI3zF,SAASM,cAAc,OACnE7R,KAAKklG,gBAAmC,kBAAEr9F,UAAY,wBAEtD7H,KAAKklG,gBAAiC,gBAAI3zF,SAASM,cAAc,QACjE7R,KAAKklG,gBAAiC,gBAAEr9F,UAAY,8BACpD7H,KAAKklG,gBAAsC,qBAAI3zF,SAASM,cAAc,QACtE7R,KAAKklG,gBAAsC,qBAAEr9F,UAAY,4BACzD7H,KAAKklG,gBAAsC,qBAAEjhF,UAAYogB,EAAuB,eAChFrkC,KAAKklG,gBAAiC,gBAAEzzF,YAAYzR,KAAKklG,gBAAsC,sBAE/FllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAA0B,UAChEllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAAmC,mBACzEllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAAiC,iBAGvEllG,KAAKklG,gBAA0B,SAAElzE,QAAUhyB,KAAKmnD,sBAAsBryB,KAAK90B,KAG3E,IAAImU,GAAKnU,IACTA,MAAKulG,cAAgBpxF,EAAG2xF,SACxB9lG,KAAKuT,GAAG,SAAUvT,KAAKulG,gBASzB3lG,EAAQ+lG,sBAAwB,WAE9B3lG,KAAKilG,uBACLjlG,KAAKu+F,cAAa,GAClBv+F,KAAKujD,kBAAmB,EAEpBvjD,KAAKulG,eACPvlG,KAAK0T,IAAI,SAAU1T,KAAKulG,cAG1B,IAAIlhE,GAASrkC,KAAK0hD,UAAU7c,QAAQ7kC,KAAK0hD,UAAUrd,OAEnDrkC,MAAKu+F,eACLv+F,KAAKytE,sBAAuB,EAC5BztE,KAAKwtE,8BAA+B,EAEpCxtE,KAAKklG,mBACLllG,KAAKklG,gBAA0B,SAAI3zF,SAASM,cAAc,QAC1D7R,KAAKklG,gBAA0B,SAAEr9F,UAAY,8BAC7C7H,KAAKklG,gBAA+B,cAAI3zF,SAASM,cAAc,QAC/D7R,KAAKklG,gBAA+B,cAAEr9F,UAAY,4BAClD7H,KAAKklG,gBAA+B,cAAEjhF,UAAYogB,EAAa,KAC/DrkC,KAAKklG,gBAA0B,SAAEzzF,YAAYzR,KAAKklG,gBAA+B,eAEjFllG,KAAKklG,gBAAmC,kBAAI3zF,SAASM,cAAc,OACnE7R,KAAKklG,gBAAmC,kBAAEr9F,UAAY,wBAEtD7H,KAAKklG,gBAAiC,gBAAI3zF,SAASM,cAAc,QACjE7R,KAAKklG,gBAAiC,gBAAEr9F,UAAY,8BACpD7H,KAAKklG,gBAAsC,qBAAI3zF,SAASM,cAAc,QACtE7R,KAAKklG,gBAAsC,qBAAEr9F,UAAY,4BACzD7H,KAAKklG,gBAAsC,qBAAEjhF,UAAYogB,EAAwB,gBACjFrkC,KAAKklG,gBAAiC,gBAAEzzF,YAAYzR,KAAKklG,gBAAsC,sBAE/FllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAA0B,UAChEllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAAmC,mBACzEllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAAiC,iBAGvEllG,KAAKklG,gBAA0B,SAAElzE,QAAUhyB,KAAKmnD,sBAAsBryB,KAAK90B,KAG3E,IAAImU,GAAKnU,IACTA,MAAKulG,cAAgBpxF,EAAG4xF,eACxB/lG,KAAKuT,GAAG,SAAUvT,KAAKulG,eAGvBvlG,KAAKwjD,gBAA8B,aAAIxjD,KAAK0qD,aAC5C1qD,KAAKwjD,gBAA8C,6BAAIxjD,KAAKwkG,6BAC5DxkG,KAAKwjD,gBAAkC,iBAAIxjD,KAAK2qD,iBAChD3qD,KAAKwjD,gBAAgC,eAAIxjD,KAAK2rD,eAC9C3rD,KAAK0qD,aAAe1qD,KAAK+lG,eACzB/lG,KAAKwkG,6BAA+B,aACpCxkG,KAAK2qD,iBAAmB,aACxB3qD,KAAK2rD,eAAiB3rD,KAAKgmG,eAG3BhmG,KAAK6iD,WAQPjjD,EAAQimG,uBAAyB,WAE/B7lG,KAAKilG,uBACLjlG,KAAK6hD,oBAAqB,EAEtB7hD,KAAKulG,eACPvlG,KAAK0T,IAAI,SAAU1T,KAAKulG,eAG1BvlG,KAAKwlG,gBAAkBxlG,KAAK4jG,mBAC5B5jG,KAAKwlG,gBAAgBhsC,qBAErB,IAAIn1B,GAASrkC,KAAK0hD,UAAU7c,QAAQ7kC,KAAK0hD,UAAUrd,OAEnDrkC,MAAKklG,mBACLllG,KAAKklG,gBAA0B,SAAI3zF,SAASM,cAAc,QAC1D7R,KAAKklG,gBAA0B,SAAEr9F,UAAY,8BAC7C7H,KAAKklG,gBAA+B,cAAI3zF,SAASM,cAAc,QAC/D7R,KAAKklG,gBAA+B,cAAEr9F,UAAY,4BAClD7H,KAAKklG,gBAA+B,cAAEjhF,UAAYogB,EAAa,KAC/DrkC,KAAKklG,gBAA0B,SAAEzzF,YAAYzR,KAAKklG,gBAA+B,eAEjFllG,KAAKklG,gBAAmC,kBAAI3zF,SAASM,cAAc,OACnE7R,KAAKklG,gBAAmC,kBAAEr9F,UAAY,wBAEtD7H,KAAKklG,gBAAiC,gBAAI3zF,SAASM,cAAc,QACjE7R,KAAKklG,gBAAiC,gBAAEr9F,UAAY,8BACpD7H,KAAKklG,gBAAsC,qBAAI3zF,SAASM,cAAc,QACtE7R,KAAKklG,gBAAsC,qBAAEr9F,UAAY,4BACzD7H,KAAKklG,gBAAsC,qBAAEjhF,UAAYogB,EAA4B,oBACrFrkC,KAAKklG,gBAAiC,gBAAEzzF,YAAYzR,KAAKklG,gBAAsC,sBAE/FllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAA0B,UAChEllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAAmC,mBACzEllG,KAAK0tE,gBAAgBj8D,YAAYzR,KAAKklG,gBAAiC,iBAGvEllG,KAAKklG,gBAA0B,SAAElzE,QAAUhyB,KAAKmnD,sBAAsBryB,KAAK90B,MAG3EA,KAAKwjD,gBAA8B,aAASxjD,KAAK0qD,aACjD1qD,KAAKwjD,gBAA8C,6BAAKxjD,KAAKwkG,6BAC7DxkG,KAAKwjD,gBAA4B,WAAWxjD,KAAK4rD,WACjD5rD,KAAKwjD,gBAAkC,iBAAKxjD,KAAK2qD,iBACjD3qD,KAAKwjD,gBAA+B,cAAQxjD,KAAKqrD,cACjDrrD,KAAK0qD,aAAmB1qD,KAAKimG,mBAC7BjmG,KAAK4rD,WAAmB,aACxB5rD,KAAKqrD,cAAmBrrD,KAAKkmG,iBAC7BlmG,KAAK2qD,iBAAmB,aACxB3qD,KAAKwkG,6BAA+BxkG,KAAKmmG,oBAGzCnmG,KAAK6iD,WAUPjjD,EAAQqmG,mBAAqB,SAASpmE,GACpC7/B,KAAKwlG,gBAAgBhxC,aAAaprC,KAAK8nB,WACvClxC,KAAKwlG,gBAAgBhxC,aAAanrC,GAAG6nB,WACrClxC,KAAKylG,oBAAsBzlG,KAAKwlG,gBAAgB9rC,wBAAwB15D,KAAKurD,qBAAqB1rB,EAAQ9tB,GAAG/R,KAAKyrD,qBAAqB5rB,EAAQ7tB,IAC9G,OAA7BhS,KAAKylG,sBACPzlG,KAAKylG,oBAAoBt0D,SACzBnxC,KAAKujD,kBAAmB,GAE1BvjD,KAAK6iD,WAUPjjD,EAAQsmG,iBAAmB,SAAS58F,GAClC,GAAIu2B,GAAU7/B,KAAKuqD,YAAYjhD,EAAM4iB,OACJ,QAA7BlsB,KAAKylG,qBAA6Dj/F,SAA7BxG,KAAKylG,sBAC5CzlG,KAAKylG,oBAAoB1zF,EAAI/R,KAAKurD,qBAAqB1rB,EAAQ9tB,GAC/D/R,KAAKylG,oBAAoBzzF,EAAIhS,KAAKyrD,qBAAqB5rB,EAAQ7tB,IAEjEhS,KAAK6iD,WASPjjD,EAAQumG,oBAAsB,SAAStmE,GACrC,GAAIumE,GAAUpmG,KAAK4qD,WAAW/qB,EACd,QAAZumE,GACqD,GAAnDpmG,KAAKwlG,gBAAgBhxC,aAAaprC,KAAKiqB,WACzCrzC,KAAKwlG,gBAAgB3rC,uBACrB75D,KAAKqmG,UAAUD,EAAQ/lG,GAAIL,KAAKwlG,gBAAgBn8E,GAAGhpB,IACnDL,KAAKwlG,gBAAgBhxC,aAAaprC,KAAK8nB,YAEY,GAAjDlxC,KAAKwlG,gBAAgBhxC,aAAanrC,GAAGgqB,WACvCrzC,KAAKwlG,gBAAgB3rC,uBACrB75D,KAAKqmG,UAAUrmG,KAAKwlG,gBAAgBp8E,KAAK/oB,GAAI+lG,EAAQ/lG,IACrDL,KAAKwlG,gBAAgBhxC,aAAanrC,GAAG6nB,aAIvClxC,KAAKwlG,gBAAgB3rC,uBAEvB75D,KAAKujD,kBAAmB,EACxBvjD,KAAK6iD,WASPjjD,EAAQmmG,eAAiB,SAASlmE,GAChC,GAAoC,GAAhC7/B,KAAK0jG,wBAA8B,CACrC,GAAI59C,GAAO9lD,KAAK4qD,WAAW/qB,EAE3B,IAAY,MAARimB,EACF,GAAIA,EAAKuW,YAAc,EACrBiqC,MAAMtmG,KAAK0hD,UAAU7c,QAAQ7kC,KAAK0hD,UAAUrd,QAAyB,qBAElE,CACHrkC,KAAK+qD,cAAcjF,GAAK,EACxB,IAAIygD,GAAevmG,KAAKkvD,QAAiB,QAAS,KAGlDq3C,GAAyB,WAAI,GAAIhjG,IAAMlD,GAAG,oBAAoBL,KAAK0hD,UACnE,IAAI8kD,GAAaD,EAAyB,UAC1CC,GAAWz0F,EAAI+zC,EAAK/zC,EACpBy0F,EAAWx0F,EAAI8zC,EAAK9zC,EAGpBhS,KAAK89C,MAAsB,eAAI,GAAI16C,IAAM/C,GAAG,iBAAiB+oB,KAAK08B,EAAKzlD,GAAGgpB,GAAGm9E,EAAWnmG,IAAKL,KAAMA,KAAK0hD,UACxG,IAAI+kD,GAAiBzmG,KAAK89C,MAAsB,cAChD2oD,GAAer9E,KAAO08B,EACtB2gD,EAAe94C,WAAY,EAC3B84C,EAAeh4F,QAAQqyC,cAAgBpyC,SAAS,EAC5CqyC,SAAS,EACTj6C,KAAM,aACNk6C,UAAW,IAEfylD,EAAepzD,UAAW,EAC1BozD,EAAep9E,GAAKm9E,EAEpBxmG,KAAKwjD,gBAA+B,cAAIxjD,KAAKqrD,cAC7CrrD,KAAKqrD,cAAgB,SAAS/hD,GAC5B,GAAIu2B,GAAU7/B,KAAKuqD,YAAYjhD,EAAM4iB,QACjCu6E,EAAiBzmG,KAAK89C,MAAsB,cAChD2oD,GAAep9E,GAAGtX,EAAI/R,KAAKurD,qBAAqB1rB,EAAQ9tB,GACxD00F,EAAep9E,GAAGrX,EAAIhS,KAAKyrD,qBAAqB5rB,EAAQ7tB,IAG1DhS,KAAK8kD,QAAS,EACd9kD,KAAK4P,WAMbhQ,EAAQomG,eAAiB,SAAS18F,GAChC,GAAoC,GAAhCtJ,KAAK0jG,wBAA8B,CACrC,GAAI7jE,GAAU7/B,KAAKuqD,YAAYjhD,EAAM4iB,OAErClsB,MAAKqrD,cAAgBrrD,KAAKwjD,gBAA+B,oBAClDxjD,MAAKwjD,gBAA+B,aAG3C,IAAIkjD,GAAgB1mG,KAAK89C,MAAsB,eAAE6V,aAG1C3zD,MAAK89C,MAAsB,qBAC3B99C,MAAKkvD,QAAiB,QAAS,MAAc,iBAC7ClvD,MAAKkvD,QAAiB,QAAS,MAAiB,aAEvD,IAAIpJ,GAAO9lD,KAAK4qD,WAAW/qB,EACf,OAARimB,IACEA,EAAKuW,YAAc,EACrBiqC,MAAMtmG,KAAK0hD,UAAU7c,QAAQ7kC,KAAK0hD,UAAUrd,QAAyB,kBAGrErkC,KAAK2mG,YAAYD,EAAc5gD,EAAKzlD,IACpCL,KAAKmnD,0BAGTnnD,KAAKu+F,iBAQT3+F,EAAQkmG,SAAW,WACjB,GAAI9lG,KAAK+jG,qBAAwC,GAAjB/jG,KAAKkoD,SAAkB,CACrD,GAAIg7C,GAAiBljG,KAAKijG,yBAAyBjjG,KAAKikD,iBACpD2iD,GAAevmG,GAAGM,EAAKqE,aAAa+M,EAAEmxF,EAAez7F,KAAKuK,EAAEkxF,EAAev7F,IAAI8gB,MAAM,MAAM+pC,gBAAe,EAAKC,gBAAe,EAClI,IAAIzyD,KAAK28C,iBAAiB1pC,IAAK,CAC7B,GAAwC,GAApCjT,KAAK28C,iBAAiB1pC,IAAItN,OAU5B,KAAM,IAAI/B,OAAM,sEAThB,IAAIuQ,GAAKnU,IACTA,MAAK28C,iBAAiB1pC,IAAI2zF,EAAa,SAASC,GAC9C1yF,EAAGiwC,UAAUnxC,IAAI4zF,GACjB1yF,EAAGgzC,wBACHhzC,EAAG2wC,QAAS,EACZ3wC,EAAGvE,cAWP5P,MAAKokD,UAAUnxC,IAAI2zF,GACnB5mG,KAAKmnD,wBACLnnD,KAAK8kD,QAAS,EACd9kD,KAAK4P,UAWXhQ,EAAQ+mG,YAAc,SAASG,EAAaC,GAC1C,GAAqB,GAAjB/mG,KAAKkoD,SAAkB,CACzB,GAAI0+C,IAAex9E,KAAK09E,EAAcz9E,GAAG09E,EACzC,IAAI/mG,KAAK28C,iBAAiBG,QAAS,CACjC,GAA4C,GAAxC98C,KAAK28C,iBAAiBG,QAAQn3C,OAShC,KAAM,IAAI/B,OAAM,0EARhB,IAAIuQ,GAAKnU,IACTA,MAAK28C,iBAAiBG,QAAQ8pD,EAAa,SAASC,GAClD1yF,EAAGkwC,UAAUpxC,IAAI4zF,GACjB1yF,EAAG2wC,QAAS,EACZ3wC,EAAGvE,cAUP5P,MAAKqkD,UAAUpxC,IAAI2zF,GACnB5mG,KAAK8kD,QAAS,EACd9kD,KAAK4P,UAUXhQ,EAAQymG,UAAY,SAASS,EAAaC,GACxC,GAAqB,GAAjB/mG,KAAKkoD,SAAkB,CACzB,GAAI0+C,IAAevmG,GAAIL,KAAKwlG,gBAAgBnlG,GAAI+oB,KAAK09E,EAAcz9E,GAAG09E,EACtE,IAAI/mG,KAAK28C,iBAAiBE,SAAU,CAClC,GAA6C,GAAzC78C,KAAK28C,iBAAiBE,SAASl3C,OASjC,KAAM,IAAI/B,OAAM,wEARhB,IAAIuQ,GAAKnU,IACTA,MAAK28C,iBAAiBE,SAAS+pD,EAAa,SAASC,GACnD1yF,EAAGkwC,UAAUxvC,OAAOgyF,GACpB1yF,EAAG2wC,QAAS,EACZ3wC,EAAGvE,cAUP5P,MAAKqkD,UAAUxvC,OAAO+xF,GACtB5mG,KAAK8kD,QAAS,EACd9kD,KAAK4P,UAUXhQ,EAAQgmG,UAAY,WAClB,IAAI5lG,KAAK28C,iBAAiBC,MAAyB,GAAjB58C,KAAKkoD,SA4BrC,KAAM,IAAItkD,OAAM,iDA3BhB,IAAIkiD,GAAO9lD,KAAK2jG,mBACZjxF,GAAQrS,GAAGylD,EAAKzlD,GAClBooB,MAAOq9B,EAAKr9B,MACZxW,MAAO6zC,EAAKr3C,QAAQwD,MACpBmrC,MAAO0I,EAAKr3C,QAAQ2uC,MACpBjyC,OACEgB,WAAW25C,EAAKr3C,QAAQtD,MAAMgB,WAC9BC,OAAO05C,EAAKr3C,QAAQtD,MAAMiB,OAC1BC,WACEF,WAAW25C,EAAKr3C,QAAQtD,MAAMkB,UAAUF,WACxCC,OAAO05C,EAAKr3C,QAAQtD,MAAMkB,UAAUD,SAG1C,IAAyC,GAArCpM,KAAK28C,iBAAiBC,KAAKj3C,OAU7B,KAAM,IAAI/B,OAAM,wEAThB,IAAIuQ,GAAKnU,IACTA,MAAK28C,iBAAiBC,KAAKlqC,EAAM,SAAUm0F,GACzC1yF,EAAGiwC,UAAUvvC,OAAOgyF,GACpB1yF,EAAGgzC,wBACHhzC,EAAG2wC,QAAS,EACZ3wC,EAAGvE,WAoBXhQ,EAAQuqD,gBAAkB,WACxB,IAAKnqD,KAAK+jG,qBAAwC,GAAjB/jG,KAAKkoD,SACpC,GAAKloD,KAAKgkG,sBA4BRsC,MAAMtmG,KAAK0hD,UAAU7c,QAAQ7kC,KAAK0hD,UAAUrd,QAA4B,wBA5BzC,CAC/B,GAAI2iE,GAAgBhnG,KAAK0kG,mBACrBuC,EAAgBjnG,KAAK4kG,kBACzB,IAAI5kG,KAAK28C,iBAAiBI,IAAK,CAC7B,GAAI5oC,GAAKnU,KACL0S,GAAQsqC,MAAOgqD,EAAelpD,MAAOmpD,EACzC,IAAwC,GAApCjnG,KAAK28C,iBAAiBI,IAAIp3C,OAU5B,KAAM,IAAI/B,OAAM,0EAThB5D,MAAK28C,iBAAiBI,IAAIrqC,EAAM,SAAUm0F,GACxC1yF,EAAGkwC,UAAUhuC,OAAOwwF,EAAc/oD,OAClC3pC,EAAGiwC,UAAU/tC,OAAOwwF,EAAc7pD,OAClC7oC,EAAGoqF,eACHpqF,EAAG2wC,QAAS,EACZ3wC,EAAGvE,cAQP5P,MAAKqkD,UAAUhuC,OAAO4wF,GACtBjnG,KAAKokD,UAAU/tC,OAAO2wF,GACtBhnG,KAAKu+F,eACLv+F,KAAK8kD,QAAS,EACd9kD,KAAK4P,WAYT,SAAS/P,EAAQD,EAASM,GAE9B,GACIklD,IADOllD,EAAoB,GACdA,EAAoB,KACjC6D,EAAS7D,EAAoB,GAEjCN,GAAQiuE,iBAAmB,WAEzB,GAA8C,GAA1C7tE,KAAK8hD,kBAAkBC,SAASp8C,OAAa,CAC/C,IAAK,GAAIH,GAAI,EAAGA,EAAIxF,KAAK8hD,kBAAkBC,SAASp8C,OAAQH,IAC1DxF,KAAK8hD,kBAAkBC,SAASv8C,GAAG8N,SAErCtT,MAAK8hD,kBAAkBC,YAGzB/hD,KAAKykG,2BAA6B,aAG9BzkG,KAAKknG,gBAAkBlnG,KAAKknG,eAAwB,SAAKlnG,KAAKknG,eAAwB,QAAEr9F,YAC1F7J,KAAKknG,eAAwB,QAAEr9F,WAAWsH,YAAYnR,KAAKknG,eAAwB,UAYvFtnG,EAAQkuE,wBAA0B,WAChC9tE,KAAK6tE,mBAEL7tE,KAAKknG,iBACL,IAAIA,IAAkB,KAAK,OAAO,OAAO,QAAQ,SAAS,UAAU,eAChEC,GAAwB,UAAU,YAAY,YAAY,aAAa,UAAU,WAAW,cAEhGnnG,MAAKknG,eAAwB,QAAI31F,SAASM,cAAc,OACxD7R,KAAKsf,MAAM7N,YAAYzR,KAAKknG,eAAwB,QAEpD,KAAK,GAAI1hG,GAAI,EAAGA,EAAI0hG,EAAevhG,OAAQH,IAAK,CAC9CxF,KAAKknG,eAAeA,EAAe1hG,IAAM+L,SAASM,cAAc,OAChE7R,KAAKknG,eAAeA,EAAe1hG,IAAIqC,UAAY,sBAAwBq/F,EAAe1hG,GAC1FxF,KAAKknG,eAAwB,QAAEz1F,YAAYzR,KAAKknG,eAAeA,EAAe1hG,IAE9E,IAAI1B,GAAS,GAAIC,GAAO/D,KAAKknG,eAAeA,EAAe1hG,KAAMwoE,iBAAiB,GAClF5oB,GAAWkE,QAAQxlD,EAAQ9D,KAAKmnG,EAAqB3hG,IAAIsvB,KAAK90B,OAC9DolD,EAAW6hB,UAAUnjE,EAAQ9D,KAAK+rD,WAAWj3B,KAAK90B,OAElDA,KAAK8hD,kBAAkBE,KAAKh6C,KAAKlE,GAGnC9D,KAAKykG,2BAA6BzkG,KAAKonG,cAEvCpnG,KAAK8hD,kBAAkBC,SAAW/hD,KAAK8hD,kBAAkBE,MAS3DpiD,EAAQynG,YAAc,SAAS/9F,GAC7BtJ,KAAKilD,YAAYn1C,SAAS,MAC1BxG,EAAMq8B,mBAQR/lC,EAAQwnG,cAAgB,WACtBpnG,KAAK8pD,eACL9pD,KAAK2pD,eACL3pD,KAAKiqD,aAYPrqD,EAAQ8pD,QAAU,SAASpgD,GACzBtJ,KAAK+iD,WAAa/iD,KAAK0hD,UAAUrB,SAASC,MAAMtuC,EAChDhS,KAAK4P,QACLtG,EAAMD,kBAQRzJ,EAAQgqD,UAAY,SAAStgD,GAC3BtJ,KAAK+iD,YAAc/iD,KAAK0hD,UAAUrB,SAASC,MAAMtuC,EACjDhS,KAAK4P,QACLtG,EAAMD,kBAQRzJ,EAAQiqD,UAAY,SAASvgD,GAC3BtJ,KAAK8iD,WAAa9iD,KAAK0hD,UAAUrB,SAASC,MAAMvuC,EAChD/R,KAAK4P,QACLtG,EAAMD,kBAQRzJ,EAAQmqD,WAAa,SAASzgD,GAC5BtJ,KAAK8iD,YAAc9iD,KAAK0hD,UAAUrB,SAASC,MAAMtuC,EACjDhS,KAAK4P,QACLtG,EAAMD,kBAQRzJ,EAAQoqD,QAAU,SAAS1gD,GACzBtJ,KAAKgjD,cAAgBhjD,KAAK0hD,UAAUrB,SAASC,MAAMpgB,KACnDlgC,KAAK4P,QACLtG,EAAMD,kBAQRzJ,EAAQsqD,SAAW,SAAS5gD,GAC1BtJ,KAAKgjD,eAAiBhjD,KAAK0hD,UAAUrB,SAASC,MAAMpgB,KACpDlgC,KAAK4P,QACLtG,EAAMD,kBAQRzJ,EAAQqqD,UAAY,SAAS3gD,GAC3BtJ,KAAKgjD,cAAgB,EACrB15C,GAASA,EAAMD,kBAQjBzJ,EAAQ+pD,aAAe,SAASrgD,GAC9BtJ,KAAK+iD,WAAa,EAClBz5C,GAASA,EAAMD,kBAQjBzJ,EAAQkqD,aAAe,SAASxgD,GAC9BtJ,KAAK8iD,WAAa,EAClBx5C,GAASA,EAAMD,mBAMb,SAASxJ,EAAQD,GAErBA,EAAQgoD,aAAe,WACrB,IAAK,GAAIzB,KAAUnmD,MAAKg9C,MACtB,GAAIh9C,KAAKg9C,MAAMl3C,eAAeqgD,GAAS,CACrC,GAAIL,GAAO9lD,KAAKg9C,MAAMmJ,EACO,IAAzBL,EAAKuV,mBACPvV,EAAKlI,MAAQ,GACbkI,EAAKwV,qBAAsB,KAYnC17D,EAAQolD,yBAA2B,WACjC,GAAiD,GAA7ChlD,KAAK0hD,UAAUjB,mBAAmB/xC,SAAmB1O,KAAK8jD,YAAYn+C,OAAS,EAAG,CAEpF,GACImgD,GAAMK,EADNmhD,EAAU,EAEVC,GAAe,EACfC,GAAiB,CAErB,KAAKrhD,IAAUnmD,MAAKg9C,MACdh9C,KAAKg9C,MAAMl3C,eAAeqgD,KAC5BL,EAAO9lD,KAAKg9C,MAAMmJ,GACA,IAAdL,EAAKlI,MACP2pD,GAAe,EAGfC,GAAiB,EAEfF,EAAUxhD,EAAKhI,MAAMn4C,SACvB2hG,EAAUxhD,EAAKhI,MAAMn4C,QAM3B,IAAsB,GAAlB6hG,GAA0C,GAAhBD,EAC5B,KAAM,IAAI3jG,OAAM,wHAQhB5D,MAAKynG,mBAGiB,GAAlBD,IAC8C,WAA5CxnG,KAAK0hD,UAAUjB,mBAAmBG,OACpC5gD,KAAK0nG,iBAAiBJ,GAGtBtnG,KAAK2nG,0BAAyB,GAKlC,IAAIC,GAAe5nG,KAAK6nG,kBAGxB7nG,MAAK8nG,uBAAuBF,GAG5B5nG,KAAK4P,UAYXhQ,EAAQkoG,uBAAyB,SAASF,GACxC,GAAIzhD,GAAQL,CAGZ,KAAK,GAAIlI,KAASgqD,GAChB,GAAIA,EAAa9hG,eAAe83C,GAE9B,IAAKuI,IAAUyhD,GAAahqD,GAAOZ,MAC7B4qD,EAAahqD,GAAOZ,MAAMl3C,eAAeqgD,KAC3CL,EAAO8hD,EAAahqD,GAAOZ,MAAMmJ,GACkB,MAA/CnmD,KAAK0hD,UAAUjB,mBAAmBvlB,WAAoE,MAA/Cl7B,KAAK0hD,UAAUjB,mBAAmBvlB,UACvF4qB,EAAKqF,SACPrF,EAAK/zC,EAAI61F,EAAahqD,GAAOmqD,OAC7BjiD,EAAKqF,QAAS,EAEdy8C,EAAahqD,GAAOmqD,QAAUH,EAAahqD,GAAO+C,aAIhDmF,EAAKsF,SACPtF,EAAK9zC,EAAI41F,EAAahqD,GAAOmqD,OAC7BjiD,EAAKsF,QAAS,EAEdw8C,EAAahqD,GAAOmqD,QAAUH,EAAahqD,GAAO+C,aAGtD3gD,KAAKgoG,kBAAkBliD,EAAKhI,MAAMgI,EAAKzlD,GAAGunG,EAAa9hD,EAAKlI,OAOpE59C,MAAK6nD,cAUPjoD,EAAQioG,iBAAmB,WACzB,GACI1hD,GAAQL,EAAMlI,EADdgqD,IAKJ,KAAKzhD,IAAUnmD,MAAKg9C,MACdh9C,KAAKg9C,MAAMl3C,eAAeqgD,KAC5BL,EAAO9lD,KAAKg9C,MAAMmJ,GAClBL,EAAKqF,QAAS,EACdrF,EAAKsF,QAAS,EACqC,MAA/CprD,KAAK0hD,UAAUjB,mBAAmBvlB,WAAoE,MAA/Cl7B,KAAK0hD,UAAUjB,mBAAmBvlB,UAC3F4qB,EAAK9zC,EAAIhS,KAAK0hD,UAAUjB,mBAAmBC,gBAAgBoF,EAAKlI,MAGhEkI,EAAK/zC,EAAI/R,KAAK0hD,UAAUjB,mBAAmBC,gBAAgBoF,EAAKlI,MAEjCp3C,SAA7BohG,EAAa9hD,EAAKlI,SACpBgqD,EAAa9hD,EAAKlI,QAAUquB,OAAQ,EAAGjvB,SAAW+qD,OAAO,EAAGpnD,YAAY,IAE1EinD,EAAa9hD,EAAKlI,OAAOquB,QAAU,EACnC27B,EAAa9hD,EAAKlI,OAAOZ,MAAMmJ,GAAUL,EAK7C,IAAImiD,GAAW,CACf,KAAKrqD,IAASgqD,GACRA,EAAa9hG,eAAe83C,IAC1BqqD,EAAWL,EAAahqD,GAAOquB,SACjCg8B,EAAWL,EAAahqD,GAAOquB,OAMrC,KAAKruB,IAASgqD,GACRA,EAAa9hG,eAAe83C,KAC9BgqD,EAAahqD,GAAO+C,aAAesnD,EAAW,GAAKjoG,KAAK0hD,UAAUjB,mBAAmBE,YACrFinD,EAAahqD,GAAO+C,aAAgBinD,EAAahqD,GAAOquB,OAAS,EACjE27B,EAAahqD,GAAOmqD,OAASH,EAAahqD,GAAO+C,YAAe,IAAOinD,EAAahqD,GAAOquB,OAAS,GAAK27B,EAAahqD,GAAO+C,YAIjI,OAAOinD,IAUThoG,EAAQ8nG,iBAAmB,SAASJ,GAClC,GAAInhD,GAAQL,CAGZ,KAAKK,IAAUnmD,MAAKg9C,MACdh9C,KAAKg9C,MAAMl3C,eAAeqgD,KAC5BL,EAAO9lD,KAAKg9C,MAAMmJ,GACdL,EAAKhI,MAAMn4C,QAAU2hG,IACvBxhD,EAAKlI,MAAQ,GAMnB,KAAKuI,IAAUnmD,MAAKg9C,MACdh9C,KAAKg9C,MAAMl3C,eAAeqgD,KAC5BL,EAAO9lD,KAAKg9C,MAAMmJ,GACA,GAAdL,EAAKlI,OACP59C,KAAKkoG,UAAU,EAAEpiD,EAAKhI,MAAMgI,EAAKzlD,MAczCT,EAAQ+nG,yBAA2B,WACjC,GAAIxhD,GAAQL,EAAMqiD,EACd5H,EAAW,GAGf4H,GAAYnoG,KAAKg9C,MAAMh9C,KAAK8jD,YAAY,IACxCqkD,EAAUvqD,MAAQ2iD,EAClBvgG,KAAKooG,kBAAkB7H,EAAS4H,EAAUrqD,MAAMqqD,EAAU9nG,GAG1D,KAAK8lD,IAAUnmD,MAAKg9C,MACdh9C,KAAKg9C,MAAMl3C,eAAeqgD,KAC5BL,EAAO9lD,KAAKg9C,MAAMmJ,GAClBo6C,EAAWz6C,EAAKlI,MAAQ2iD,EAAWz6C,EAAKlI,MAAQ2iD,EAKpD,KAAKp6C,IAAUnmD,MAAKg9C,MACdh9C,KAAKg9C,MAAMl3C,eAAeqgD,KAC5BL,EAAO9lD,KAAKg9C,MAAMmJ,GAClBL,EAAKlI,OAAS2iD,IAepB3gG,EAAQ6nG,iBAAmB,WACzBznG,KAAK0hD,UAAUvC,WAAWzwC,SAAU,EACpC1O,KAAK0hD,UAAUlD,QAAQC,UAAU/vC,SAAU,EAC3C1O,KAAK0hD,UAAUlD,QAAQU,sBAAsBxwC,SAAU,EACvD1O,KAAKmtE,2BACsC,GAAvCntE,KAAK0hD,UAAUZ,aAAapyC,UAC9B1O,KAAK0hD,UAAUZ,aAAaC,SAAU,GAExC/gD,KAAK0oD,wBAEL,IAAI8oB,GAASxxE,KAAK0hD,UAAUjB,kBAC5B+wB,GAAO9wB,gBAAkBx7C,KAAK2lB,IAAI2mD,EAAO9wB,kBACjB,MAApB8wB,EAAOt2C,WAAyC,MAApBs2C,EAAOt2C,aACrCs2C,EAAO9wB,iBAAmB,IAGJ,MAApB8wB,EAAOt2C,WAAyC,MAApBs2C,EAAOt2C,UACM,GAAvCl7B,KAAK0hD,UAAUZ,aAAapyC,UAC9B1O,KAAK0hD,UAAUZ,aAAah6C,KAAO,YAIM,GAAvC9G,KAAK0hD,UAAUZ,aAAapyC,UAC9B1O,KAAK0hD,UAAUZ,aAAah6C,KAAO,eAgBzClH,EAAQooG,kBAAoB,SAASlqD,EAAOuqD,EAAUT,EAAcU,GAClE,IAAK,GAAI9iG,GAAI,EAAGA,EAAIs4C,EAAMn4C,OAAQH,IAAK,CACrC,GAAI64F,GAAY,IAEdA,GADEvgD,EAAMt4C,GAAGouD,MAAQy0C,EACPvqD,EAAMt4C,GAAG4jB,KAGT00B,EAAMt4C,GAAG6jB,EAIvB,IAAIk/E,IAAY,CACmC,OAA/CvoG,KAAK0hD,UAAUjB,mBAAmBvlB,WAAoE,MAA/Cl7B,KAAK0hD,UAAUjB,mBAAmBvlB,UACvFmjE,EAAUlzC,QAAUkzC,EAAUzgD,MAAQ0qD,IACxCjK,EAAUlzC,QAAS,EACnBkzC,EAAUtsF,EAAI61F,EAAavJ,EAAUzgD,OAAOmqD,OAC5CQ,GAAY,GAIVlK,EAAUjzC,QAAUizC,EAAUzgD,MAAQ0qD,IACxCjK,EAAUjzC,QAAS,EACnBizC,EAAUrsF,EAAI41F,EAAavJ,EAAUzgD,OAAOmqD,OAC5CQ,GAAY,GAIC,GAAbA,IACFX,EAAavJ,EAAUzgD,OAAOmqD,QAAUH,EAAavJ,EAAUzgD,OAAO+C,YAClE09C,EAAUvgD,MAAMn4C,OAAS,GAC3B3F,KAAKgoG,kBAAkB3J,EAAUvgD,MAAMugD,EAAUh+F,GAAGunG,EAAavJ,EAAUzgD,UAenFh+C,EAAQsoG,UAAY,SAAStqD,EAAOE,EAAOuqD,GACzC,IAAK,GAAI7iG,GAAI,EAAGA,EAAIs4C,EAAMn4C,OAAQH,IAAK,CACrC,GAAI64F,GAAY,IAEdA,GADEvgD,EAAMt4C,GAAGouD,MAAQy0C,EACPvqD,EAAMt4C,GAAG4jB,KAGT00B,EAAMt4C,GAAG6jB,IAEA,IAAnBg1E,EAAUzgD,OAAeygD,EAAUzgD,MAAQA,KAC7CygD,EAAUzgD,MAAQA,EACdygD,EAAUvgD,MAAMn4C,OAAS,GAC3B3F,KAAKkoG,UAAUtqD,EAAM,EAAGygD,EAAUvgD,MAAOugD,EAAUh+F,OAe3DT,EAAQwoG,kBAAoB,SAASxqD,EAAOE,EAAOuqD,GACjDroG,KAAKg9C,MAAMqrD,GAAU/sC,qBAAsB,CAE3C,KAAK,GADD+iC,GAAWnjE,EACN11B,EAAI,EAAGA,EAAIs4C,EAAMn4C,OAAQH,IAChC01B,EAAY,EACR4iB,EAAMt4C,GAAGouD,MAAQy0C,GACnBhK,EAAYvgD,EAAMt4C,GAAG4jB,KACrB8R,EAAY,IAGZmjE,EAAYvgD,EAAMt4C,GAAG6jB,GAEA,IAAnBg1E,EAAUzgD,QACZygD,EAAUzgD,MAAQA,EAAQ1iB,EAI9B,KAAK,GAAI11B,GAAI,EAAGA,EAAIs4C,EAAMn4C,OAAQH,IACA64F,EAA5BvgD,EAAMt4C,GAAGouD,MAAQy0C,EAAuBvqD,EAAMt4C,GAAG4jB,KACnC00B,EAAMt4C,GAAG6jB,GAEvBg1E,EAAUvgD,MAAMn4C,OAAS,GAAK04F,EAAU/iC,uBAAwB,GAClEt7D,KAAKooG,kBAAkB/J,EAAUzgD,MAAOygD,EAAUvgD,MAAOugD,EAAUh+F,KAWzET,EAAQ4oG,cAAgB,WACtB,IAAK,GAAIriD,KAAUnmD,MAAKg9C,MAClBh9C,KAAKg9C,MAAMl3C,eAAeqgD,KAC5BnmD,KAAKg9C,MAAMmJ,GAAQgF,QAAS,EAC5BnrD,KAAKg9C,MAAMmJ,GAAQiF,QAAS,KAQ9B,SAASvrD,EAAQD,EAASM,GAkgB9B,QAASuoG,KACPzoG,KAAK0hD,UAAUZ,aAAapyC,SAAW1O,KAAK0hD,UAAUZ,aAAapyC,OACnE,IAAIg6F,GAAqBn3F,SAASo3F,eAAe,qBACCD,GAAmBz7F,MAAMd,WAAhC,GAAvCnM,KAAK0hD,UAAUZ,aAAapyC,QAAwD,UACR,UAEhF1O,KAAK0oD,wBAAuB,GAO9B,QAASkgD,KACP,IAAK,GAAIziD,KAAUnmD,MAAK4jD,iBAClB5jD,KAAK4jD,iBAAiB99C,eAAeqgD,KACvCnmD,KAAK4jD,iBAAiBuC,GAAQsV,GAAK,EAAIz7D,KAAK4jD,iBAAiBuC,GAAQuV,GAAK,EAC1E17D,KAAK4jD,iBAAiBuC,GAAQoV,GAAK,EAAIv7D,KAAK4jD,iBAAiBuC,GAAQqV,GAAK,EAG7B,IAA7Cx7D,KAAK0hD,UAAUjB,mBAAmB/xC,SACpC1O,KAAKglD,2BACL6jD,EAAiBtoG,KAAKP,KAAM,aAAc,EAAG,8CAC7C6oG,EAAiBtoG,KAAKP,KAAM,aAAc,EAAG,0BAC7C6oG,EAAiBtoG,KAAKP,KAAM,aAAc,EAAG,0BAC7C6oG,EAAiBtoG,KAAKP,KAAM,aAAc,EAAG,wBAC7C6oG,EAAiBtoG,KAAKP,KAAM,eAAgB,EAAG,oBAG/CA,KAAK08F,kBAEP18F,KAAK8kD,QAAS,EACd9kD,KAAK4P,QAMP,QAASk5F,KACP,GAAIr6F,GAAU,gDACVs6F,KACAC,EAAez3F,SAASo3F,eAAe,wBACvCM,EAAe13F,SAASo3F,eAAe,uBAC3C,IAA4B,GAAxBK,EAAaE,QAAiB,CAMhC,GALIlpG,KAAK0hD,UAAUlD,QAAQC,UAAUE,uBAAyB3+C,KAAKmpG,gBAAgB3qD,QAAQC,UAAUE,uBAAwBoqD,EAAgB/gG,KAAK,0BAA4BhI,KAAK0hD,UAAUlD,QAAQC,UAAUE,uBAC3M3+C,KAAK0hD,UAAUlD,QAAQI,gBAAkB5+C,KAAKmpG,gBAAgB3qD,QAAQC,UAAUG,gBAAyCmqD,EAAgB/gG,KAAK,mBAAqBhI,KAAK0hD,UAAUlD,QAAQI,gBAC1L5+C,KAAK0hD,UAAUlD,QAAQK,cAAgB7+C,KAAKmpG,gBAAgB3qD,QAAQC,UAAUI,cAA2CkqD,EAAgB/gG,KAAK,iBAAmBhI,KAAK0hD,UAAUlD,QAAQK,cACxL7+C,KAAK0hD,UAAUlD,QAAQM,gBAAkB9+C,KAAKmpG,gBAAgB3qD,QAAQC,UAAUK,gBAAyCiqD,EAAgB/gG,KAAK,mBAAqBhI,KAAK0hD,UAAUlD,QAAQM,gBAC1L9+C,KAAK0hD,UAAUlD,QAAQO,SAAW/+C,KAAKmpG,gBAAgB3qD,QAAQC,UAAUM,SAAgDgqD,EAAgB/gG,KAAK,YAAchI,KAAK0hD,UAAUlD,QAAQO,SACzJ,GAA1BgqD,EAAgBpjG,OAAa,CAC/B8I,EAAU,kBACVA,GAAW,wBACX,KAAK,GAAIjJ,GAAI,EAAGA,EAAIujG,EAAgBpjG,OAAQH,IAC1CiJ,GAAWs6F,EAAgBvjG,GACvBA,EAAIujG,EAAgBpjG,OAAS,IAC/B8I,GAAW,KAGfA,IAAW,KAETzO,KAAK0hD,UAAUZ,aAAapyC,SAAW1O,KAAKmpG,gBAAgBroD,aAAapyC,UAC7C,GAA1Bq6F,EAAgBpjG,OAAc8I,EAAU,kBACtCA,GAAW,KACjBA,GAAW,iBAAmBzO,KAAK0hD,UAAUZ,aAAapyC,SAE7C,iDAAXD,IACFA,GAAW,UAGV,IAA4B,GAAxBw6F,EAAaC,QAAiB,CAQrC,GAPAz6F,EAAU,kBACVA,GAAW,wCACPzO,KAAK0hD,UAAUlD,QAAQQ,UAAUC,cAAgBj/C,KAAKmpG,gBAAgB3qD,QAAQQ,UAAUC,cAAgB8pD,EAAgB/gG,KAAK,iBAAmBhI,KAAK0hD,UAAUlD,QAAQQ,UAAUC,cACjLj/C,KAAK0hD,UAAUlD,QAAQI,gBAAkB5+C,KAAKmpG,gBAAgB3qD,QAAQQ,UAAUJ,gBAAwBmqD,EAAgB/gG,KAAK,mBAAqBhI,KAAK0hD,UAAUlD,QAAQI,gBACzK5+C,KAAK0hD,UAAUlD,QAAQK,cAAgB7+C,KAAKmpG,gBAAgB3qD,QAAQQ,UAAUH,cAA0BkqD,EAAgB/gG,KAAK,iBAAmBhI,KAAK0hD,UAAUlD,QAAQK,cACvK7+C,KAAK0hD,UAAUlD,QAAQM,gBAAkB9+C,KAAKmpG,gBAAgB3qD,QAAQQ,UAAUF,gBAAwBiqD,EAAgB/gG,KAAK,mBAAqBhI,KAAK0hD,UAAUlD,QAAQM,gBACzK9+C,KAAK0hD,UAAUlD,QAAQO,SAAW/+C,KAAKmpG,gBAAgB3qD,QAAQQ,UAAUD,SAA+BgqD,EAAgB/gG,KAAK,YAAchI,KAAK0hD,UAAUlD,QAAQO,SACxI,GAA1BgqD,EAAgBpjG,OAAa,CAC/B8I,GAAW,gBACX,KAAK,GAAIjJ,GAAI,EAAGA,EAAIujG,EAAgBpjG,OAAQH,IAC1CiJ,GAAWs6F,EAAgBvjG,GACvBA,EAAIujG,EAAgBpjG,OAAS,IAC/B8I,GAAW,KAGfA,IAAW,KAEiB,GAA1Bs6F,EAAgBpjG,SAAc8I,GAAW,KACzCzO,KAAK0hD,UAAUZ,cAAgB9gD,KAAKmpG,gBAAgBroD,eACtDryC,GAAW,mBAAqBzO,KAAK0hD,UAAUZ,cAEjDryC,GAAW,SAER,CAOH,GANAA,EAAU,kBACNzO,KAAK0hD,UAAUlD,QAAQU,sBAAsBD,cAAgBj/C,KAAKmpG,gBAAgB3qD,QAAQU,sBAAsBD,cAAgB8pD,EAAgB/gG,KAAK,iBAAmBhI,KAAK0hD,UAAUlD,QAAQU,sBAAsBD,cACrNj/C,KAAK0hD,UAAUlD,QAAQI,gBAAkB5+C,KAAKmpG,gBAAgB3qD,QAAQU,sBAAsBN,gBAAwBmqD,EAAgB/gG,KAAK,mBAAqBhI,KAAK0hD,UAAUlD,QAAQI,gBACrL5+C,KAAK0hD,UAAUlD,QAAQK,cAAgB7+C,KAAKmpG,gBAAgB3qD,QAAQU,sBAAsBL,cAA0BkqD,EAAgB/gG,KAAK,iBAAmBhI,KAAK0hD,UAAUlD,QAAQK,cACnL7+C,KAAK0hD,UAAUlD,QAAQM,gBAAkB9+C,KAAKmpG,gBAAgB3qD,QAAQU,sBAAsBJ,gBAAwBiqD,EAAgB/gG,KAAK,mBAAqBhI,KAAK0hD,UAAUlD,QAAQM,gBACrL9+C,KAAK0hD,UAAUlD,QAAQO,SAAW/+C,KAAKmpG,gBAAgB3qD,QAAQU,sBAAsBH,SAA+BgqD,EAAgB/gG,KAAK,YAAchI,KAAK0hD,UAAUlD,QAAQO,SACpJ,GAA1BgqD,EAAgBpjG,OAAa,CAC/B8I,GAAW,oCACX,KAAK,GAAIjJ,GAAI,EAAGA,EAAIujG,EAAgBpjG,OAAQH,IAC1CiJ,GAAWs6F,EAAgBvjG,GACvBA,EAAIujG,EAAgBpjG,OAAS,IAC/B8I,GAAW,KAGfA,IAAW,MAOb,GALAA,GAAW,wBACXs6F,KACI/oG,KAAK0hD,UAAUjB,mBAAmBvlB,WAAal7B,KAAKmpG,gBAAgB1oD,mBAAmBvlB,WAAkC6tE,EAAgB/gG,KAAK,cAAgBhI,KAAK0hD,UAAUjB,mBAAmBvlB,WAChMh2B,KAAK2lB,IAAI7qB,KAAK0hD,UAAUjB,mBAAmBC,kBAAoB1gD,KAAKmpG,gBAAgB1oD,mBAAmBC,iBAAkBqoD,EAAgB/gG,KAAK,oBAAsBhI,KAAK0hD,UAAUjB,mBAAmBC,iBACtM1gD,KAAK0hD,UAAUjB,mBAAmBE,aAAe3gD,KAAKmpG,gBAAgB1oD,mBAAmBE,aAAgCooD,EAAgB/gG,KAAK,gBAAkBhI,KAAK0hD,UAAUjB,mBAAmBE,aACxK,GAA1BooD,EAAgBpjG,OAAa,CAC/B,IAAK,GAAIH,GAAI,EAAGA,EAAIujG,EAAgBpjG,OAAQH,IAC1CiJ,GAAWs6F,EAAgBvjG,GACvBA,EAAIujG,EAAgBpjG,OAAS,IAC/B8I,GAAW,KAGfA,IAAW,QAGXA,IAAW,eAEbA,IAAW,KAIbzO,KAAKopG,WAAWnlF,UAAYxV,EAO9B,QAAS46F,KACP,GAAIl0F,IAAO,iBAAkB,gBAAiB,iBAC1Cm0F,EAAc/3F,SAASg4F,cAAc,6CAA6CliG,MAClFmiG,EAAU,SAAWF,EAAc,SACnCG,EAAQl4F,SAASo3F,eAAea,EACpCC,GAAMx8F,MAAM26B,QAAU,OACtB,KAAK,GAAIpiC,GAAI,EAAGA,EAAI2P,EAAIxP,OAAQH,IAC1B2P,EAAI3P,IAAMgkG,IACZC,EAAQl4F,SAASo3F,eAAexzF,EAAI3P,IACpCikG,EAAMx8F,MAAM26B,QAAU,OAG1B5nC,MAAKwoG,gBACc,KAAfc,GACFtpG,KAAK0hD,UAAUjB,mBAAmB/xC,SAAU,EAC5C1O,KAAK0hD,UAAUlD,QAAQU,sBAAsBxwC,SAAU,EACvD1O,KAAK0hD,UAAUlD,QAAQC,UAAU/vC,SAAU,GAErB,KAAf46F,EAC0C,GAA7CtpG,KAAK0hD,UAAUjB,mBAAmB/xC,UACpC1O,KAAK0hD,UAAUjB,mBAAmB/xC,SAAU,EAC5C1O,KAAK0hD,UAAUlD,QAAQU,sBAAsBxwC,SAAU,EACvD1O,KAAK0hD,UAAUlD,QAAQC,UAAU/vC,SAAU,EAC3C1O,KAAK0hD,UAAUZ,aAAapyC,SAAU,EACtC1O,KAAKglD,6BAIPhlD,KAAK0hD,UAAUjB,mBAAmB/xC,SAAU,EAC5C1O,KAAK0hD,UAAUlD,QAAQU,sBAAsBxwC,SAAU,EACvD1O,KAAK0hD,UAAUlD,QAAQC,UAAU/vC,SAAU,GAE7C1O,KAAKmtE,0BACL,IAAIu7B,GAAqBn3F,SAASo3F,eAAe,qBACCD,GAAmBz7F,MAAMd,WAAhC,GAAvCnM,KAAK0hD,UAAUZ,aAAapyC,QAAwD,UACR,UAChF1O,KAAK8kD,QAAS,EACd9kD,KAAK4P,QAWP,QAASi5F,GAAkBxoG,EAAGgN,EAAIq8F,GAChC,GAAIC,GAAUtpG,EAAK,SACfupG,EAAar4F,SAASo3F,eAAetoG,GAAIgH,KAEzCpB,OAAMC,QAAQmH,IAChBkE,SAASo3F,eAAegB,GAAStiG,MAAQgG,EAAIzC,SAASg/F,IACtD5pG,KAAK6pG,yBAAyBH,EAAsBr8F,EAAIzC,SAASg/F,OAGjEr4F,SAASo3F,eAAegB,GAAStiG,MAAQuD,SAASyC,GAAOgY,WAAWukF,GACpE5pG,KAAK6pG,yBAAyBH,EAAuB9+F,SAASyC,GAAOgY,WAAWukF,MAGrD,gCAAzBF,GACuB,sCAAzBA,GACyB,kCAAzBA,IACA1pG,KAAKglD,2BAEPhlD,KAAK8kD,QAAS,EACd9kD,KAAK4P,QA7sBP,GAAIjP,GAAOT,EAAoB,GAC3B4pG,EAAiB5pG,EAAoB,IACrC6pG,EAA4B7pG,EAAoB,IAChD8pG,EAAiB9pG,EAAoB,GAOzCN,GAAQqqG,iBAAmB,WACzBjqG,KAAK0hD,UAAUlD,QAAQC,UAAU/vC,SAAW1O,KAAK0hD,UAAUlD,QAAQC,UAAU/vC,QAC7E1O,KAAKmtE,2BACLntE,KAAK8kD,QAAS,EACd9kD,KAAK4P,SASPhQ,EAAQutE,yBAA2B,WAEe,GAA5CntE,KAAK0hD,UAAUlD,QAAQC,UAAU/vC,SACnC1O,KAAKktE,YAAY48B,GACjB9pG,KAAKktE,YAAY68B,GAEjB/pG,KAAK0hD,UAAUlD,QAAQI,eAAiB5+C,KAAK0hD,UAAUlD,QAAQC,UAAUG,eACzE5+C,KAAK0hD,UAAUlD,QAAQK,aAAe7+C,KAAK0hD,UAAUlD,QAAQC,UAAUI,aACvE7+C,KAAK0hD,UAAUlD,QAAQM,eAAiB9+C,KAAK0hD,UAAUlD,QAAQC,UAAUK,eACzE9+C,KAAK0hD,UAAUlD,QAAQO,QAAU/+C,KAAK0hD,UAAUlD,QAAQC,UAAUM,QAElE/+C,KAAK+sE,WAAWi9B,IAE+C,GAAxDhqG,KAAK0hD,UAAUlD,QAAQU,sBAAsBxwC,SACpD1O,KAAKktE,YAAY88B,GACjBhqG,KAAKktE,YAAY48B,GAEjB9pG,KAAK0hD,UAAUlD,QAAQI,eAAiB5+C,KAAK0hD,UAAUlD,QAAQU,sBAAsBN,eACrF5+C,KAAK0hD,UAAUlD,QAAQK,aAAe7+C,KAAK0hD,UAAUlD,QAAQU,sBAAsBL,aACnF7+C,KAAK0hD,UAAUlD,QAAQM,eAAiB9+C,KAAK0hD,UAAUlD,QAAQU,sBAAsBJ,eACrF9+C,KAAK0hD,UAAUlD,QAAQO,QAAU/+C,KAAK0hD,UAAUlD,QAAQU,sBAAsBH,QAE9E/+C,KAAK+sE,WAAWg9B,KAGhB/pG,KAAKktE,YAAY88B,GACjBhqG,KAAKktE,YAAY68B,GACjB/pG,KAAKkqG,cAAgB1jG,OAErBxG,KAAK0hD,UAAUlD,QAAQI,eAAiB5+C,KAAK0hD,UAAUlD,QAAQQ,UAAUJ,eACzE5+C,KAAK0hD,UAAUlD,QAAQK,aAAe7+C,KAAK0hD,UAAUlD,QAAQQ,UAAUH,aACvE7+C,KAAK0hD,UAAUlD,QAAQM,eAAiB9+C,KAAK0hD,UAAUlD,QAAQQ,UAAUF,eACzE9+C,KAAK0hD,UAAUlD,QAAQO,QAAU/+C,KAAK0hD,UAAUlD,QAAQQ,UAAUD,QAElE/+C,KAAK+sE,WAAW+8B,KAUpBlqG,EAAQuqG,4BAA8B,WAEL,GAA3BnqG,KAAK8jD,YAAYn+C,OACnB3F,KAAKg9C,MAAMh9C,KAAK8jD,YAAY,IAAIsa,UAAU,EAAG,IAIzCp+D,KAAK8jD,YAAYn+C,OAAS3F,KAAK0hD,UAAUvC,WAAWE,kBAAyD,GAArCr/C,KAAK0hD,UAAUvC,WAAWzwC,SACpG1O,KAAKm8F,aAAan8F,KAAK0hD,UAAUvC,WAAWG,eAAe,GAI7Dt/C,KAAKoqG,qBAUTxqG,EAAQwqG,iBAAmB,WAKzBpqG,KAAKqqG,gCACLrqG,KAAKsqG,uBAEDtqG,KAAK0hD,UAAUlD,QAAQM,eAAiB,IACC,GAAvC9+C,KAAK0hD,UAAUZ,aAAapyC,SAA0D,GAAvC1O,KAAK0hD,UAAUZ,aAAaC,QAC7E/gD,KAAKuqG,oCAGuD,GAAxDvqG,KAAK0hD,UAAUlD,QAAQU,sBAAsBxwC,QAC/C1O,KAAKwqG,qCAGLxqG,KAAKyqG,2BAeb7qG,EAAQyuD,wBAA0B,WAChC,GAA2C,GAAvCruD,KAAK0hD,UAAUZ,aAAapyC,SAA0D,GAAvC1O,KAAK0hD,UAAUZ,aAAaC,QAAiB,CAC9F/gD,KAAK4jD,oBACL5jD,KAAK6jD,yBAEL,KAAK,GAAIsC,KAAUnmD,MAAKg9C,MAClBh9C,KAAKg9C,MAAMl3C,eAAeqgD,KAC5BnmD,KAAK4jD,iBAAiBuC,GAAUnmD,KAAKg9C,MAAMmJ,GAG/C,IAAIogD,GAAevmG,KAAKkvD,QAAiB,QAAS,KAClD,KAAK,GAAIw7C,KAAiBnE,GACpBA,EAAazgG,eAAe4kG,KAC1B1qG,KAAK89C,MAAMh4C,eAAeygG,EAAamE,GAAez4C,cACxDjyD,KAAK4jD,iBAAiB8mD,GAAiBnE,EAAamE,GAGpDnE,EAAamE,GAAetsC,UAAU,EAAG,GAK/C,KAAK,GAAIlX,KAAOlnD,MAAK4jD,iBACf5jD,KAAK4jD,iBAAiB99C,eAAeohD,IACvClnD,KAAK6jD,uBAAuB77C,KAAKk/C,OAKrClnD,MAAK4jD,iBAAmB5jD,KAAKg9C,MAC7Bh9C,KAAK6jD,uBAAyB7jD,KAAK8jD,aAUvClkD,EAAQyqG,8BAAgC,WACtC,GAAIzrF,GAAIC,EAAI8G,EAAUmgC,EAAMtgD,EACxBw3C,EAAQh9C,KAAK4jD,iBACb+mD,EAAU3qG,KAAK0hD,UAAUlD,QAAQI,eACjCgsD,EAAe,CAEnB,KAAKplG,EAAI,EAAGA,EAAIxF,KAAK6jD,uBAAuBl+C,OAAQH,IAClDsgD,EAAO9I,EAAMh9C,KAAK6jD,uBAAuBr+C,IACzCsgD,EAAK/G,QAAU/+C,KAAK0hD,UAAUlD,QAAQO,QAEhB,WAAlB/+C,KAAK88F,WAAqC,GAAX6N,GACjC/rF,GAAMknC,EAAK/zC,EACX8M,GAAMinC,EAAK9zC,EACX2T,EAAWzgB,KAAKyqB,KAAK/Q,EAAKA,EAAKC,EAAKA,GAEpC+rF,EAA4B,GAAZjlF,EAAiB,EAAKglF,EAAUhlF,EAChDmgC,EAAKyV,GAAK38C,EAAKgsF,EACf9kD,EAAK0V,GAAK38C,EAAK+rF,IAGf9kD,EAAKyV,GAAK,EACVzV,EAAK0V,GAAK,IAahB57D,EAAQ6qG,uBAAyB,WAC/B,GAAII,GAAYn9C,EAAMV,EAClBpuC,EAAIC,EAAI08C,EAAIC,EAAIsvC,EAAanlF,EAC7Bm4B,EAAQ99C,KAAK89C,KAGjB,KAAKkP,IAAUlP,GACTA,EAAMh4C,eAAeknD,KACvBU,EAAO5P,EAAMkP,GACTU,EAAKC,WAEH3tD,KAAKg9C,MAAMl3C,eAAe4nD,EAAKkG,OAAS5zD,KAAKg9C,MAAMl3C,eAAe4nD,EAAKiG,UACzEk3C,EAAan9C,EAAKlP,QAAQK,aAE1BgsD,IAAen9C,EAAKrkC,GAAGgzC,YAAc3O,EAAKtkC,KAAKizC,YAAc,GAAKr8D,KAAK0hD,UAAUvC,WAAWY,WAE5FnhC,EAAM8uC,EAAKtkC,KAAKrX,EAAI27C,EAAKrkC,GAAGtX,EAC5B8M,EAAM6uC,EAAKtkC,KAAKpX,EAAI07C,EAAKrkC,GAAGrX,EAC5B2T,EAAWzgB,KAAKyqB,KAAK/Q,EAAKA,EAAKC,EAAKA,GAEpB,GAAZ8G,IACFA,EAAW,KAIbmlF,EAAc9qG,KAAK0hD,UAAUlD,QAAQM,gBAAkB+rD,EAAallF,GAAYA,EAEhF41C,EAAK38C,EAAKksF,EACVtvC,EAAK38C,EAAKisF,EAEVp9C,EAAKtkC,KAAKmyC,IAAMA,EAChB7N,EAAKtkC,KAAKoyC,IAAMA,EAChB9N,EAAKrkC,GAAGkyC,IAAMA,EACd7N,EAAKrkC,GAAGmyC,IAAMA,KAexB57D,EAAQ2qG,kCAAoC,WAC1C,GAAIM,GAAYn9C,EAAMV,EAAQ+9C,EAC1BjtD,EAAQ99C,KAAK89C,KAGjB,KAAKkP,IAAUlP,GACb,GAAIA,EAAMh4C,eAAeknD,KACvBU,EAAO5P,EAAMkP,GACTU,EAAKC,WAEH3tD,KAAKg9C,MAAMl3C,eAAe4nD,EAAKkG,OAAS5zD,KAAKg9C,MAAMl3C,eAAe4nD,EAAKiG,SACzD,MAAZjG,EAAKuB,KAAa,CACpB,GAAI+7C,GAAQt9C,EAAKrkC,GACb4hF,EAAQv9C,EAAKuB,IACbi8C,EAAQx9C,EAAKtkC,IAEjByhF,GAAan9C,EAAKlP,QAAQK,aAE1BksD,EAAsBC,EAAM3uC,YAAc6uC,EAAM7uC,YAAc,EAG9DwuC,GAAcE,EAAsB/qG,KAAK0hD,UAAUvC,WAAWY,WAC9D//C,KAAKmrG,sBAAsBH,EAAOC,EAAO,GAAMJ,GAC/C7qG,KAAKmrG,sBAAsBF,EAAOC,EAAO,GAAML,KAiB3DjrG,EAAQurG,sBAAwB,SAAUH,EAAOC,EAAOJ,GACtD,GAAIjsF,GAAIC,EAAI08C,EAAIC,EAAIsvC,EAAanlF,CAEjC/G,GAAMosF,EAAMj5F,EAAIk5F,EAAMl5F,EACtB8M,EAAMmsF,EAAMh5F,EAAIi5F,EAAMj5F,EACtB2T,EAAWzgB,KAAKyqB,KAAK/Q,EAAKA,EAAKC,EAAKA,GAEpB,GAAZ8G,IACFA,EAAW,KAIbmlF,EAAc9qG,KAAK0hD,UAAUlD,QAAQM,gBAAkB+rD,EAAallF,GAAYA,EAEhF41C,EAAK38C,EAAKksF,EACVtvC,EAAK38C,EAAKisF,EAEVE,EAAMzvC,IAAMA,EACZyvC,EAAMxvC,IAAMA,EACZyvC,EAAM1vC,IAAMA,EACZ0vC,EAAMzvC,IAAMA,GAId57D,EAAQwqD,6BAA+B,WACrC,GAAkC5jD,SAA9BxG,KAAKorG,qBAAoC,CAC3C,KAAOprG,KAAKorG,qBAAqB1nF,iBAC/B1jB,KAAKorG,qBAAqBj6F,YAAYnR,KAAKorG,qBAAqBznF,WAGlE3jB,MAAKorG,qBAAqBvhG,WAAWsH,YAAYnR,KAAKorG,sBACtDprG,KAAKorG,qBAAuB5kG,SAQhC5G,EAAQwtE,0BAA4B,WAClC,GAAkC5mE,SAA9BxG,KAAKorG,qBAAoC,CAC3CprG,KAAKmpG,mBACLxoG,EAAK8F,WAAWzG,KAAKmpG,gBAAgBnpG,KAAK0hD,UAE1C,IAAI2pD,IAAgC,KAAM,KAAM,KAAM,KACtDrrG,MAAKorG,qBAAuB75F,SAASM,cAAc,OACnD7R,KAAKorG,qBAAqBvjG,UAAY,uBACtC7H,KAAKorG,qBAAqBnnF,UAAY,onBAW2E,GAAKjkB,KAAK0hD,UAAUlD,QAAQC,UAAUE,sBAAyB,wGAA2G,GAAK3+C,KAAK0hD,UAAUlD,QAAQC,UAAUE,sBAAyB,4JAGpP3+C,KAAK0hD,UAAUlD,QAAQC,UAAUG,eAAiB,wFAA0F5+C,KAAK0hD,UAAUlD,QAAQC,UAAUG,eAAiB,2JAG/L5+C,KAAK0hD,UAAUlD,QAAQC,UAAUI,aAAe,sFAAwF7+C,KAAK0hD,UAAUlD,QAAQC,UAAUI,aAAe,6JAGtL7+C,KAAK0hD,UAAUlD,QAAQC,UAAUK,eAAiB,0FAA4F9+C,KAAK0hD,UAAUlD,QAAQC,UAAUK,eAAiB,sJAGvM9+C,KAAK0hD,UAAUlD,QAAQC,UAAUM,QAAU,4FAA8F/+C,KAAK0hD,UAAUlD,QAAQC,UAAUM,QAAU,sPAM/K/+C,KAAK0hD,UAAUlD,QAAQQ,UAAUC,aAAe,kGAAoGj/C,KAAK0hD,UAAUlD,QAAQQ,UAAUC,aAAe,2JAGnMj/C,KAAK0hD,UAAUlD,QAAQQ,UAAUJ,eAAiB,uFAAyF5+C,KAAK0hD,UAAUlD,QAAQQ,UAAUJ,eAAiB,0JAG9L5+C,KAAK0hD,UAAUlD,QAAQQ,UAAUH,aAAe,qFAAuF7+C,KAAK0hD,UAAUlD,QAAQQ,UAAUH,aAAe,4JAGrL7+C,KAAK0hD,UAAUlD,QAAQQ,UAAUF,eAAiB,yFAA2F9+C,KAAK0hD,UAAUlD,QAAQQ,UAAUF,eAAiB,qJAGtM9+C,KAAK0hD,UAAUlD,QAAQQ,UAAUD,QAAU,2FAA6F/+C,KAAK0hD,UAAUlD,QAAQQ,UAAUD,QAAU,oQAM9K/+C,KAAK0hD,UAAUlD,QAAQU,sBAAsBD,aAAe,kGAAoGj/C,KAAK0hD,UAAUlD,QAAQU,sBAAsBD,aAAe,2JAG3Nj/C,KAAK0hD,UAAUlD,QAAQU,sBAAsBN,eAAiB,uFAAyF5+C,KAAK0hD,UAAUlD,QAAQU,sBAAsBN,eAAiB,0JAGtN5+C,KAAK0hD,UAAUlD,QAAQU,sBAAsBL,aAAe,qFAAuF7+C,KAAK0hD,UAAUlD,QAAQU,sBAAsBL,aAAe,4JAG7M7+C,KAAK0hD,UAAUlD,QAAQU,sBAAsBJ,eAAiB,yFAA2F9+C,KAAK0hD,UAAUlD,QAAQU,sBAAsBJ,eAAiB,qJAG9N9+C,KAAK0hD,UAAUlD,QAAQU,sBAAsBH,QAAU,2FAA6F/+C,KAAK0hD,UAAUlD,QAAQU,sBAAsBH,QAAU,uJAG3MssD,EAA6B1kG,QAAQ3G,KAAK0hD,UAAUjB,mBAAmBvlB,WAAa,0FAA4Fl7B,KAAK0hD,UAAUjB,mBAAmBvlB,UAAY,oKAGtNl7B,KAAK0hD,UAAUjB,mBAAmBC,gBAAkB,yFAA2F1gD,KAAK0hD,UAAUjB,mBAAmBC,gBAAkB,6JAGvM1gD,KAAK0hD,UAAUjB,mBAAmBE,YAAc,wFAA0F3gD,KAAK0hD,UAAUjB,mBAAmBE,YAAc,odAU9R3gD,KAAKyZ,iBAAiB6xF,cAAc15F,aAAa5R,KAAKorG,qBAAsBprG,KAAKyZ,kBACjFzZ,KAAKopG,WAAa73F,SAASM,cAAc,OACzC7R,KAAKopG,WAAWn8F,MAAMswC,SAAW,OACjCv9C,KAAKopG,WAAWn8F,MAAMszD,WAAa,UACnCvgE,KAAKyZ,iBAAiB6xF,cAAc15F,aAAa5R,KAAKopG,WAAYppG,KAAKyZ,iBAEvE;GAAI8xF,EACJA,GAAeh6F,SAASo3F,eAAe,eACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,cAAe,GAAI,2CACvEurG,EAAeh6F,SAASo3F,eAAe,eACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,cAAe,EAAG,0BACtEurG,EAAeh6F,SAASo3F,eAAe,eACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,cAAe,EAAG,0BACtEurG,EAAeh6F,SAASo3F,eAAe,eACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,cAAe,EAAG,wBACtEurG,EAAeh6F,SAASo3F,eAAe,iBACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,gBAAiB,EAAG,mBAExEurG,EAAeh6F,SAASo3F,eAAe,cACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,aAAc,EAAG,kCACrEurG,EAAeh6F,SAASo3F,eAAe,cACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,aAAc,EAAG,0BACrEurG,EAAeh6F,SAASo3F,eAAe,cACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,aAAc,EAAG,0BACrEurG,EAAeh6F,SAASo3F,eAAe,cACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,aAAc,EAAG,wBACrEurG,EAAeh6F,SAASo3F,eAAe,gBACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,eAAgB,EAAG,mBAEvEurG,EAAeh6F,SAASo3F,eAAe,cACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,aAAc,EAAG,8CACrEurG,EAAeh6F,SAASo3F,eAAe,cACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,aAAc,EAAG,0BACrEurG,EAAeh6F,SAASo3F,eAAe,cACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,aAAc,EAAG,0BACrEurG,EAAeh6F,SAASo3F,eAAe,cACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,aAAc,EAAG,wBACrEurG,EAAeh6F,SAASo3F,eAAe,gBACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,eAAgB,EAAG,mBACvEurG,EAAeh6F,SAASo3F,eAAe,qBACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,oBAAqBqrG,EAA8B,gCACvGE,EAAeh6F,SAASo3F,eAAe,kBACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,iBAAkB,EAAG,sCACzEurG,EAAeh6F,SAASo3F,eAAe,iBACvC4C,EAAa1iF,SAAWggF,EAAiB/zE,KAAK90B,KAAM,gBAAiB,EAAG,iCAExE,IAAIgpG,GAAez3F,SAASo3F,eAAe,wBACvCM,EAAe13F,SAASo3F,eAAe,wBACvC6C,EAAej6F,SAASo3F,eAAe,uBAC3CM,GAAaC,SAAU,EACnBlpG,KAAK0hD,UAAUlD,QAAQC,UAAU/vC,UACnCs6F,EAAaE,SAAU,GAErBlpG,KAAK0hD,UAAUjB,mBAAmB/xC,UACpC88F,EAAatC,SAAU,EAGzB,IAAIR,GAAqBn3F,SAASo3F,eAAe,sBAC7C8C,EAAwBl6F,SAASo3F,eAAe,yBAChD+C,EAAwBn6F,SAASo3F,eAAe,wBAEpDD,GAAmB12E,QAAUy2E,EAAwB3zE,KAAK90B,MAC1DyrG,EAAsBz5E,QAAU42E,EAAqB9zE,KAAK90B,MAC1D0rG,EAAsB15E,QAAU82E,EAAqBh0E,KAAK90B,MAExD0oG,EAAmBz7F,MAAMd,WADQ,GAA/BnM,KAAK0hD,UAAUZ,cAA8D,GAAtC9gD,KAAK0hD,UAAUiqD,oBAClB,UAGA,UAIxCtC,EAAqBtxF,MAAM/X,MAE3BgpG,EAAangF,SAAWwgF,EAAqBv0E,KAAK90B,MAClDipG,EAAapgF,SAAWwgF,EAAqBv0E,KAAK90B,MAClDwrG,EAAa3iF,SAAWwgF,EAAqBv0E,KAAK90B,QAWtDJ,EAAQiqG,yBAA2B,SAAUH,EAAuBriG,GAClE,GAAIukG,GAAYlC,EAAsB3hG,MAAM,IACpB,IAApB6jG,EAAUjmG,OACZ3F,KAAK0hD,UAAUkqD,EAAU,IAAMvkG,EAEJ,GAApBukG,EAAUjmG,OACjB3F,KAAK0hD,UAAUkqD,EAAU,IAAIA,EAAU,IAAMvkG,EAElB,GAApBukG,EAAUjmG,SACjB3F,KAAK0hD,UAAUkqD,EAAU,IAAIA,EAAU,IAAIA,EAAU,IAAMvkG,KA6N3D,SAASxH,GAEb,QAASgsG,GAAeC,GACvB,KAAM,IAAIloG,OAAM,uBAAyBkoG,EAAM,MAEhDD,EAAez+F,KAAO,WAAa,UACnCy+F,EAAeE,QAAUF,EACzBhsG,EAAOD,QAAUisG,EACjBA,EAAexrG,GAAK,IAKhB,SAASR,EAAQD,GAQrBA,EAAQ0qG,qBAAuB,WAC7B,GAAI1rF,GAAIC,EAAW8G,EAAU41C,EAAIC,EAAIuvC,EACnCiB,EAAgBhB,EAAOC,EAAOzlG,EAAGomB,EAE/BoxB,EAAQh9C,KAAK4jD,iBACbE,EAAc9jD,KAAK6jD,uBAGnBooD,EAAS,GAAK,EACd7lG,EAAI,EAAI,EAGR64C,EAAej/C,KAAK0hD,UAAUlD,QAAQQ,UAAUC,aAChDitD,EAAkBjtD,CAItB,KAAKz5C,EAAI,EAAGA,EAAIs+C,EAAYn+C,OAAS,EAAGH,IAEtC,IADAwlG,EAAQhuD,EAAM8G,EAAYt+C,IACrBomB,EAAIpmB,EAAI,EAAGomB,EAAIk4B,EAAYn+C,OAAQimB,IAAK,CAC3Cq/E,EAAQjuD,EAAM8G,EAAYl4B,IAC1Bm/E,EAAsBC,EAAM3uC,YAAc4uC,EAAM5uC,YAAc,EAE9Dz9C,EAAKqsF,EAAMl5F,EAAIi5F,EAAMj5F,EACrB8M,EAAKosF,EAAMj5F,EAAIg5F,EAAMh5F,EACrB2T,EAAWzgB,KAAKyqB,KAAK/Q,EAAKA,EAAKC,EAAKA,GAGpB,GAAZ8G,IACFA,EAAW,GAAIzgB,KAAKE,SACpBwZ,EAAK+G,GAGPumF,EAA0C,GAAvBnB,EAA4B9rD,EAAgBA,GAAgB,EAAI8rD,EAAsB/qG,KAAK0hD,UAAUvC,WAAWW,sBACnI,IAAIv6C,GAAI0mG,EAASC,CACF,GAAIA,EAAfvmF,IAEAqmF,EADa,GAAME,EAAjBvmF,EACe,EAGApgB,EAAIogB,EAAWvf,EAIlC4lG,GAA0C,GAAvBjB,EAA4B,EAAI,EAAIA,EAAsB/qG,KAAK0hD,UAAUvC,WAAWU,mBACvGmsD,GAAkC9mG,KAAKwH,IAAIiZ,EAAS,IAAKumF,GAEzD3wC,EAAK38C,EAAKotF,EACVxwC,EAAK38C,EAAKmtF,EACVhB,EAAMzvC,IAAMA,EACZyvC,EAAMxvC,IAAMA,EACZyvC,EAAM1vC,IAAMA,EACZ0vC,EAAMzvC,IAAMA,MAUhB,SAAS37D,EAAQD,GAQrBA,EAAQ0qG,qBAAuB,WAC7B,GAAI1rF,GAAIC,EAAI8G,EAAU41C,EAAIC,EACxBwwC,EAAgBhB,EAAOC,EAAOzlG,EAAGomB,EAE/BoxB,EAAQh9C,KAAK4jD,iBACbE,EAAc9jD,KAAK6jD,uBAGnB5E,EAAej/C,KAAK0hD,UAAUlD,QAAQU,sBAAsBD,YAIhE,KAAKz5C,EAAI,EAAGA,EAAIs+C,EAAYn+C,OAAS,EAAGH,IAEtC,IADAwlG,EAAQhuD,EAAM8G,EAAYt+C,IACrBomB,EAAIpmB,EAAI,EAAGomB,EAAIk4B,EAAYn+C,OAAQimB,IAItC,GAHAq/E,EAAQjuD,EAAM8G,EAAYl4B,IAGtBo/E,EAAMptD,OAASqtD,EAAMrtD,MAAO,CAE9Bh/B,EAAKqsF,EAAMl5F,EAAIi5F,EAAMj5F,EACrB8M,EAAKosF,EAAMj5F,EAAIg5F,EAAMh5F,EACrB2T,EAAWzgB,KAAKyqB,KAAK/Q,EAAKA,EAAKC,EAAKA,EAGpC,IAAIstF,GAAY,GAEdH,GADa/sD,EAAXt5B,GACgBzgB,KAAK4uB,IAAIq4E,EAAUxmF,EAAS,GAAKzgB,KAAK4uB,IAAIq4E,EAAUltD,EAAa,GAGlE,EAGD,GAAZt5B,EACFA,EAAW,IAGXqmF,GAAkCrmF,EAEpC41C,EAAK38C,EAAKotF,EACVxwC,EAAK38C,EAAKmtF,EAEVhB,EAAMzvC,IAAMA,EACZyvC,EAAMxvC,IAAMA,EACZyvC,EAAM1vC,IAAMA,EACZ0vC,EAAMzvC,IAAMA,IAYtB57D,EAAQ4qG,mCAAqC,WAS3C,IAAK,GARDK,GAAYn9C,EAAMV,EAClBpuC,EAAIC,EAAI08C,EAAIC,EAAIsvC,EAAanlF,EAC7Bm4B,EAAQ99C,KAAK89C,MAEbd,EAAQh9C,KAAK4jD,iBACbE,EAAc9jD,KAAK6jD,uBAGdr+C,EAAI,EAAGA,EAAIs+C,EAAYn+C,OAAQH,IAAK,CAC3C,GAAIwlG,GAAQhuD,EAAM8G,EAAYt+C,GAC9BwlG,GAAMoB,SAAW,EACjBpB,EAAMqB,SAAW,EAKnB,IAAKr/C,IAAUlP,GACb,GAAIA,EAAMh4C,eAAeknD,KACvBU,EAAO5P,EAAMkP,GACTU,EAAKC,WAEH3tD,KAAKg9C,MAAMl3C,eAAe4nD,EAAKkG,OAAS5zD,KAAKg9C,MAAMl3C,eAAe4nD,EAAKiG,SAqBzE,GApBAk3C,EAAan9C,EAAKlP,QAAQK,aAE1BgsD,IAAen9C,EAAKrkC,GAAGgzC,YAAc3O,EAAKtkC,KAAKizC,YAAc,GAAKr8D,KAAK0hD,UAAUvC,WAAWY,WAE5FnhC,EAAM8uC,EAAKtkC,KAAKrX,EAAI27C,EAAKrkC,GAAGtX,EAC5B8M,EAAM6uC,EAAKtkC,KAAKpX,EAAI07C,EAAKrkC,GAAGrX,EAC5B2T,EAAWzgB,KAAKyqB,KAAK/Q,EAAKA,EAAKC,EAAKA,GAEpB,GAAZ8G,IACFA,EAAW,KAIbmlF,EAAc9qG,KAAK0hD,UAAUlD,QAAQM,gBAAkB+rD,EAAallF,GAAYA,EAEhF41C,EAAK38C,EAAKksF,EACVtvC,EAAK38C,EAAKisF,EAINp9C,EAAKrkC,GAAGu0B,OAAS8P,EAAKtkC,KAAKw0B,MAC7B8P,EAAKrkC,GAAG+iF,UAAY7wC,EACpB7N,EAAKrkC,GAAGgjF,UAAY7wC,EACpB9N,EAAKtkC,KAAKgjF,UAAY7wC,EACtB7N,EAAKtkC,KAAKijF,UAAY7wC,MAEnB,CACH,GAAI7U,GAAS,EACb+G,GAAKrkC,GAAGkyC,IAAM5U,EAAO4U,EACrB7N,EAAKrkC,GAAGmyC,IAAM7U,EAAO6U,EACrB9N,EAAKtkC,KAAKmyC,IAAM5U,EAAO4U,EACvB7N,EAAKtkC,KAAKoyC,IAAM7U,EAAO6U,EAQjC,GACI4wC,GAAUC,EADVvB,EAAc,CAElB,KAAKtlG,EAAI,EAAGA,EAAIs+C,EAAYn+C,OAAQH,IAAK,CACvC,GAAIsgD,GAAO9I,EAAM8G,EAAYt+C,GAC7B4mG,GAAWlnG,KAAK4G,IAAIg/F,EAAY5lG,KAAKwH,KAAKo+F,EAAYhlD,EAAKsmD,WAC3DC,EAAWnnG,KAAK4G,IAAIg/F,EAAY5lG,KAAKwH,KAAKo+F,EAAYhlD,EAAKumD,WAE3DvmD,EAAKyV,IAAM6wC,EACXtmD,EAAK0V,IAAM6wC,EAIb,GAAIC,GAAU,EACVC,EAAU,CACd,KAAK/mG,EAAI,EAAGA,EAAIs+C,EAAYn+C,OAAQH,IAAK,CACvC,GAAIsgD,GAAO9I,EAAM8G,EAAYt+C,GAC7B8mG,IAAWxmD,EAAKyV,GAChBgxC,GAAWzmD,EAAK0V,GAElB,GAAIgxC,GAAeF,EAAUxoD,EAAYn+C,OACrC8mG,EAAeF,EAAUzoD,EAAYn+C,MAEzC,KAAKH,EAAI,EAAGA,EAAIs+C,EAAYn+C,OAAQH,IAAK,CACvC,GAAIsgD,GAAO9I,EAAM8G,EAAYt+C,GAC7BsgD,GAAKyV,IAAMixC,EACX1mD,EAAK0V,IAAMixC,KAOX,SAAS5sG,EAAQD,GAQrBA,EAAQ0qG,qBAAuB,WAC7B,GAA8D,GAA1DtqG,KAAK0hD,UAAUlD,QAAQC,UAAUE,sBAA4B,CAC/D,GAAImH,GACA9I,EAAQh9C,KAAK4jD,iBACbE,EAAc9jD,KAAK6jD,uBACnB6oD,EAAY5oD,EAAYn+C,MAE5B3F,MAAK2sG,mBAAmB3vD,EAAM8G,EAK9B,KAAK,GAHDomD,GAAgBlqG,KAAKkqG,cAGhB1kG,EAAI,EAAOknG,EAAJlnG,EAAeA,IAC7BsgD,EAAO9I,EAAM8G,EAAYt+C,IACrBsgD,EAAKr3C,QAAQwuC,KAAO,IAEtBj9C,KAAK4sG,sBAAsB1C,EAAcxqG,KAAKmtG,SAASC,GAAGhnD,GAC1D9lD,KAAK4sG,sBAAsB1C,EAAcxqG,KAAKmtG,SAASE,GAAGjnD,GAC1D9lD,KAAK4sG,sBAAsB1C,EAAcxqG,KAAKmtG,SAASG,GAAGlnD,GAC1D9lD,KAAK4sG,sBAAsB1C,EAAcxqG,KAAKmtG,SAASI,GAAGnnD,MAelElmD,EAAQgtG,sBAAwB,SAASM,EAAapnD,GAEpD,GAAIonD,EAAaC,cAAgB,EAAG,CAClC,GAAIvuF,GAAGC,EAAG8G,CAUV,IAPA/G,EAAKsuF,EAAaE,aAAar7F,EAAI+zC,EAAK/zC,EACxC8M,EAAKquF,EAAaE,aAAap7F,EAAI8zC,EAAK9zC,EACxC2T,EAAWzgB,KAAKyqB,KAAK/Q,EAAKA,EAAKC,EAAKA,GAKhC8G,EAAWunF,EAAaG,SAAWrtG,KAAK0hD,UAAUlD,QAAQC,UAAUC,cAAe,CAErE,GAAZ/4B,IACFA,EAAW,GAAIzgB,KAAKE,SACpBwZ,EAAK+G,EAEP,IAAIilF,GAAe5qG,KAAK0hD,UAAUlD,QAAQC,UAAUE,sBAAwBuuD,EAAajwD,KAAO6I,EAAKr3C,QAAQwuC,MAAQt3B,EAAWA,EAAWA,GACvI41C,EAAK38C,EAAKgsF,EACVpvC,EAAK38C,EAAK+rF,CACd9kD,GAAKyV,IAAMA,EACXzV,EAAK0V,IAAMA,MAIX,IAAkC,GAA9B0xC,EAAaC,cACfntG,KAAK4sG,sBAAsBM,EAAaL,SAASC,GAAGhnD,GACpD9lD,KAAK4sG,sBAAsBM,EAAaL,SAASE,GAAGjnD,GACpD9lD,KAAK4sG,sBAAsBM,EAAaL,SAASG,GAAGlnD,GACpD9lD,KAAK4sG,sBAAsBM,EAAaL,SAASI,GAAGnnD,OAGpD,IAAIonD,EAAaL,SAASn6F,KAAKrS,IAAMylD,EAAKzlD,GAAI,CAE5B,GAAZslB,IACFA,EAAW,GAAIzgB,KAAKE,SACpBwZ,EAAK+G,EAEP,IAAIilF,GAAe5qG,KAAK0hD,UAAUlD,QAAQC,UAAUE,sBAAwBuuD,EAAajwD,KAAO6I,EAAKr3C,QAAQwuC,MAAQt3B,EAAWA,EAAWA,GACvI41C,EAAK38C,EAAKgsF,EACVpvC,EAAK38C,EAAK+rF,CACd9kD,GAAKyV,IAAMA,EACXzV,EAAK0V,IAAMA,KAcrB57D,EAAQ+sG,mBAAqB,SAAS3vD,EAAM8G,GAU1C,IAAK,GATDgC,GACA4mD,EAAY5oD,EAAYn+C,OAExBsgD,EAAO/hD,OAAOopG,UAChBvnD,EAAO7hD,OAAOopG,UACdpnD,GAAOhiD,OAAOopG,UACdtnD,GAAO9hD,OAAOopG,UAGP9nG,EAAI,EAAOknG,EAAJlnG,EAAeA,IAAK,CAClC,GAAIuM,GAAIirC,EAAM8G,EAAYt+C,IAAIuM,EAC1BC,EAAIgrC,EAAM8G,EAAYt+C,IAAIwM,CAC1BgrC,GAAM8G,EAAYt+C,IAAIiJ,QAAQwuC,KAAO,IAC/BgJ,EAAJl0C,IAAYk0C,EAAOl0C,GACnBA,EAAIm0C,IAAQA,EAAOn0C,GACfg0C,EAAJ/zC,IAAY+zC,EAAO/zC,GACnBA,EAAIg0C,IAAQA,EAAOh0C,IAI3B,GAAIu7F,GAAWroG,KAAK2lB,IAAIq7B,EAAOD,GAAQ/gD,KAAK2lB,IAAIm7B,EAAOD,EACnDwnD,GAAW,GAAIxnD,GAAQ,GAAMwnD,EAAUvnD,GAAQ,GAAMunD,IACtCtnD,GAAQ,GAAMsnD,EAAUrnD,GAAQ,GAAMqnD,EAGzD,IAAIC,GAAkB,KAClBC,EAAWvoG,KAAKwH,IAAI8gG,EAAgBtoG,KAAK2lB,IAAIq7B,EAAOD,IACpDynD,EAAe,GAAMD,EACrBxuC,EAAU,IAAOhZ,EAAOC,GAAOgZ,EAAU,IAAOnZ,EAAOC,GAGvDkkD,GACFxqG,MACE0tG,cAAer7F,EAAE,EAAGC,EAAE,GACtBirC,KAAK,EACLxnB,OACEwwB,KAAMgZ,EAAQyuC,EAAaxnD,KAAK+Y,EAAQyuC,EACxC3nD,KAAMmZ,EAAQwuC,EAAa1nD,KAAKkZ,EAAQwuC,GAE1Cr7F,KAAMo7F,EACNJ,SAAU,EAAII,EACdZ,UAAYn6F,KAAK,MACjBgpC,SAAU,EACVkC,MAAO,EACPuvD,cAAe,GAMnB,KAHAntG,KAAK2tG,aAAazD,EAAcxqG,MAG3B8F,EAAI,EAAOknG,EAAJlnG,EAAeA,IACzBsgD,EAAO9I,EAAM8G,EAAYt+C,IACrBsgD,EAAKr3C,QAAQwuC,KAAO,GACtBj9C,KAAK4tG,aAAa1D,EAAcxqG,KAAKomD,EAKzC9lD,MAAKkqG,cAAgBA,GAWvBtqG,EAAQiuG,kBAAoB,SAASX,EAAcpnD,GACjD,GAAIgoD,GAAYZ,EAAajwD,KAAO6I,EAAKr3C,QAAQwuC,KAC7C8wD,EAAe,EAAED,CAErBZ,GAAaE,aAAar7F,EAAIm7F,EAAaE,aAAar7F,EAAIm7F,EAAajwD,KAAO6I,EAAK/zC,EAAI+zC,EAAKr3C,QAAQwuC,KACtGiwD,EAAaE,aAAar7F,GAAKg8F,EAE/Bb,EAAaE,aAAap7F,EAAIk7F,EAAaE,aAAap7F,EAAIk7F,EAAajwD,KAAO6I,EAAK9zC,EAAI8zC,EAAKr3C,QAAQwuC,KACtGiwD,EAAaE,aAAap7F,GAAK+7F,EAE/Bb,EAAajwD,KAAO6wD,CACpB,IAAIE,GAAc9oG,KAAKwH,IAAIxH,KAAKwH,IAAIo5C,EAAKtzC,OAAOszC,EAAKr6B,QAAQq6B,EAAKvzC,MAClE26F,GAAaxxD,SAAYwxD,EAAaxxD,SAAWsyD,EAAeA,EAAcd,EAAaxxD,UAa7F97C,EAAQguG,aAAe,SAASV,EAAapnD,EAAKmoD,IAC1B,GAAlBA,GAA6CznG,SAAnBynG,IAE5BjuG,KAAK6tG,kBAAkBX,EAAapnD,GAGlConD,EAAaL,SAASC,GAAGr3E,MAAMywB,KAAOJ,EAAK/zC,EACzCm7F,EAAaL,SAASC,GAAGr3E,MAAMuwB,KAAOF,EAAK9zC,EAC7ChS,KAAKkuG,eAAehB,EAAapnD,EAAK,MAGtC9lD,KAAKkuG,eAAehB,EAAapnD,EAAK,MAIpConD,EAAaL,SAASC,GAAGr3E,MAAMuwB,KAAOF,EAAK9zC,EAC7ChS,KAAKkuG,eAAehB,EAAapnD,EAAK,MAGtC9lD,KAAKkuG,eAAehB,EAAapnD,EAAK,OAc5ClmD,EAAQsuG,eAAiB,SAAShB,EAAapnD,EAAKqoD,GAClD,OAAQjB,EAAaL,SAASsB,GAAQhB,eACpC,IAAK,GACHD,EAAaL,SAASsB,GAAQtB,SAASn6F,KAAOozC,EAC9ConD,EAAaL,SAASsB,GAAQhB,cAAgB,EAC9CntG,KAAK6tG,kBAAkBX,EAAaL,SAASsB,GAAQroD,EACrD,MACF,KAAK,GAGConD,EAAaL,SAASsB,GAAQtB,SAASn6F,KAAKX,GAAK+zC,EAAK/zC,GACtDm7F,EAAaL,SAASsB,GAAQtB,SAASn6F,KAAKV,GAAK8zC,EAAK9zC,GACxD8zC,EAAK/zC,GAAK7M,KAAKE,SACf0gD,EAAK9zC,GAAK9M,KAAKE,WAGfpF,KAAK2tG,aAAaT,EAAaL,SAASsB,IACxCnuG,KAAK4tG,aAAaV,EAAaL,SAASsB,GAAQroD,GAElD,MACF,KAAK,GACH9lD,KAAK4tG,aAAaV,EAAaL,SAASsB,GAAQroD,KAatDlmD,EAAQ+tG,aAAe,SAAST,GAE9B,GAAIkB,GAAgB,IACc,IAA9BlB,EAAaC,gBACfiB,EAAgBlB,EAAaL,SAASn6F,KACtCw6F,EAAajwD,KAAO,EAAGiwD,EAAaE,aAAar7F,EAAI,EAAGm7F,EAAaE,aAAap7F,EAAI,GAExFk7F,EAAaC,cAAgB,EAC7BD,EAAaL,SAASn6F,KAAO,KAC7B1S,KAAKquG,cAAcnB,EAAa,MAChCltG,KAAKquG,cAAcnB,EAAa,MAChCltG,KAAKquG,cAAcnB,EAAa,MAChCltG,KAAKquG,cAAcnB,EAAa,MAEX,MAAjBkB,GACFpuG,KAAK4tG,aAAaV,EAAakB,IAenCxuG,EAAQyuG,cAAgB,SAASnB,EAAciB,GAC7C,GAAIloD,GAAKC,EAAKH,EAAKC,EACfsoD,EAAY,GAAMpB,EAAa76F,IACnC,QAAQ87F,GACN,IAAK,KACHloD,EAAOinD,EAAaz3E,MAAMwwB,KAC1BC,EAAOgnD,EAAaz3E,MAAMwwB,KAAOqoD,EACjCvoD,EAAOmnD,EAAaz3E,MAAMswB,KAC1BC,EAAOknD,EAAaz3E,MAAMswB,KAAOuoD,CACjC,MACF,KAAK,KACHroD,EAAOinD,EAAaz3E,MAAMwwB,KAAOqoD,EACjCpoD,EAAOgnD,EAAaz3E,MAAMywB,KAC1BH,EAAOmnD,EAAaz3E,MAAMswB,KAC1BC,EAAOknD,EAAaz3E,MAAMswB,KAAOuoD,CACjC,MACF,KAAK,KACHroD,EAAOinD,EAAaz3E,MAAMwwB,KAC1BC,EAAOgnD,EAAaz3E,MAAMwwB,KAAOqoD,EACjCvoD,EAAOmnD,EAAaz3E,MAAMswB,KAAOuoD,EACjCtoD,EAAOknD,EAAaz3E,MAAMuwB,IAC1B,MACF,KAAK,KACHC,EAAOinD,EAAaz3E,MAAMwwB,KAAOqoD,EACjCpoD,EAAOgnD,EAAaz3E,MAAMywB,KAC1BH,EAAOmnD,EAAaz3E,MAAMswB,KAAOuoD,EACjCtoD,EAAOknD,EAAaz3E,MAAMuwB,KAK9BknD,EAAaL,SAASsB,IACpBf,cAAcr7F,EAAE,EAAEC,EAAE,GACpBirC,KAAK,EACLxnB,OAAOwwB,KAAKA,EAAKC,KAAKA,EAAKH,KAAKA,EAAKC,KAAKA,GAC1C3zC,KAAM,GAAM66F,EAAa76F,KACzBg7F,SAAU,EAAIH,EAAaG,SAC3BR,UAAWn6F,KAAK,MAChBgpC,SAAU,EACVkC,MAAOsvD,EAAatvD,MAAM,EAC1BuvD,cAAe,IAYnBvtG,EAAQ2uG,UAAY,SAASxnF,EAAI5b,GACJ3E,SAAvBxG,KAAKkqG,gBAEPnjF,EAAIO,UAAY,EAEhBtnB,KAAKwuG,YAAYxuG,KAAKkqG,cAAcxqG,KAAKqnB,EAAI5b,KAajDvL,EAAQ4uG,YAAc,SAASC,EAAO1nF,EAAI5b,GAC1B3E,SAAV2E,IACFA,EAAQ,WAGkB,GAAxBsjG,EAAOtB,gBACTntG,KAAKwuG,YAAYC,EAAO5B,SAASC,GAAG/lF,GACpC/mB,KAAKwuG,YAAYC,EAAO5B,SAASE,GAAGhmF,GACpC/mB,KAAKwuG,YAAYC,EAAO5B,SAASI,GAAGlmF,GACpC/mB,KAAKwuG,YAAYC,EAAO5B,SAASG,GAAGjmF,IAEtCA,EAAIY,YAAcxc,EAClB4b,EAAIa,YACJb,EAAIc,OAAO4mF,EAAOh5E,MAAMwwB,KAAKwoD,EAAOh5E,MAAMswB,MAC1Ch/B,EAAIe,OAAO2mF,EAAOh5E,MAAMywB,KAAKuoD,EAAOh5E,MAAMswB,MAC1Ch/B,EAAIlH,SAEJkH,EAAIa,YACJb,EAAIc,OAAO4mF,EAAOh5E,MAAMywB,KAAKuoD,EAAOh5E,MAAMswB,MAC1Ch/B,EAAIe,OAAO2mF,EAAOh5E,MAAMywB,KAAKuoD,EAAOh5E,MAAMuwB,MAC1Cj/B,EAAIlH,SAEJkH,EAAIa,YACJb,EAAIc,OAAO4mF,EAAOh5E,MAAMywB,KAAKuoD,EAAOh5E,MAAMuwB,MAC1Cj/B,EAAIe,OAAO2mF,EAAOh5E,MAAMwwB,KAAKwoD,EAAOh5E,MAAMuwB,MAC1Cj/B,EAAIlH,SAEJkH,EAAIa,YACJb,EAAIc,OAAO4mF,EAAOh5E,MAAMwwB,KAAKwoD,EAAOh5E,MAAMuwB,MAC1Cj/B,EAAIe,OAAO2mF,EAAOh5E,MAAMwwB,KAAKwoD,EAAOh5E,MAAMswB,MAC1Ch/B,EAAIlH,WAaF,SAAShgB,GAEbA,EAAOD,QAAU,SAASC,GAQzB,MAPIA,GAAO6uG,kBACV7uG,EAAOqwE,UAAY,aACnBrwE,EAAO8uG,SAEP9uG,EAAOgtG,YACPhtG,EAAO6uG,gBAAkB,GAEnB7uG,IAMJ,SAASA,EAAQD,IAEO,SAASgvG,GAA0B/uG,EAAOD,QAAUgvG,IAEnDruG,KAAKX"} \ No newline at end of file diff --git a/core/src/main/resources/org/apache/spark/ui/static/vis.min.js b/core/src/main/resources/org/apache/spark/ui/static/vis.min.js index 4af2c818c27a6..2b3b1d60463f7 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/vis.min.js +++ b/core/src/main/resources/org/apache/spark/ui/static/vis.min.js @@ -36,4 +36,3 @@ return!e&&this._longDateFormat[t.toUpperCase()]&&(e=this._longDateFormat[t.toUpp },e.time,this);else if(t.eventType&ke)return pi;return mi},reset:function(){clearTimeout(this._timer)},emit:function(t){this.state===pi&&(t&&t.eventType&ke?this.manager.emit(this.options.event+"up",t):(this._input.timeStamp=ve(),this.manager.emit(this.options.event,this._input)))}}),u(ne,ee,{defaults:{event:"rotate",threshold:0,pointers:2},getTouchAction:function(){return[ni]},attrTest:function(t){return this._super.attrTest.call(this,t)&&(Math.abs(t.rotation)>this.options.threshold||this.state&di)}}),u(re,ee,{defaults:{event:"swipe",threshold:10,velocity:.65,direction:Re|Fe,pointers:1},getTouchAction:function(){return ie.prototype.getTouchAction.call(this)},attrTest:function(t){var e,i=this.options.direction;return i&(Re|Fe)?e=t.velocity:i&Re?e=t.velocityX:i&Fe&&(e=t.velocityY),this._super.attrTest.call(this,t)&&i&t.direction&&t.distance>this.options.threshold&&ge(e)>this.options.velocity&&t.eventType&ke},emit:function(t){var e=J(t.direction);e&&this.manager.emit(this.options.event+e,t),this.manager.emit(this.options.event,t)}}),u(ae,K,{defaults:{event:"tap",pointers:1,taps:1,interval:300,time:250,threshold:2,posThreshold:10},getTouchAction:function(){return[oi]},process:function(t){var e=this.options,i=t.pointers.length===e.pointers,s=t.distancet&&s>o;)o%3==0?(this.forceAggregateHubs(!0),this.normalizeClusterLevels()):this.increaseClusterLevel(),i=this.nodeIndices.length,o+=1;o>0&&1==e&&this.repositionNodes(),this._updateCalculationNodes()},e.openCluster=function(t){var e=this.moving;if(t.clusterSize>this.constants.clustering.sectorThreshold&&this._nodeInActiveArea(t)&&("default"!=this._sector()||1!=this.nodeIndices.length)){this._addSector(t);for(var i=0;this.nodeIndices.lengthi;)this.decreaseClusterLevel(),i+=1}else this._expandClusterNode(t,!1,!0),this._updateNodeIndexList(),this._updateDynamicEdges(),this._updateCalculationNodes(),this.updateLabels();this.moving!=e&&this.start()},e.updateClustersDefault=function(){1==this.constants.clustering.enabled&&this.updateClusters(0,!1,!1)},e.increaseClusterLevel=function(){this.updateClusters(-1,!1,!0)},e.decreaseClusterLevel=function(){this.updateClusters(1,!1,!0)},e.updateClusters=function(t,e,i,s){var o=this.moving,n=this.nodeIndices.length;this.previousScale>this.scale&&0==t&&this._collapseSector(),this.previousScale>this.scale||-1==t?this._formClusters(i):(this.previousScalethis.scale||-1==t)&&(this._aggregateHubs(i),this._updateNodeIndexList()),(this.previousScale>this.scale||-1==t)&&(this.handleChains(),this._updateNodeIndexList()),this.previousScale=this.scale,this._updateDynamicEdges(),this.updateLabels(),this.nodeIndices.lengththis.constants.clustering.chainThreshold&&this._reduceAmountOfChains(1-this.constants.clustering.chainThreshold/t)},e._aggregateHubs=function(t){this._getHubSize(),this._formClustersByHub(t,!1)},e.forceAggregateHubs=function(t){var e=this.moving,i=this.nodeIndices.length;this._aggregateHubs(!0),this._updateNodeIndexList(),this._updateDynamicEdges(),this.updateLabels(),this.nodeIndices.length!=i&&(this.clusterSession+=1),(0==t||void 0===t)&&this.moving!=e&&this.start()},e._openClustersBySize=function(){for(var t in this.nodes)if(this.nodes.hasOwnProperty(t)){var e=this.nodes[t];1==e.inView()&&(e.width*this.scale>this.constants.clustering.screenSizeThreshold*this.frame.canvas.clientWidth||e.height*this.scale>this.constants.clustering.screenSizeThreshold*this.frame.canvas.clientHeight)&&this.openCluster(e)}},e._openClusters=function(t,e){for(var i=0;i1&&(t.clusterSizei)){var r=n.from,a=n.to;n.to.options.mass>n.from.options.mass&&(r=n.to,a=n.from),1==a.dynamicEdgesLength?this._addToCluster(r,a,!1):1==r.dynamicEdgesLength&&this._addToCluster(a,r,!1)}}},e._forceClustersByZoom=function(){for(var t in this.nodes)if(this.nodes.hasOwnProperty(t)){var e=this.nodes[t];if(1==e.dynamicEdgesLength&&0!=e.dynamicEdges.length){var i=e.dynamicEdges[0],s=i.toId==e.id?this.nodes[i.fromId]:this.nodes[i.toId];e.id!=s.id&&(s.options.mass>e.options.mass?this._addToCluster(s,e,!0):this._addToCluster(e,s,!0))}}},e._clusterToSmallestNeighbour=function(t){for(var e=-1,i=null,s=0;so.clusterSessions.length&&(e=o.clusterSessions.length,i=o)}null!=o&&void 0!==this.nodes[o.id]&&this._addToCluster(o,t,!0)},e._formClustersByHub=function(t,e){for(var i in this.nodes)this.nodes.hasOwnProperty(i)&&this._formClusterFromHub(this.nodes[i],t,e)},e._formClusterFromHub=function(t,e,i,s){if(void 0===s&&(s=0),t.dynamicEdgesLength>=this.hubThreshold&&0==i||t.dynamicEdgesLength==this.hubThreshold&&1==i){for(var o,n,r,a=this.constants.clustering.clusterEdgeThreshold/this.scale,h=!1,d=[],l=t.dynamicEdges.length,c=0;l>c;c++)d.push(t.dynamicEdges[c].id);if(0==e)for(h=!1,c=0;l>c;c++){var p=this.edges[d[c]];if(void 0!==p&&p.connected&&p.toId!=p.fromId&&(o=p.to.x-p.from.x,n=p.to.y-p.from.y,r=Math.sqrt(o*o+n*n),a>r)){h=!0;break}}if(!e&&h||e)for(c=0;l>c;c++)if(p=this.edges[d[c]],void 0!==p){var u=this.nodes[p.fromId==t.id?p.toId:p.fromId];u.dynamicEdges.length<=this.hubThreshold+s&&u.id!=t.id&&this._addToCluster(t,u,e)}}},e._addToCluster=function(t,e,i){t.containedNodes[e.id]=e;for(var s=0;s1)for(var s=0;s1&&(e.label="[".concat(String(e.clusterSize),"]"))}for(t in this.nodes)this.nodes.hasOwnProperty(t)&&(e=this.nodes[t],1==e.clusterSize&&(e.label=void 0!==e.originalLabel?e.originalLabel:String(e.id)))},e.normalizeClusterLevels=function(){var t,e=0,i=1e9,s=0;for(t in this.nodes)this.nodes.hasOwnProperty(t)&&(s=this.nodes[t].clusterSessions.length,s>e&&(e=s),i>s&&(i=s));if(e-i>this.constants.clustering.clusterLevelDifference){var o=this.nodeIndices.length,n=e-this.constants.clustering.clusterLevelDifference;for(t in this.nodes)this.nodes.hasOwnProperty(t)&&this.nodes[t].clusterSessions.lengths&&(s=n.dynamicEdgesLength),t+=n.dynamicEdgesLength,e+=Math.pow(n.dynamicEdgesLength,2),i+=1}t/=i,e/=i;var r=e-Math.pow(t,2),a=Math.sqrt(r);this.hubThreshold=Math.floor(t+2*a),this.hubThreshold>s&&(this.hubThreshold=s)},e._reduceAmountOfChains=function(t){this.hubThreshold=2;var e=Math.floor(this.nodeIndices.length*t);for(var i in this.nodes)this.nodes.hasOwnProperty(i)&&2==this.nodes[i].dynamicEdgesLength&&this.nodes[i].dynamicEdges.length>=2&&e>0&&(this._formClusterFromHub(this.nodes[i],!0,!0,1),e-=1)},e._getChainFraction=function(){var t=0,e=0;for(var i in this.nodes)this.nodes.hasOwnProperty(i)&&(2==this.nodes[i].dynamicEdgesLength&&this.nodes[i].dynamicEdges.length>=2&&(t+=1),e+=1);return t/e}},function(t,e,i){var s=i(1),o=i(40);e._putDataInSector=function(){this.sectors.active[this._sector()].nodes=this.nodes,this.sectors.active[this._sector()].edges=this.edges,this.sectors.active[this._sector()].nodeIndices=this.nodeIndices},e._switchToSector=function(t,e){void 0===e||"active"==e?this._switchToActiveSector(t):this._switchToFrozenSector(t)},e._switchToActiveSector=function(t){this.nodeIndices=this.sectors.active[t].nodeIndices,this.nodes=this.sectors.active[t].nodes,this.edges=this.sectors.active[t].edges},e._switchToSupportSector=function(){this.nodeIndices=this.sectors.support.nodeIndices,this.nodes=this.sectors.support.nodes,this.edges=this.sectors.support.edges},e._switchToFrozenSector=function(t){this.nodeIndices=this.sectors.frozen[t].nodeIndices,this.nodes=this.sectors.frozen[t].nodes,this.edges=this.sectors.frozen[t].edges},e._loadLatestSector=function(){this._switchToSector(this._sector())},e._sector=function(){return this.activeSector[this.activeSector.length-1]},e._previousSector=function(){if(this.activeSector.length>1)return this.activeSector[this.activeSector.length-2];throw new TypeError("there are not enough sectors in the this.activeSector array.")},e._setActiveSector=function(t){this.activeSector.push(t)},e._forgetLastSector=function(){this.activeSector.pop()},e._createNewSector=function(t){this.sectors.active[t]={nodes:{},edges:{},nodeIndices:[],formationScale:this.scale,drawingNode:void 0},this.sectors.active[t].drawingNode=new o({id:t,color:{background:"#eaefef",border:"495c5e"}},{},{},this.constants),this.sectors.active[t].drawingNode.clusterSize=2},e._deleteActiveSector=function(t){delete this.sectors.active[t]},e._deleteFrozenSector=function(t){delete this.sectors.frozen[t]},e._freezeSector=function(t){this.sectors.frozen[t]=this.sectors.active[t],this._deleteActiveSector(t)},e._activateSector=function(t){this.sectors.active[t]=this.sectors.frozen[t],this._deleteFrozenSector(t)},e._mergeThisWithFrozen=function(t){for(var e in this.nodes)this.nodes.hasOwnProperty(e)&&(this.sectors.frozen[t].nodes[e]=this.nodes[e]);for(var i in this.edges)this.edges.hasOwnProperty(i)&&(this.sectors.frozen[t].edges[i]=this.edges[i]);for(var s=0;s1?this[t](o[0],o[1]):this[t](e))}return this._loadLatestSector(),i},e._doInSupportSector=function(t,e){var i=!1;if(void 0===e)this._switchToSupportSector(),i=this[t]();else{this._switchToSupportSector();var s=Array.prototype.splice.call(arguments,1);i=s.length>1?this[t](s[0],s[1]):this[t](e)}return this._loadLatestSector(),i},e._doInAllFrozenSectors=function(t,e){if(void 0===e)for(var i in this.sectors.frozen)this.sectors.frozen.hasOwnProperty(i)&&(this._switchToFrozenSector(i),this[t]());else for(var i in this.sectors.frozen)if(this.sectors.frozen.hasOwnProperty(i)){this._switchToFrozenSector(i);var s=Array.prototype.splice.call(arguments,1);s.length>1?this[t](s[0],s[1]):this[t](e)}this._loadLatestSector()},e._doInAllSectors=function(t,e){var i=Array.prototype.splice.call(arguments,1);void 0===e?(this._doInAllActiveSectors(t),this._doInAllFrozenSectors(t)):i.length>1?(this._doInAllActiveSectors(t,i[0],i[1]),this._doInAllFrozenSectors(t,i[0],i[1])):(this._doInAllActiveSectors(t,e),this._doInAllFrozenSectors(t,e))},e._clearNodeIndexList=function(){var t=this._sector();this.sectors.active[t].nodeIndices=[],this.nodeIndices=this.sectors.active[t].nodeIndices},e._drawSectorNodes=function(t,e){var i,s=1e9,o=-1e9,n=1e9,r=-1e9;for(var a in this.sectors[e])if(this.sectors[e].hasOwnProperty(a)&&void 0!==this.sectors[e][a].drawingNode){this._switchToSector(a,e),s=1e9,o=-1e9,n=1e9,r=-1e9;for(var h in this.nodes)this.nodes.hasOwnProperty(h)&&(i=this.nodes[h],i.resize(t),n>i.x-.5*i.width&&(n=i.x-.5*i.width),ri.y-.5*i.height&&(s=i.y-.5*i.height),o0?this.nodes[i[i.length-1]]:null},e._getEdgesOverlappingWith=function(t,e){var i=this.edges;for(var s in i)i.hasOwnProperty(s)&&i[s].isOverlappingWith(t)&&e.push(s)},e._getAllEdgesOverlappingWith=function(t){var e=[];return this._doInAllActiveSectors("_getEdgesOverlappingWith",t,e),e},e._getEdgeAt=function(t){var e=this._pointerToPositionObject(t),i=this._getAllEdgesOverlappingWith(e);return i.length>0?this.edges[i[i.length-1]]:null},e._addToSelection=function(t){t instanceof s?this.selectionObj.nodes[t.id]=t:this.selectionObj.edges[t.id]=t},e._addToHover=function(t){t instanceof s?this.hoverObj.nodes[t.id]=t:this.hoverObj.edges[t.id]=t},e._removeFromSelection=function(t){t instanceof s?delete this.selectionObj.nodes[t.id]:delete this.selectionObj.edges[t.id]},e._unselectAll=function(t){void 0===t&&(t=!1);for(var e in this.selectionObj.nodes)this.selectionObj.nodes.hasOwnProperty(e)&&this.selectionObj.nodes[e].unselect();for(var i in this.selectionObj.edges)this.selectionObj.edges.hasOwnProperty(i)&&this.selectionObj.edges[i].unselect();this.selectionObj={nodes:{},edges:{}},0==t&&this.emit("select",this.getSelection())},e._unselectClusters=function(t){void 0===t&&(t=!1);for(var e in this.selectionObj.nodes)this.selectionObj.nodes.hasOwnProperty(e)&&this.selectionObj.nodes[e].clusterSize>1&&(this.selectionObj.nodes[e].unselect(),this._removeFromSelection(this.selectionObj.nodes[e]));0==t&&this.emit("select",this.getSelection())},e._getSelectedNodeCount=function(){var t=0;for(var e in this.selectionObj.nodes)this.selectionObj.nodes.hasOwnProperty(e)&&(t+=1);return t},e._getSelectedNode=function(){for(var t in this.selectionObj.nodes)if(this.selectionObj.nodes.hasOwnProperty(t))return this.selectionObj.nodes[t];return null},e._getSelectedEdge=function(){for(var t in this.selectionObj.edges)if(this.selectionObj.edges.hasOwnProperty(t))return this.selectionObj.edges[t];return null},e._getSelectedEdgeCount=function(){var t=0;for(var e in this.selectionObj.edges)this.selectionObj.edges.hasOwnProperty(e)&&(t+=1);return t},e._getSelectedObjectCount=function(){var t=0;for(var e in this.selectionObj.nodes)this.selectionObj.nodes.hasOwnProperty(e)&&(t+=1);for(var i in this.selectionObj.edges)this.selectionObj.edges.hasOwnProperty(i)&&(t+=1);return t},e._selectionIsEmpty=function(){for(var t in this.selectionObj.nodes)if(this.selectionObj.nodes.hasOwnProperty(t))return!1;for(var e in this.selectionObj.edges)if(this.selectionObj.edges.hasOwnProperty(e))return!1;return!0},e._clusterInSelection=function(){for(var t in this.selectionObj.nodes)if(this.selectionObj.nodes.hasOwnProperty(t)&&this.selectionObj.nodes[t].clusterSize>1)return!0;return!1},e._selectConnectedEdges=function(t){for(var e=0;ei;i++){o=t[i];var n=this.nodes[o];if(!n)throw new RangeError('Node with id "'+o+'" not found');this._selectObject(n,!0,!0,e,!0)}this.redraw()},e.selectEdges=function(t){var e,i,s;if(!t||void 0==t.length)throw"Selection must be an array with ids";for(this._unselectAll(!0),e=0,i=t.length;i>e;e++){s=t[e];var o=this.edges[s];if(!o)throw new RangeError('Edge with id "'+s+'" not found');this._selectObject(o,!0,!0,!1,!0)}this.redraw()},e._updateSelection=function(){for(var t in this.selectionObj.nodes)this.selectionObj.nodes.hasOwnProperty(t)&&(this.nodes.hasOwnProperty(t)||delete this.selectionObj.nodes[t]);for(var e in this.selectionObj.edges)this.selectionObj.edges.hasOwnProperty(e)&&(this.edges.hasOwnProperty(e)||delete this.selectionObj.edges[e])}},function(t,e,i){var s=i(1),o=i(40),n=i(37);e._clearManipulatorBar=function(){this._recursiveDOMDelete(this.manipulationDiv),this.manipulationDOM={},this._manipulationReleaseOverload=function(){},delete this.sectors.support.nodes.targetNode,delete this.sectors.support.nodes.targetViaNode,this.controlNodesActive=!1,this.freezeSimulation=!1},e._restoreOverloadedFunctions=function(){for(var t in this.cachedFunctions)this.cachedFunctions.hasOwnProperty(t)&&(this[t]=this.cachedFunctions[t],delete this.cachedFunctions[t])},e._toggleEditMode=function(){this.editMode=!this.editMode;var t=this.manipulationDiv,e=this.closeDiv,i=this.editModeDiv;1==this.editMode?(t.style.display="block",e.style.display="block",i.style.display="none",e.onclick=this._toggleEditMode.bind(this)):(t.style.display="none",e.style.display="none",i.style.display="block",e.onclick=null),this._createManipulatorBar()},e._createManipulatorBar=function(){this.boundFunction&&this.off("select",this.boundFunction);var t=this.constants.locales[this.constants.locale];if(void 0!==this.edgeBeingEdited&&(this.edgeBeingEdited._disableControlNodes(),this.edgeBeingEdited=void 0,this.selectedControlNode=null,this.controlNodesActive=!1,this._redraw()),this._restoreOverloadedFunctions(),this.freezeSimulation=!1,this.blockConnectingEdgeSelection=!1,this.forceAppendSelection=!1,this.manipulationDOM={},1==this.editMode){for(;this.manipulationDiv.hasChildNodes();)this.manipulationDiv.removeChild(this.manipulationDiv.firstChild);this.manipulationDOM.addNodeSpan=document.createElement("span"),this.manipulationDOM.addNodeSpan.className="network-manipulationUI add",this.manipulationDOM.addNodeLabelSpan=document.createElement("span"),this.manipulationDOM.addNodeLabelSpan.className="network-manipulationLabel",this.manipulationDOM.addNodeLabelSpan.innerHTML=t.addNode,this.manipulationDOM.addNodeSpan.appendChild(this.manipulationDOM.addNodeLabelSpan),this.manipulationDOM.seperatorLineDiv1=document.createElement("div"),this.manipulationDOM.seperatorLineDiv1.className="network-seperatorLine",this.manipulationDOM.addEdgeSpan=document.createElement("span"),this.manipulationDOM.addEdgeSpan.className="network-manipulationUI connect",this.manipulationDOM.addEdgeLabelSpan=document.createElement("span"),this.manipulationDOM.addEdgeLabelSpan.className="network-manipulationLabel",this.manipulationDOM.addEdgeLabelSpan.innerHTML=t.addEdge,this.manipulationDOM.addEdgeSpan.appendChild(this.manipulationDOM.addEdgeLabelSpan),this.manipulationDiv.appendChild(this.manipulationDOM.addNodeSpan),this.manipulationDiv.appendChild(this.manipulationDOM.seperatorLineDiv1),this.manipulationDiv.appendChild(this.manipulationDOM.addEdgeSpan),1==this._getSelectedNodeCount()&&this.triggerFunctions.edit?(this.manipulationDOM.seperatorLineDiv2=document.createElement("div"),this.manipulationDOM.seperatorLineDiv2.className="network-seperatorLine",this.manipulationDOM.editNodeSpan=document.createElement("span"),this.manipulationDOM.editNodeSpan.className="network-manipulationUI edit",this.manipulationDOM.editNodeLabelSpan=document.createElement("span"),this.manipulationDOM.editNodeLabelSpan.className="network-manipulationLabel",this.manipulationDOM.editNodeLabelSpan.innerHTML=t.editNode,this.manipulationDOM.editNodeSpan.appendChild(this.manipulationDOM.editNodeLabelSpan),this.manipulationDiv.appendChild(this.manipulationDOM.seperatorLineDiv2),this.manipulationDiv.appendChild(this.manipulationDOM.editNodeSpan)):1==this._getSelectedEdgeCount()&&0==this._getSelectedNodeCount()&&(this.manipulationDOM.seperatorLineDiv3=document.createElement("div"),this.manipulationDOM.seperatorLineDiv3.className="network-seperatorLine",this.manipulationDOM.editEdgeSpan=document.createElement("span"),this.manipulationDOM.editEdgeSpan.className="network-manipulationUI edit",this.manipulationDOM.editEdgeLabelSpan=document.createElement("span"),this.manipulationDOM.editEdgeLabelSpan.className="network-manipulationLabel",this.manipulationDOM.editEdgeLabelSpan.innerHTML=t.editEdge,this.manipulationDOM.editEdgeSpan.appendChild(this.manipulationDOM.editEdgeLabelSpan),this.manipulationDiv.appendChild(this.manipulationDOM.seperatorLineDiv3),this.manipulationDiv.appendChild(this.manipulationDOM.editEdgeSpan)),0==this._selectionIsEmpty()&&(this.manipulationDOM.seperatorLineDiv4=document.createElement("div"),this.manipulationDOM.seperatorLineDiv4.className="network-seperatorLine",this.manipulationDOM.deleteSpan=document.createElement("span"),this.manipulationDOM.deleteSpan.className="network-manipulationUI delete",this.manipulationDOM.deleteLabelSpan=document.createElement("span"),this.manipulationDOM.deleteLabelSpan.className="network-manipulationLabel",this.manipulationDOM.deleteLabelSpan.innerHTML=t.del,this.manipulationDOM.deleteSpan.appendChild(this.manipulationDOM.deleteLabelSpan),this.manipulationDiv.appendChild(this.manipulationDOM.seperatorLineDiv4),this.manipulationDiv.appendChild(this.manipulationDOM.deleteSpan)),this.manipulationDOM.addNodeSpan.onclick=this._createAddNodeToolbar.bind(this),this.manipulationDOM.addEdgeSpan.onclick=this._createAddEdgeToolbar.bind(this),1==this._getSelectedNodeCount()&&this.triggerFunctions.edit?this.manipulationDOM.editNodeSpan.onclick=this._editNode.bind(this):1==this._getSelectedEdgeCount()&&0==this._getSelectedNodeCount()&&(this.manipulationDOM.editEdgeSpan.onclick=this._createEditEdgeToolbar.bind(this)),0==this._selectionIsEmpty()&&(this.manipulationDOM.deleteSpan.onclick=this._deleteSelected.bind(this)),this.closeDiv.onclick=this._toggleEditMode.bind(this); var e=this;this.boundFunction=e._createManipulatorBar,this.on("select",this.boundFunction)}else{for(;this.editModeDiv.hasChildNodes();)this.editModeDiv.removeChild(this.editModeDiv.firstChild);this.manipulationDOM.editModeSpan=document.createElement("span"),this.manipulationDOM.editModeSpan.className="network-manipulationUI edit editmode",this.manipulationDOM.editModeLabelSpan=document.createElement("span"),this.manipulationDOM.editModeLabelSpan.className="network-manipulationLabel",this.manipulationDOM.editModeLabelSpan.innerHTML=t.edit,this.manipulationDOM.editModeSpan.appendChild(this.manipulationDOM.editModeLabelSpan),this.editModeDiv.appendChild(this.manipulationDOM.editModeSpan),this.manipulationDOM.editModeSpan.onclick=this._toggleEditMode.bind(this)}},e._createAddNodeToolbar=function(){this._clearManipulatorBar(),this.boundFunction&&this.off("select",this.boundFunction);var t=this.constants.locales[this.constants.locale];this.manipulationDOM={},this.manipulationDOM.backSpan=document.createElement("span"),this.manipulationDOM.backSpan.className="network-manipulationUI back",this.manipulationDOM.backLabelSpan=document.createElement("span"),this.manipulationDOM.backLabelSpan.className="network-manipulationLabel",this.manipulationDOM.backLabelSpan.innerHTML=t.back,this.manipulationDOM.backSpan.appendChild(this.manipulationDOM.backLabelSpan),this.manipulationDOM.seperatorLineDiv1=document.createElement("div"),this.manipulationDOM.seperatorLineDiv1.className="network-seperatorLine",this.manipulationDOM.descriptionSpan=document.createElement("span"),this.manipulationDOM.descriptionSpan.className="network-manipulationUI none",this.manipulationDOM.descriptionLabelSpan=document.createElement("span"),this.manipulationDOM.descriptionLabelSpan.className="network-manipulationLabel",this.manipulationDOM.descriptionLabelSpan.innerHTML=t.addDescription,this.manipulationDOM.descriptionSpan.appendChild(this.manipulationDOM.descriptionLabelSpan),this.manipulationDiv.appendChild(this.manipulationDOM.backSpan),this.manipulationDiv.appendChild(this.manipulationDOM.seperatorLineDiv1),this.manipulationDiv.appendChild(this.manipulationDOM.descriptionSpan),this.manipulationDOM.backSpan.onclick=this._createManipulatorBar.bind(this);var e=this;this.boundFunction=e._addNode,this.on("select",this.boundFunction)},e._createAddEdgeToolbar=function(){this._clearManipulatorBar(),this._unselectAll(!0),this.freezeSimulation=!0,this.boundFunction&&this.off("select",this.boundFunction);var t=this.constants.locales[this.constants.locale];this._unselectAll(),this.forceAppendSelection=!1,this.blockConnectingEdgeSelection=!0,this.manipulationDOM={},this.manipulationDOM.backSpan=document.createElement("span"),this.manipulationDOM.backSpan.className="network-manipulationUI back",this.manipulationDOM.backLabelSpan=document.createElement("span"),this.manipulationDOM.backLabelSpan.className="network-manipulationLabel",this.manipulationDOM.backLabelSpan.innerHTML=t.back,this.manipulationDOM.backSpan.appendChild(this.manipulationDOM.backLabelSpan),this.manipulationDOM.seperatorLineDiv1=document.createElement("div"),this.manipulationDOM.seperatorLineDiv1.className="network-seperatorLine",this.manipulationDOM.descriptionSpan=document.createElement("span"),this.manipulationDOM.descriptionSpan.className="network-manipulationUI none",this.manipulationDOM.descriptionLabelSpan=document.createElement("span"),this.manipulationDOM.descriptionLabelSpan.className="network-manipulationLabel",this.manipulationDOM.descriptionLabelSpan.innerHTML=t.edgeDescription,this.manipulationDOM.descriptionSpan.appendChild(this.manipulationDOM.descriptionLabelSpan),this.manipulationDiv.appendChild(this.manipulationDOM.backSpan),this.manipulationDiv.appendChild(this.manipulationDOM.seperatorLineDiv1),this.manipulationDiv.appendChild(this.manipulationDOM.descriptionSpan),this.manipulationDOM.backSpan.onclick=this._createManipulatorBar.bind(this);var e=this;this.boundFunction=e._handleConnect,this.on("select",this.boundFunction),this.cachedFunctions._handleTouch=this._handleTouch,this.cachedFunctions._manipulationReleaseOverload=this._manipulationReleaseOverload,this.cachedFunctions._handleDragStart=this._handleDragStart,this.cachedFunctions._handleDragEnd=this._handleDragEnd,this._handleTouch=this._handleConnect,this._manipulationReleaseOverload=function(){},this._handleDragStart=function(){},this._handleDragEnd=this._finishConnect,this._redraw()},e._createEditEdgeToolbar=function(){this._clearManipulatorBar(),this.controlNodesActive=!0,this.boundFunction&&this.off("select",this.boundFunction),this.edgeBeingEdited=this._getSelectedEdge(),this.edgeBeingEdited._enableControlNodes();var t=this.constants.locales[this.constants.locale];this.manipulationDOM={},this.manipulationDOM.backSpan=document.createElement("span"),this.manipulationDOM.backSpan.className="network-manipulationUI back",this.manipulationDOM.backLabelSpan=document.createElement("span"),this.manipulationDOM.backLabelSpan.className="network-manipulationLabel",this.manipulationDOM.backLabelSpan.innerHTML=t.back,this.manipulationDOM.backSpan.appendChild(this.manipulationDOM.backLabelSpan),this.manipulationDOM.seperatorLineDiv1=document.createElement("div"),this.manipulationDOM.seperatorLineDiv1.className="network-seperatorLine",this.manipulationDOM.descriptionSpan=document.createElement("span"),this.manipulationDOM.descriptionSpan.className="network-manipulationUI none",this.manipulationDOM.descriptionLabelSpan=document.createElement("span"),this.manipulationDOM.descriptionLabelSpan.className="network-manipulationLabel",this.manipulationDOM.descriptionLabelSpan.innerHTML=t.editEdgeDescription,this.manipulationDOM.descriptionSpan.appendChild(this.manipulationDOM.descriptionLabelSpan),this.manipulationDiv.appendChild(this.manipulationDOM.backSpan),this.manipulationDiv.appendChild(this.manipulationDOM.seperatorLineDiv1),this.manipulationDiv.appendChild(this.manipulationDOM.descriptionSpan),this.manipulationDOM.backSpan.onclick=this._createManipulatorBar.bind(this),this.cachedFunctions._handleTouch=this._handleTouch,this.cachedFunctions._manipulationReleaseOverload=this._manipulationReleaseOverload,this.cachedFunctions._handleTap=this._handleTap,this.cachedFunctions._handleDragStart=this._handleDragStart,this.cachedFunctions._handleOnDrag=this._handleOnDrag,this._handleTouch=this._selectControlNode,this._handleTap=function(){},this._handleOnDrag=this._controlNodeDrag,this._handleDragStart=function(){},this._manipulationReleaseOverload=this._releaseControlNode,this._redraw()},e._selectControlNode=function(t){this.edgeBeingEdited.controlNodes.from.unselect(),this.edgeBeingEdited.controlNodes.to.unselect(),this.selectedControlNode=this.edgeBeingEdited._getSelectedControlNode(this._XconvertDOMtoCanvas(t.x),this._YconvertDOMtoCanvas(t.y)),null!==this.selectedControlNode&&(this.selectedControlNode.select(),this.freezeSimulation=!0),this._redraw()},e._controlNodeDrag=function(t){var e=this._getPointer(t.center);null!==this.selectedControlNode&&void 0!==this.selectedControlNode&&(this.selectedControlNode.x=this._XconvertDOMtoCanvas(e.x),this.selectedControlNode.y=this._YconvertDOMtoCanvas(e.y)),this._redraw()},e._releaseControlNode=function(t){var e=this._getNodeAt(t);null!==e?(1==this.edgeBeingEdited.controlNodes.from.selected&&(this.edgeBeingEdited._restoreControlNodes(),this._editEdge(e.id,this.edgeBeingEdited.to.id),this.edgeBeingEdited.controlNodes.from.unselect()),1==this.edgeBeingEdited.controlNodes.to.selected&&(this.edgeBeingEdited._restoreControlNodes(),this._editEdge(this.edgeBeingEdited.from.id,e.id),this.edgeBeingEdited.controlNodes.to.unselect())):this.edgeBeingEdited._restoreControlNodes(),this.freezeSimulation=!1,this._redraw()},e._handleConnect=function(t){if(0==this._getSelectedNodeCount()){var e=this._getNodeAt(t);if(null!=e)if(e.clusterSize>1)alert(this.constants.locales[this.constants.locale].createEdgeError);else{this._selectObject(e,!1);var i=this.sectors.support.nodes;i.targetNode=new o({id:"targetNode"},{},{},this.constants);var s=i.targetNode;s.x=e.x,s.y=e.y,this.edges.connectionEdge=new n({id:"connectionEdge",from:e.id,to:s.id},this,this.constants);var r=this.edges.connectionEdge;r.from=e,r.connected=!0,r.options.smoothCurves={enabled:!0,dynamic:!1,type:"continuous",roundness:.5},r.selected=!0,r.to=s,this.cachedFunctions._handleOnDrag=this._handleOnDrag,this._handleOnDrag=function(t){var e=this._getPointer(t.center),i=this.edges.connectionEdge;i.to.x=this._XconvertDOMtoCanvas(e.x),i.to.y=this._YconvertDOMtoCanvas(e.y)},this.moving=!0,this.start()}}},e._finishConnect=function(t){if(1==this._getSelectedNodeCount()){var e=this._getPointer(t.center);this._handleOnDrag=this.cachedFunctions._handleOnDrag,delete this.cachedFunctions._handleOnDrag;var i=this.edges.connectionEdge.fromId;delete this.edges.connectionEdge,delete this.sectors.support.nodes.targetNode,delete this.sectors.support.nodes.targetViaNode;var s=this._getNodeAt(e);null!=s&&(s.clusterSize>1?alert(this.constants.locales[this.constants.locale].createEdgeError):(this._createEdge(i,s.id),this._createManipulatorBar())),this._unselectAll()}},e._addNode=function(){if(this._selectionIsEmpty()&&1==this.editMode){var t=this._pointerToPositionObject(this.pointerPosition),e={id:s.randomUUID(),x:t.left,y:t.top,label:"new",allowedToMoveX:!0,allowedToMoveY:!0};if(this.triggerFunctions.add){if(2!=this.triggerFunctions.add.length)throw new Error("The function for add does not support two arguments (data,callback)");var i=this;this.triggerFunctions.add(e,function(t){i.nodesData.add(t),i._createManipulatorBar(),i.moving=!0,i.start()})}else this.nodesData.add(e),this._createManipulatorBar(),this.moving=!0,this.start()}},e._createEdge=function(t,e){if(1==this.editMode){var i={from:t,to:e};if(this.triggerFunctions.connect){if(2!=this.triggerFunctions.connect.length)throw new Error("The function for connect does not support two arguments (data,callback)");var s=this;this.triggerFunctions.connect(i,function(t){s.edgesData.add(t),s.moving=!0,s.start()})}else this.edgesData.add(i),this.moving=!0,this.start()}},e._editEdge=function(t,e){if(1==this.editMode){var i={id:this.edgeBeingEdited.id,from:t,to:e};if(this.triggerFunctions.editEdge){if(2!=this.triggerFunctions.editEdge.length)throw new Error("The function for edit does not support two arguments (data, callback)");var s=this;this.triggerFunctions.editEdge(i,function(t){s.edgesData.update(t),s.moving=!0,s.start()})}else this.edgesData.update(i),this.moving=!0,this.start()}},e._editNode=function(){if(!this.triggerFunctions.edit||1!=this.editMode)throw new Error("No edit function has been bound to this button");var t=this._getSelectedNode(),e={id:t.id,label:t.label,group:t.options.group,shape:t.options.shape,color:{background:t.options.color.background,border:t.options.color.border,highlight:{background:t.options.color.highlight.background,border:t.options.color.highlight.border}}};if(2!=this.triggerFunctions.edit.length)throw new Error("The function for edit does not support two arguments (data, callback)");var i=this;this.triggerFunctions.edit(e,function(t){i.nodesData.update(t),i._createManipulatorBar(),i.moving=!0,i.start()})},e._deleteSelected=function(){if(!this._selectionIsEmpty()&&1==this.editMode)if(this._clusterInSelection())alert(this.constants.locales[this.constants.locale].deleteClusterError);else{var t=this.getSelectedNodes(),e=this.getSelectedEdges();if(this.triggerFunctions.del){var i=this,s={nodes:t,edges:e};if(2!=this.triggerFunctions.del.length)throw new Error("The function for delete does not support two arguments (data, callback)");this.triggerFunctions.del(s,function(t){i.edgesData.remove(t.edges),i.nodesData.remove(t.nodes),i._unselectAll(),i.moving=!0,i.start()})}else this.edgesData.remove(e),this.nodesData.remove(t),this._unselectAll(),this.moving=!0,this.start()}}},function(t,e,i){var s=(i(1),i(47)),o=i(45);e._cleanNavigation=function(){if(0!=this.navigationHammers.existing.length){for(var t=0;t0){var t,e,i=0,s=!1,o=!1;for(e in this.nodes)this.nodes.hasOwnProperty(e)&&(t=this.nodes[e],-1!=t.level?s=!0:o=!0,is&&(n.xFixed=!1,n.x=i[n.level].minPos,r=!0):n.yFixed&&n.level>s&&(n.yFixed=!1,n.y=i[n.level].minPos,r=!0),1==r&&(i[n.level].minPos+=i[n.level].nodeSpacing,n.edges.length>1&&this._placeBranchNodes(n.edges,n.id,i,n.level))}},e._setLevel=function(t,e,i){for(var s=0;st)&&(o.level=t,o.edges.length>1&&this._setLevel(t+1,o.edges,o.id))}},e._setLevelDirected=function(t,e,i){this.nodes[i].hierarchyEnumerated=!0;for(var s,o,n=0;n1&&s.hierarchyEnumerated===!1&&this._setLevelDirected(s.level,s.edges,s.id)},e._restoreNodes=function(){for(var t in this.nodes)this.nodes.hasOwnProperty(t)&&(this.nodes[t].xFixed=!1,this.nodes[t].yFixed=!1)}},function(t,e,i){function s(){this.constants.smoothCurves.enabled=!this.constants.smoothCurves.enabled;var t=document.getElementById("graph_toggleSmooth");t.style.background=1==this.constants.smoothCurves.enabled?"#A4FF56":"#FF8532",this._configureSmoothCurves(!1)}function o(){for(var t in this.calculationNodes)this.calculationNodes.hasOwnProperty(t)&&(this.calculationNodes[t].vx=0,this.calculationNodes[t].vy=0,this.calculationNodes[t].fx=0,this.calculationNodes[t].fy=0);1==this.constants.hierarchicalLayout.enabled?(this._setupHierarchicalLayout(),a.call(this,"graph_H_nd",1,"physics_hierarchicalRepulsion_nodeDistance"),a.call(this,"graph_H_cg",1,"physics_centralGravity"),a.call(this,"graph_H_sc",1,"physics_springConstant"),a.call(this,"graph_H_sl",1,"physics_springLength"),a.call(this,"graph_H_damp",1,"physics_damping")):this.repositionNodes(),this.moving=!0,this.start()}function n(){var t="No options are required, default values used.",e=[],i=document.getElementById("graph_physicsMethod1"),s=document.getElementById("graph_physicsMethod2");if(1==i.checked){if(this.constants.physics.barnesHut.gravitationalConstant!=this.backupConstants.physics.barnesHut.gravitationalConstant&&e.push("gravitationalConstant: "+this.constants.physics.barnesHut.gravitationalConstant),this.constants.physics.centralGravity!=this.backupConstants.physics.barnesHut.centralGravity&&e.push("centralGravity: "+this.constants.physics.centralGravity),this.constants.physics.springLength!=this.backupConstants.physics.barnesHut.springLength&&e.push("springLength: "+this.constants.physics.springLength),this.constants.physics.springConstant!=this.backupConstants.physics.barnesHut.springConstant&&e.push("springConstant: "+this.constants.physics.springConstant),this.constants.physics.damping!=this.backupConstants.physics.barnesHut.damping&&e.push("damping: "+this.constants.physics.damping),0!=e.length){t="var options = {",t+="physics: {barnesHut: {";for(var o=0;othis.constants.clustering.clusterThreshold&&1==this.constants.clustering.enabled&&this.clusterToFit(this.constants.clustering.reduceToNodes,!1),this._calculateForces())},e._calculateForces=function(){this._calculateGravitationalForces(),this._calculateNodeForces(),this.constants.physics.springConstant>0&&(1==this.constants.smoothCurves.enabled&&1==this.constants.smoothCurves.dynamic?this._calculateSpringForcesWithSupport():1==this.constants.physics.hierarchicalRepulsion.enabled?this._calculateHierarchicalSpringForces():this._calculateSpringForces())},e._updateCalculationNodes=function(){if(1==this.constants.smoothCurves.enabled&&1==this.constants.smoothCurves.dynamic){this.calculationNodes={},this.calculationNodeIndices=[];for(var t in this.nodes)this.nodes.hasOwnProperty(t)&&(this.calculationNodes[t]=this.nodes[t]);var e=this.sectors.support.nodes;for(var i in e)e.hasOwnProperty(i)&&(this.edges.hasOwnProperty(e[i].parentEdgeId)?this.calculationNodes[i]=e[i]:e[i]._setForce(0,0));for(var s in this.calculationNodes)this.calculationNodes.hasOwnProperty(s)&&this.calculationNodeIndices.push(s)}else this.calculationNodes=this.nodes,this.calculationNodeIndices=this.nodeIndices},e._calculateGravitationalForces=function(){var t,e,i,s,o,n=this.calculationNodes,r=this.constants.physics.centralGravity,a=0;for(o=0;oSimulation Mode:Barnes HutRepulsionHierarchical
Options:
',this.containerElement.parentElement.insertBefore(this.physicsConfiguration,this.containerElement),this.optionsDiv=document.createElement("div"),this.optionsDiv.style.fontSize="14px",this.optionsDiv.style.fontFamily="verdana",this.containerElement.parentElement.insertBefore(this.optionsDiv,this.containerElement); var e;e=document.getElementById("graph_BH_gc"),e.onchange=a.bind(this,"graph_BH_gc",-1,"physics_barnesHut_gravitationalConstant"),e=document.getElementById("graph_BH_cg"),e.onchange=a.bind(this,"graph_BH_cg",1,"physics_centralGravity"),e=document.getElementById("graph_BH_sc"),e.onchange=a.bind(this,"graph_BH_sc",1,"physics_springConstant"),e=document.getElementById("graph_BH_sl"),e.onchange=a.bind(this,"graph_BH_sl",1,"physics_springLength"),e=document.getElementById("graph_BH_damp"),e.onchange=a.bind(this,"graph_BH_damp",1,"physics_damping"),e=document.getElementById("graph_R_nd"),e.onchange=a.bind(this,"graph_R_nd",1,"physics_repulsion_nodeDistance"),e=document.getElementById("graph_R_cg"),e.onchange=a.bind(this,"graph_R_cg",1,"physics_centralGravity"),e=document.getElementById("graph_R_sc"),e.onchange=a.bind(this,"graph_R_sc",1,"physics_springConstant"),e=document.getElementById("graph_R_sl"),e.onchange=a.bind(this,"graph_R_sl",1,"physics_springLength"),e=document.getElementById("graph_R_damp"),e.onchange=a.bind(this,"graph_R_damp",1,"physics_damping"),e=document.getElementById("graph_H_nd"),e.onchange=a.bind(this,"graph_H_nd",1,"physics_hierarchicalRepulsion_nodeDistance"),e=document.getElementById("graph_H_cg"),e.onchange=a.bind(this,"graph_H_cg",1,"physics_centralGravity"),e=document.getElementById("graph_H_sc"),e.onchange=a.bind(this,"graph_H_sc",1,"physics_springConstant"),e=document.getElementById("graph_H_sl"),e.onchange=a.bind(this,"graph_H_sl",1,"physics_springLength"),e=document.getElementById("graph_H_damp"),e.onchange=a.bind(this,"graph_H_damp",1,"physics_damping"),e=document.getElementById("graph_H_direction"),e.onchange=a.bind(this,"graph_H_direction",t,"hierarchicalLayout_direction"),e=document.getElementById("graph_H_levsep"),e.onchange=a.bind(this,"graph_H_levsep",1,"hierarchicalLayout_levelSeparation"),e=document.getElementById("graph_H_nspac"),e.onchange=a.bind(this,"graph_H_nspac",1,"hierarchicalLayout_nodeSpacing");var i=document.getElementById("graph_physicsMethod1"),d=document.getElementById("graph_physicsMethod2"),l=document.getElementById("graph_physicsMethod3");d.checked=!0,this.constants.physics.barnesHut.enabled&&(i.checked=!0),this.constants.hierarchicalLayout.enabled&&(l.checked=!0);var c=document.getElementById("graph_toggleSmooth"),p=document.getElementById("graph_repositionNodes"),u=document.getElementById("graph_generateOptions");c.onclick=s.bind(this),p.onclick=o.bind(this),u.onclick=n.bind(this),c.style.background=1==this.constants.smoothCurves&&0==this.constants.dynamicSmoothCurves?"#A4FF56":"#FF8532",r.apply(this),i.onchange=r.bind(this),d.onchange=r.bind(this),l.onchange=r.bind(this)}},e._overWriteGraphConstants=function(t,e){var i=t.split("_");1==i.length?this.constants[i[0]]=e:2==i.length?this.constants[i[0]][i[1]]=e:3==i.length&&(this.constants[i[0]][i[1]][i[2]]=e)}},function(t){function e(t){throw new Error("Cannot find module '"+t+"'.")}e.keys=function(){return[]},e.resolve=e,t.exports=e,e.id=68},function(t,e){e._calculateNodeForces=function(){var t,e,i,s,o,n,r,a,h,d,l,c=this.calculationNodes,p=this.calculationNodeIndices,u=-2/3,m=4/3,f=this.constants.physics.repulsion.nodeDistance,g=f;for(d=0;di&&(r=.5*g>i?1:v*i+m,r*=0==n?1:1+n*this.constants.clustering.forceAmplification,r/=Math.max(i,.01*g),s=t*r,o=e*r,a.fx-=s,a.fy-=o,h.fx+=s,h.fy+=o)}}},function(t,e){e._calculateNodeForces=function(){var t,e,i,s,o,n,r,a,h,d,l=this.calculationNodes,c=this.calculationNodeIndices,p=this.constants.physics.hierarchicalRepulsion.nodeDistance;for(h=0;hi?-Math.pow(u*i,2)+Math.pow(u*p,2):0,0==i?i=.01:n/=i,s=t*n,o=e*n,r.fx-=s,r.fy-=o,a.fx+=s,a.fy+=o}},e._calculateHierarchicalSpringForces=function(){for(var t,e,i,s,o,n,r,a,h,d=this.edges,l=this.calculationNodes,c=this.calculationNodeIndices,p=0;pn;n++)t=e[i[n]],t.options.mass>0&&(this._getForceContribution(o.root.children.NW,t),this._getForceContribution(o.root.children.NE,t),this._getForceContribution(o.root.children.SW,t),this._getForceContribution(o.root.children.SE,t))}},e._getForceContribution=function(t,e){if(t.childrenCount>0){var i,s,o;if(i=t.centerOfMass.x-e.x,s=t.centerOfMass.y-e.y,o=Math.sqrt(i*i+s*s),o*t.calcSize>this.constants.physics.barnesHut.thetaInverted){0==o&&(o=.1*Math.random(),i=o);var n=this.constants.physics.barnesHut.gravitationalConstant*t.mass*e.options.mass/(o*o*o),r=i*n,a=s*n;e.fx+=r,e.fy+=a}else if(4==t.childrenCount)this._getForceContribution(t.children.NW,e),this._getForceContribution(t.children.NE,e),this._getForceContribution(t.children.SW,e),this._getForceContribution(t.children.SE,e);else if(t.children.data.id!=e.id){0==o&&(o=.5*Math.random(),i=o);var n=this.constants.physics.barnesHut.gravitationalConstant*t.mass*e.options.mass/(o*o*o),r=i*n,a=s*n;e.fx+=r,e.fy+=a}}},e._formBarnesHutTree=function(t,e){for(var i,s=e.length,o=Number.MAX_VALUE,n=Number.MAX_VALUE,r=-Number.MAX_VALUE,a=-Number.MAX_VALUE,h=0;s>h;h++){var d=t[e[h]].x,l=t[e[h]].y;t[e[h]].options.mass>0&&(o>d&&(o=d),d>r&&(r=d),n>l&&(n=l),l>a&&(a=l))}var c=Math.abs(r-o)-Math.abs(a-n);c>0?(n-=.5*c,a+=.5*c):(o+=.5*c,r-=.5*c);var p=1e-5,u=Math.max(p,Math.abs(r-o)),m=.5*u,f=.5*(o+r),g=.5*(n+a),v={root:{centerOfMass:{x:0,y:0},mass:0,range:{minX:f-m,maxX:f+m,minY:g-m,maxY:g+m},size:u,calcSize:1/u,children:{data:null},maxWidth:0,level:0,childrenCount:4}};for(this._splitBranch(v.root),h=0;s>h;h++)i=t[e[h]],i.options.mass>0&&this._placeInTree(v.root,i);this.barnesHutTree=v},e._updateBranchMass=function(t,e){var i=t.mass+e.options.mass,s=1/i;t.centerOfMass.x=t.centerOfMass.x*t.mass+e.x*e.options.mass,t.centerOfMass.x*=s,t.centerOfMass.y=t.centerOfMass.y*t.mass+e.y*e.options.mass,t.centerOfMass.y*=s,t.mass=i;var o=Math.max(Math.max(e.height,e.radius),e.width);t.maxWidth=t.maxWidthe.x?t.children.NW.range.maxY>e.y?this._placeInRegion(t,e,"NW"):this._placeInRegion(t,e,"SW"):t.children.NW.range.maxY>e.y?this._placeInRegion(t,e,"NE"):this._placeInRegion(t,e,"SE")},e._placeInRegion=function(t,e,i){switch(t.children[i].childrenCount){case 0:t.children[i].children.data=e,t.children[i].childrenCount=1,this._updateBranchMass(t.children[i],e);break;case 1:t.children[i].children.data.x==e.x&&t.children[i].children.data.y==e.y?(e.x+=Math.random(),e.y+=Math.random()):(this._splitBranch(t.children[i]),this._placeInTree(t.children[i],e));break;case 4:this._placeInTree(t.children[i],e)}},e._splitBranch=function(t){var e=null;1==t.childrenCount&&(e=t.children.data,t.mass=0,t.centerOfMass.x=0,t.centerOfMass.y=0),t.childrenCount=4,t.children.data=null,this._insertRegion(t,"NW"),this._insertRegion(t,"NE"),this._insertRegion(t,"SW"),this._insertRegion(t,"SE"),null!=e&&this._placeInTree(t,e)},e._insertRegion=function(t,e){var i,s,o,n,r=.5*t.size;switch(e){case"NW":i=t.range.minX,s=t.range.minX+r,o=t.range.minY,n=t.range.minY+r;break;case"NE":i=t.range.minX+r,s=t.range.maxX,o=t.range.minY,n=t.range.minY+r;break;case"SW":i=t.range.minX,s=t.range.minX+r,o=t.range.minY+r,n=t.range.maxY;break;case"SE":i=t.range.minX+r,s=t.range.maxX,o=t.range.minY+r,n=t.range.maxY}t.children[e]={centerOfMass:{x:0,y:0},mass:0,range:{minX:i,maxX:s,minY:o,maxY:n},size:.5*t.size,calcSize:2*t.calcSize,children:{data:null},maxWidth:0,level:t.level+1,childrenCount:0}},e._drawTree=function(t,e){void 0!==this.barnesHutTree&&(t.lineWidth=1,this._drawBranch(this.barnesHutTree.root,t,e))},e._drawBranch=function(t,e,i){void 0===i&&(i="#FF0000"),4==t.childrenCount&&(this._drawBranch(t.children.NW,e),this._drawBranch(t.children.NE,e),this._drawBranch(t.children.SE,e),this._drawBranch(t.children.SW,e)),e.strokeStyle=i,e.beginPath(),e.moveTo(t.range.minX,t.range.minY),e.lineTo(t.range.maxX,t.range.minY),e.stroke(),e.beginPath(),e.moveTo(t.range.maxX,t.range.minY),e.lineTo(t.range.maxX,t.range.maxY),e.stroke(),e.beginPath(),e.moveTo(t.range.maxX,t.range.maxY),e.lineTo(t.range.minX,t.range.maxY),e.stroke(),e.beginPath(),e.moveTo(t.range.minX,t.range.maxY),e.lineTo(t.range.minX,t.range.minY),e.stroke()}},function(t){t.exports=function(t){return t.webpackPolyfill||(t.deprecate=function(){},t.paths=[],t.children=[],t.webpackPolyfill=1),t}},function(t,e){(function(e){t.exports=e}).call(e,{})}])}); -//# sourceMappingURL=vis.map diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.css b/core/src/main/resources/org/apache/spark/ui/static/webui.css index 4910744d1d790..e7c1d475d4e52 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/webui.css +++ b/core/src/main/resources/org/apache/spark/ui/static/webui.css @@ -106,14 +106,18 @@ span.rest-uri { } pre { - font-size: 0.8em; + font-size: 12px; + line-height: 18px; + padding: 6px; + margin: 0; + border-radius: 3px; } .stage-details { max-height: 100px; overflow-y: auto; margin: 0; - transition: max-height 0.5s ease-out, padding 0.5s ease-out; + transition: max-height 0.25s ease-out, padding 0.25s ease-out; } .stage-details.collapsed { @@ -135,7 +139,7 @@ pre { max-height: 300px; overflow-y: auto; margin: 0; - transition: max-height 0.5s ease-out, padding 0.5s ease-out; + transition: max-height 0.25s ease-out, padding 0.25s ease-out; } .stacktrace-details.collapsed { @@ -145,7 +149,7 @@ pre { border: none; } -span.expand-additional-metrics { +span.expand-additional-metrics, span.expand-dag-viz { cursor: pointer; } @@ -158,7 +162,7 @@ span.additional-metric-title { } .tooltip { - font-weight: normal; + font-weight: normal; } .arrow-open { @@ -166,9 +170,9 @@ span.additional-metric-title { height: 0; border-left: 5px solid transparent; border-right: 5px solid transparent; - border-top: 5px solid black; - float: left; - margin-top: 6px; + border-top: 5px solid #08c; + display: inline-block; + margin-bottom: 2px; } .arrow-closed { @@ -176,8 +180,10 @@ span.additional-metric-title { height: 0; border-top: 5px solid transparent; border-bottom: 5px solid transparent; - border-left: 5px solid black; + border-left: 5px solid #08c; display: inline-block; + margin-left: 2px; + margin-right: 3px; } .version { @@ -196,3 +202,17 @@ span.additional-metric-title { .serialization_time, .getting_result_time { display: none; } + +.accordion-inner { + background: #f5f5f5; +} + +.accordion-inner pre { + border: 0; + padding: 0; + background: none; +} + +a.expandbutton { + cursor: pointer; +} diff --git a/core/src/main/scala/org/apache/spark/Accumulators.scala b/core/src/main/scala/org/apache/spark/Accumulators.scala index 330df1d59a9b1..5a8d17bd99933 100644 --- a/core/src/main/scala/org/apache/spark/Accumulators.scala +++ b/core/src/main/scala/org/apache/spark/Accumulators.scala @@ -228,7 +228,7 @@ GrowableAccumulableParam[R <% Growable[T] with TraversableOnce[T] with Serializa * @tparam T result type */ class Accumulator[T](@transient initialValue: T, param: AccumulatorParam[T], name: Option[String]) - extends Accumulable[T,T](initialValue, param, name) { + extends Accumulable[T, T](initialValue, param, name) { def this(initialValue: T, param: AccumulatorParam[T]) = this(initialValue, param, None) } diff --git a/core/src/main/scala/org/apache/spark/Aggregator.scala b/core/src/main/scala/org/apache/spark/Aggregator.scala index af9765d313e9e..ceeb58075d345 100644 --- a/core/src/main/scala/org/apache/spark/Aggregator.scala +++ b/core/src/main/scala/org/apache/spark/Aggregator.scala @@ -34,8 +34,8 @@ case class Aggregator[K, V, C] ( mergeValue: (C, V) => C, mergeCombiners: (C, C) => C) { - // When spilling is enabled sorting will happen externally, but not necessarily with an - // ExternalSorter. + // When spilling is enabled sorting will happen externally, but not necessarily with an + // ExternalSorter. private val isSpillEnabled = SparkEnv.get.conf.getBoolean("spark.shuffle.spill", true) @deprecated("use combineValuesByKey with TaskContext argument", "0.9.0") @@ -45,7 +45,7 @@ case class Aggregator[K, V, C] ( def combineValuesByKey(iter: Iterator[_ <: Product2[K, V]], context: TaskContext): Iterator[(K, C)] = { if (!isSpillEnabled) { - val combiners = new AppendOnlyMap[K,C] + val combiners = new AppendOnlyMap[K, C] var kv: Product2[K, V] = null val update = (hadValue: Boolean, oldValue: C) => { if (hadValue) mergeValue(oldValue, kv._2) else createCombiner(kv._2) @@ -76,7 +76,7 @@ case class Aggregator[K, V, C] ( : Iterator[(K, C)] = { if (!isSpillEnabled) { - val combiners = new AppendOnlyMap[K,C] + val combiners = new AppendOnlyMap[K, C] var kc: Product2[K, C] = null val update = (hadValue: Boolean, oldValue: C) => { if (hadValue) mergeCombiners(oldValue, kc._2) else kc._2 diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala index 228d9149df2a2..9514604752640 100644 --- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala +++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala @@ -21,7 +21,10 @@ import java.util.concurrent.TimeUnit import scala.collection.mutable +import com.codahale.metrics.{Gauge, MetricRegistry} + import org.apache.spark.scheduler._ +import org.apache.spark.metrics.source.Source import org.apache.spark.util.{ThreadUtils, Clock, SystemClock, Utils} /** @@ -88,7 +91,7 @@ private[spark] class ExecutorAllocationManager( // How long there must be backlogged tasks for before an addition is triggered (seconds) private val schedulerBacklogTimeoutS = conf.getTimeAsSeconds( - "spark.dynamicAllocation.schedulerBacklogTimeout", "5s") + "spark.dynamicAllocation.schedulerBacklogTimeout", "1s") // Same as above, but used only after `schedulerBacklogTimeoutS` is exceeded private val sustainedSchedulerBacklogTimeoutS = conf.getTimeAsSeconds( @@ -96,7 +99,7 @@ private[spark] class ExecutorAllocationManager( // How long an executor must be idle for before it is removed (seconds) private val executorIdleTimeoutS = conf.getTimeAsSeconds( - "spark.dynamicAllocation.executorIdleTimeout", "600s") + "spark.dynamicAllocation.executorIdleTimeout", "60s") // During testing, the methods to actually kill and add executors are mocked out private val testing = conf.getBoolean("spark.dynamicAllocation.testing", false) @@ -144,6 +147,9 @@ private[spark] class ExecutorAllocationManager( private val executor = ThreadUtils.newDaemonSingleThreadScheduledExecutor("spark-dynamic-executor-allocation") + // Metric source for ExecutorAllocationManager to expose internal status to MetricsSystem. + val executorAllocationManagerSource = new ExecutorAllocationManagerSource + /** * Verify that the settings specified through the config are valid. * If not, throw an appropriate exception. @@ -262,6 +268,8 @@ private[spark] class ExecutorAllocationManager( numExecutorsTarget = math.max(maxNeeded, minNumExecutors) client.requestTotalExecutors(numExecutorsTarget) numExecutorsToAdd = 1 + logInfo(s"Lowering target number of executors to $numExecutorsTarget because " + + s"not all requests are actually needed (previously $oldNumExecutorsTarget)") numExecutorsTarget - oldNumExecutorsTarget } else if (addTime != NOT_SET && now >= addTime) { val delta = addExecutors(maxNeeded) @@ -286,9 +294,8 @@ private[spark] class ExecutorAllocationManager( private def addExecutors(maxNumExecutorsNeeded: Int): Int = { // Do not request more executors if it would put our target over the upper bound if (numExecutorsTarget >= maxNumExecutors) { - val numExecutorsPending = numExecutorsTarget - executorIds.size - logDebug(s"Not adding executors because there are already ${executorIds.size} registered " + - s"and ${numExecutorsPending} pending executor(s) (limit $maxNumExecutors)") + logDebug(s"Not adding executors because our current target total " + + s"is already $numExecutorsTarget (limit $maxNumExecutors)") numExecutorsToAdd = 1 return 0 } @@ -304,10 +311,19 @@ private[spark] class ExecutorAllocationManager( // Ensure that our target fits within configured bounds: numExecutorsTarget = math.max(math.min(numExecutorsTarget, maxNumExecutors), minNumExecutors) + val delta = numExecutorsTarget - oldNumExecutorsTarget + + // If our target has not changed, do not send a message + // to the cluster manager and reset our exponential growth + if (delta == 0) { + numExecutorsToAdd = 1 + return 0 + } + val addRequestAcknowledged = testing || client.requestTotalExecutors(numExecutorsTarget) if (addRequestAcknowledged) { - val delta = numExecutorsTarget - oldNumExecutorsTarget - logInfo(s"Requesting $delta new executor(s) because tasks are backlogged" + + val executorsString = "executor" + { if (delta > 1) "s" else "" } + logInfo(s"Requesting $delta new $executorsString because tasks are backlogged" + s" (new desired total will be $numExecutorsTarget)") numExecutorsToAdd = if (delta == numExecutorsToAdd) { numExecutorsToAdd * 2 @@ -414,7 +430,7 @@ private[spark] class ExecutorAllocationManager( * This resets all variables used for adding executors. */ private def onSchedulerQueueEmpty(): Unit = synchronized { - logDebug(s"Clearing timer to add executors because there are no more pending tasks") + logDebug("Clearing timer to add executors because there are no more pending tasks") addTime = NOT_SET numExecutorsToAdd = 1 } @@ -579,6 +595,29 @@ private[spark] class ExecutorAllocationManager( } } + /** + * Metric source for ExecutorAllocationManager to expose its internal executor allocation + * status to MetricsSystem. + * Note: These metrics heavily rely on the internal implementation of + * ExecutorAllocationManager, metrics or value of metrics will be changed when internal + * implementation is changed, so these metrics are not stable across Spark version. + */ + private[spark] class ExecutorAllocationManagerSource extends Source { + val sourceName = "ExecutorAllocationManager" + val metricRegistry = new MetricRegistry() + + private def registerGauge[T](name: String, value: => T, defaultValue: T): Unit = { + metricRegistry.register(MetricRegistry.name("executors", name), new Gauge[T] { + override def getValue: T = synchronized { Option(value).getOrElse(defaultValue) } + }) + } + + registerGauge("numberExecutorsToAdd", numExecutorsToAdd, 0) + registerGauge("numberExecutorsPendingToRemove", executorsPendingToRemove.size, 0) + registerGauge("numberAllExecutors", executorIds.size, 0) + registerGauge("numberTargetExecutors", numExecutorsTarget, 0) + registerGauge("numberMaxNeededExecutors", maxNumExecutorsNeeded(), 0) + } } private object ExecutorAllocationManager { diff --git a/core/src/main/scala/org/apache/spark/FutureAction.scala b/core/src/main/scala/org/apache/spark/FutureAction.scala index 91f9ef8ce7185..48792a958130c 100644 --- a/core/src/main/scala/org/apache/spark/FutureAction.scala +++ b/core/src/main/scala/org/apache/spark/FutureAction.scala @@ -150,7 +150,7 @@ class SimpleFutureAction[T] private[spark](jobWaiter: JobWaiter[_], resultFunc: } override def isCompleted: Boolean = jobWaiter.jobFinished - + override def isCancelled: Boolean = _cancelled override def value: Option[Try[T]] = { diff --git a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala index f2b024ff6cb67..6909015ff66e6 100644 --- a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala +++ b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala @@ -29,7 +29,7 @@ import org.apache.spark.util.{ThreadUtils, Utils} /** * A heartbeat from executors to the driver. This is a shared message used by several internal - * components to convey liveness or execution information for in-progress tasks. It will also + * components to convey liveness or execution information for in-progress tasks. It will also * expire the hosts that have not heartbeated for more than spark.network.timeout. */ private[spark] case class Heartbeat( @@ -43,8 +43,8 @@ private[spark] case class Heartbeat( */ private[spark] case object TaskSchedulerIsSet -private[spark] case object ExpireDeadHosts - +private[spark] case object ExpireDeadHosts + private[spark] case class HeartbeatResponse(reregisterBlockManager: Boolean) /** @@ -62,18 +62,18 @@ private[spark] class HeartbeatReceiver(sc: SparkContext) // "spark.network.timeout" uses "seconds", while `spark.storage.blockManagerSlaveTimeoutMs` uses // "milliseconds" - private val slaveTimeoutMs = + private val slaveTimeoutMs = sc.conf.getTimeAsMs("spark.storage.blockManagerSlaveTimeoutMs", "120s") - private val executorTimeoutMs = + private val executorTimeoutMs = sc.conf.getTimeAsSeconds("spark.network.timeout", s"${slaveTimeoutMs}ms") * 1000 - + // "spark.network.timeoutInterval" uses "seconds", while // "spark.storage.blockManagerTimeoutIntervalMs" uses "milliseconds" - private val timeoutIntervalMs = + private val timeoutIntervalMs = sc.conf.getTimeAsMs("spark.storage.blockManagerTimeoutIntervalMs", "60s") - private val checkTimeoutIntervalMs = + private val checkTimeoutIntervalMs = sc.conf.getTimeAsSeconds("spark.network.timeoutInterval", s"${timeoutIntervalMs}ms") * 1000 - + private var timeoutCheckingTask: ScheduledFuture[_] = null // "eventLoopThread" is used to run some pretty fast actions. The actions running in it should not @@ -140,7 +140,7 @@ private[spark] class HeartbeatReceiver(sc: SparkContext) } } } - + override def onStop(): Unit = { if (timeoutCheckingTask != null) { timeoutCheckingTask.cancel(true) diff --git a/core/src/main/scala/org/apache/spark/HttpFileServer.scala b/core/src/main/scala/org/apache/spark/HttpFileServer.scala index 7e706bcc42f04..7cf7bc0dc6810 100644 --- a/core/src/main/scala/org/apache/spark/HttpFileServer.scala +++ b/core/src/main/scala/org/apache/spark/HttpFileServer.scala @@ -50,8 +50,8 @@ private[spark] class HttpFileServer( def stop() { httpServer.stop() - - // If we only stop sc, but the driver process still run as a services then we need to delete + + // If we only stop sc, but the driver process still run as a services then we need to delete // the tmp dir, if not, it will create too many tmp dirs try { Utils.deleteRecursively(baseDir) diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala index 16072283edbe9..018422827e1c8 100644 --- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala +++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala @@ -367,7 +367,11 @@ private[spark] object MapOutputTracker extends Logging { // Opposite of serializeMapStatuses. def deserializeMapStatuses(bytes: Array[Byte]): Array[MapStatus] = { val objIn = new ObjectInputStream(new GZIPInputStream(new ByteArrayInputStream(bytes))) - objIn.readObject().asInstanceOf[Array[MapStatus]] + Utils.tryWithSafeFinally { + objIn.readObject().asInstanceOf[Array[MapStatus]] + } { + objIn.close() + } } // Convert an array of MapStatuses to locations and sizes for a given reduce ID. If diff --git a/core/src/main/scala/org/apache/spark/Partitioner.scala b/core/src/main/scala/org/apache/spark/Partitioner.scala index b8d244408bc5b..82889bcd30988 100644 --- a/core/src/main/scala/org/apache/spark/Partitioner.scala +++ b/core/src/main/scala/org/apache/spark/Partitioner.scala @@ -103,7 +103,7 @@ class HashPartitioner(partitions: Int) extends Partitioner { */ class RangePartitioner[K : Ordering : ClassTag, V]( @transient partitions: Int, - @transient rdd: RDD[_ <: Product2[K,V]], + @transient rdd: RDD[_ <: Product2[K, V]], private var ascending: Boolean = true) extends Partitioner { @@ -185,7 +185,7 @@ class RangePartitioner[K : Ordering : ClassTag, V]( } override def equals(other: Any): Boolean = other match { - case r: RangePartitioner[_,_] => + case r: RangePartitioner[_, _] => r.rangeBounds.sameElements(rangeBounds) && r.ascending == ascending case _ => false @@ -249,7 +249,7 @@ private[spark] object RangePartitioner { * @param sampleSizePerPartition max sample size per partition * @return (total number of items, an array of (partitionId, number of items, sample)) */ - def sketch[K:ClassTag]( + def sketch[K : ClassTag]( rdd: RDD[K], sampleSizePerPartition: Int): (Long, Array[(Int, Int, Array[K])]) = { val shift = rdd.id @@ -272,7 +272,7 @@ private[spark] object RangePartitioner { * @param partitions number of partitions * @return selected bounds */ - def determineBounds[K:Ordering:ClassTag]( + def determineBounds[K : Ordering : ClassTag]( candidates: ArrayBuffer[(K, Float)], partitions: Int): Array[K] = { val ordering = implicitly[Ordering[K]] diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala index a8fc90ad2050e..46d72841dccce 100644 --- a/core/src/main/scala/org/apache/spark/SparkConf.scala +++ b/core/src/main/scala/org/apache/spark/SparkConf.scala @@ -227,7 +227,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging { def getSizeAsBytes(key: String, defaultValue: String): Long = { Utils.byteStringAsBytes(get(key, defaultValue)) } - + /** * Get a size parameter as Kibibytes; throws a NoSuchElementException if it's not set. If no * suffix is provided then Kibibytes are assumed. @@ -244,7 +244,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging { def getSizeAsKb(key: String, defaultValue: String): Long = { Utils.byteStringAsKb(get(key, defaultValue)) } - + /** * Get a size parameter as Mebibytes; throws a NoSuchElementException if it's not set. If no * suffix is provided then Mebibytes are assumed. @@ -261,7 +261,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging { def getSizeAsMb(key: String, defaultValue: String): Long = { Utils.byteStringAsMb(get(key, defaultValue)) } - + /** * Get a size parameter as Gibibytes; throws a NoSuchElementException if it's not set. If no * suffix is provided then Gibibytes are assumed. @@ -278,7 +278,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging { def getSizeAsGb(key: String, defaultValue: String): Long = { Utils.byteStringAsGb(get(key, defaultValue)) } - + /** Get a parameter as an Option */ def getOption(key: String): Option[String] = { Option(settings.get(key)).orElse(getDeprecatedConfig(key, this)) @@ -480,8 +480,8 @@ private[spark] object SparkConf extends Logging { "spark.kryoserializer.buffer.mb was previously specified as '0.064'. Fractional values " + "are no longer accepted. To specify the equivalent now, one may use '64k'.") ) - - Map(configs.map { cfg => (cfg.key -> cfg) }:_*) + + Map(configs.map { cfg => (cfg.key -> cfg) } : _*) } /** @@ -508,8 +508,8 @@ private[spark] object SparkConf extends Logging { "spark.reducer.maxSizeInFlight" -> Seq( AlternateConfig("spark.reducer.maxMbInFlight", "1.4")), "spark.kryoserializer.buffer" -> - Seq(AlternateConfig("spark.kryoserializer.buffer.mb", "1.4", - translation = s => s"${s.toDouble * 1000}k")), + Seq(AlternateConfig("spark.kryoserializer.buffer.mb", "1.4", + translation = s => s"${(s.toDouble * 1000).toInt}k")), "spark.kryoserializer.buffer.max" -> Seq( AlternateConfig("spark.kryoserializer.buffer.max.mb", "1.4")), "spark.shuffle.file.buffer" -> Seq( diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 4ef90546a2452..b4c0d4c2f5456 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -371,6 +371,14 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli throw new SparkException("An application name must be set in your configuration") } + // System property spark.yarn.app.id must be set if user code ran by AM on a YARN cluster + // yarn-standalone is deprecated, but still supported + if ((master == "yarn-cluster" || master == "yarn-standalone") && + !_conf.contains("spark.yarn.app.id")) { + throw new SparkException("Detected yarn-cluster mode, but isn't running on a cluster. " + + "Deployment to YARN is not supported directly by SparkContext. Please use spark-submit.") + } + if (_conf.getBoolean("spark.logConf", false)) { logInfo("Spark configuration:\n" + _conf.toDebugString) } @@ -381,7 +389,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli _conf.set("spark.executor.id", SparkContext.DRIVER_IDENTIFIER) - _jars =_conf.getOption("spark.jars").map(_.split(",")).map(_.filter(_.size != 0)).toSeq.flatten + _jars = _conf.getOption("spark.jars").map(_.split(",")).map(_.filter(_.size != 0)).toSeq.flatten _files = _conf.getOption("spark.files").map(_.split(",")).map(_.filter(_.size != 0)) .toSeq.flatten @@ -407,15 +415,17 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli if (master == "yarn-client") System.setProperty("SPARK_YARN_MODE", "true") + // "_jobProgressListener" should be set up before creating SparkEnv because when creating + // "SparkEnv", some messages will be posted to "listenerBus" and we should not miss them. + _jobProgressListener = new JobProgressListener(_conf) + listenerBus.addListener(jobProgressListener) + // Create the Spark execution environment (cache, map output tracker, etc) _env = createSparkEnv(_conf, isLocal, listenerBus) SparkEnv.set(_env) _metadataCleaner = new MetadataCleaner(MetadataCleanerType.SPARK_CONTEXT, this.cleanup, _conf) - _jobProgressListener = new JobProgressListener(_conf) - listenerBus.addListener(jobProgressListener) - _statusTracker = new SparkStatusTracker(this) _progressBar = @@ -428,7 +438,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli _ui = if (conf.getBoolean("spark.ui.enabled", true)) { Some(SparkUI.createLiveUI(this, _conf, listenerBus, _jobProgressListener, - _env.securityManager,appName)) + _env.securityManager, appName, startTime = startTime)) } else { // For tests, do not enable the UI None @@ -537,6 +547,9 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli _taskScheduler.postStartHook() _env.metricsSystem.registerSource(new DAGSchedulerSource(dagScheduler)) _env.metricsSystem.registerSource(new BlockManagerSource(_env.blockManager)) + _executorAllocationManager.foreach { e => + _env.metricsSystem.registerSource(e.executorAllocationManagerSource) + } // Make sure the context is stopped if the user forgets about it. This avoids leaving // unfinished event logs around after the JVM exits cleanly. It doesn't help if the JVM @@ -659,6 +672,14 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli setLocalProperty(SparkContext.SPARK_JOB_INTERRUPT_ON_CANCEL, null) } + /** + * Execute a block of code in a scope such that all new RDDs created in this body will + * be part of the same scope. For more detail, see {{org.apache.spark.rdd.RDDOperationScope}}. + * + * Note: Return statements are NOT allowed in the given body. + */ + private[spark] def withScope[U](body: => U): U = RDDOperationScope.withScope[U](this)(body) + // Methods for creating RDDs /** Distribute a local Scala collection to form an RDD. @@ -669,23 +690,99 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli * @note avoid using `parallelize(Seq())` to create an empty `RDD`. Consider `emptyRDD` for an * RDD with no partitions, or `parallelize(Seq[T]())` for an RDD of `T` with empty partitions. */ - def parallelize[T: ClassTag](seq: Seq[T], numSlices: Int = defaultParallelism): RDD[T] = { + def parallelize[T: ClassTag]( + seq: Seq[T], + numSlices: Int = defaultParallelism): RDD[T] = withScope { assertNotStopped() new ParallelCollectionRDD[T](this, seq, numSlices, Map[Int, Seq[String]]()) } + /** + * Creates a new RDD[Long] containing elements from `start` to `end`(exclusive), increased by + * `step` every element. + * + * @note if we need to cache this RDD, we should make sure each partition does not exceed limit. + * + * @param start the start value. + * @param end the end value. + * @param step the incremental step + * @param numSlices the partition number of the new RDD. + * @return + */ + def range( + start: Long, + end: Long, + step: Long = 1, + numSlices: Int = defaultParallelism): RDD[Long] = withScope { + assertNotStopped() + // when step is 0, range will run infinitely + require(step != 0, "step cannot be 0") + val numElements: BigInt = { + val safeStart = BigInt(start) + val safeEnd = BigInt(end) + if ((safeEnd - safeStart) % step == 0 || safeEnd > safeStart ^ step > 0) { + (safeEnd - safeStart) / step + } else { + // the remainder has the same sign with range, could add 1 more + (safeEnd - safeStart) / step + 1 + } + } + parallelize(0 until numSlices, numSlices).mapPartitionsWithIndex((i, _) => { + val partitionStart = (i * numElements) / numSlices * step + start + val partitionEnd = (((i + 1) * numElements) / numSlices) * step + start + def getSafeMargin(bi: BigInt): Long = + if (bi.isValidLong) { + bi.toLong + } else if (bi > 0) { + Long.MaxValue + } else { + Long.MinValue + } + val safePartitionStart = getSafeMargin(partitionStart) + val safePartitionEnd = getSafeMargin(partitionEnd) + + new Iterator[Long] { + private[this] var number: Long = safePartitionStart + private[this] var overflow: Boolean = false + + override def hasNext = + if (!overflow) { + if (step > 0) { + number < safePartitionEnd + } else { + number > safePartitionEnd + } + } else false + + override def next() = { + val ret = number + number += step + if (number < ret ^ step < 0) { + // we have Long.MaxValue + Long.MaxValue < Long.MaxValue + // and Long.MinValue + Long.MinValue > Long.MinValue, so iff the step causes a step + // back, we are pretty sure that we have an overflow. + overflow = true + } + ret + } + } + }) + } + /** Distribute a local Scala collection to form an RDD. * * This method is identical to `parallelize`. */ - def makeRDD[T: ClassTag](seq: Seq[T], numSlices: Int = defaultParallelism): RDD[T] = { + def makeRDD[T: ClassTag]( + seq: Seq[T], + numSlices: Int = defaultParallelism): RDD[T] = withScope { parallelize(seq, numSlices) } /** Distribute a local Scala collection to form an RDD, with one or more * location preferences (hostnames of Spark nodes) for each object. * Create a new partition for each collection item. */ - def makeRDD[T: ClassTag](seq: Seq[(T, Seq[String])]): RDD[T] = { + def makeRDD[T: ClassTag](seq: Seq[(T, Seq[String])]): RDD[T] = withScope { assertNotStopped() val indexToPrefs = seq.zipWithIndex.map(t => (t._2, t._1._2)).toMap new ParallelCollectionRDD[T](this, seq.map(_._1), seq.size, indexToPrefs) @@ -695,10 +792,12 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli * Read a text file from HDFS, a local file system (available on all nodes), or any * Hadoop-supported file system URI, and return it as an RDD of Strings. */ - def textFile(path: String, minPartitions: Int = defaultMinPartitions): RDD[String] = { + def textFile( + path: String, + minPartitions: Int = defaultMinPartitions): RDD[String] = withScope { assertNotStopped() hadoopFile(path, classOf[TextInputFormat], classOf[LongWritable], classOf[Text], - minPartitions).map(pair => pair._2.toString).setName(path) + minPartitions).map(pair => pair._2.toString) } /** @@ -728,8 +827,9 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli * * @param minPartitions A suggestion value of the minimal splitting number for input data. */ - def wholeTextFiles(path: String, minPartitions: Int = defaultMinPartitions): - RDD[(String, String)] = { + def wholeTextFiles( + path: String, + minPartitions: Int = defaultMinPartitions): RDD[(String, String)] = withScope { assertNotStopped() val job = new NewHadoopJob(hadoopConfiguration) // Use setInputPaths so that wholeTextFiles aligns with hadoopFile/textFile in taking @@ -776,8 +876,9 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli * @note Small files are preferred; very large files may cause bad performance. */ @Experimental - def binaryFiles(path: String, minPartitions: Int = defaultMinPartitions): - RDD[(String, PortableDataStream)] = { + def binaryFiles( + path: String, + minPartitions: Int = defaultMinPartitions): RDD[(String, PortableDataStream)] = withScope { assertNotStopped() val job = new NewHadoopJob(hadoopConfiguration) // Use setInputPaths so that binaryFiles aligns with hadoopFile/textFile in taking @@ -806,15 +907,17 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli * @return An RDD of data with values, represented as byte arrays */ @Experimental - def binaryRecords(path: String, recordLength: Int, conf: Configuration = hadoopConfiguration) - : RDD[Array[Byte]] = { + def binaryRecords( + path: String, + recordLength: Int, + conf: Configuration = hadoopConfiguration): RDD[Array[Byte]] = withScope { assertNotStopped() conf.setInt(FixedLengthBinaryInputFormat.RECORD_LENGTH_PROPERTY, recordLength) val br = newAPIHadoopFile[LongWritable, BytesWritable, FixedLengthBinaryInputFormat](path, classOf[FixedLengthBinaryInputFormat], classOf[LongWritable], classOf[BytesWritable], - conf=conf) + conf = conf) val data = br.map { case (k, v) => val bytes = v.getBytes assert(bytes.length == recordLength, "Byte array does not have correct length") @@ -848,8 +951,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli inputFormatClass: Class[_ <: InputFormat[K, V]], keyClass: Class[K], valueClass: Class[V], - minPartitions: Int = defaultMinPartitions - ): RDD[(K, V)] = { + minPartitions: Int = defaultMinPartitions): RDD[(K, V)] = withScope { assertNotStopped() // Add necessary security credentials to the JobConf before broadcasting it. SparkHadoopUtil.get.addCredentials(conf) @@ -869,8 +971,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli inputFormatClass: Class[_ <: InputFormat[K, V]], keyClass: Class[K], valueClass: Class[V], - minPartitions: Int = defaultMinPartitions - ): RDD[(K, V)] = { + minPartitions: Int = defaultMinPartitions): RDD[(K, V)] = withScope { assertNotStopped() // A Hadoop configuration can be about 10 KB, which is pretty big, so broadcast it. val confBroadcast = broadcast(new SerializableWritable(hadoopConfiguration)) @@ -901,7 +1002,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli */ def hadoopFile[K, V, F <: InputFormat[K, V]] (path: String, minPartitions: Int) - (implicit km: ClassTag[K], vm: ClassTag[V], fm: ClassTag[F]): RDD[(K, V)] = { + (implicit km: ClassTag[K], vm: ClassTag[V], fm: ClassTag[F]): RDD[(K, V)] = withScope { hadoopFile(path, fm.runtimeClass.asInstanceOf[Class[F]], km.runtimeClass.asInstanceOf[Class[K]], @@ -924,13 +1025,14 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli * copy them using a `map` function. */ def hadoopFile[K, V, F <: InputFormat[K, V]](path: String) - (implicit km: ClassTag[K], vm: ClassTag[V], fm: ClassTag[F]): RDD[(K, V)] = + (implicit km: ClassTag[K], vm: ClassTag[V], fm: ClassTag[F]): RDD[(K, V)] = withScope { hadoopFile[K, V, F](path, defaultMinPartitions) + } /** Get an RDD for a Hadoop file with an arbitrary new API InputFormat. */ def newAPIHadoopFile[K, V, F <: NewInputFormat[K, V]] (path: String) - (implicit km: ClassTag[K], vm: ClassTag[V], fm: ClassTag[F]): RDD[(K, V)] = { + (implicit km: ClassTag[K], vm: ClassTag[V], fm: ClassTag[F]): RDD[(K, V)] = withScope { newAPIHadoopFile( path, fm.runtimeClass.asInstanceOf[Class[F]], @@ -953,7 +1055,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli fClass: Class[F], kClass: Class[K], vClass: Class[V], - conf: Configuration = hadoopConfiguration): RDD[(K, V)] = { + conf: Configuration = hadoopConfiguration): RDD[(K, V)] = withScope { assertNotStopped() // The call to new NewHadoopJob automatically adds security credentials to conf, // so we don't need to explicitly add them ourselves @@ -987,7 +1089,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli conf: Configuration = hadoopConfiguration, fClass: Class[F], kClass: Class[K], - vClass: Class[V]): RDD[(K, V)] = { + vClass: Class[V]): RDD[(K, V)] = withScope { assertNotStopped() // Add necessary security credentials to the JobConf. Required to access secure HDFS. val jconf = new JobConf(conf) @@ -1007,7 +1109,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli keyClass: Class[K], valueClass: Class[V], minPartitions: Int - ): RDD[(K, V)] = { + ): RDD[(K, V)] = withScope { assertNotStopped() val inputFormatClass = classOf[SequenceFileInputFormat[K, V]] hadoopFile(path, inputFormatClass, keyClass, valueClass, minPartitions) @@ -1021,7 +1123,10 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli * If you plan to directly cache, sort, or aggregate Hadoop writable objects, you should first * copy them using a `map` function. * */ - def sequenceFile[K, V](path: String, keyClass: Class[K], valueClass: Class[V]): RDD[(K, V)] = { + def sequenceFile[K, V]( + path: String, + keyClass: Class[K], + valueClass: Class[V]): RDD[(K, V)] = withScope { assertNotStopped() sequenceFile(path, keyClass, valueClass, defaultMinPartitions) } @@ -1051,16 +1156,17 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli def sequenceFile[K, V] (path: String, minPartitions: Int = defaultMinPartitions) (implicit km: ClassTag[K], vm: ClassTag[V], - kcf: () => WritableConverter[K], vcf: () => WritableConverter[V]) - : RDD[(K, V)] = { - assertNotStopped() - val kc = kcf() - val vc = vcf() - val format = classOf[SequenceFileInputFormat[Writable, Writable]] - val writables = hadoopFile(path, format, + kcf: () => WritableConverter[K], vcf: () => WritableConverter[V]): RDD[(K, V)] = { + withScope { + assertNotStopped() + val kc = clean(kcf)() + val vc = clean(vcf)() + val format = classOf[SequenceFileInputFormat[Writable, Writable]] + val writables = hadoopFile(path, format, kc.writableClass(km).asInstanceOf[Class[Writable]], vc.writableClass(vm).asInstanceOf[Class[Writable]], minPartitions) - writables.map { case (k, v) => (kc.convert(k), vc.convert(v)) } + writables.map { case (k, v) => (kc.convert(k), vc.convert(v)) } + } } /** @@ -1073,21 +1179,18 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli */ def objectFile[T: ClassTag]( path: String, - minPartitions: Int = defaultMinPartitions - ): RDD[T] = { + minPartitions: Int = defaultMinPartitions): RDD[T] = withScope { assertNotStopped() sequenceFile(path, classOf[NullWritable], classOf[BytesWritable], minPartitions) .flatMap(x => Utils.deserialize[Array[T]](x._2.getBytes, Utils.getContextOrSparkClassLoader)) } - protected[spark] def checkpointFile[T: ClassTag]( - path: String - ): RDD[T] = { + protected[spark] def checkpointFile[T: ClassTag](path: String): RDD[T] = withScope { new CheckpointRDD[T](this, path) } /** Build the union of a list of RDDs. */ - def union[T: ClassTag](rdds: Seq[RDD[T]]): RDD[T] = { + def union[T: ClassTag](rdds: Seq[RDD[T]]): RDD[T] = withScope { val partitioners = rdds.flatMap(_.partitioner).toSet if (rdds.forall(_.partitioner.isDefined) && partitioners.size == 1) { new PartitionerAwareUnionRDD(this, rdds) @@ -1097,8 +1200,9 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli } /** Build the union of a list of RDDs passed as variable-length arguments. */ - def union[T: ClassTag](first: RDD[T], rest: RDD[T]*): RDD[T] = + def union[T: ClassTag](first: RDD[T], rest: RDD[T]*): RDD[T] = withScope { union(Seq(first) ++ rest) + } /** Get an RDD that has no partitions or elements. */ def emptyRDD[T: ClassTag]: EmptyRDD[T] = new EmptyRDD[T](this) @@ -1163,7 +1267,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli */ def accumulableCollection[R <% Growable[T] with TraversableOnce[T] with Serializable: ClassTag, T] (initialValue: R): Accumulable[R, T] = { - val param = new GrowableAccumulableParam[R,T] + val param = new GrowableAccumulableParam[R, T] val acc = new Accumulable(initialValue, param) cleaner.foreach(_.registerAccumulatorForCleanup(acc)) acc @@ -1212,7 +1316,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli val uri = new URI(path) val schemeCorrectedPath = uri.getScheme match { case null | "local" => new File(path).getCanonicalFile.toURI.toString - case _ => path + case _ => path } val hadoopPath = new Path(schemeCorrectedPath) @@ -1657,7 +1761,8 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli partitions: Seq[Int], allowLocal: Boolean ): Array[U] = { - runJob(rdd, (context: TaskContext, iter: Iterator[T]) => func(iter), partitions, allowLocal) + val cleanedFunc = clean(func) + runJob(rdd, (ctx: TaskContext, it: Iterator[T]) => cleanedFunc(it), partitions, allowLocal) } /** @@ -1711,7 +1816,8 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli val callSite = getCallSite logInfo("Starting job: " + callSite.shortForm) val start = System.nanoTime - val result = dagScheduler.runApproximateJob(rdd, func, evaluator, callSite, timeout, + val cleanedFunc = clean(func) + val result = dagScheduler.runApproximateJob(rdd, cleanedFunc, evaluator, callSite, timeout, localProperties.get) logInfo( "Job finished: " + callSite.shortForm + ", took " + (System.nanoTime - start) / 1e9 + " s") @@ -1778,7 +1884,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli * * @param f the closure to clean * @param checkSerializable whether or not to immediately check f for serializability - * @throws SparkException if checkSerializable is set but f is not + * @throws SparkException if checkSerializable is set but f is not * serializable */ private[spark] def clean[F <: AnyRef](f: F, checkSerializable: Boolean = true): F = { @@ -2060,10 +2166,10 @@ object SparkContext extends Logging { } private[spark] val SPARK_JOB_DESCRIPTION = "spark.job.description" - private[spark] val SPARK_JOB_GROUP_ID = "spark.jobGroup.id" - private[spark] val SPARK_JOB_INTERRUPT_ON_CANCEL = "spark.job.interruptOnCancel" + private[spark] val RDD_SCOPE_KEY = "spark.rdd.scope" + private[spark] val RDD_SCOPE_NO_OVERRIDE_KEY = "spark.rdd.scope.noOverride" /** * Executor id for the driver. In earlier versions of Spark, this was ``, but this was diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala index 0c4d28f786edd..a185954089528 100644 --- a/core/src/main/scala/org/apache/spark/SparkEnv.scala +++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala @@ -298,7 +298,7 @@ object SparkEnv extends Logging { } } - val mapOutputTracker = if (isDriver) { + val mapOutputTracker = if (isDriver) { new MapOutputTrackerMaster(conf) } else { new MapOutputTrackerWorker(conf) @@ -313,7 +313,8 @@ object SparkEnv extends Logging { // Let the user specify short names for shuffle managers val shortShuffleMgrNames = Map( "hash" -> "org.apache.spark.shuffle.hash.HashShuffleManager", - "sort" -> "org.apache.spark.shuffle.sort.SortShuffleManager") + "sort" -> "org.apache.spark.shuffle.sort.SortShuffleManager", + "tungsten-sort" -> "org.apache.spark.shuffle.unsafe.UnsafeShuffleManager") val shuffleMgrName = conf.get("spark.shuffle.manager", "sort") val shuffleMgrClass = shortShuffleMgrNames.getOrElse(shuffleMgrName.toLowerCase, shuffleMgrName) val shuffleManager = instantiateClass[ShuffleManager](shuffleMgrClass) @@ -347,7 +348,7 @@ object SparkEnv extends Logging { val fileServerPort = conf.getInt("spark.fileserver.port", 0) val server = new HttpFileServer(conf, securityManager, fileServerPort) server.initialize() - conf.set("spark.fileserver.uri", server.serverUri) + conf.set("spark.fileserver.uri", server.serverUri) server } else { null @@ -378,7 +379,7 @@ object SparkEnv extends Logging { } val outputCommitCoordinator = mockOutputCommitCoordinator.getOrElse { - new OutputCommitCoordinator(conf) + new OutputCommitCoordinator(conf, isDriver) } val outputCommitCoordinatorRef = registerOrLookupEndpoint("OutputCommitCoordinator", new OutputCommitCoordinatorEndpoint(rpcEnv, outputCommitCoordinator)) diff --git a/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala b/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala index 2ec42d3aea169..59ac82ccec53b 100644 --- a/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala +++ b/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala @@ -50,8 +50,8 @@ class SparkHadoopWriter(@transient jobConf: JobConf) private var jID: SerializableWritable[JobID] = null private var taID: SerializableWritable[TaskAttemptID] = null - @transient private var writer: RecordWriter[AnyRef,AnyRef] = null - @transient private var format: OutputFormat[AnyRef,AnyRef] = null + @transient private var writer: RecordWriter[AnyRef, AnyRef] = null + @transient private var format: OutputFormat[AnyRef, AnyRef] = null @transient private var committer: OutputCommitter = null @transient private var jobContext: JobContext = null @transient private var taskContext: TaskAttemptContext = null @@ -114,10 +114,10 @@ class SparkHadoopWriter(@transient jobConf: JobConf) // ********* Private Functions ********* - private def getOutputFormat(): OutputFormat[AnyRef,AnyRef] = { + private def getOutputFormat(): OutputFormat[AnyRef, AnyRef] = { if (format == null) { format = conf.value.getOutputFormat() - .asInstanceOf[OutputFormat[AnyRef,AnyRef]] + .asInstanceOf[OutputFormat[AnyRef, AnyRef]] } format } @@ -138,7 +138,7 @@ class SparkHadoopWriter(@transient jobConf: JobConf) private def getTaskContext(): TaskAttemptContext = { if (taskContext == null) { - taskContext = newTaskAttemptContext(conf.value, taID.value) + taskContext = newTaskAttemptContext(conf.value, taID.value) } taskContext } diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala index 398ca41e16151..a1ebbecf93b7b 100644 --- a/core/src/main/scala/org/apache/spark/TestUtils.scala +++ b/core/src/main/scala/org/apache/spark/TestUtils.scala @@ -51,7 +51,7 @@ private[spark] object TestUtils { classpathUrls: Seq[URL] = Seq()): URL = { val tempDir = Utils.createTempDir() val files1 = for (name <- classNames) yield { - createCompiledClass(name, tempDir, toStringValue, classpathUrls = classpathUrls) + createCompiledClass(name, tempDir, toStringValue, classpathUrls = classpathUrls) } val files2 = for ((childName, baseName) <- classNamesWithBase) yield { createCompiledClass(childName, tempDir, toStringValue, baseName, classpathUrls) @@ -105,23 +105,18 @@ private[spark] object TestUtils { URI.create(s"string:///${name.replace(".", "/")}${SOURCE.extension}") } - private class JavaSourceFromString(val name: String, val code: String) + private[spark] class JavaSourceFromString(val name: String, val code: String) extends SimpleJavaFileObject(createURI(name), SOURCE) { override def getCharContent(ignoreEncodingErrors: Boolean): String = code } - /** Creates a compiled class with the given name. Class file will be placed in destDir. */ + /** Creates a compiled class with the source file. Class file will be placed in destDir. */ def createCompiledClass( className: String, destDir: File, - toStringValue: String = "", - baseClass: String = null, - classpathUrls: Seq[URL] = Seq()): File = { + sourceFile: JavaSourceFromString, + classpathUrls: Seq[URL]): File = { val compiler = ToolProvider.getSystemJavaCompiler - val extendsText = Option(baseClass).map { c => s" extends ${c}" }.getOrElse("") - val sourceFile = new JavaSourceFromString(className, - "public class " + className + extendsText + " implements java.io.Serializable {" + - " @Override public String toString() { return \"" + toStringValue + "\"; }}") // Calling this outputs a class file in pwd. It's easier to just rename the file than // build a custom FileManager that controls the output location. @@ -144,4 +139,18 @@ private[spark] object TestUtils { assert(out.exists(), "Destination file not moved: " + out.getAbsolutePath()) out } + + /** Creates a compiled class with the given name. Class file will be placed in destDir. */ + def createCompiledClass( + className: String, + destDir: File, + toStringValue: String = "", + baseClass: String = null, + classpathUrls: Seq[URL] = Seq()): File = { + val extendsText = Option(baseClass).map { c => s" extends ${c}" }.getOrElse("") + val sourceFile = new JavaSourceFromString(className, + "public class " + className + extendsText + " implements java.io.Serializable {" + + " @Override public String toString() { return \"" + toStringValue + "\"; }}") + createCompiledClass(className, destDir, sourceFile, classpathUrls) + } } diff --git a/core/src/main/scala/org/apache/spark/annotation/Private.java b/core/src/main/scala/org/apache/spark/annotation/Private.java new file mode 100644 index 0000000000000..9082fcf0c84bc --- /dev/null +++ b/core/src/main/scala/org/apache/spark/annotation/Private.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.annotation; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * A class that is considered private to the internals of Spark -- there is a high-likelihood + * they will be changed in future versions of Spark. + * + * This should be used only when the standard Scala / Java means of protecting classes are + * insufficient. In particular, Java has no equivalent of private[spark], so we use this annotation + * in its place. + * + * NOTE: If there exists a Scaladoc comment that immediately precedes this annotation, the first + * line of the comment must be ":: Private ::" with no trailing blank line. This is because + * of the known issue that Scaladoc displays only either the annotation or the comment, whichever + * comes first. + */ +@Retention(RetentionPolicy.RUNTIME) +@Target({ElementType.TYPE, ElementType.FIELD, ElementType.METHOD, ElementType.PARAMETER, + ElementType.CONSTRUCTOR, ElementType.LOCAL_VARIABLE, ElementType.PACKAGE}) +public @interface Private {} diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala index 61af867b11b9c..a650df605b92e 100644 --- a/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala @@ -137,7 +137,7 @@ class JavaDoubleRDD(val srdd: RDD[scala.Double]) */ def sample(withReplacement: Boolean, fraction: JDouble): JavaDoubleRDD = sample(withReplacement, fraction, Utils.random.nextLong) - + /** * Return a sampled subset of this RDD. */ diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala index db4e996feb31c..ed312770ee131 100644 --- a/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala @@ -101,7 +101,7 @@ class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T]) /** * Return a sampled subset of this RDD. - * + * * @param withReplacement can elements be sampled multiple times (replaced when sampled out) * @param fraction expected size of the sample as a fraction of this RDD's size * without replacement: probability that each element is chosen; fraction must be [0, 1] @@ -109,10 +109,10 @@ class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T]) */ def sample(withReplacement: Boolean, fraction: Double): JavaRDD[T] = sample(withReplacement, fraction, Utils.random.nextLong) - + /** * Return a sampled subset of this RDD. - * + * * @param withReplacement can elements be sampled multiple times (replaced when sampled out) * @param fraction expected size of the sample as a fraction of this RDD's size * without replacement: probability that each element is chosen; fraction must be [0, 1] diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala index 8bf0627fc420d..7b92f618dc571 100644 --- a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala +++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala @@ -60,7 +60,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { @deprecated("Use partitions() instead.", "1.1.0") def splits: JList[Partition] = new java.util.ArrayList(rdd.partitions.toSeq) - + /** Set of partitions in this RDD. */ def partitions: JList[Partition] = new java.util.ArrayList(rdd.partitions.toSeq) @@ -96,7 +96,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { def mapPartitionsWithIndex[R]( f: JFunction2[jl.Integer, java.util.Iterator[T], java.util.Iterator[R]], preservesPartitioning: Boolean = false): JavaRDD[R] = - new JavaRDD(rdd.mapPartitionsWithIndex(((a,b) => f(a,asJavaIterator(b))), + new JavaRDD(rdd.mapPartitionsWithIndex(((a, b) => f(a, asJavaIterator(b))), preservesPartitioning)(fakeClassTag))(fakeClassTag) /** @@ -386,9 +386,16 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { /** * Aggregate the elements of each partition, and then the results for all the partitions, using a - * given associative function and a neutral "zero value". The function op(t1, t2) is allowed to - * modify t1 and return it as its result value to avoid object allocation; however, it should not - * modify t2. + * given associative and commutative function and a neutral "zero value". The function + * op(t1, t2) is allowed to modify t1 and return it as its result value to avoid object + * allocation; however, it should not modify t2. + * + * This behaves somewhat differently from fold operations implemented for non-distributed + * collections in functional languages like Scala. This fold operation may be applied to + * partitions individually, and then fold those results into the final result, rather than + * apply the fold to each element sequentially in some defined ordering. For functions + * that are not commutative, the result may differ from that of a fold applied to a + * non-distributed collection. */ def fold(zeroValue: T)(f: JFunction2[T, T, T]): T = rdd.fold(zeroValue)(f) @@ -485,9 +492,9 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable { new java.util.ArrayList(arr) } - def takeSample(withReplacement: Boolean, num: Int): JList[T] = + def takeSample(withReplacement: Boolean, num: Int): JList[T] = takeSample(withReplacement, num, Utils.random.nextLong) - + def takeSample(withReplacement: Boolean, num: Int, seed: Long): JList[T] = { import scala.collection.JavaConversions._ val arr: java.util.Collection[T] = rdd.takeSample(withReplacement, num, seed).toSeq diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala index 7409dc2d866f6..55a37f8c944b2 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala @@ -47,6 +47,7 @@ private[spark] class PythonRDD( pythonIncludes: JList[String], preservePartitoning: Boolean, pythonExec: String, + pythonVer: String, broadcastVars: JList[Broadcast[PythonBroadcast]], accumulator: Accumulator[JList[Array[Byte]]]) extends RDD[Array[Byte]](parent) { @@ -210,6 +211,8 @@ private[spark] class PythonRDD( val dataOut = new DataOutputStream(stream) // Partition index dataOut.writeInt(split.index) + // Python version of driver + PythonRDD.writeUTF(pythonVer, dataOut) // sparkFilesDir PythonRDD.writeUTF(SparkFiles.getRootDirectory, dataOut) // Python includes (*.zip and *.egg files) @@ -720,7 +723,7 @@ private[spark] object PythonRDD extends Logging { val converted = convertRDD(rdd, keyConverterClass, valueConverterClass, new JavaToWritableConverter) val fc = Utils.classForName(outputFormatClass).asInstanceOf[Class[F]] - converted.saveAsHadoopFile(path, kc, vc, fc, new JobConf(mergedConf), codec=codec) + converted.saveAsHadoopFile(path, kc, vc, fc, new JobConf(mergedConf), codec = codec) } /** @@ -794,10 +797,10 @@ private class PythonAccumulatorParam(@transient serverHost: String, serverPort: val bufferSize = SparkEnv.get.conf.getInt("spark.buffer.size", 65536) - /** + /** * We try to reuse a single Socket to transfer accumulator updates, as they are all added * by the DAGScheduler's single-threaded actor anyway. - */ + */ @transient var socket: Socket = _ def openSocket(): Socket = synchronized { @@ -840,6 +843,7 @@ private class PythonAccumulatorParam(@transient serverHost: String, serverPort: * An Wrapper for Python Broadcast, which is written into disk by Python. It also will * write the data into disk after deserialization, then Python can read it from disks. */ +// scalastyle:off no.finalize private[spark] class PythonBroadcast(@transient var path: String) extends Serializable { /** @@ -881,3 +885,4 @@ private[spark] class PythonBroadcast(@transient var path: String) extends Serial } } } +// scalastyle:on no.finalize diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala index acbaba6791850..90dacaeb93429 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala @@ -31,7 +31,7 @@ private[spark] object PythonUtils { def sparkPythonPath: String = { val pythonPath = new ArrayBuffer[String] for (sparkHome <- sys.env.get("SPARK_HOME")) { - pythonPath += Seq(sparkHome, "python").mkString(File.separator) + pythonPath += Seq(sparkHome, "python", "lib", "pyspark.zip").mkString(File.separator) pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.8.2.1-src.zip").mkString(File.separator) } pythonPath ++= SparkContext.jarOfObject(this) @@ -50,7 +50,21 @@ private[spark] object PythonUtils { /** * Convert list of T into seq of T (for calling API with varargs) */ - def toSeq[T](cols: JList[T]): Seq[T] = { - cols.toList.toSeq + def toSeq[T](vs: JList[T]): Seq[T] = { + vs.toList.toSeq + } + + /** + * Convert list of T into array of T (for calling API with array) + */ + def toArray[T](vs: JList[T]): Array[T] = { + vs.toArray().asInstanceOf[Array[T]] + } + + /** + * Convert java map of K, V into Map of K, V (for calling API with varargs) + */ + def toScalaMap[K, V](jm: java.util.Map[K, V]): Map[K, V] = { + jm.toMap } } diff --git a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala index 257491e90dd66..1f1debcf84ad4 100644 --- a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala +++ b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala @@ -56,16 +56,13 @@ private[spark] object SerDeUtil extends Logging { // {'\0', 0, 0, 0} /* Sentinel */ // }; // TODO: support Py_UNICODE with 2 bytes - // FIXME: unpickle array of float is wrong in Pyrolite, so we reverse the - // machine code for float/double here to workaround it. - // we should fix this after Pyrolite fix them val machineCodes: Map[Char, Int] = if (ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN)) { Map('c' -> 1, 'B' -> 0, 'b' -> 1, 'H' -> 3, 'h' -> 5, 'I' -> 7, 'i' -> 9, - 'L' -> 11, 'l' -> 13, 'f' -> 14, 'd' -> 16, 'u' -> 21 + 'L' -> 11, 'l' -> 13, 'f' -> 15, 'd' -> 17, 'u' -> 21 ) } else { Map('c' -> 1, 'B' -> 0, 'b' -> 1, 'H' -> 2, 'h' -> 4, 'I' -> 6, 'i' -> 8, - 'L' -> 10, 'l' -> 12, 'f' -> 15, 'd' -> 17, 'u' -> 20 + 'L' -> 10, 'l' -> 12, 'f' -> 14, 'd' -> 16, 'u' -> 20 ) } override def construct(args: Array[Object]): Object = { diff --git a/core/src/main/scala/org/apache/spark/api/r/RBackend.scala b/core/src/main/scala/org/apache/spark/api/r/RBackend.scala index 3a2c94bd9d875..d24c650d37bb0 100644 --- a/core/src/main/scala/org/apache/spark/api/r/RBackend.scala +++ b/core/src/main/scala/org/apache/spark/api/r/RBackend.scala @@ -18,7 +18,7 @@ package org.apache.spark.api.r import java.io.{DataOutputStream, File, FileOutputStream, IOException} -import java.net.{InetSocketAddress, ServerSocket} +import java.net.{InetAddress, InetSocketAddress, ServerSocket} import java.util.concurrent.TimeUnit import io.netty.bootstrap.ServerBootstrap @@ -44,11 +44,11 @@ private[spark] class RBackend { bossGroup = new NioEventLoopGroup(2) val workerGroup = bossGroup val handler = new RBackendHandler(this) - + bootstrap = new ServerBootstrap() .group(bossGroup, workerGroup) .channel(classOf[NioServerSocketChannel]) - + bootstrap.childHandler(new ChannelInitializer[SocketChannel]() { def initChannel(ch: SocketChannel): Unit = { ch.pipeline() @@ -65,7 +65,7 @@ private[spark] class RBackend { } }) - channelFuture = bootstrap.bind(new InetSocketAddress(0)) + channelFuture = bootstrap.bind(new InetSocketAddress("localhost", 0)) channelFuture.syncUninterruptibly() channelFuture.channel().localAddress().asInstanceOf[InetSocketAddress].getPort() } @@ -101,7 +101,7 @@ private[spark] object RBackend extends Logging { try { // bind to random port val boundPort = sparkRBackend.init() - val serverSocket = new ServerSocket(0, 1) + val serverSocket = new ServerSocket(0, 1, InetAddress.getByName("localhost")) val listenPort = serverSocket.getLocalPort() // tell the R process via temporary file diff --git a/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala b/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala index 0075d963711f1..2e86984c66b3a 100644 --- a/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala +++ b/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala @@ -77,7 +77,7 @@ private[r] class RBackendHandler(server: RBackend) val reply = bos.toByteArray ctx.write(reply) } - + override def channelReadComplete(ctx: ChannelHandlerContext): Unit = { ctx.flush() } @@ -124,7 +124,7 @@ private[r] class RBackendHandler(server: RBackend) } throw new Exception(s"No matched method found for $cls.$methodName") } - val ret = methods.head.invoke(obj, args:_*) + val ret = methods.head.invoke(obj, args : _*) // Write status bit writeInt(dos, 0) @@ -135,7 +135,7 @@ private[r] class RBackendHandler(server: RBackend) matchMethod(numArgs, args, x.getParameterTypes) }.head - val obj = ctor.newInstance(args:_*) + val obj = ctor.newInstance(args : _*) writeInt(dos, 0) writeObject(dos, obj.asInstanceOf[AnyRef]) diff --git a/core/src/main/scala/org/apache/spark/api/r/RRDD.scala b/core/src/main/scala/org/apache/spark/api/r/RRDD.scala index 6fea5e1144f2f..4dfa7325934ff 100644 --- a/core/src/main/scala/org/apache/spark/api/r/RRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/r/RRDD.scala @@ -18,7 +18,7 @@ package org.apache.spark.api.r import java.io._ -import java.net.ServerSocket +import java.net.{InetAddress, ServerSocket} import java.util.{Map => JMap} import scala.collection.JavaConversions._ @@ -55,7 +55,7 @@ private abstract class BaseRRDD[T: ClassTag, U: ClassTag]( val parentIterator = firstParent[T].iterator(partition, context) // we expect two connections - val serverSocket = new ServerSocket(0, 2) + val serverSocket = new ServerSocket(0, 2, InetAddress.getByName("localhost")) val listenPort = serverSocket.getLocalPort() // The stdout/stderr is shared by multiple tasks, because we use one daemon @@ -309,7 +309,7 @@ private class StringRRDD[T: ClassTag]( } private object SpecialLengths { - val TIMING_DATA = -1 + val TIMING_DATA = -1 } private[r] class BufferedStreamThread( @@ -355,7 +355,6 @@ private[r] object RRDD { val sparkConf = new SparkConf().setAppName(appName) .setSparkHome(sparkHome) - .setJars(jars) // Override `master` if we have a user-specified value if (master != "") { @@ -373,7 +372,11 @@ private[r] object RRDD { sparkConf.setExecutorEnv(name.asInstanceOf[String], value.asInstanceOf[String]) } - new JavaSparkContext(sparkConf) + val jsc = new JavaSparkContext(sparkConf) + jars.foreach { jar => + jsc.addJar(jar) + } + jsc } /** @@ -414,7 +417,7 @@ private[r] object RRDD { synchronized { if (daemonChannel == null) { // we expect one connections - val serverSocket = new ServerSocket(0, 1) + val serverSocket = new ServerSocket(0, 1, InetAddress.getByName("localhost")) val daemonPort = serverSocket.getLocalPort errThread = createRProcess(rLibDir, daemonPort, "daemon.R") // the socket used to send out the input of task diff --git a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala index 371dfe454d1a2..f8e3f1a79082e 100644 --- a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala +++ b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala @@ -157,9 +157,11 @@ private[spark] object SerDe { val keysLen = readInt(in) val keys = (0 until keysLen).map(_ => readTypedObject(in, keysType)) - val valuesType = readObjectType(in) val valuesLen = readInt(in) - val values = (0 until valuesLen).map(_ => readTypedObject(in, valuesType)) + val values = (0 until valuesLen).map(_ => { + val valueType = readObjectType(in) + readTypedObject(in, valueType) + }) mapAsJavaMap(keys.zip(values).toMap) } else { new java.util.HashMap[Object, Object]() diff --git a/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala index 4457c75e8b0fc..b69af639f7862 100644 --- a/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala +++ b/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala @@ -125,7 +125,7 @@ private[broadcast] object HttpBroadcast extends Logging { securityManager = securityMgr if (isDriver) { createServer(conf) - conf.set("spark.httpBroadcast.uri", serverUri) + conf.set("spark.httpBroadcast.uri", serverUri) } serverUri = conf.get("spark.httpBroadcast.uri") cleaner = new MetadataCleaner(MetadataCleanerType.HTTP_BROADCAST, cleanup, conf) @@ -187,7 +187,7 @@ private[broadcast] object HttpBroadcast extends Logging { } private def read[T: ClassTag](id: Long): T = { - logDebug("broadcast read server: " + serverUri + " id: broadcast-" + id) + logDebug("broadcast read server: " + serverUri + " id: broadcast-" + id) val url = serverUri + "/" + BroadcastBlockId(id).name var uc: URLConnection = null diff --git a/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala b/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala index c048b78910f38..b4edb6109e839 100644 --- a/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala +++ b/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala @@ -65,7 +65,7 @@ private object FaultToleranceTest extends App with Logging { private val workers = ListBuffer[TestWorkerInfo]() private var sc: SparkContext = _ - private val zk = SparkCuratorUtil.newClient(conf) + private val zk = SparkCuratorUtil.newClient(conf) private var numPassed = 0 private var numFailed = 0 diff --git a/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala b/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala index f0e77c2ba982b..0550f00a172ab 100644 --- a/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala +++ b/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala @@ -43,15 +43,21 @@ class LocalSparkCluster( private val localHostname = Utils.localHostName() private val masterActorSystems = ArrayBuffer[ActorSystem]() private val workerActorSystems = ArrayBuffer[ActorSystem]() + // exposed for testing + var masterWebUIPort = -1 def start(): Array[String] = { logInfo("Starting a local Spark cluster with " + numWorkers + " workers.") // Disable REST server on Master in this mode unless otherwise specified - val _conf = conf.clone().setIfMissing("spark.master.rest.enabled", "false") + val _conf = conf.clone() + .setIfMissing("spark.master.rest.enabled", "false") + .set("spark.shuffle.service.enabled", "false") /* Start the Master */ - val (masterSystem, masterPort, _, _) = Master.startSystemAndActor(localHostname, 0, 0, _conf) + val (masterSystem, masterPort, webUiPort, _) = + Master.startSystemAndActor(localHostname, 0, 0, _conf) + masterWebUIPort = webUiPort masterActorSystems += masterSystem val masterUrl = "spark://" + Utils.localHostNameForURI() + ":" + masterPort val masters = Array(masterUrl) diff --git a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala index 53e18c4bcec23..c2ed43a5397d6 100644 --- a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala +++ b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala @@ -18,9 +18,11 @@ package org.apache.spark.deploy import java.net.URI +import java.io.File import scala.collection.mutable.ArrayBuffer import scala.collection.JavaConversions._ +import scala.util.Try import org.apache.spark.api.python.PythonUtils import org.apache.spark.util.{RedirectThread, Utils} @@ -81,16 +83,13 @@ object PythonRunner { throw new IllegalArgumentException("Launching Python applications through " + s"spark-submit is currently only supported for local files: $path") } - val windows = Utils.isWindows || testWindows - var formattedPath = if (windows) Utils.formatWindowsPath(path) else path - - // Strip the URI scheme from the path - formattedPath = - new URI(formattedPath).getScheme match { - case null => formattedPath - case Utils.windowsDrive(d) if windows => formattedPath - case _ => new URI(formattedPath).getPath - } + // get path when scheme is file. + val uri = Try(new URI(path)).getOrElse(new File(path).toURI) + var formattedPath = uri.getScheme match { + case null => path + case "file" | "local" => uri.getPath + case _ => null + } // Guard against malformed paths potentially throwing NPE if (formattedPath == null) { @@ -99,7 +98,9 @@ object PythonRunner { // In Windows, the drive should not be prefixed with "/" // For instance, python does not understand "/C:/path/to/sheep.py" - formattedPath = if (windows) formattedPath.stripPrefix("/") else formattedPath + if (Utils.isWindows && formattedPath.matches("/[a-zA-Z]:/.*")) { + formattedPath = formattedPath.stripPrefix("/") + } formattedPath } diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala index b563034457a91..7fa75ac8c2b54 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala @@ -22,22 +22,22 @@ import java.lang.reflect.Method import java.security.PrivilegedExceptionAction import java.util.{Arrays, Comparator} +import scala.collection.JavaConversions._ +import scala.concurrent.duration._ +import scala.language.postfixOps + import com.google.common.primitives.Longs import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.{FileStatus, FileSystem, Path, PathFilter} import org.apache.hadoop.fs.FileSystem.Statistics +import org.apache.hadoop.fs.{FileStatus, FileSystem, Path, PathFilter} import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier import org.apache.hadoop.mapred.JobConf import org.apache.hadoop.mapreduce.JobContext import org.apache.hadoop.security.{Credentials, UserGroupInformation} -import org.apache.spark.{Logging, SparkConf, SparkException} import org.apache.spark.annotation.DeveloperApi import org.apache.spark.util.Utils - -import scala.collection.JavaConversions._ -import scala.concurrent.duration._ -import scala.language.postfixOps +import org.apache.spark.{Logging, SparkConf, SparkException} /** * :: DeveloperApi :: @@ -199,13 +199,43 @@ class SparkHadoopUtil extends Logging { * that file. */ def listLeafStatuses(fs: FileSystem, basePath: Path): Seq[FileStatus] = { - def recurse(path: Path): Array[FileStatus] = { - val (directories, leaves) = fs.listStatus(path).partition(_.isDir) - leaves ++ directories.flatMap(f => listLeafStatuses(fs, f.getPath)) + listLeafStatuses(fs, fs.getFileStatus(basePath)) + } + + /** + * Get [[FileStatus]] objects for all leaf children (files) under the given base path. If the + * given path points to a file, return a single-element collection containing [[FileStatus]] of + * that file. + */ + def listLeafStatuses(fs: FileSystem, baseStatus: FileStatus): Seq[FileStatus] = { + def recurse(status: FileStatus): Seq[FileStatus] = { + val (directories, leaves) = fs.listStatus(status.getPath).partition(_.isDir) + leaves ++ directories.flatMap(f => listLeafStatuses(fs, f)) } - val baseStatus = fs.getFileStatus(basePath) - if (baseStatus.isDir) recurse(basePath) else Array(baseStatus) + if (baseStatus.isDir) recurse(baseStatus) else Seq(baseStatus) + } + + def listLeafDirStatuses(fs: FileSystem, basePath: Path): Seq[FileStatus] = { + listLeafDirStatuses(fs, fs.getFileStatus(basePath)) + } + + def listLeafDirStatuses(fs: FileSystem, baseStatus: FileStatus): Seq[FileStatus] = { + def recurse(status: FileStatus): Seq[FileStatus] = { + val (directories, files) = fs.listStatus(status.getPath).partition(_.isDir) + val leaves = if (directories.isEmpty) Seq(status) else Seq.empty[FileStatus] + leaves ++ directories.flatMap(dir => listLeafDirStatuses(fs, dir)) + } + + assert(baseStatus.isDir) + recurse(baseStatus) + } + + def globPath(pattern: Path): Seq[Path] = { + val fs = pattern.getFileSystem(conf) + Option(fs.globStatus(pattern)).map { statuses => + statuses.map(_.getPath.makeQualified(fs.getUri, fs.getWorkingDirectory)).toSeq + }.getOrElse(Seq.empty[Path]) } /** diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 8a0327984e195..48098c9d64f61 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -332,6 +332,47 @@ object SparkSubmit { } } + // In yarn mode for a python app, add pyspark archives to files + // that can be distributed with the job + if (args.isPython && clusterManager == YARN) { + var pyArchives: String = null + val pyArchivesEnvOpt = sys.env.get("PYSPARK_ARCHIVES_PATH") + if (pyArchivesEnvOpt.isDefined) { + pyArchives = pyArchivesEnvOpt.get + } else { + if (!sys.env.contains("SPARK_HOME")) { + printErrorAndExit("SPARK_HOME does not exist for python application in yarn mode.") + } + val pythonPath = new ArrayBuffer[String] + for (sparkHome <- sys.env.get("SPARK_HOME")) { + val pyLibPath = Seq(sparkHome, "python", "lib").mkString(File.separator) + val pyArchivesFile = new File(pyLibPath, "pyspark.zip") + if (!pyArchivesFile.exists()) { + printErrorAndExit("pyspark.zip does not exist for python application in yarn mode.") + } + val py4jFile = new File(pyLibPath, "py4j-0.8.2.1-src.zip") + if (!py4jFile.exists()) { + printErrorAndExit("py4j-0.8.2.1-src.zip does not exist for python application " + + "in yarn mode.") + } + pythonPath += pyArchivesFile.getAbsolutePath() + pythonPath += py4jFile.getAbsolutePath() + } + pyArchives = pythonPath.mkString(",") + } + + pyArchives = pyArchives.split(",").map { localPath => + val localURI = Utils.resolveURI(localPath) + if (localURI.getScheme != "local") { + args.files = mergeFileLists(args.files, localURI.toString) + new Path(localPath).getName + } else { + localURI.getPath + } + }.mkString(File.pathSeparator) + sysProps("spark.submit.pyArchives") = pyArchives + } + // If we're running a R app, set the main class to our specific R runner if (args.isR && deployMode == CLIENT) { if (args.primaryResource == SPARKR_SHELL) { @@ -712,7 +753,9 @@ private[spark] object SparkSubmitUtils { * @param artifactId the artifactId of the coordinate * @param version the version of the coordinate */ - private[deploy] case class MavenCoordinate(groupId: String, artifactId: String, version: String) + private[deploy] case class MavenCoordinate(groupId: String, artifactId: String, version: String) { + override def toString: String = s"$groupId:$artifactId:$version" + } /** * Extracts maven coordinates from a comma-delimited string. Coordinates should be provided @@ -735,6 +778,10 @@ private[spark] object SparkSubmitUtils { } } + /** Path of the local Maven cache. */ + private[spark] def m2Path: File = new File(System.getProperty("user.home"), + ".m2" + File.separator + "repository" + File.separator) + /** * Extracts maven coordinates from a comma-delimited string * @param remoteRepos Comma-delimited string of remote repositories @@ -748,8 +795,7 @@ private[spark] object SparkSubmitUtils { val localM2 = new IBiblioResolver localM2.setM2compatible(true) - val m2Path = ".m2" + File.separator + "repository" + File.separator - localM2.setRoot(new File(System.getProperty("user.home"), m2Path).toURI.toString) + localM2.setRoot(m2Path.toURI.toString) localM2.setUsepoms(true) localM2.setName("local-m2-cache") cr.add(localM2) @@ -823,7 +869,7 @@ private[spark] object SparkSubmitUtils { md.addDependency(dd) } } - + /** Add exclusion rules for dependencies already included in the spark-assembly */ def addExclusionRules( ivySettings: IvySettings, @@ -874,69 +920,72 @@ private[spark] object SparkSubmitUtils { "" } else { val sysOut = System.out - // To prevent ivy from logging to system out - System.setOut(printStream) - val artifacts = extractMavenCoordinates(coordinates) - // Default configuration name for ivy - val ivyConfName = "default" - // set ivy settings for location of cache - val ivySettings: IvySettings = new IvySettings - // Directories for caching downloads through ivy and storing the jars when maven coordinates - // are supplied to spark-submit - val alternateIvyCache = ivyPath.getOrElse("") - val packagesDirectory: File = - if (alternateIvyCache.trim.isEmpty) { - new File(ivySettings.getDefaultIvyUserDir, "jars") + try { + // To prevent ivy from logging to system out + System.setOut(printStream) + val artifacts = extractMavenCoordinates(coordinates) + // Default configuration name for ivy + val ivyConfName = "default" + // set ivy settings for location of cache + val ivySettings: IvySettings = new IvySettings + // Directories for caching downloads through ivy and storing the jars when maven coordinates + // are supplied to spark-submit + val alternateIvyCache = ivyPath.getOrElse("") + val packagesDirectory: File = + if (alternateIvyCache.trim.isEmpty) { + new File(ivySettings.getDefaultIvyUserDir, "jars") + } else { + ivySettings.setDefaultIvyUserDir(new File(alternateIvyCache)) + ivySettings.setDefaultCache(new File(alternateIvyCache, "cache")) + new File(alternateIvyCache, "jars") + } + printStream.println( + s"Ivy Default Cache set to: ${ivySettings.getDefaultCache.getAbsolutePath}") + printStream.println(s"The jars for the packages stored in: $packagesDirectory") + // create a pattern matcher + ivySettings.addMatcher(new GlobPatternMatcher) + // create the dependency resolvers + val repoResolver = createRepoResolvers(remoteRepos, ivySettings) + ivySettings.addResolver(repoResolver) + ivySettings.setDefaultResolver(repoResolver.getName) + + val ivy = Ivy.newInstance(ivySettings) + // Set resolve options to download transitive dependencies as well + val resolveOptions = new ResolveOptions + resolveOptions.setTransitive(true) + val retrieveOptions = new RetrieveOptions + // Turn downloading and logging off for testing + if (isTest) { + resolveOptions.setDownload(false) + resolveOptions.setLog(LogOptions.LOG_QUIET) + retrieveOptions.setLog(LogOptions.LOG_QUIET) } else { - ivySettings.setDefaultIvyUserDir(new File(alternateIvyCache)) - ivySettings.setDefaultCache(new File(alternateIvyCache, "cache")) - new File(alternateIvyCache, "jars") + resolveOptions.setDownload(true) } - printStream.println( - s"Ivy Default Cache set to: ${ivySettings.getDefaultCache.getAbsolutePath}") - printStream.println(s"The jars for the packages stored in: $packagesDirectory") - // create a pattern matcher - ivySettings.addMatcher(new GlobPatternMatcher) - // create the dependency resolvers - val repoResolver = createRepoResolvers(remoteRepos, ivySettings) - ivySettings.addResolver(repoResolver) - ivySettings.setDefaultResolver(repoResolver.getName) - - val ivy = Ivy.newInstance(ivySettings) - // Set resolve options to download transitive dependencies as well - val resolveOptions = new ResolveOptions - resolveOptions.setTransitive(true) - val retrieveOptions = new RetrieveOptions - // Turn downloading and logging off for testing - if (isTest) { - resolveOptions.setDownload(false) - resolveOptions.setLog(LogOptions.LOG_QUIET) - retrieveOptions.setLog(LogOptions.LOG_QUIET) - } else { - resolveOptions.setDownload(true) - } - // A Module descriptor must be specified. Entries are dummy strings - val md = getModuleDescriptor - md.setDefaultConf(ivyConfName) + // A Module descriptor must be specified. Entries are dummy strings + val md = getModuleDescriptor + md.setDefaultConf(ivyConfName) - // Add exclusion rules for Spark and Scala Library - addExclusionRules(ivySettings, ivyConfName, md) - // add all supplied maven artifacts as dependencies - addDependenciesToIvy(md, artifacts, ivyConfName) + // Add exclusion rules for Spark and Scala Library + addExclusionRules(ivySettings, ivyConfName, md) + // add all supplied maven artifacts as dependencies + addDependenciesToIvy(md, artifacts, ivyConfName) - // resolve dependencies - val rr: ResolveReport = ivy.resolve(md, resolveOptions) - if (rr.hasError) { - throw new RuntimeException(rr.getAllProblemMessages.toString) + // resolve dependencies + val rr: ResolveReport = ivy.resolve(md, resolveOptions) + if (rr.hasError) { + throw new RuntimeException(rr.getAllProblemMessages.toString) + } + // retrieve all resolved dependencies + ivy.retrieve(rr.getModuleDescriptor.getModuleRevisionId, + packagesDirectory.getAbsolutePath + File.separator + + "[organization]_[artifact]-[revision].[ext]", + retrieveOptions.setConfs(Array(ivyConfName))) + resolveDependencyPaths(rr.getArtifacts.toArray, packagesDirectory) + } finally { + System.setOut(sysOut) } - // retrieve all resolved dependencies - ivy.retrieve(rr.getModuleDescriptor.getModuleRevisionId, - packagesDirectory.getAbsolutePath + File.separator + - "[organization]_[artifact]-[revision].[ext]", - retrieveOptions.setConfs(Array(ivyConfName))) - System.setOut(sysOut) - resolveDependencyPaths(rr.getArtifacts.toArray, packagesDirectory) } } } diff --git a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala index 6a5011af17458..298a8201960d1 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala @@ -19,7 +19,7 @@ package org.apache.spark.deploy.history import org.apache.spark.ui.SparkUI -private[history] case class ApplicationAttemptInfo( +private[spark] case class ApplicationAttemptInfo( attemptId: Option[String], startTime: Long, endTime: Long, @@ -27,7 +27,7 @@ private[history] case class ApplicationAttemptInfo( sparkUser: String, completed: Boolean = false) -private[history] case class ApplicationHistoryInfo( +private[spark] case class ApplicationHistoryInfo( id: String, name: String, attempts: List[ApplicationAttemptInfo]) diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala index 993763f3aa092..45c2be34c8680 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala @@ -17,23 +17,21 @@ package org.apache.spark.deploy.history -import java.io.{IOException, BufferedInputStream, FileNotFoundException, InputStream} +import java.io.{BufferedInputStream, FileNotFoundException, IOException, InputStream} import java.util.concurrent.{ExecutorService, Executors, TimeUnit} import scala.collection.mutable -import scala.concurrent.duration.Duration -import com.google.common.util.concurrent.ThreadFactoryBuilder - -import com.google.common.util.concurrent.MoreExecutors -import org.apache.hadoop.fs.permission.AccessControlException +import com.google.common.util.concurrent.{MoreExecutors, ThreadFactoryBuilder} import org.apache.hadoop.fs.{FileStatus, Path} +import org.apache.hadoop.fs.permission.AccessControlException + +import org.apache.spark.{Logging, SecurityManager, SparkConf} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.io.CompressionCodec import org.apache.spark.scheduler._ import org.apache.spark.ui.SparkUI import org.apache.spark.util.{Clock, SystemClock, ThreadUtils, Utils} -import org.apache.spark.{Logging, SecurityManager, SparkConf} /** * A class that provides application history from event logs stored in the file system. @@ -151,7 +149,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) val conf = this.conf.clone() val appSecManager = new SecurityManager(conf) SparkUI.createHistoryUI(conf, replayBus, appSecManager, appId, - HistoryServer.getAttemptURI(appId, attempt.attemptId)) + HistoryServer.getAttemptURI(appId, attempt.attemptId), attempt.startTime) // Do not call ui.bind() to avoid creating a new server for each application } diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala index 754c8e9b6668b..5a0eb585a9049 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala @@ -25,6 +25,8 @@ import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder} import org.apache.spark.{Logging, SecurityManager, SparkConf} import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.status.api.v1.{ApiRootResource, ApplicationInfo, ApplicationsListResource, + UIRoot} import org.apache.spark.ui.{SparkUI, UIUtils, WebUI} import org.apache.spark.ui.JettyUtils._ import org.apache.spark.util.{SignalLogger, Utils} @@ -45,7 +47,7 @@ class HistoryServer( provider: ApplicationHistoryProvider, securityManager: SecurityManager, port: Int) - extends WebUI(securityManager, port, conf) with Logging { + extends WebUI(securityManager, port, conf) with Logging with UIRoot { // How many applications to retain private val retainedApplications = conf.getInt("spark.history.retainedApplications", 50) @@ -56,7 +58,7 @@ class HistoryServer( require(parts.length == 1 || parts.length == 2, s"Invalid app key $key") val ui = provider .getAppUI(parts(0), if (parts.length > 1) Some(parts(1)) else None) - .getOrElse(throw new NoSuchElementException()) + .getOrElse(throw new NoSuchElementException(s"no app with key $key")) attachSparkUI(ui) ui } @@ -82,37 +84,37 @@ class HistoryServer( return } - val appKey = - if (parts.length == 3) { - s"${parts(1)}/${parts(2)}" - } else { - parts(1) + val appId = parts(1) + val attemptId = if (parts.length >= 3) Some(parts(2)) else None + + // Since we may have applications with multiple attempts mixed with applications with a + // single attempt, we need to try both. Try the single-attempt route first, and if an + // error is raised, then try the multiple attempt route. + if (!loadAppUi(appId, None) && (!attemptId.isDefined || !loadAppUi(appId, attemptId))) { + val msg =
Application {appId} not found.
+ res.setStatus(HttpServletResponse.SC_NOT_FOUND) + UIUtils.basicSparkPage(msg, "Not Found").foreach { n => + res.getWriter().write(n.toString) } + return + } // Note we don't use the UI retrieved from the cache; the cache loader above will register // the app's UI, and all we need to do is redirect the user to the same URI that was // requested, and the proper data should be served at that point. - try { - appCache.get(appKey) - res.sendRedirect(res.encodeRedirectURL(req.getRequestURI())) - } catch { - case e: Exception => e.getCause() match { - case nsee: NoSuchElementException => - val msg =
Application {appKey} not found.
- res.setStatus(HttpServletResponse.SC_NOT_FOUND) - UIUtils.basicSparkPage(msg, "Not Found").foreach( - n => res.getWriter().write(n.toString)) - - case cause: Exception => throw cause - } - } + res.sendRedirect(res.encodeRedirectURL(req.getRequestURI())) } + // SPARK-5983 ensure TRACE is not supported protected override def doTrace(req: HttpServletRequest, res: HttpServletResponse): Unit = { res.sendError(HttpServletResponse.SC_METHOD_NOT_ALLOWED) } } + def getSparkUI(appKey: String): Option[SparkUI] = { + Option(appCache.get(appKey)) + } + initialize() /** @@ -123,6 +125,9 @@ class HistoryServer( */ def initialize() { attachPage(new HistoryPage(this)) + + attachHandler(ApiRootResource.getServletHandler(this)) + attachHandler(createStaticHandler(SparkUI.STATIC_RESOURCE_DIR, "/static")) val contextHandler = new ServletContextHandler @@ -160,7 +165,13 @@ class HistoryServer( * * @return List of all known applications. */ - def getApplicationList(): Iterable[ApplicationHistoryInfo] = provider.getListing() + def getApplicationList(): Iterable[ApplicationHistoryInfo] = { + provider.getListing() + } + + def getApplicationInfoList: Iterator[ApplicationInfo] = { + getApplicationList().iterator.map(ApplicationsListResource.appHistoryInfoToPublicAppInfo) + } /** * Returns the provider configuration to show in the listing page. @@ -169,6 +180,20 @@ class HistoryServer( */ def getProviderConfig(): Map[String, String] = provider.getConfig() + private def loadAppUi(appId: String, attemptId: Option[String]): Boolean = { + try { + appCache.get(appId + attemptId.map { id => s"/$id" }.getOrElse("")) + true + } catch { + case e: Exception => e.getCause() match { + case nsee: NoSuchElementException => + false + + case cause: Exception => throw cause + } + } + } + } /** @@ -189,8 +214,8 @@ object HistoryServer extends Logging { def main(argStrings: Array[String]) { SignalLogger.register(log) - initSecurity() new HistoryServerArguments(conf, argStrings) + initSecurity() val securityManager = new SecurityManager(conf) val providerName = conf.getOption("spark.history.provider") diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala index a2a97a7877ce7..4692d22651c93 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala @@ -23,7 +23,7 @@ import org.apache.spark.util.Utils /** * Command-line parser for the master. */ -private[history] class HistoryServerArguments(conf: SparkConf, args: Array[String]) +private[history] class HistoryServerArguments(conf: SparkConf, args: Array[String]) extends Logging { private var propertiesFile: String = null diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala index f59d550d4f3b3..1620e95bea218 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala @@ -28,7 +28,7 @@ import org.apache.spark.annotation.DeveloperApi import org.apache.spark.deploy.ApplicationDescription import org.apache.spark.util.Utils -private[deploy] class ApplicationInfo( +private[spark] class ApplicationInfo( val startTime: Long, val id: String, val desc: ApplicationDescription, diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala index 0fac3cdcf55e7..fccceb3ea528b 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala @@ -165,7 +165,7 @@ private[master] class Master( (fsFactory.createPersistenceEngine(), fsFactory.createLeaderElectionAgent(this)) case "CUSTOM" => val clazz = Class.forName(conf.get("spark.deploy.recoveryMode.factory")) - val factory = clazz.getConstructor(conf.getClass, Serialization.getClass) + val factory = clazz.getConstructor(classOf[SparkConf], classOf[Serialization]) .newInstance(conf, SerializationExtension(context.system)) .asInstanceOf[StandaloneRecoveryModeFactory] (factory.createPersistenceEngine(), factory.createLeaderElectionAgent(this)) @@ -754,9 +754,9 @@ private[master] class Master( /** * Rebuild a new SparkUI from the given application's event logs. - * Return whether this is successful. + * Return the UI if successful, else None */ - private def rebuildSparkUI(app: ApplicationInfo): Boolean = { + private[master] def rebuildSparkUI(app: ApplicationInfo): Option[SparkUI] = { val appName = app.desc.name val notFoundBasePath = HistoryServer.UI_PATH_PREFIX + "/not-found" try { @@ -764,7 +764,7 @@ private[master] class Master( .getOrElse { // Event logging is not enabled for this application app.desc.appUiUrl = notFoundBasePath - return false + return None } val eventLogFilePrefix = EventLoggingListener.getLogPath( @@ -787,7 +787,7 @@ private[master] class Master( val logInput = EventLoggingListener.openEventLog(new Path(eventLogFile), fs) val replayBus = new ReplayListenerBus() val ui = SparkUI.createHistoryUI(new SparkConf, replayBus, new SecurityManager(conf), - appName + status, HistoryServer.UI_PATH_PREFIX + s"/${app.id}") + appName + status, HistoryServer.UI_PATH_PREFIX + s"/${app.id}", app.startTime) val maybeTruncated = eventLogFile.endsWith(EventLoggingListener.IN_PROGRESS) try { replayBus.replay(logInput, eventLogFile, maybeTruncated) @@ -798,7 +798,7 @@ private[master] class Master( webUi.attachSparkUI(ui) // Application UI is successfully rebuilt, so link the Master UI to it app.desc.appUiUrl = ui.basePath - true + Some(ui) } catch { case fnf: FileNotFoundException => // Event logging is enabled for this application, but no event logs are found @@ -808,7 +808,7 @@ private[master] class Master( msg += " Did you specify the correct logging directory?" msg = URLEncoder.encode(msg, "UTF-8") app.desc.appUiUrl = notFoundBasePath + s"?msg=$msg&title=$title" - false + None case e: Exception => // Relay exception message to application UI page val title = s"Application history load error (${app.id})" @@ -817,7 +817,7 @@ private[master] class Master( logError(msg, e) msg = URLEncoder.encode(msg, "UTF-8") app.desc.appUiUrl = notFoundBasePath + s"?msg=$msg&exception=$exception&title=$title" - false + None } } diff --git a/core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala b/core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala index 9c3f79f1244b7..66a9ff38678c6 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala @@ -30,6 +30,11 @@ private[spark] class MasterSource(val master: Master) extends Source { override def getValue: Int = master.workers.size }) + // Gauge for alive worker numbers in cluster + metricRegistry.register(MetricRegistry.name("aliveWorkers"), new Gauge[Int]{ + override def getValue: Int = master.workers.filter(_.state == WorkerState.ALIVE).size + }) + // Gauge for application numbers in cluster metricRegistry.register(MetricRegistry.name("apps"), new Gauge[Int] { override def getValue: Int = master.apps.size diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala index 80db6d474b5c1..328d95a7a0c68 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala @@ -32,7 +32,7 @@ import org.apache.spark.deploy.SparkCuratorUtil private[master] class ZooKeeperPersistenceEngine(conf: SparkConf, val serialization: Serialization) extends PersistenceEngine with Logging { - + private val WORKING_DIR = conf.get("spark.deploy.zookeeper.dir", "/spark") + "/master_status" private val zk: CuratorFramework = SparkCuratorUtil.newClient(conf) diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala index 273f077bd8f57..06e265f99e231 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala @@ -23,10 +23,8 @@ import scala.concurrent.Await import scala.xml.Node import akka.pattern.ask -import org.json4s.JValue -import org.json4s.JsonAST.JNothing -import org.apache.spark.deploy.{ExecutorState, JsonProtocol} +import org.apache.spark.deploy.ExecutorState import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState} import org.apache.spark.deploy.master.ExecutorDesc import org.apache.spark.ui.{UIUtils, WebUIPage} @@ -37,21 +35,6 @@ private[ui] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app") private val master = parent.masterActorRef private val timeout = parent.timeout - /** Executor details for a particular application */ - override def renderJson(request: HttpServletRequest): JValue = { - val appId = request.getParameter("appId") - val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse] - val state = Await.result(stateFuture, timeout) - val app = state.activeApps.find(_.id == appId).getOrElse({ - state.completedApps.find(_.id == appId).getOrElse(null) - }) - if (app == null) { - JNothing - } else { - JsonProtocol.writeApplicationInfo(app) - } - } - /** Executor details for a particular application */ def render(request: HttpServletRequest): Seq[Node] = { val appId = request.getParameter("appId") diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala index 1f2c3fdbfb2bc..756927682cd24 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala @@ -35,10 +35,13 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") { private val master = parent.masterActorRef private val timeout = parent.timeout - override def renderJson(request: HttpServletRequest): JValue = { + def getMasterState: MasterStateResponse = { val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse] - val state = Await.result(stateFuture, timeout) - JsonProtocol.writeMasterState(state) + Await.result(stateFuture, timeout) + } + + override def renderJson(request: HttpServletRequest): JValue = { + JsonProtocol.writeMasterState(getMasterState) } def handleAppKillRequest(request: HttpServletRequest): Unit = { @@ -68,8 +71,7 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") { /** Index view listing applications and executors */ def render(request: HttpServletRequest): Seq[Node] = { - val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse] - val state = Await.result(stateFuture, timeout) + val state = getMasterState val workerHeaders = Seq("Worker Id", "Address", "State", "Cores", "Memory") val workers = state.workers.sortBy(_.id) diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala index dea0a65eeeaa6..2111a8581f2e4 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala @@ -19,6 +19,8 @@ package org.apache.spark.deploy.master.ui import org.apache.spark.Logging import org.apache.spark.deploy.master.Master +import org.apache.spark.status.api.v1.{ApiRootResource, ApplicationsListResource, ApplicationInfo, + UIRoot} import org.apache.spark.ui.{SparkUI, WebUI} import org.apache.spark.ui.JettyUtils._ import org.apache.spark.util.RpcUtils @@ -28,12 +30,15 @@ import org.apache.spark.util.RpcUtils */ private[master] class MasterWebUI(val master: Master, requestedPort: Int) - extends WebUI(master.securityMgr, requestedPort, master.conf, name = "MasterUI") with Logging { + extends WebUI(master.securityMgr, requestedPort, master.conf, name = "MasterUI") with Logging + with UIRoot { val masterActorRef = master.self val timeout = RpcUtils.askTimeout(master.conf) val killEnabled = master.conf.getBoolean("spark.ui.killEnabled", true) + val masterPage = new MasterPage(this) + initialize() /** Initialize all components of the server. */ @@ -43,6 +48,7 @@ class MasterWebUI(val master: Master, requestedPort: Int) attachPage(new HistoryNotFoundPage(this)) attachPage(masterPage) attachHandler(createStaticHandler(MasterWebUI.STATIC_RESOURCE_DIR, "/static")) + attachHandler(ApiRootResource.getServletHandler(this)) attachHandler(createRedirectHandler( "/app/kill", "/", masterPage.handleAppKillRequest, httpMethods = Set("POST"))) attachHandler(createRedirectHandler( @@ -60,6 +66,23 @@ class MasterWebUI(val master: Master, requestedPort: Int) assert(serverInfo.isDefined, "Master UI must be bound to a server before detaching SparkUIs") ui.getHandlers.foreach(detachHandler) } + + def getApplicationInfoList: Iterator[ApplicationInfo] = { + val state = masterPage.getMasterState + val activeApps = state.activeApps.sortBy(_.startTime).reverse + val completedApps = state.completedApps.sortBy(_.endTime).reverse + activeApps.iterator.map { ApplicationsListResource.convertApplicationInfo(_, false) } ++ + completedApps.iterator.map { ApplicationsListResource.convertApplicationInfo(_, true) } + } + + def getSparkUI(appId: String): Option[SparkUI] = { + val state = masterPage.getMasterState + val activeApps = state.activeApps.sortBy(_.startTime).reverse + val completedApps = state.completedApps.sortBy(_.endTime).reverse + (activeApps ++ completedApps).find { _.id == appId }.flatMap { + master.rebuildSparkUI + } + } } private[master] object MasterWebUI { diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala b/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala index 6078f50518ba4..1fe956320a1b8 100644 --- a/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala +++ b/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala @@ -57,7 +57,11 @@ private[spark] class RestSubmissionClient(master: String) extends Logging { private val supportedMasterPrefixes = Seq("spark://", "mesos://") - private val masters: Array[String] = Utils.parseStandaloneMasterUrls(master) + private val masters: Array[String] = if (master.startsWith("spark://")) { + Utils.parseStandaloneMasterUrls(master) + } else { + Array(master) + } // Set of masters that lost contact with us, used to keep track of // whether there are masters still alive for us to communicate with diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala index 8f3cc54051048..ebc6cd76c6afd 100755 --- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala @@ -324,9 +324,6 @@ private[worker] class Worker( map(e => new ExecutorDescription(e.appId, e.execId, e.cores, e.state)) sender ! WorkerSchedulerStateResponse(workerId, execs.toList, drivers.keys.toSeq) - case Heartbeat => - logInfo(s"Received heartbeat from driver ${sender.path}") - case RegisterWorkerFailed(message) => if (!registered) { logError("Worker registration failed: " + message) @@ -557,7 +554,7 @@ private[deploy] object Worker extends Logging { conf = conf, securityManager = securityMgr) val masterAkkaUrls = masterUrls.map(Master.toAkkaUrl(_, AkkaUtils.protocol(actorSystem))) actorSystem.actorOf(Props(classOf[Worker], host, boundPort, webUiPort, cores, memory, - masterAkkaUrls, systemName, actorName, workDir, conf, securityMgr), name = actorName) + masterAkkaUrls, systemName, actorName, workDir, conf, securityMgr), name = actorName) (actorSystem, boundPort) } diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala index 88170d4df3053..dc2bee6f2bdca 100644 --- a/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala @@ -29,6 +29,7 @@ import org.apache.spark.util.logging.RollingFileAppender private[ui] class LogPage(parent: WorkerWebUI) extends WebUIPage("logPage") with Logging { private val worker = parent.worker private val workDir = parent.workDir + private val supportedLogTypes = Set("stderr", "stdout") def renderLog(request: HttpServletRequest): String = { val defaultBytes = 100 * 1024 @@ -129,6 +130,11 @@ private[ui] class LogPage(parent: WorkerWebUI) extends WebUIPage("logPage") with offsetOption: Option[Long], byteLength: Int ): (String, Long, Long, Long) = { + + if (!supportedLogTypes.contains(logType)) { + return ("Error: Log type must be one of " + supportedLogTypes.mkString(", "), 0, 0, 0) + } + try { val files = RollingFileAppender.getSortedRolledOverFiles(logDirectory, logType) logDebug(s"Sorted log files of type $logType in $logDirectory:\n${files.mkString("\n")}") diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala index ed159dec4f998..f3a26f54a81fb 100644 --- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala +++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala @@ -33,7 +33,7 @@ import org.apache.spark.deploy.worker.WorkerWatcher import org.apache.spark.scheduler.TaskDescription import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._ import org.apache.spark.serializer.SerializerInstance -import org.apache.spark.util.{SignalLogger, Utils} +import org.apache.spark.util.{ThreadUtils, SignalLogger, Utils} private[spark] class CoarseGrainedExecutorBackend( override val rpcEnv: RpcEnv, @@ -55,18 +55,19 @@ private[spark] class CoarseGrainedExecutorBackend( private[this] val ser: SerializerInstance = env.closureSerializer.newInstance() override def onStart() { - import scala.concurrent.ExecutionContext.Implicits.global logInfo("Connecting to driver: " + driverUrl) rpcEnv.asyncSetupEndpointRefByURI(driverUrl).flatMap { ref => + // This is a very fast action so we can use "ThreadUtils.sameThread" driver = Some(ref) ref.ask[RegisteredExecutor.type]( RegisterExecutor(executorId, self, hostPort, cores, extractLogUrls)) - } onComplete { + }(ThreadUtils.sameThread).onComplete { + // This is a very fast action so we can use "ThreadUtils.sameThread" case Success(msg) => Utils.tryLogNonFatalError { Option(self).foreach(_.send(msg)) // msg must be RegisteredExecutor } case Failure(e) => logError(s"Cannot register with driver: $driverUrl", e) - } + }(ThreadUtils.sameThread) } def extractLogUrls: Map[String, String] = { diff --git a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala index 06152f16ae618..38b61d7242fce 100644 --- a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala +++ b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala @@ -43,22 +43,22 @@ class TaskMetrics extends Serializable { private var _hostname: String = _ def hostname: String = _hostname private[spark] def setHostname(value: String) = _hostname = value - + /** * Time taken on the executor to deserialize this task */ private var _executorDeserializeTime: Long = _ def executorDeserializeTime: Long = _executorDeserializeTime private[spark] def setExecutorDeserializeTime(value: Long) = _executorDeserializeTime = value - - + + /** * Time the executor spends actually running the task (including fetching shuffle data) */ private var _executorRunTime: Long = _ def executorRunTime: Long = _executorRunTime private[spark] def setExecutorRunTime(value: Long) = _executorRunTime = value - + /** * The number of bytes this task transmitted back to the driver as the TaskResult */ @@ -261,7 +261,7 @@ case class InputMetrics(readMethod: DataReadMethod.Value) { */ private var _recordsRead: Long = _ def recordsRead: Long = _recordsRead - def incRecordsRead(records: Long): Unit = _recordsRead += records + def incRecordsRead(records: Long): Unit = _recordsRead += records /** * Invoke the bytesReadCallback and mutate bytesRead. @@ -315,7 +315,7 @@ class ShuffleReadMetrics extends Serializable { def remoteBlocksFetched: Int = _remoteBlocksFetched private[spark] def incRemoteBlocksFetched(value: Int) = _remoteBlocksFetched += value private[spark] def decRemoteBlocksFetched(value: Int) = _remoteBlocksFetched -= value - + /** * Number of local blocks fetched in this shuffle by this task */ @@ -333,7 +333,7 @@ class ShuffleReadMetrics extends Serializable { def fetchWaitTime: Long = _fetchWaitTime private[spark] def incFetchWaitTime(value: Long) = _fetchWaitTime += value private[spark] def decFetchWaitTime(value: Long) = _fetchWaitTime -= value - + /** * Total number of remote bytes read from the shuffle by this task */ @@ -381,7 +381,7 @@ class ShuffleWriteMetrics extends Serializable { def shuffleBytesWritten: Long = _shuffleBytesWritten private[spark] def incShuffleBytesWritten(value: Long) = _shuffleBytesWritten += value private[spark] def decShuffleBytesWritten(value: Long) = _shuffleBytesWritten -= value - + /** * Time the task spent blocking on writes to disk or buffer cache, in nanoseconds */ @@ -389,7 +389,7 @@ class ShuffleWriteMetrics extends Serializable { def shuffleWriteTime: Long = _shuffleWriteTime private[spark] def incShuffleWriteTime(value: Long) = _shuffleWriteTime += value private[spark] def decShuffleWriteTime(value: Long) = _shuffleWriteTime -= value - + /** * Total number of records written to the shuffle by this task */ diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala index 0756cdb2ed8e6..0d8ac1f80a9f4 100644 --- a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala +++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala @@ -17,7 +17,7 @@ package org.apache.spark.io -import java.io.{InputStream, OutputStream} +import java.io.{IOException, InputStream, OutputStream} import com.ning.compress.lzf.{LZFInputStream, LZFOutputStream} import net.jpountz.lz4.{LZ4BlockInputStream, LZ4BlockOutputStream} @@ -154,8 +154,53 @@ class SnappyCompressionCodec(conf: SparkConf) extends CompressionCodec { override def compressedOutputStream(s: OutputStream): OutputStream = { val blockSize = conf.getSizeAsBytes("spark.io.compression.snappy.blockSize", "32k").toInt - new SnappyOutputStream(s, blockSize) + new SnappyOutputStreamWrapper(new SnappyOutputStream(s, blockSize)) } override def compressedInputStream(s: InputStream): InputStream = new SnappyInputStream(s) } + +/** + * Wrapper over [[SnappyOutputStream]] which guards against write-after-close and double-close + * issues. See SPARK-7660 for more details. This wrapping can be removed if we upgrade to a version + * of snappy-java that contains the fix for https://github.com/xerial/snappy-java/issues/107. + */ +private final class SnappyOutputStreamWrapper(os: SnappyOutputStream) extends OutputStream { + + private[this] var closed: Boolean = false + + override def write(b: Int): Unit = { + if (closed) { + throw new IOException("Stream is closed") + } + os.write(b) + } + + override def write(b: Array[Byte]): Unit = { + if (closed) { + throw new IOException("Stream is closed") + } + os.write(b) + } + + override def write(b: Array[Byte], off: Int, len: Int): Unit = { + if (closed) { + throw new IOException("Stream is closed") + } + os.write(b, off, len) + } + + override def flush(): Unit = { + if (closed) { + throw new IOException("Stream is closed") + } + os.flush() + } + + override def close(): Unit = { + if (!closed) { + closed = true + os.close() + } + } +} diff --git a/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala b/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala index cfd20392d12f1..390d148bc97f9 100644 --- a/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala +++ b/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala @@ -60,7 +60,7 @@ trait SparkHadoopMapReduceUtil { val taskTypeClass = Class.forName("org.apache.hadoop.mapreduce.TaskType") .asInstanceOf[Class[Enum[_]]] val taskType = taskTypeClass.getMethod("valueOf", classOf[String]).invoke( - taskTypeClass, if(isMap) "MAP" else "REDUCE") + taskTypeClass, if (isMap) "MAP" else "REDUCE") val ctor = klass.getDeclaredConstructor(classOf[String], classOf[Int], taskTypeClass, classOf[Int], classOf[Int]) ctor.newInstance(jtIdentifier, new JInteger(jobId), taskType, new JInteger(taskId), diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/Slf4jSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/Slf4jSink.scala index e8b3074e8f1a6..11dfcfe2f04e1 100644 --- a/core/src/main/scala/org/apache/spark/metrics/sink/Slf4jSink.scala +++ b/core/src/main/scala/org/apache/spark/metrics/sink/Slf4jSink.scala @@ -26,9 +26,9 @@ import org.apache.spark.SecurityManager import org.apache.spark.metrics.MetricsSystem private[spark] class Slf4jSink( - val property: Properties, + val property: Properties, val registry: MetricRegistry, - securityMgr: SecurityManager) + securityMgr: SecurityManager) extends Sink { val SLF4J_DEFAULT_PERIOD = 10 val SLF4J_DEFAULT_UNIT = "SECONDS" diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/package.scala b/core/src/main/scala/org/apache/spark/metrics/sink/package.scala index 90e3aa70b99ef..670e683663324 100644 --- a/core/src/main/scala/org/apache/spark/metrics/sink/package.scala +++ b/core/src/main/scala/org/apache/spark/metrics/sink/package.scala @@ -20,4 +20,4 @@ package org.apache.spark.metrics /** * Sinks used in Spark's metrics system. */ -package object sink +package object sink diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala index 6181c0ee9fa2b..d650d5fe73087 100644 --- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala +++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala @@ -59,12 +59,22 @@ class NettyBlockTransferService(conf: SparkConf, securityManager: SecurityManage } transportContext = new TransportContext(transportConf, rpcHandler) clientFactory = transportContext.createClientFactory(clientBootstrap.toList) - server = transportContext.createServer(conf.getInt("spark.blockManager.port", 0), - serverBootstrap.toList) + server = createServer(serverBootstrap.toList) appId = conf.getAppId logInfo("Server created on " + server.getPort) } + /** Creates and binds the TransportServer, possibly trying multiple ports. */ + private def createServer(bootstraps: List[TransportServerBootstrap]): TransportServer = { + def startService(port: Int): (TransportServer, Int) = { + val server = transportContext.createServer(port, bootstraps) + (server, server.getPort) + } + + val portToTry = conf.getInt("spark.blockManager.port", 0) + Utils.startServiceOnPort(portToTry, startService, conf, getClass.getName)._1 + } + override def fetchBlocks( host: String, port: Int, diff --git a/core/src/main/scala/org/apache/spark/network/nio/BlockMessage.scala b/core/src/main/scala/org/apache/spark/network/nio/BlockMessage.scala index b573f1a8a5fcb..67a376102994c 100644 --- a/core/src/main/scala/org/apache/spark/network/nio/BlockMessage.scala +++ b/core/src/main/scala/org/apache/spark/network/nio/BlockMessage.scala @@ -110,7 +110,7 @@ private[nio] class BlockMessage() { def getType: Int = typ def getId: BlockId = id def getData: ByteBuffer = data - def getLevel: StorageLevel = level + def getLevel: StorageLevel = level def toBufferMessage: BufferMessage = { val buffers = new ArrayBuffer[ByteBuffer]() @@ -155,7 +155,7 @@ private[nio] class BlockMessage() { override def toString: String = { "BlockMessage [type = " + typ + ", id = " + id + ", level = " + level + - ", data = " + (if (data != null) data.remaining.toString else "null") + "]" + ", data = " + (if (data != null) data.remaining.toString else "null") + "]" } } diff --git a/core/src/main/scala/org/apache/spark/network/nio/BlockMessageArray.scala b/core/src/main/scala/org/apache/spark/network/nio/BlockMessageArray.scala index 1ba25aa74aa02..7d0806f0c2580 100644 --- a/core/src/main/scala/org/apache/spark/network/nio/BlockMessageArray.scala +++ b/core/src/main/scala/org/apache/spark/network/nio/BlockMessageArray.scala @@ -114,8 +114,8 @@ private[nio] object BlockMessageArray { val blockMessages = (0 until 10).map { i => if (i % 2 == 0) { - val buffer = ByteBuffer.allocate(100) - buffer.clear + val buffer = ByteBuffer.allocate(100) + buffer.clear() BlockMessage.fromPutBlock(PutBlock(TestBlockId(i.toString), buffer, StorageLevel.MEMORY_ONLY_SER)) } else { diff --git a/core/src/main/scala/org/apache/spark/network/nio/Connection.scala b/core/src/main/scala/org/apache/spark/network/nio/Connection.scala index 6b898bd4bfc1b..1499da07bb83b 100644 --- a/core/src/main/scala/org/apache/spark/network/nio/Connection.scala +++ b/core/src/main/scala/org/apache/spark/network/nio/Connection.scala @@ -326,15 +326,14 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector, // MUST be called within the selector loop def connect() { - try{ + try { channel.register(selector, SelectionKey.OP_CONNECT) channel.connect(address) logInfo("Initiating connection to [" + address + "]") } catch { - case e: Exception => { + case e: Exception => logError("Error connecting to " + address, e) callOnExceptionCallbacks(e) - } } } diff --git a/core/src/main/scala/org/apache/spark/network/nio/ConnectionManager.scala b/core/src/main/scala/org/apache/spark/network/nio/ConnectionManager.scala index 497871ed6d5e5..c0bca2c4bc994 100644 --- a/core/src/main/scala/org/apache/spark/network/nio/ConnectionManager.scala +++ b/core/src/main/scala/org/apache/spark/network/nio/ConnectionManager.scala @@ -635,12 +635,11 @@ private[nio] class ConnectionManager( val message = securityMsgResp.toBufferMessage if (message == null) throw new IOException("Error creating security message") sendSecurityMessage(waitingConn.getRemoteConnectionManagerId(), message) - } catch { - case e: Exception => { + } catch { + case e: Exception => logError("Error handling sasl client authentication", e) waitingConn.close() throw new IOException("Error evaluating sasl response: ", e) - } } } } diff --git a/core/src/main/scala/org/apache/spark/network/nio/SecurityMessage.scala b/core/src/main/scala/org/apache/spark/network/nio/SecurityMessage.scala index 747a2088a7258..232c552f9865d 100644 --- a/core/src/main/scala/org/apache/spark/network/nio/SecurityMessage.scala +++ b/core/src/main/scala/org/apache/spark/network/nio/SecurityMessage.scala @@ -75,7 +75,7 @@ private[nio] class SecurityMessage extends Logging { for (i <- 1 to idLength) { idBuilder += buffer.getChar() } - connectionId = idBuilder.toString() + connectionId = idBuilder.toString() val tokenLength = buffer.getInt() token = new Array[Byte](tokenLength) diff --git a/core/src/main/scala/org/apache/spark/package.scala b/core/src/main/scala/org/apache/spark/package.scala index 2ab41ba488ff6..07acd82ebcb24 100644 --- a/core/src/main/scala/org/apache/spark/package.scala +++ b/core/src/main/scala/org/apache/spark/package.scala @@ -43,5 +43,5 @@ package org.apache package object spark { // For package docs only - val SPARK_VERSION = "1.4.0-SNAPSHOT" + val SPARK_VERSION = "1.4.0" } diff --git a/core/src/main/scala/org/apache/spark/partial/GroupedCountEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/GroupedCountEvaluator.scala index 3ef3cc219dec6..91b07ce3af1b6 100644 --- a/core/src/main/scala/org/apache/spark/partial/GroupedCountEvaluator.scala +++ b/core/src/main/scala/org/apache/spark/partial/GroupedCountEvaluator.scala @@ -32,12 +32,12 @@ import org.apache.spark.util.collection.OpenHashMap * An ApproximateEvaluator for counts by key. Returns a map of key to confidence interval. */ private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, confidence: Double) - extends ApproximateEvaluator[OpenHashMap[T,Long], Map[T, BoundedDouble]] { + extends ApproximateEvaluator[OpenHashMap[T, Long], Map[T, BoundedDouble]] { var outputsMerged = 0 - var sums = new OpenHashMap[T,Long]() // Sum of counts for each key + var sums = new OpenHashMap[T, Long]() // Sum of counts for each key - override def merge(outputId: Int, taskResult: OpenHashMap[T,Long]) { + override def merge(outputId: Int, taskResult: OpenHashMap[T, Long]) { outputsMerged += 1 taskResult.foreach { case (key, value) => sums.changeValue(key, value, _ + value) diff --git a/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala b/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala index 3406a7e97e368..ca1eb1f4e4a9a 100644 --- a/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala +++ b/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala @@ -19,8 +19,10 @@ package org.apache.spark.rdd import java.util.concurrent.atomic.AtomicLong +import org.apache.spark.util.ThreadUtils + import scala.collection.mutable.ArrayBuffer -import scala.concurrent.ExecutionContext.Implicits.global +import scala.concurrent.ExecutionContext import scala.reflect.ClassTag import org.apache.spark.{ComplexFutureAction, FutureAction, Logging} @@ -33,7 +35,7 @@ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Loggi /** * Returns a future for counting the number of elements in the RDD. */ - def countAsync(): FutureAction[Long] = { + def countAsync(): FutureAction[Long] = self.withScope { val totalCount = new AtomicLong self.context.submitJob( self, @@ -53,7 +55,7 @@ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Loggi /** * Returns a future for retrieving all elements of this RDD. */ - def collectAsync(): FutureAction[Seq[T]] = { + def collectAsync(): FutureAction[Seq[T]] = self.withScope { val results = new Array[Array[T]](self.partitions.length) self.context.submitJob[T, Array[T], Seq[T]](self, _.toArray, Range(0, self.partitions.length), (index, data) => results(index) = data, results.flatten.toSeq) @@ -62,10 +64,12 @@ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Loggi /** * Returns a future for retrieving the first num elements of the RDD. */ - def takeAsync(num: Int): FutureAction[Seq[T]] = { + def takeAsync(num: Int): FutureAction[Seq[T]] = self.withScope { val f = new ComplexFutureAction[Seq[T]] f.run { + // This is a blocking action so we should use "AsyncRDDActions.futureExecutionContext" which + // is a cached thread pool. val results = new ArrayBuffer[T](num) val totalParts = self.partitions.length var partsScanned = 0 @@ -81,9 +85,9 @@ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Loggi numPartsToTry = partsScanned * 4 } else { // the left side of max is >=1 whenever partsScanned >= 2 - numPartsToTry = Math.max(1, + numPartsToTry = Math.max(1, (1.5 * num * partsScanned / results.size).toInt - partsScanned) - numPartsToTry = Math.min(numPartsToTry, partsScanned * 4) + numPartsToTry = Math.min(numPartsToTry, partsScanned * 4) } } @@ -101,7 +105,7 @@ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Loggi partsScanned += numPartsToTry } results.toSeq - } + }(AsyncRDDActions.futureExecutionContext) f } @@ -109,7 +113,7 @@ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Loggi /** * Applies a function f to all elements of this RDD. */ - def foreachAsync(f: T => Unit): FutureAction[Unit] = { + def foreachAsync(f: T => Unit): FutureAction[Unit] = self.withScope { val cleanF = self.context.clean(f) self.context.submitJob[T, Unit, Unit](self, _.foreach(cleanF), Range(0, self.partitions.length), (index, data) => Unit, Unit) @@ -118,8 +122,13 @@ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Loggi /** * Applies a function f to each partition of this RDD. */ - def foreachPartitionAsync(f: Iterator[T] => Unit): FutureAction[Unit] = { + def foreachPartitionAsync(f: Iterator[T] => Unit): FutureAction[Unit] = self.withScope { self.context.submitJob[T, Unit, Unit](self, f, Range(0, self.partitions.length), (index, data) => Unit, Unit) } } + +private object AsyncRDDActions { + val futureExecutionContext = ExecutionContext.fromExecutorService( + ThreadUtils.newDaemonCachedThreadPool("AsyncRDDActions-future", 128)) +} diff --git a/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala b/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala index 71578d1210fde..922030263756b 100644 --- a/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala @@ -31,7 +31,7 @@ private[spark] class BlockRDD[T: ClassTag](@transient sc: SparkContext, @transient val blockIds: Array[BlockId]) extends RDD[T](sc, Nil) { - @transient lazy val locations_ = BlockManager.blockIdsToHosts(blockIds, SparkEnv.get) + @transient lazy val _locations = BlockManager.blockIdsToHosts(blockIds, SparkEnv.get) @volatile private var _isValid = true override def getPartitions: Array[Partition] = { @@ -54,7 +54,7 @@ class BlockRDD[T: ClassTag](@transient sc: SparkContext, @transient val blockIds override def getPreferredLocations(split: Partition): Seq[String] = { assertValid() - locations_(split.asInstanceOf[BlockRDDPartition].blockId) + _locations(split.asInstanceOf[BlockRDDPartition].blockId) } /** @@ -79,14 +79,14 @@ class BlockRDD[T: ClassTag](@transient sc: SparkContext, @transient val blockIds /** Check if this BlockRDD is valid. If not valid, exception is thrown. */ private[spark] def assertValid() { - if (!_isValid) { + if (!isValid) { throw new SparkException( "Attempted to use %s after its blocks have been removed!".format(toString)) } } protected def getBlockIdLocations(): Map[BlockId, Seq[String]] = { - locations_ + _locations } } diff --git a/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala index 0d130dd4c7a60..a4715e3437d94 100644 --- a/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala @@ -49,7 +49,7 @@ class CheckpointRDD[T: ClassTag](sc: SparkContext, val checkpointPath: String) if (fs.exists(cpath)) { val dirContents = fs.listStatus(cpath).map(_.getPath) val partitionFiles = dirContents.filter(_.getName.startsWith("part-")).map(_.toString).sorted - val numPart = partitionFiles.length + val numPart = partitionFiles.length if (numPart > 0 && (! partitionFiles(0).endsWith(CheckpointRDD.splitIdToFile(0)) || ! partitionFiles(numPart-1).endsWith(CheckpointRDD.splitIdToFile(numPart-1)))) { throw new SparkException("Invalid checkpoint directory: " + checkpointPath) diff --git a/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala index 0c1b02c07d09f..663eebb8e4191 100644 --- a/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala @@ -310,11 +310,11 @@ private class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: def throwBalls() { if (noLocality) { // no preferredLocations in parent RDD, no randomization needed if (maxPartitions > groupArr.size) { // just return prev.partitions - for ((p,i) <- prev.partitions.zipWithIndex) { + for ((p, i) <- prev.partitions.zipWithIndex) { groupArr(i).arr += p } } else { // no locality available, then simply split partitions based on positions in array - for(i <- 0 until maxPartitions) { + for (i <- 0 until maxPartitions) { val rangeStart = ((i.toLong * prev.partitions.length) / maxPartitions).toInt val rangeEnd = (((i.toLong + 1) * prev.partitions.length) / maxPartitions).toInt (rangeStart until rangeEnd).foreach{ j => groupArr(i).arr += prev.partitions(j) } diff --git a/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala index 843a893235e56..926bce6f15a2a 100644 --- a/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala +++ b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala @@ -30,7 +30,7 @@ import org.apache.spark.util.StatCounter */ class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable { /** Add up the elements in this RDD. */ - def sum(): Double = { + def sum(): Double = self.withScope { self.fold(0.0)(_ + _) } @@ -38,37 +38,49 @@ class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable { * Return a [[org.apache.spark.util.StatCounter]] object that captures the mean, variance and * count of the RDD's elements in one operation. */ - def stats(): StatCounter = { + def stats(): StatCounter = self.withScope { self.mapPartitions(nums => Iterator(StatCounter(nums))).reduce((a, b) => a.merge(b)) } /** Compute the mean of this RDD's elements. */ - def mean(): Double = stats().mean + def mean(): Double = self.withScope { + stats().mean + } /** Compute the variance of this RDD's elements. */ - def variance(): Double = stats().variance + def variance(): Double = self.withScope { + stats().variance + } /** Compute the standard deviation of this RDD's elements. */ - def stdev(): Double = stats().stdev + def stdev(): Double = self.withScope { + stats().stdev + } /** * Compute the sample standard deviation of this RDD's elements (which corrects for bias in * estimating the standard deviation by dividing by N-1 instead of N). */ - def sampleStdev(): Double = stats().sampleStdev + def sampleStdev(): Double = self.withScope { + stats().sampleStdev + } /** * Compute the sample variance of this RDD's elements (which corrects for bias in * estimating the variance by dividing by N-1 instead of N). */ - def sampleVariance(): Double = stats().sampleVariance + def sampleVariance(): Double = self.withScope { + stats().sampleVariance + } /** * :: Experimental :: * Approximate operation to return the mean within a timeout. */ @Experimental - def meanApprox(timeout: Long, confidence: Double = 0.95): PartialResult[BoundedDouble] = { + def meanApprox( + timeout: Long, + confidence: Double = 0.95): PartialResult[BoundedDouble] = self.withScope { val processPartition = (ctx: TaskContext, ns: Iterator[Double]) => StatCounter(ns) val evaluator = new MeanEvaluator(self.partitions.length, confidence) self.context.runApproximateJob(self, processPartition, evaluator, timeout) @@ -79,7 +91,9 @@ class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable { * Approximate operation to return the sum within a timeout. */ @Experimental - def sumApprox(timeout: Long, confidence: Double = 0.95): PartialResult[BoundedDouble] = { + def sumApprox( + timeout: Long, + confidence: Double = 0.95): PartialResult[BoundedDouble] = self.withScope { val processPartition = (ctx: TaskContext, ns: Iterator[Double]) => StatCounter(ns) val evaluator = new SumEvaluator(self.partitions.length, confidence) self.context.runApproximateJob(self, processPartition, evaluator, timeout) @@ -93,7 +107,7 @@ class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable { * If the RDD contains infinity, NaN throws an exception * If the elements in RDD do not vary (max == min) always returns a single bucket. */ - def histogram(bucketCount: Int): Pair[Array[Double], Array[Long]] = { + def histogram(bucketCount: Int): Pair[Array[Double], Array[Long]] = self.withScope { // Scala's built-in range has issues. See #SI-8782 def customRange(min: Double, max: Double, steps: Int): IndexedSeq[Double] = { val span = max - min @@ -140,7 +154,9 @@ class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable { * the maximum value of the last position and all NaN entries will be counted * in that bucket. */ - def histogram(buckets: Array[Double], evenBuckets: Boolean = false): Array[Long] = { + def histogram( + buckets: Array[Double], + evenBuckets: Boolean = false): Array[Long] = self.withScope { if (buckets.length < 2) { throw new IllegalArgumentException("buckets array must have at least two elements") } diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala index f77abac42b623..2cefe63d44b20 100644 --- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala @@ -99,7 +99,7 @@ private[spark] class HadoopPartition(rddId: Int, idx: Int, @transient s: InputSp */ @DeveloperApi class HadoopRDD[K, V]( - sc: SparkContext, + @transient sc: SparkContext, broadcastedConf: Broadcast[SerializableWritable[Configuration]], initLocalJobConfFuncOpt: Option[JobConf => Unit], inputFormatClass: Class[_ <: InputFormat[K, V]], @@ -108,6 +108,10 @@ class HadoopRDD[K, V]( minPartitions: Int) extends RDD[(K, V)](sc, Nil) with Logging { + if (initLocalJobConfFuncOpt.isDefined) { + sc.clean(initLocalJobConfFuncOpt.get) + } + def this( sc: SparkContext, conf: JobConf, diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala index 2ab967f4bb313..84456d6d868dc 100644 --- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala @@ -196,7 +196,7 @@ class NewHadoopRDD[K, V]( override def getPreferredLocations(hsplit: Partition): Seq[String] = { val split = hsplit.asInstanceOf[NewHadoopPartition].serializableHadoopSplit.value val locs = HadoopRDD.SPLIT_INFO_REFLECTIONS match { - case Some(c) => + case Some(c) => try { val infos = c.newGetLocationInfo.invoke(split).asInstanceOf[Array[AnyRef]] Some(HadoopRDD.convertSplitLocationInfo(infos)) diff --git a/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala index 6afe50161dacd..d71bb63000904 100644 --- a/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala +++ b/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala @@ -57,7 +57,7 @@ class OrderedRDDFunctions[K : Ordering : ClassTag, */ // TODO: this currently doesn't work on P other than Tuple2! def sortByKey(ascending: Boolean = true, numPartitions: Int = self.partitions.length) - : RDD[(K, V)] = + : RDD[(K, V)] = self.withScope { val part = new RangePartitioner(numPartitions, self, ascending) new ShuffledRDD[K, V, V](self, part) @@ -71,7 +71,7 @@ class OrderedRDDFunctions[K : Ordering : ClassTag, * This is more efficient than calling `repartition` and then sorting within each partition * because it can push the sorting down into the shuffle machinery. */ - def repartitionAndSortWithinPartitions(partitioner: Partitioner): RDD[(K, V)] = { + def repartitionAndSortWithinPartitions(partitioner: Partitioner): RDD[(K, V)] = self.withScope { new ShuffledRDD[K, V, V](self, partitioner).setKeyOrdering(ordering) } @@ -81,7 +81,7 @@ class OrderedRDDFunctions[K : Ordering : ClassTag, * performed efficiently by only scanning the partitions that might contain matching elements. * Otherwise, a standard `filter` is applied to all partitions. */ - def filterByRange(lower: K, upper: K): RDD[P] = { + def filterByRange(lower: K, upper: K): RDD[P] = self.withScope { def inRange(k: K): Boolean = ordering.gteq(k, lower) && ordering.lteq(k, upper) diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala index 05351ba4ff76b..cfd3e26faf2b9 100644 --- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala +++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala @@ -29,7 +29,7 @@ import scala.util.DynamicVariable import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus import org.apache.hadoop.conf.{Configurable, Configuration} -import org.apache.hadoop.fs.{FileSystem, Path} +import org.apache.hadoop.fs.FileSystem import org.apache.hadoop.io.SequenceFile.CompressionType import org.apache.hadoop.io.compress.CompressionCodec import org.apache.hadoop.mapred.{FileOutputCommitter, FileOutputFormat, JobConf, OutputFormat} @@ -75,7 +75,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) mergeCombiners: (C, C) => C, partitioner: Partitioner, mapSideCombine: Boolean = true, - serializer: Serializer = null): RDD[(K, C)] = { + serializer: Serializer = null): RDD[(K, C)] = self.withScope { require(mergeCombiners != null, "mergeCombiners must be defined") // required as of Spark 0.9.0 if (keyClass.isArray) { if (mapSideCombine) { @@ -108,7 +108,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) def combineByKey[C](createCombiner: V => C, mergeValue: (C, V) => C, mergeCombiners: (C, C) => C, - numPartitions: Int): RDD[(K, C)] = { + numPartitions: Int): RDD[(K, C)] = self.withScope { combineByKey(createCombiner, mergeValue, mergeCombiners, new HashPartitioner(numPartitions)) } @@ -122,7 +122,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * instead of creating a new U. */ def aggregateByKey[U: ClassTag](zeroValue: U, partitioner: Partitioner)(seqOp: (U, V) => U, - combOp: (U, U) => U): RDD[(K, U)] = { + combOp: (U, U) => U): RDD[(K, U)] = self.withScope { // Serialize the zero value to a byte array so that we can get a new clone of it on each key val zeroBuffer = SparkEnv.get.serializer.newInstance().serialize(zeroValue) val zeroArray = new Array[Byte](zeroBuffer.limit) @@ -131,7 +131,9 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) lazy val cachedSerializer = SparkEnv.get.serializer.newInstance() val createZero = () => cachedSerializer.deserialize[U](ByteBuffer.wrap(zeroArray)) - combineByKey[U]((v: V) => seqOp(createZero(), v), seqOp, combOp, partitioner) + // We will clean the combiner closure later in `combineByKey` + val cleanedSeqOp = self.context.clean(seqOp) + combineByKey[U]((v: V) => cleanedSeqOp(createZero(), v), cleanedSeqOp, combOp, partitioner) } /** @@ -144,7 +146,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * instead of creating a new U. */ def aggregateByKey[U: ClassTag](zeroValue: U, numPartitions: Int)(seqOp: (U, V) => U, - combOp: (U, U) => U): RDD[(K, U)] = { + combOp: (U, U) => U): RDD[(K, U)] = self.withScope { aggregateByKey(zeroValue, new HashPartitioner(numPartitions))(seqOp, combOp) } @@ -158,7 +160,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * instead of creating a new U. */ def aggregateByKey[U: ClassTag](zeroValue: U)(seqOp: (U, V) => U, - combOp: (U, U) => U): RDD[(K, U)] = { + combOp: (U, U) => U): RDD[(K, U)] = self.withScope { aggregateByKey(zeroValue, defaultPartitioner(self))(seqOp, combOp) } @@ -167,7 +169,9 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * may be added to the result an arbitrary number of times, and must not change the result * (e.g., Nil for list concatenation, 0 for addition, or 1 for multiplication.). */ - def foldByKey(zeroValue: V, partitioner: Partitioner)(func: (V, V) => V): RDD[(K, V)] = { + def foldByKey( + zeroValue: V, + partitioner: Partitioner)(func: (V, V) => V): RDD[(K, V)] = self.withScope { // Serialize the zero value to a byte array so that we can get a new clone of it on each key val zeroBuffer = SparkEnv.get.serializer.newInstance().serialize(zeroValue) val zeroArray = new Array[Byte](zeroBuffer.limit) @@ -177,7 +181,8 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) lazy val cachedSerializer = SparkEnv.get.serializer.newInstance() val createZero = () => cachedSerializer.deserialize[V](ByteBuffer.wrap(zeroArray)) - combineByKey[V]((v: V) => func(createZero(), v), func, func, partitioner) + val cleanedFunc = self.context.clean(func) + combineByKey[V]((v: V) => cleanedFunc(createZero(), v), cleanedFunc, cleanedFunc, partitioner) } /** @@ -185,7 +190,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * may be added to the result an arbitrary number of times, and must not change the result * (e.g., Nil for list concatenation, 0 for addition, or 1 for multiplication.). */ - def foldByKey(zeroValue: V, numPartitions: Int)(func: (V, V) => V): RDD[(K, V)] = { + def foldByKey(zeroValue: V, numPartitions: Int)(func: (V, V) => V): RDD[(K, V)] = self.withScope { foldByKey(zeroValue, new HashPartitioner(numPartitions))(func) } @@ -194,7 +199,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * may be added to the result an arbitrary number of times, and must not change the result * (e.g., Nil for list concatenation, 0 for addition, or 1 for multiplication.). */ - def foldByKey(zeroValue: V)(func: (V, V) => V): RDD[(K, V)] = { + def foldByKey(zeroValue: V)(func: (V, V) => V): RDD[(K, V)] = self.withScope { foldByKey(zeroValue, defaultPartitioner(self))(func) } @@ -213,7 +218,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) */ def sampleByKey(withReplacement: Boolean, fractions: Map[K, Double], - seed: Long = Utils.random.nextLong): RDD[(K, V)] = { + seed: Long = Utils.random.nextLong): RDD[(K, V)] = self.withScope { require(fractions.values.forall(v => v >= 0.0), "Negative sampling rates.") @@ -242,9 +247,10 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * @return RDD containing the sampled subset */ @Experimental - def sampleByKeyExact(withReplacement: Boolean, + def sampleByKeyExact( + withReplacement: Boolean, fractions: Map[K, Double], - seed: Long = Utils.random.nextLong): RDD[(K, V)] = { + seed: Long = Utils.random.nextLong): RDD[(K, V)] = self.withScope { require(fractions.values.forall(v => v >= 0.0), "Negative sampling rates.") @@ -261,7 +267,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * the merging locally on each mapper before sending results to a reducer, similarly to a * "combiner" in MapReduce. */ - def reduceByKey(partitioner: Partitioner, func: (V, V) => V): RDD[(K, V)] = { + def reduceByKey(partitioner: Partitioner, func: (V, V) => V): RDD[(K, V)] = self.withScope { combineByKey[V]((v: V) => v, func, func, partitioner) } @@ -270,7 +276,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * the merging locally on each mapper before sending results to a reducer, similarly to a * "combiner" in MapReduce. Output will be hash-partitioned with numPartitions partitions. */ - def reduceByKey(func: (V, V) => V, numPartitions: Int): RDD[(K, V)] = { + def reduceByKey(func: (V, V) => V, numPartitions: Int): RDD[(K, V)] = self.withScope { reduceByKey(new HashPartitioner(numPartitions), func) } @@ -280,7 +286,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * "combiner" in MapReduce. Output will be hash-partitioned with the existing partitioner/ * parallelism level. */ - def reduceByKey(func: (V, V) => V): RDD[(K, V)] = { + def reduceByKey(func: (V, V) => V): RDD[(K, V)] = self.withScope { reduceByKey(defaultPartitioner(self), func) } @@ -289,7 +295,8 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * immediately to the master as a Map. This will also perform the merging locally on each mapper * before sending results to a reducer, similarly to a "combiner" in MapReduce. */ - def reduceByKeyLocally(func: (V, V) => V): Map[K, V] = { + def reduceByKeyLocally(func: (V, V) => V): Map[K, V] = self.withScope { + val cleanedF = self.sparkContext.clean(func) if (keyClass.isArray) { throw new SparkException("reduceByKeyLocally() does not support array keys") @@ -299,7 +306,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) val map = new JHashMap[K, V] iter.foreach { pair => val old = map.get(pair._1) - map.put(pair._1, if (old == null) pair._2 else func(old, pair._2)) + map.put(pair._1, if (old == null) pair._2 else cleanedF(old, pair._2)) } Iterator(map) } : Iterator[JHashMap[K, V]] @@ -307,7 +314,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) val mergeMaps = (m1: JHashMap[K, V], m2: JHashMap[K, V]) => { m2.foreach { pair => val old = m1.get(pair._1) - m1.put(pair._1, if (old == null) pair._2 else func(old, pair._2)) + m1.put(pair._1, if (old == null) pair._2 else cleanedF(old, pair._2)) } m1 } : JHashMap[K, V] @@ -317,9 +324,11 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) /** Alias for reduceByKeyLocally */ @deprecated("Use reduceByKeyLocally", "1.0.0") - def reduceByKeyToDriver(func: (V, V) => V): Map[K, V] = reduceByKeyLocally(func) + def reduceByKeyToDriver(func: (V, V) => V): Map[K, V] = self.withScope { + reduceByKeyLocally(func) + } - /** + /** * Count the number of elements for each key, collecting the results to a local Map. * * Note that this method should only be used if the resulting map is expected to be small, as @@ -327,7 +336,9 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * To handle very large results, consider using rdd.mapValues(_ => 1L).reduceByKey(_ + _), which * returns an RDD[T, Long] instead of a map. */ - def countByKey(): Map[K, Long] = self.mapValues(_ => 1L).reduceByKey(_ + _).collect().toMap + def countByKey(): Map[K, Long] = self.withScope { + self.mapValues(_ => 1L).reduceByKey(_ + _).collect().toMap + } /** * :: Experimental :: @@ -336,7 +347,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) */ @Experimental def countByKeyApprox(timeout: Long, confidence: Double = 0.95) - : PartialResult[Map[K, BoundedDouble]] = { + : PartialResult[Map[K, BoundedDouble]] = self.withScope { self.map(_._1).countByValueApprox(timeout, confidence) } @@ -360,7 +371,10 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * @param partitioner Partitioner to use for the resulting RDD. */ @Experimental - def countApproxDistinctByKey(p: Int, sp: Int, partitioner: Partitioner): RDD[(K, Long)] = { + def countApproxDistinctByKey( + p: Int, + sp: Int, + partitioner: Partitioner): RDD[(K, Long)] = self.withScope { require(p >= 4, s"p ($p) must be >= 4") require(sp <= 32, s"sp ($sp) must be <= 32") require(sp == 0 || p <= sp, s"p ($p) cannot be greater than sp ($sp)") @@ -392,7 +406,9 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * It must be greater than 0.000017. * @param partitioner partitioner of the resulting RDD */ - def countApproxDistinctByKey(relativeSD: Double, partitioner: Partitioner): RDD[(K, Long)] = { + def countApproxDistinctByKey( + relativeSD: Double, + partitioner: Partitioner): RDD[(K, Long)] = self.withScope { require(relativeSD > 0.000017, s"accuracy ($relativeSD) must be greater than 0.000017") val p = math.ceil(2.0 * math.log(1.054 / relativeSD) / math.log(2)).toInt assert(p <= 32) @@ -410,7 +426,9 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * It must be greater than 0.000017. * @param numPartitions number of partitions of the resulting RDD */ - def countApproxDistinctByKey(relativeSD: Double, numPartitions: Int): RDD[(K, Long)] = { + def countApproxDistinctByKey( + relativeSD: Double, + numPartitions: Int): RDD[(K, Long)] = self.withScope { countApproxDistinctByKey(relativeSD, new HashPartitioner(numPartitions)) } @@ -424,7 +442,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * @param relativeSD Relative accuracy. Smaller values create counters that require more space. * It must be greater than 0.000017. */ - def countApproxDistinctByKey(relativeSD: Double = 0.05): RDD[(K, Long)] = { + def countApproxDistinctByKey(relativeSD: Double = 0.05): RDD[(K, Long)] = self.withScope { countApproxDistinctByKey(relativeSD, defaultPartitioner(self)) } @@ -441,7 +459,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * Note: As currently implemented, groupByKey must be able to hold all the key-value pairs for any * key in memory. If a key has too many values, it can result in an [[OutOfMemoryError]]. */ - def groupByKey(partitioner: Partitioner): RDD[(K, Iterable[V])] = { + def groupByKey(partitioner: Partitioner): RDD[(K, Iterable[V])] = self.withScope { // groupByKey shouldn't use map side combine because map side combine does not // reduce the amount of data shuffled and requires all map side data be inserted // into a hash table, leading to more objects in the old gen. @@ -449,7 +467,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) val mergeValue = (buf: CompactBuffer[V], v: V) => buf += v val mergeCombiners = (c1: CompactBuffer[V], c2: CompactBuffer[V]) => c1 ++= c2 val bufs = combineByKey[CompactBuffer[V]]( - createCombiner, mergeValue, mergeCombiners, partitioner, mapSideCombine=false) + createCombiner, mergeValue, mergeCombiners, partitioner, mapSideCombine = false) bufs.asInstanceOf[RDD[(K, Iterable[V])]] } @@ -465,14 +483,14 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * Note: As currently implemented, groupByKey must be able to hold all the key-value pairs for any * key in memory. If a key has too many values, it can result in an [[OutOfMemoryError]]. */ - def groupByKey(numPartitions: Int): RDD[(K, Iterable[V])] = { + def groupByKey(numPartitions: Int): RDD[(K, Iterable[V])] = self.withScope { groupByKey(new HashPartitioner(numPartitions)) } /** * Return a copy of the RDD partitioned using the specified partitioner. */ - def partitionBy(partitioner: Partitioner): RDD[(K, V)] = { + def partitionBy(partitioner: Partitioner): RDD[(K, V)] = self.withScope { if (keyClass.isArray && partitioner.isInstanceOf[HashPartitioner]) { throw new SparkException("Default partitioner cannot partition array keys.") } @@ -488,7 +506,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * pair of elements will be returned as a (k, (v1, v2)) tuple, where (k, v1) is in `this` and * (k, v2) is in `other`. Uses the given Partitioner to partition the output RDD. */ - def join[W](other: RDD[(K, W)], partitioner: Partitioner): RDD[(K, (V, W))] = { + def join[W](other: RDD[(K, W)], partitioner: Partitioner): RDD[(K, (V, W))] = self.withScope { this.cogroup(other, partitioner).flatMapValues( pair => for (v <- pair._1.iterator; w <- pair._2.iterator) yield (v, w) ) @@ -500,7 +518,9 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * pair (k, (v, None)) if no elements in `other` have key k. Uses the given Partitioner to * partition the output RDD. */ - def leftOuterJoin[W](other: RDD[(K, W)], partitioner: Partitioner): RDD[(K, (V, Option[W]))] = { + def leftOuterJoin[W]( + other: RDD[(K, W)], + partitioner: Partitioner): RDD[(K, (V, Option[W]))] = self.withScope { this.cogroup(other, partitioner).flatMapValues { pair => if (pair._2.isEmpty) { pair._1.iterator.map(v => (v, None)) @@ -517,7 +537,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * partition the output RDD. */ def rightOuterJoin[W](other: RDD[(K, W)], partitioner: Partitioner) - : RDD[(K, (Option[V], W))] = { + : RDD[(K, (Option[V], W))] = self.withScope { this.cogroup(other, partitioner).flatMapValues { pair => if (pair._1.isEmpty) { pair._2.iterator.map(w => (None, w)) @@ -536,7 +556,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * in `this` have key k. Uses the given Partitioner to partition the output RDD. */ def fullOuterJoin[W](other: RDD[(K, W)], partitioner: Partitioner) - : RDD[(K, (Option[V], Option[W]))] = { + : RDD[(K, (Option[V], Option[W]))] = self.withScope { this.cogroup(other, partitioner).flatMapValues { case (vs, Seq()) => vs.iterator.map(v => (Some(v), None)) case (Seq(), ws) => ws.iterator.map(w => (None, Some(w))) @@ -549,7 +569,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * existing partitioner/parallelism level. */ def combineByKey[C](createCombiner: V => C, mergeValue: (C, V) => C, mergeCombiners: (C, C) => C) - : RDD[(K, C)] = { + : RDD[(K, C)] = self.withScope { combineByKey(createCombiner, mergeValue, mergeCombiners, defaultPartitioner(self)) } @@ -563,7 +583,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]] * or [[PairRDDFunctions.reduceByKey]] will provide much better performance. */ - def groupByKey(): RDD[(K, Iterable[V])] = { + def groupByKey(): RDD[(K, Iterable[V])] = self.withScope { groupByKey(defaultPartitioner(self)) } @@ -572,7 +592,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * pair of elements will be returned as a (k, (v1, v2)) tuple, where (k, v1) is in `this` and * (k, v2) is in `other`. Performs a hash join across the cluster. */ - def join[W](other: RDD[(K, W)]): RDD[(K, (V, W))] = { + def join[W](other: RDD[(K, W)]): RDD[(K, (V, W))] = self.withScope { join(other, defaultPartitioner(self, other)) } @@ -581,7 +601,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * pair of elements will be returned as a (k, (v1, v2)) tuple, where (k, v1) is in `this` and * (k, v2) is in `other`. Performs a hash join across the cluster. */ - def join[W](other: RDD[(K, W)], numPartitions: Int): RDD[(K, (V, W))] = { + def join[W](other: RDD[(K, W)], numPartitions: Int): RDD[(K, (V, W))] = self.withScope { join(other, new HashPartitioner(numPartitions)) } @@ -591,7 +611,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * pair (k, (v, None)) if no elements in `other` have key k. Hash-partitions the output * using the existing partitioner/parallelism level. */ - def leftOuterJoin[W](other: RDD[(K, W)]): RDD[(K, (V, Option[W]))] = { + def leftOuterJoin[W](other: RDD[(K, W)]): RDD[(K, (V, Option[W]))] = self.withScope { leftOuterJoin(other, defaultPartitioner(self, other)) } @@ -601,7 +621,9 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * pair (k, (v, None)) if no elements in `other` have key k. Hash-partitions the output * into `numPartitions` partitions. */ - def leftOuterJoin[W](other: RDD[(K, W)], numPartitions: Int): RDD[(K, (V, Option[W]))] = { + def leftOuterJoin[W]( + other: RDD[(K, W)], + numPartitions: Int): RDD[(K, (V, Option[W]))] = self.withScope { leftOuterJoin(other, new HashPartitioner(numPartitions)) } @@ -611,7 +633,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * pair (k, (None, w)) if no elements in `this` have key k. Hash-partitions the resulting * RDD using the existing partitioner/parallelism level. */ - def rightOuterJoin[W](other: RDD[(K, W)]): RDD[(K, (Option[V], W))] = { + def rightOuterJoin[W](other: RDD[(K, W)]): RDD[(K, (Option[V], W))] = self.withScope { rightOuterJoin(other, defaultPartitioner(self, other)) } @@ -621,7 +643,9 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * pair (k, (None, w)) if no elements in `this` have key k. Hash-partitions the resulting * RDD into the given number of partitions. */ - def rightOuterJoin[W](other: RDD[(K, W)], numPartitions: Int): RDD[(K, (Option[V], W))] = { + def rightOuterJoin[W]( + other: RDD[(K, W)], + numPartitions: Int): RDD[(K, (Option[V], W))] = self.withScope { rightOuterJoin(other, new HashPartitioner(numPartitions)) } @@ -634,7 +658,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * in `this` have key k. Hash-partitions the resulting RDD using the existing partitioner/ * parallelism level. */ - def fullOuterJoin[W](other: RDD[(K, W)]): RDD[(K, (Option[V], Option[W]))] = { + def fullOuterJoin[W](other: RDD[(K, W)]): RDD[(K, (Option[V], Option[W]))] = self.withScope { fullOuterJoin(other, defaultPartitioner(self, other)) } @@ -646,7 +670,9 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * (k, (Some(v), Some(w))) for v in `this`, or the pair (k, (None, Some(w))) if no elements * in `this` have key k. Hash-partitions the resulting RDD into the given number of partitions. */ - def fullOuterJoin[W](other: RDD[(K, W)], numPartitions: Int): RDD[(K, (Option[V], Option[W]))] = { + def fullOuterJoin[W]( + other: RDD[(K, W)], + numPartitions: Int): RDD[(K, (Option[V], Option[W]))] = self.withScope { fullOuterJoin(other, new HashPartitioner(numPartitions)) } @@ -656,7 +682,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * Warning: this doesn't return a multimap (so if you have multiple values to the same key, only * one value per key is preserved in the map returned) */ - def collectAsMap(): Map[K, V] = { + def collectAsMap(): Map[K, V] = self.withScope { val data = self.collect() val map = new mutable.HashMap[K, V] map.sizeHint(data.length) @@ -668,7 +694,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * Pass each value in the key-value pair RDD through a map function without changing the keys; * this also retains the original RDD's partitioning. */ - def mapValues[U](f: V => U): RDD[(K, U)] = { + def mapValues[U](f: V => U): RDD[(K, U)] = self.withScope { val cleanF = self.context.clean(f) new MapPartitionsRDD[(K, U), (K, V)](self, (context, pid, iter) => iter.map { case (k, v) => (k, cleanF(v)) }, @@ -679,7 +705,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * Pass each value in the key-value pair RDD through a flatMap function without changing the * keys; this also retains the original RDD's partitioning. */ - def flatMapValues[U](f: V => TraversableOnce[U]): RDD[(K, U)] = { + def flatMapValues[U](f: V => TraversableOnce[U]): RDD[(K, U)] = self.withScope { val cleanF = self.context.clean(f) new MapPartitionsRDD[(K, U), (K, V)](self, (context, pid, iter) => iter.flatMap { case (k, v) => @@ -697,7 +723,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) other2: RDD[(K, W2)], other3: RDD[(K, W3)], partitioner: Partitioner) - : RDD[(K, (Iterable[V], Iterable[W1], Iterable[W2], Iterable[W3]))] = { + : RDD[(K, (Iterable[V], Iterable[W1], Iterable[W2], Iterable[W3]))] = self.withScope { if (partitioner.isInstanceOf[HashPartitioner] && keyClass.isArray) { throw new SparkException("Default partitioner cannot partition array keys.") } @@ -715,7 +741,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * list of values for that key in `this` as well as `other`. */ def cogroup[W](other: RDD[(K, W)], partitioner: Partitioner) - : RDD[(K, (Iterable[V], Iterable[W]))] = { + : RDD[(K, (Iterable[V], Iterable[W]))] = self.withScope { if (partitioner.isInstanceOf[HashPartitioner] && keyClass.isArray) { throw new SparkException("Default partitioner cannot partition array keys.") } @@ -730,7 +756,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * tuple with the list of values for that key in `this`, `other1` and `other2`. */ def cogroup[W1, W2](other1: RDD[(K, W1)], other2: RDD[(K, W2)], partitioner: Partitioner) - : RDD[(K, (Iterable[V], Iterable[W1], Iterable[W2]))] = { + : RDD[(K, (Iterable[V], Iterable[W1], Iterable[W2]))] = self.withScope { if (partitioner.isInstanceOf[HashPartitioner] && keyClass.isArray) { throw new SparkException("Default partitioner cannot partition array keys.") } @@ -748,7 +774,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * for that key in `this`, `other1`, `other2` and `other3`. */ def cogroup[W1, W2, W3](other1: RDD[(K, W1)], other2: RDD[(K, W2)], other3: RDD[(K, W3)]) - : RDD[(K, (Iterable[V], Iterable[W1], Iterable[W2], Iterable[W3]))] = { + : RDD[(K, (Iterable[V], Iterable[W1], Iterable[W2], Iterable[W3]))] = self.withScope { cogroup(other1, other2, other3, defaultPartitioner(self, other1, other2, other3)) } @@ -756,7 +782,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * For each key k in `this` or `other`, return a resulting RDD that contains a tuple with the * list of values for that key in `this` as well as `other`. */ - def cogroup[W](other: RDD[(K, W)]): RDD[(K, (Iterable[V], Iterable[W]))] = { + def cogroup[W](other: RDD[(K, W)]): RDD[(K, (Iterable[V], Iterable[W]))] = self.withScope { cogroup(other, defaultPartitioner(self, other)) } @@ -765,7 +791,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * tuple with the list of values for that key in `this`, `other1` and `other2`. */ def cogroup[W1, W2](other1: RDD[(K, W1)], other2: RDD[(K, W2)]) - : RDD[(K, (Iterable[V], Iterable[W1], Iterable[W2]))] = { + : RDD[(K, (Iterable[V], Iterable[W1], Iterable[W2]))] = self.withScope { cogroup(other1, other2, defaultPartitioner(self, other1, other2)) } @@ -773,7 +799,9 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * For each key k in `this` or `other`, return a resulting RDD that contains a tuple with the * list of values for that key in `this` as well as `other`. */ - def cogroup[W](other: RDD[(K, W)], numPartitions: Int): RDD[(K, (Iterable[V], Iterable[W]))] = { + def cogroup[W]( + other: RDD[(K, W)], + numPartitions: Int): RDD[(K, (Iterable[V], Iterable[W]))] = self.withScope { cogroup(other, new HashPartitioner(numPartitions)) } @@ -782,7 +810,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * tuple with the list of values for that key in `this`, `other1` and `other2`. */ def cogroup[W1, W2](other1: RDD[(K, W1)], other2: RDD[(K, W2)], numPartitions: Int) - : RDD[(K, (Iterable[V], Iterable[W1], Iterable[W2]))] = { + : RDD[(K, (Iterable[V], Iterable[W1], Iterable[W2]))] = self.withScope { cogroup(other1, other2, new HashPartitioner(numPartitions)) } @@ -795,24 +823,24 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) other2: RDD[(K, W2)], other3: RDD[(K, W3)], numPartitions: Int) - : RDD[(K, (Iterable[V], Iterable[W1], Iterable[W2], Iterable[W3]))] = { + : RDD[(K, (Iterable[V], Iterable[W1], Iterable[W2], Iterable[W3]))] = self.withScope { cogroup(other1, other2, other3, new HashPartitioner(numPartitions)) } /** Alias for cogroup. */ - def groupWith[W](other: RDD[(K, W)]): RDD[(K, (Iterable[V], Iterable[W]))] = { + def groupWith[W](other: RDD[(K, W)]): RDD[(K, (Iterable[V], Iterable[W]))] = self.withScope { cogroup(other, defaultPartitioner(self, other)) } /** Alias for cogroup. */ def groupWith[W1, W2](other1: RDD[(K, W1)], other2: RDD[(K, W2)]) - : RDD[(K, (Iterable[V], Iterable[W1], Iterable[W2]))] = { + : RDD[(K, (Iterable[V], Iterable[W1], Iterable[W2]))] = self.withScope { cogroup(other1, other2, defaultPartitioner(self, other1, other2)) } /** Alias for cogroup. */ def groupWith[W1, W2, W3](other1: RDD[(K, W1)], other2: RDD[(K, W2)], other3: RDD[(K, W3)]) - : RDD[(K, (Iterable[V], Iterable[W1], Iterable[W2], Iterable[W3]))] = { + : RDD[(K, (Iterable[V], Iterable[W1], Iterable[W2], Iterable[W3]))] = self.withScope { cogroup(other1, other2, other3, defaultPartitioner(self, other1, other2, other3)) } @@ -822,22 +850,27 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * Uses `this` partitioner/partition size, because even if `other` is huge, the resulting * RDD will be <= us. */ - def subtractByKey[W: ClassTag](other: RDD[(K, W)]): RDD[(K, V)] = + def subtractByKey[W: ClassTag](other: RDD[(K, W)]): RDD[(K, V)] = self.withScope { subtractByKey(other, self.partitioner.getOrElse(new HashPartitioner(self.partitions.length))) + } /** Return an RDD with the pairs from `this` whose keys are not in `other`. */ - def subtractByKey[W: ClassTag](other: RDD[(K, W)], numPartitions: Int): RDD[(K, V)] = + def subtractByKey[W: ClassTag]( + other: RDD[(K, W)], + numPartitions: Int): RDD[(K, V)] = self.withScope { subtractByKey(other, new HashPartitioner(numPartitions)) + } /** Return an RDD with the pairs from `this` whose keys are not in `other`. */ - def subtractByKey[W: ClassTag](other: RDD[(K, W)], p: Partitioner): RDD[(K, V)] = + def subtractByKey[W: ClassTag](other: RDD[(K, W)], p: Partitioner): RDD[(K, V)] = self.withScope { new SubtractedRDD[K, V, W](self, other, p) + } /** * Return the list of values in the RDD for key `key`. This operation is done efficiently if the * RDD has a known partitioner by only searching the partition that the key maps to. */ - def lookup(key: K): Seq[V] = { + def lookup(key: K): Seq[V] = self.withScope { self.partitioner match { case Some(p) => val index = p.getPartition(key) @@ -859,7 +892,8 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * Output the RDD to any Hadoop-supported file system, using a Hadoop `OutputFormat` class * supporting the key and value types K and V in this RDD. */ - def saveAsHadoopFile[F <: OutputFormat[K, V]](path: String)(implicit fm: ClassTag[F]) { + def saveAsHadoopFile[F <: OutputFormat[K, V]]( + path: String)(implicit fm: ClassTag[F]): Unit = self.withScope { saveAsHadoopFile(path, keyClass, valueClass, fm.runtimeClass.asInstanceOf[Class[F]]) } @@ -869,7 +903,8 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * supplied codec. */ def saveAsHadoopFile[F <: OutputFormat[K, V]]( - path: String, codec: Class[_ <: CompressionCodec]) (implicit fm: ClassTag[F]) { + path: String, + codec: Class[_ <: CompressionCodec])(implicit fm: ClassTag[F]): Unit = self.withScope { val runtimeClass = fm.runtimeClass saveAsHadoopFile(path, keyClass, valueClass, runtimeClass.asInstanceOf[Class[F]], codec) } @@ -878,7 +913,8 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * Output the RDD to any Hadoop-supported file system, using a new Hadoop API `OutputFormat` * (mapreduce.OutputFormat) object supporting the key and value types K and V in this RDD. */ - def saveAsNewAPIHadoopFile[F <: NewOutputFormat[K, V]](path: String)(implicit fm: ClassTag[F]) { + def saveAsNewAPIHadoopFile[F <: NewOutputFormat[K, V]]( + path: String)(implicit fm: ClassTag[F]): Unit = self.withScope { saveAsNewAPIHadoopFile(path, keyClass, valueClass, fm.runtimeClass.asInstanceOf[Class[F]]) } @@ -891,8 +927,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) keyClass: Class[_], valueClass: Class[_], outputFormatClass: Class[_ <: NewOutputFormat[_, _]], - conf: Configuration = self.context.hadoopConfiguration) - { + conf: Configuration = self.context.hadoopConfiguration): Unit = self.withScope { // Rename this as hadoopConf internally to avoid shadowing (see SPARK-2038). val hadoopConf = conf val job = new NewAPIHadoopJob(hadoopConf) @@ -912,7 +947,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) keyClass: Class[_], valueClass: Class[_], outputFormatClass: Class[_ <: OutputFormat[_, _]], - codec: Class[_ <: CompressionCodec]) { + codec: Class[_ <: CompressionCodec]): Unit = self.withScope { saveAsHadoopFile(path, keyClass, valueClass, outputFormatClass, new JobConf(self.context.hadoopConfiguration), Some(codec)) } @@ -927,7 +962,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) valueClass: Class[_], outputFormatClass: Class[_ <: OutputFormat[_, _]], conf: JobConf = new JobConf(self.context.hadoopConfiguration), - codec: Option[Class[_ <: CompressionCodec]] = None) { + codec: Option[Class[_ <: CompressionCodec]] = None): Unit = self.withScope { // Rename this as hadoopConf internally to avoid shadowing (see SPARK-2038). val hadoopConf = conf hadoopConf.setOutputKeyClass(keyClass) @@ -960,7 +995,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * output paths required (e.g. a table name to write to) in the same way as it would be * configured for a Hadoop MapReduce job. */ - def saveAsNewAPIHadoopDataset(conf: Configuration) { + def saveAsNewAPIHadoopDataset(conf: Configuration): Unit = self.withScope { // Rename this as hadoopConf internally to avoid shadowing (see SPARK-2038). val hadoopConf = conf val job = new NewAPIHadoopJob(hadoopConf) @@ -976,7 +1011,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) jobFormat.checkOutputSpecs(job) } - val writeShard = (context: TaskContext, iter: Iterator[(K,V)]) => { + val writeShard = (context: TaskContext, iter: Iterator[(K, V)]) => { val config = wrappedConf.value /* "reduce task" */ val attemptId = newTaskAttemptID(jobtrackerID, stageId, isMap = false, context.partitionId, @@ -992,7 +1027,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) val (outputMetrics, bytesWrittenCallback) = initHadoopOutputMetrics(context) - val writer = format.getRecordWriter(hadoopContext).asInstanceOf[NewRecordWriter[K,V]] + val writer = format.getRecordWriter(hadoopContext).asInstanceOf[NewRecordWriter[K, V]] require(writer != null, "Unable to obtain RecordWriter") var recordsWritten = 0L Utils.tryWithSafeFinally { @@ -1027,7 +1062,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * (e.g. a table name to write to) in the same way as it would be configured for a Hadoop * MapReduce job. */ - def saveAsHadoopDataset(conf: JobConf) { + def saveAsHadoopDataset(conf: JobConf): Unit = self.withScope { // Rename this as hadoopConf internally to avoid shadowing (see SPARK-2038). val hadoopConf = conf val wrappedConf = new SerializableWritable(hadoopConf) diff --git a/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala index 7598ff617b399..9e3880714a79f 100644 --- a/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala @@ -86,7 +86,7 @@ class PartitionerAwareUnionRDD[T: ClassTag]( } val location = if (locations.isEmpty) { None - } else { + } else { // Find the location that maximum number of parent partitions prefer Some(locations.groupBy(x => x).maxBy(_._2.length)._1) } diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index 31c07c73fe07b..10610f4b6f1ff 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -25,7 +25,7 @@ import scala.language.implicitConversions import scala.reflect.{classTag, ClassTag} import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus -import org.apache.hadoop.io.{Writable, BytesWritable, NullWritable, Text} +import org.apache.hadoop.io.{BytesWritable, NullWritable, Text} import org.apache.hadoop.io.compress.CompressionCodec import org.apache.hadoop.mapred.TextOutputFormat @@ -277,12 +277,20 @@ abstract class RDD[T: ClassTag]( if (isCheckpointed) firstParent[T].iterator(split, context) else compute(split, context) } + /** + * Execute a block of code in a scope such that all new RDDs created in this body will + * be part of the same scope. For more detail, see {{org.apache.spark.rdd.RDDOperationScope}}. + * + * Note: Return statements are NOT allowed in the given body. + */ + private[spark] def withScope[U](body: => U): U = RDDOperationScope.withScope[U](sc)(body) + // Transformations (return a new RDD) /** * Return a new RDD by applying a function to all elements of this RDD. */ - def map[U: ClassTag](f: T => U): RDD[U] = { + def map[U: ClassTag](f: T => U): RDD[U] = withScope { val cleanF = sc.clean(f) new MapPartitionsRDD[U, T](this, (context, pid, iter) => iter.map(cleanF)) } @@ -291,7 +299,7 @@ abstract class RDD[T: ClassTag]( * Return a new RDD by first applying a function to all elements of this * RDD, and then flattening the results. */ - def flatMap[U: ClassTag](f: T => TraversableOnce[U]): RDD[U] = { + def flatMap[U: ClassTag](f: T => TraversableOnce[U]): RDD[U] = withScope { val cleanF = sc.clean(f) new MapPartitionsRDD[U, T](this, (context, pid, iter) => iter.flatMap(cleanF)) } @@ -299,7 +307,7 @@ abstract class RDD[T: ClassTag]( /** * Return a new RDD containing only the elements that satisfy a predicate. */ - def filter(f: T => Boolean): RDD[T] = { + def filter(f: T => Boolean): RDD[T] = withScope { val cleanF = sc.clean(f) new MapPartitionsRDD[T, T]( this, @@ -310,13 +318,16 @@ abstract class RDD[T: ClassTag]( /** * Return a new RDD containing the distinct elements in this RDD. */ - def distinct(numPartitions: Int)(implicit ord: Ordering[T] = null): RDD[T] = + def distinct(numPartitions: Int)(implicit ord: Ordering[T] = null): RDD[T] = withScope { map(x => (x, null)).reduceByKey((x, y) => x, numPartitions).map(_._1) + } /** * Return a new RDD containing the distinct elements in this RDD. */ - def distinct(): RDD[T] = distinct(partitions.length) + def distinct(): RDD[T] = withScope { + distinct(partitions.length) + } /** * Return a new RDD that has exactly numPartitions partitions. @@ -327,7 +338,7 @@ abstract class RDD[T: ClassTag]( * If you are decreasing the number of partitions in this RDD, consider using `coalesce`, * which can avoid performing a shuffle. */ - def repartition(numPartitions: Int)(implicit ord: Ordering[T] = null): RDD[T] = { + def repartition(numPartitions: Int)(implicit ord: Ordering[T] = null): RDD[T] = withScope { coalesce(numPartitions, shuffle = true) } @@ -352,7 +363,7 @@ abstract class RDD[T: ClassTag]( * data distributed using a hash partitioner. */ def coalesce(numPartitions: Int, shuffle: Boolean = false)(implicit ord: Ordering[T] = null) - : RDD[T] = { + : RDD[T] = withScope { if (shuffle) { /** Distributes elements evenly across output partitions, starting from a random partition. */ val distributePartition = (index: Int, items: Iterator[T]) => { @@ -377,16 +388,17 @@ abstract class RDD[T: ClassTag]( /** * Return a sampled subset of this RDD. - * + * * @param withReplacement can elements be sampled multiple times (replaced when sampled out) * @param fraction expected size of the sample as a fraction of this RDD's size * without replacement: probability that each element is chosen; fraction must be [0, 1] * with replacement: expected number of times each element is chosen; fraction must be >= 0 * @param seed seed for the random number generator */ - def sample(withReplacement: Boolean, + def sample( + withReplacement: Boolean, fraction: Double, - seed: Long = Utils.random.nextLong): RDD[T] = { + seed: Long = Utils.random.nextLong): RDD[T] = withScope { require(fraction >= 0.0, "Negative fraction value: " + fraction) if (withReplacement) { new PartitionwiseSampledRDD[T, T](this, new PoissonSampler[T](fraction), true, seed) @@ -403,7 +415,9 @@ abstract class RDD[T: ClassTag]( * * @return split RDDs in an array */ - def randomSplit(weights: Array[Double], seed: Long = Utils.random.nextLong): Array[RDD[T]] = { + def randomSplit( + weights: Array[Double], + seed: Long = Utils.random.nextLong): Array[RDD[T]] = withScope { val sum = weights.sum val normalizedCumWeights = weights.map(_ / sum).scanLeft(0.0d)(_ + _) normalizedCumWeights.sliding(2).map { x => @@ -420,11 +434,11 @@ abstract class RDD[T: ClassTag]( * @return A random sub-sample of the RDD without replacement. */ private[spark] def randomSampleWithRange(lb: Double, ub: Double, seed: Long): RDD[T] = { - this.mapPartitionsWithIndex { case (index, partition) => + this.mapPartitionsWithIndex( { (index, partition) => val sampler = new BernoulliCellSampler[T](lb, ub) sampler.setSeed(seed + index) sampler.sample(partition) - } + }, preservesPartitioning = true) } /** @@ -435,10 +449,12 @@ abstract class RDD[T: ClassTag]( * @param seed seed for the random number generator * @return sample of specified size in an array */ - def takeSample(withReplacement: Boolean, + // TODO: rewrite this without return statements so we can wrap it in a scope + def takeSample( + withReplacement: Boolean, num: Int, seed: Long = Utils.random.nextLong): Array[T] = { - val numStDev = 10.0 + val numStDev = 10.0 if (num < 0) { throw new IllegalArgumentException("Negative number of elements requested") @@ -483,7 +499,7 @@ abstract class RDD[T: ClassTag]( * Return the union of this RDD and another one. Any identical elements will appear multiple * times (use `.distinct()` to eliminate them). */ - def union(other: RDD[T]): RDD[T] = { + def union(other: RDD[T]): RDD[T] = withScope { if (partitioner.isDefined && other.partitioner == partitioner) { new PartitionerAwareUnionRDD(sc, Array(this, other)) } else { @@ -495,7 +511,9 @@ abstract class RDD[T: ClassTag]( * Return the union of this RDD and another one. Any identical elements will appear multiple * times (use `.distinct()` to eliminate them). */ - def ++(other: RDD[T]): RDD[T] = this.union(other) + def ++(other: RDD[T]): RDD[T] = withScope { + this.union(other) + } /** * Return this RDD sorted by the given key function. @@ -504,10 +522,11 @@ abstract class RDD[T: ClassTag]( f: (T) => K, ascending: Boolean = true, numPartitions: Int = this.partitions.length) - (implicit ord: Ordering[K], ctag: ClassTag[K]): RDD[T] = + (implicit ord: Ordering[K], ctag: ClassTag[K]): RDD[T] = withScope { this.keyBy[K](f) .sortByKey(ascending, numPartitions) .values + } /** * Return the intersection of this RDD and another one. The output will not contain any duplicate @@ -515,7 +534,7 @@ abstract class RDD[T: ClassTag]( * * Note that this method performs a shuffle internally. */ - def intersection(other: RDD[T]): RDD[T] = { + def intersection(other: RDD[T]): RDD[T] = withScope { this.map(v => (v, null)).cogroup(other.map(v => (v, null))) .filter { case (_, (leftGroup, rightGroup)) => leftGroup.nonEmpty && rightGroup.nonEmpty } .keys @@ -529,8 +548,9 @@ abstract class RDD[T: ClassTag]( * * @param partitioner Partitioner to use for the resulting RDD */ - def intersection(other: RDD[T], partitioner: Partitioner)(implicit ord: Ordering[T] = null) - : RDD[T] = { + def intersection( + other: RDD[T], + partitioner: Partitioner)(implicit ord: Ordering[T] = null): RDD[T] = withScope { this.map(v => (v, null)).cogroup(other.map(v => (v, null)), partitioner) .filter { case (_, (leftGroup, rightGroup)) => leftGroup.nonEmpty && rightGroup.nonEmpty } .keys @@ -544,16 +564,14 @@ abstract class RDD[T: ClassTag]( * * @param numPartitions How many partitions to use in the resulting RDD */ - def intersection(other: RDD[T], numPartitions: Int): RDD[T] = { - this.map(v => (v, null)).cogroup(other.map(v => (v, null)), new HashPartitioner(numPartitions)) - .filter { case (_, (leftGroup, rightGroup)) => leftGroup.nonEmpty && rightGroup.nonEmpty } - .keys + def intersection(other: RDD[T], numPartitions: Int): RDD[T] = withScope { + intersection(other, new HashPartitioner(numPartitions)) } /** * Return an RDD created by coalescing all elements within each partition into an array. */ - def glom(): RDD[Array[T]] = { + def glom(): RDD[Array[T]] = withScope { new MapPartitionsRDD[Array[T], T](this, (context, pid, iter) => Iterator(iter.toArray)) } @@ -561,7 +579,9 @@ abstract class RDD[T: ClassTag]( * Return the Cartesian product of this RDD and another one, that is, the RDD of all pairs of * elements (a, b) where a is in `this` and b is in `other`. */ - def cartesian[U: ClassTag](other: RDD[U]): RDD[(T, U)] = new CartesianRDD(sc, this, other) + def cartesian[U: ClassTag](other: RDD[U]): RDD[(T, U)] = withScope { + new CartesianRDD(sc, this, other) + } /** * Return an RDD of grouped items. Each group consists of a key and a sequence of elements @@ -572,8 +592,9 @@ abstract class RDD[T: ClassTag]( * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]] * or [[PairRDDFunctions.reduceByKey]] will provide much better performance. */ - def groupBy[K](f: T => K)(implicit kt: ClassTag[K]): RDD[(K, Iterable[T])] = + def groupBy[K](f: T => K)(implicit kt: ClassTag[K]): RDD[(K, Iterable[T])] = withScope { groupBy[K](f, defaultPartitioner(this)) + } /** * Return an RDD of grouped elements. Each group consists of a key and a sequence of elements @@ -584,8 +605,11 @@ abstract class RDD[T: ClassTag]( * aggregation (such as a sum or average) over each key, using [[PairRDDFunctions.aggregateByKey]] * or [[PairRDDFunctions.reduceByKey]] will provide much better performance. */ - def groupBy[K](f: T => K, numPartitions: Int)(implicit kt: ClassTag[K]): RDD[(K, Iterable[T])] = + def groupBy[K]( + f: T => K, + numPartitions: Int)(implicit kt: ClassTag[K]): RDD[(K, Iterable[T])] = withScope { groupBy(f, new HashPartitioner(numPartitions)) + } /** * Return an RDD of grouped items. Each group consists of a key and a sequence of elements @@ -597,7 +621,7 @@ abstract class RDD[T: ClassTag]( * or [[PairRDDFunctions.reduceByKey]] will provide much better performance. */ def groupBy[K](f: T => K, p: Partitioner)(implicit kt: ClassTag[K], ord: Ordering[K] = null) - : RDD[(K, Iterable[T])] = { + : RDD[(K, Iterable[T])] = withScope { val cleanF = sc.clean(f) this.map(t => (cleanF(t), t)).groupByKey(p) } @@ -605,13 +629,16 @@ abstract class RDD[T: ClassTag]( /** * Return an RDD created by piping elements to a forked external process. */ - def pipe(command: String): RDD[String] = new PipedRDD(this, command) + def pipe(command: String): RDD[String] = withScope { + new PipedRDD(this, command) + } /** * Return an RDD created by piping elements to a forked external process. */ - def pipe(command: String, env: Map[String, String]): RDD[String] = + def pipe(command: String, env: Map[String, String]): RDD[String] = withScope { new PipedRDD(this, command, env) + } /** * Return an RDD created by piping elements to a forked external process. @@ -619,7 +646,7 @@ abstract class RDD[T: ClassTag]( * * @param command command to run in forked process. * @param env environment variables to set. - * @param printPipeContext Before piping elements, this function is called as an oppotunity + * @param printPipeContext Before piping elements, this function is called as an opportunity * to pipe context data. Print line function (like out.println) will be * passed as printPipeContext's parameter. * @param printRDDElement Use this function to customize how to pipe elements. This function @@ -637,7 +664,7 @@ abstract class RDD[T: ClassTag]( env: Map[String, String] = Map(), printPipeContext: (String => Unit) => Unit = null, printRDDElement: (T, String => Unit) => Unit = null, - separateWorkingDir: Boolean = false): RDD[String] = { + separateWorkingDir: Boolean = false): RDD[String] = withScope { new PipedRDD(this, command, env, if (printPipeContext ne null) sc.clean(printPipeContext) else null, if (printRDDElement ne null) sc.clean(printRDDElement) else null, @@ -651,9 +678,13 @@ abstract class RDD[T: ClassTag]( * should be `false` unless this is a pair RDD and the input function doesn't modify the keys. */ def mapPartitions[U: ClassTag]( - f: Iterator[T] => Iterator[U], preservesPartitioning: Boolean = false): RDD[U] = { - val func = (context: TaskContext, index: Int, iter: Iterator[T]) => f(iter) - new MapPartitionsRDD(this, sc.clean(func), preservesPartitioning) + f: Iterator[T] => Iterator[U], + preservesPartitioning: Boolean = false): RDD[U] = withScope { + val cleanedF = sc.clean(f) + new MapPartitionsRDD( + this, + (context: TaskContext, index: Int, iter: Iterator[T]) => cleanedF(iter), + preservesPartitioning) } /** @@ -664,9 +695,13 @@ abstract class RDD[T: ClassTag]( * should be `false` unless this is a pair RDD and the input function doesn't modify the keys. */ def mapPartitionsWithIndex[U: ClassTag]( - f: (Int, Iterator[T]) => Iterator[U], preservesPartitioning: Boolean = false): RDD[U] = { - val func = (context: TaskContext, index: Int, iter: Iterator[T]) => f(index, iter) - new MapPartitionsRDD(this, sc.clean(func), preservesPartitioning) + f: (Int, Iterator[T]) => Iterator[U], + preservesPartitioning: Boolean = false): RDD[U] = withScope { + val cleanedF = sc.clean(f) + new MapPartitionsRDD( + this, + (context: TaskContext, index: Int, iter: Iterator[T]) => cleanedF(index, iter), + preservesPartitioning) } /** @@ -681,8 +716,9 @@ abstract class RDD[T: ClassTag]( @deprecated("use TaskContext.get", "1.2.0") def mapPartitionsWithContext[U: ClassTag]( f: (TaskContext, Iterator[T]) => Iterator[U], - preservesPartitioning: Boolean = false): RDD[U] = { - val func = (context: TaskContext, index: Int, iter: Iterator[T]) => f(context, iter) + preservesPartitioning: Boolean = false): RDD[U] = withScope { + val cleanF = sc.clean(f) + val func = (context: TaskContext, index: Int, iter: Iterator[T]) => cleanF(context, iter) new MapPartitionsRDD(this, sc.clean(func), preservesPartitioning) } @@ -692,7 +728,8 @@ abstract class RDD[T: ClassTag]( */ @deprecated("use mapPartitionsWithIndex", "0.7.0") def mapPartitionsWithSplit[U: ClassTag]( - f: (Int, Iterator[T]) => Iterator[U], preservesPartitioning: Boolean = false): RDD[U] = { + f: (Int, Iterator[T]) => Iterator[U], + preservesPartitioning: Boolean = false): RDD[U] = withScope { mapPartitionsWithIndex(f, preservesPartitioning) } @@ -704,10 +741,12 @@ abstract class RDD[T: ClassTag]( @deprecated("use mapPartitionsWithIndex", "1.0.0") def mapWith[A, U: ClassTag] (constructA: Int => A, preservesPartitioning: Boolean = false) - (f: (T, A) => U): RDD[U] = { + (f: (T, A) => U): RDD[U] = withScope { + val cleanF = sc.clean(f) + val cleanA = sc.clean(constructA) mapPartitionsWithIndex((index, iter) => { - val a = constructA(index) - iter.map(t => f(t, a)) + val a = cleanA(index) + iter.map(t => cleanF(t, a)) }, preservesPartitioning) } @@ -719,10 +758,12 @@ abstract class RDD[T: ClassTag]( @deprecated("use mapPartitionsWithIndex and flatMap", "1.0.0") def flatMapWith[A, U: ClassTag] (constructA: Int => A, preservesPartitioning: Boolean = false) - (f: (T, A) => Seq[U]): RDD[U] = { + (f: (T, A) => Seq[U]): RDD[U] = withScope { + val cleanF = sc.clean(f) + val cleanA = sc.clean(constructA) mapPartitionsWithIndex((index, iter) => { - val a = constructA(index) - iter.flatMap(t => f(t, a)) + val a = cleanA(index) + iter.flatMap(t => cleanF(t, a)) }, preservesPartitioning) } @@ -732,11 +773,13 @@ abstract class RDD[T: ClassTag]( * partition with the index of that partition. */ @deprecated("use mapPartitionsWithIndex and foreach", "1.0.0") - def foreachWith[A](constructA: Int => A)(f: (T, A) => Unit) { + def foreachWith[A](constructA: Int => A)(f: (T, A) => Unit): Unit = withScope { + val cleanF = sc.clean(f) + val cleanA = sc.clean(constructA) mapPartitionsWithIndex { (index, iter) => - val a = constructA(index) - iter.map(t => {f(t, a); t}) - }.foreach(_ => {}) + val a = cleanA(index) + iter.map(t => {cleanF(t, a); t}) + } } /** @@ -745,10 +788,12 @@ abstract class RDD[T: ClassTag]( * partition with the index of that partition. */ @deprecated("use mapPartitionsWithIndex and filter", "1.0.0") - def filterWith[A](constructA: Int => A)(p: (T, A) => Boolean): RDD[T] = { + def filterWith[A](constructA: Int => A)(p: (T, A) => Boolean): RDD[T] = withScope { + val cleanP = sc.clean(p) + val cleanA = sc.clean(constructA) mapPartitionsWithIndex((index, iter) => { - val a = constructA(index) - iter.filter(t => p(t, a)) + val a = cleanA(index) + iter.filter(t => cleanP(t, a)) }, preservesPartitioning = true) } @@ -758,7 +803,7 @@ abstract class RDD[T: ClassTag]( * partitions* and the *same number of elements in each partition* (e.g. one was made through * a map on the other). */ - def zip[U: ClassTag](other: RDD[U]): RDD[(T, U)] = { + def zip[U: ClassTag](other: RDD[U]): RDD[(T, U)] = withScope { zipPartitions(other, preservesPartitioning = false) { (thisIter, otherIter) => new Iterator[(T, U)] { def hasNext: Boolean = (thisIter.hasNext, otherIter.hasNext) match { @@ -780,33 +825,39 @@ abstract class RDD[T: ClassTag]( */ def zipPartitions[B: ClassTag, V: ClassTag] (rdd2: RDD[B], preservesPartitioning: Boolean) - (f: (Iterator[T], Iterator[B]) => Iterator[V]): RDD[V] = + (f: (Iterator[T], Iterator[B]) => Iterator[V]): RDD[V] = withScope { new ZippedPartitionsRDD2(sc, sc.clean(f), this, rdd2, preservesPartitioning) + } def zipPartitions[B: ClassTag, V: ClassTag] (rdd2: RDD[B]) - (f: (Iterator[T], Iterator[B]) => Iterator[V]): RDD[V] = - new ZippedPartitionsRDD2(sc, sc.clean(f), this, rdd2, false) + (f: (Iterator[T], Iterator[B]) => Iterator[V]): RDD[V] = withScope { + zipPartitions(rdd2, preservesPartitioning = false)(f) + } def zipPartitions[B: ClassTag, C: ClassTag, V: ClassTag] (rdd2: RDD[B], rdd3: RDD[C], preservesPartitioning: Boolean) - (f: (Iterator[T], Iterator[B], Iterator[C]) => Iterator[V]): RDD[V] = + (f: (Iterator[T], Iterator[B], Iterator[C]) => Iterator[V]): RDD[V] = withScope { new ZippedPartitionsRDD3(sc, sc.clean(f), this, rdd2, rdd3, preservesPartitioning) + } def zipPartitions[B: ClassTag, C: ClassTag, V: ClassTag] (rdd2: RDD[B], rdd3: RDD[C]) - (f: (Iterator[T], Iterator[B], Iterator[C]) => Iterator[V]): RDD[V] = - new ZippedPartitionsRDD3(sc, sc.clean(f), this, rdd2, rdd3, false) + (f: (Iterator[T], Iterator[B], Iterator[C]) => Iterator[V]): RDD[V] = withScope { + zipPartitions(rdd2, rdd3, preservesPartitioning = false)(f) + } def zipPartitions[B: ClassTag, C: ClassTag, D: ClassTag, V: ClassTag] (rdd2: RDD[B], rdd3: RDD[C], rdd4: RDD[D], preservesPartitioning: Boolean) - (f: (Iterator[T], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V]): RDD[V] = + (f: (Iterator[T], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V]): RDD[V] = withScope { new ZippedPartitionsRDD4(sc, sc.clean(f), this, rdd2, rdd3, rdd4, preservesPartitioning) + } def zipPartitions[B: ClassTag, C: ClassTag, D: ClassTag, V: ClassTag] (rdd2: RDD[B], rdd3: RDD[C], rdd4: RDD[D]) - (f: (Iterator[T], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V]): RDD[V] = - new ZippedPartitionsRDD4(sc, sc.clean(f), this, rdd2, rdd3, rdd4, false) + (f: (Iterator[T], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V]): RDD[V] = withScope { + zipPartitions(rdd2, rdd3, rdd4, preservesPartitioning = false)(f) + } // Actions (launch a job to return a value to the user program) @@ -814,7 +865,7 @@ abstract class RDD[T: ClassTag]( /** * Applies a function f to all elements of this RDD. */ - def foreach(f: T => Unit) { + def foreach(f: T => Unit): Unit = withScope { val cleanF = sc.clean(f) sc.runJob(this, (iter: Iterator[T]) => iter.foreach(cleanF)) } @@ -822,7 +873,7 @@ abstract class RDD[T: ClassTag]( /** * Applies a function f to each partition of this RDD. */ - def foreachPartition(f: Iterator[T] => Unit) { + def foreachPartition(f: Iterator[T] => Unit): Unit = withScope { val cleanF = sc.clean(f) sc.runJob(this, (iter: Iterator[T]) => cleanF(iter)) } @@ -830,7 +881,7 @@ abstract class RDD[T: ClassTag]( /** * Return an array that contains all of the elements in this RDD. */ - def collect(): Array[T] = { + def collect(): Array[T] = withScope { val results = sc.runJob(this, (iter: Iterator[T]) => iter.toArray) Array.concat(results: _*) } @@ -840,7 +891,7 @@ abstract class RDD[T: ClassTag]( * * The iterator will consume as much memory as the largest partition in this RDD. */ - def toLocalIterator: Iterator[T] = { + def toLocalIterator: Iterator[T] = withScope { def collectPartition(p: Int): Array[T] = { sc.runJob(this, (iter: Iterator[T]) => iter.toArray, Seq(p), allowLocal = false).head } @@ -851,13 +902,16 @@ abstract class RDD[T: ClassTag]( * Return an array that contains all of the elements in this RDD. */ @deprecated("use collect", "1.0.0") - def toArray(): Array[T] = collect() + def toArray(): Array[T] = withScope { + collect() + } /** * Return an RDD that contains all matching values by applying `f`. */ - def collect[U: ClassTag](f: PartialFunction[T, U]): RDD[U] = { - filter(f.isDefinedAt).map(f) + def collect[U: ClassTag](f: PartialFunction[T, U]): RDD[U] = withScope { + val cleanF = sc.clean(f) + filter(cleanF.isDefinedAt).map(cleanF) } /** @@ -866,19 +920,23 @@ abstract class RDD[T: ClassTag]( * Uses `this` partitioner/partition size, because even if `other` is huge, the resulting * RDD will be <= us. */ - def subtract(other: RDD[T]): RDD[T] = + def subtract(other: RDD[T]): RDD[T] = withScope { subtract(other, partitioner.getOrElse(new HashPartitioner(partitions.length))) + } /** * Return an RDD with the elements from `this` that are not in `other`. */ - def subtract(other: RDD[T], numPartitions: Int): RDD[T] = + def subtract(other: RDD[T], numPartitions: Int): RDD[T] = withScope { subtract(other, new HashPartitioner(numPartitions)) + } /** * Return an RDD with the elements from `this` that are not in `other`. */ - def subtract(other: RDD[T], p: Partitioner)(implicit ord: Ordering[T] = null): RDD[T] = { + def subtract( + other: RDD[T], + p: Partitioner)(implicit ord: Ordering[T] = null): RDD[T] = withScope { if (partitioner == Some(p)) { // Our partitioner knows how to handle T (which, since we have a partitioner, is // really (K, V)) so make a new Partitioner that will de-tuple our fake tuples @@ -900,7 +958,7 @@ abstract class RDD[T: ClassTag]( * Reduces the elements of this RDD using the specified commutative and * associative binary operator. */ - def reduce(f: (T, T) => T): T = { + def reduce(f: (T, T) => T): T = withScope { val cleanF = sc.clean(f) val reducePartition: Iterator[T] => Option[T] = iter => { if (iter.hasNext) { @@ -929,7 +987,7 @@ abstract class RDD[T: ClassTag]( * @param depth suggested depth of the tree (default: 2) * @see [[org.apache.spark.rdd.RDD#reduce]] */ - def treeReduce(f: (T, T) => T, depth: Int = 2): T = { + def treeReduce(f: (T, T) => T, depth: Int = 2): T = withScope { require(depth >= 1, s"Depth must be greater than or equal to 1 but got $depth.") val cleanF = context.clean(f) val reducePartition: Iterator[T] => Option[T] = iter => { @@ -957,11 +1015,18 @@ abstract class RDD[T: ClassTag]( /** * Aggregate the elements of each partition, and then the results for all the partitions, using a - * given associative function and a neutral "zero value". The function op(t1, t2) is allowed to - * modify t1 and return it as its result value to avoid object allocation; however, it should not - * modify t2. - */ - def fold(zeroValue: T)(op: (T, T) => T): T = { + * given associative and commutative function and a neutral "zero value". The function + * op(t1, t2) is allowed to modify t1 and return it as its result value to avoid object + * allocation; however, it should not modify t2. + * + * This behaves somewhat differently from fold operations implemented for non-distributed + * collections in functional languages like Scala. This fold operation may be applied to + * partitions individually, and then fold those results into the final result, rather than + * apply the fold to each element sequentially in some defined ordering. For functions + * that are not commutative, the result may differ from that of a fold applied to a + * non-distributed collection. + */ + def fold(zeroValue: T)(op: (T, T) => T): T = withScope { // Clone the zero value since we will also be serializing it as part of tasks var jobResult = Utils.clone(zeroValue, sc.env.closureSerializer.newInstance()) val cleanOp = sc.clean(op) @@ -979,7 +1044,7 @@ abstract class RDD[T: ClassTag]( * allowed to modify and return their first argument instead of creating a new U to avoid memory * allocation. */ - def aggregate[U: ClassTag](zeroValue: U)(seqOp: (U, T) => U, combOp: (U, U) => U): U = { + def aggregate[U: ClassTag](zeroValue: U)(seqOp: (U, T) => U, combOp: (U, U) => U): U = withScope { // Clone the zero value since we will also be serializing it as part of tasks var jobResult = Utils.clone(zeroValue, sc.env.serializer.newInstance()) val cleanSeqOp = sc.clean(seqOp) @@ -999,26 +1064,29 @@ abstract class RDD[T: ClassTag]( def treeAggregate[U: ClassTag](zeroValue: U)( seqOp: (U, T) => U, combOp: (U, U) => U, - depth: Int = 2): U = { + depth: Int = 2): U = withScope { require(depth >= 1, s"Depth must be greater than or equal to 1 but got $depth.") if (partitions.length == 0) { - return Utils.clone(zeroValue, context.env.closureSerializer.newInstance()) - } - val cleanSeqOp = context.clean(seqOp) - val cleanCombOp = context.clean(combOp) - val aggregatePartition = (it: Iterator[T]) => it.aggregate(zeroValue)(cleanSeqOp, cleanCombOp) - var partiallyAggregated = mapPartitions(it => Iterator(aggregatePartition(it))) - var numPartitions = partiallyAggregated.partitions.length - val scale = math.max(math.ceil(math.pow(numPartitions, 1.0 / depth)).toInt, 2) - // If creating an extra level doesn't help reduce the wall-clock time, we stop tree aggregation. - while (numPartitions > scale + numPartitions / scale) { - numPartitions /= scale - val curNumPartitions = numPartitions - partiallyAggregated = partiallyAggregated.mapPartitionsWithIndex { (i, iter) => - iter.map((i % curNumPartitions, _)) - }.reduceByKey(new HashPartitioner(curNumPartitions), cleanCombOp).values + Utils.clone(zeroValue, context.env.closureSerializer.newInstance()) + } else { + val cleanSeqOp = context.clean(seqOp) + val cleanCombOp = context.clean(combOp) + val aggregatePartition = + (it: Iterator[T]) => it.aggregate(zeroValue)(cleanSeqOp, cleanCombOp) + var partiallyAggregated = mapPartitions(it => Iterator(aggregatePartition(it))) + var numPartitions = partiallyAggregated.partitions.length + val scale = math.max(math.ceil(math.pow(numPartitions, 1.0 / depth)).toInt, 2) + // If creating an extra level doesn't help reduce + // the wall-clock time, we stop tree aggregation. + while (numPartitions > scale + numPartitions / scale) { + numPartitions /= scale + val curNumPartitions = numPartitions + partiallyAggregated = partiallyAggregated.mapPartitionsWithIndex { + (i, iter) => iter.map((i % curNumPartitions, _)) + }.reduceByKey(new HashPartitioner(curNumPartitions), cleanCombOp).values + } + partiallyAggregated.reduce(cleanCombOp) } - partiallyAggregated.reduce(cleanCombOp) } /** @@ -1032,7 +1100,9 @@ abstract class RDD[T: ClassTag]( * within a timeout, even if not all tasks have finished. */ @Experimental - def countApprox(timeout: Long, confidence: Double = 0.95): PartialResult[BoundedDouble] = { + def countApprox( + timeout: Long, + confidence: Double = 0.95): PartialResult[BoundedDouble] = withScope { val countElements: (TaskContext, Iterator[T]) => Long = { (ctx, iter) => var result = 0L while (iter.hasNext) { @@ -1053,7 +1123,7 @@ abstract class RDD[T: ClassTag]( * To handle very large results, consider using rdd.map(x => (x, 1L)).reduceByKey(_ + _), which * returns an RDD[T, Long] instead of a map. */ - def countByValue()(implicit ord: Ordering[T] = null): Map[T, Long] = { + def countByValue()(implicit ord: Ordering[T] = null): Map[T, Long] = withScope { map(value => (value, null)).countByKey() } @@ -1064,13 +1134,12 @@ abstract class RDD[T: ClassTag]( @Experimental def countByValueApprox(timeout: Long, confidence: Double = 0.95) (implicit ord: Ordering[T] = null) - : PartialResult[Map[T, BoundedDouble]] = - { + : PartialResult[Map[T, BoundedDouble]] = withScope { if (elementClassTag.runtimeClass.isArray) { throw new SparkException("countByValueApprox() does not support arrays") } - val countPartition: (TaskContext, Iterator[T]) => OpenHashMap[T,Long] = { (ctx, iter) => - val map = new OpenHashMap[T,Long] + val countPartition: (TaskContext, Iterator[T]) => OpenHashMap[T, Long] = { (ctx, iter) => + val map = new OpenHashMap[T, Long] iter.foreach { t => map.changeValue(t, 1L, _ + 1L) } @@ -1098,9 +1167,9 @@ abstract class RDD[T: ClassTag]( * If `sp` equals 0, the sparse representation is skipped. */ @Experimental - def countApproxDistinct(p: Int, sp: Int): Long = { - require(p >= 4, s"p ($p) must be at least 4") - require(sp <= 32, s"sp ($sp) cannot be greater than 32") + def countApproxDistinct(p: Int, sp: Int): Long = withScope { + require(p >= 4, s"p ($p) must be >= 4") + require(sp <= 32, s"sp ($sp) must be <= 32") require(sp == 0 || p <= sp, s"p ($p) cannot be greater than sp ($sp)") val zeroCounter = new HyperLogLogPlus(p, sp) aggregate(zeroCounter)( @@ -1124,9 +1193,10 @@ abstract class RDD[T: ClassTag]( * @param relativeSD Relative accuracy. Smaller values create counters that require more space. * It must be greater than 0.000017. */ - def countApproxDistinct(relativeSD: Double = 0.05): Long = { + def countApproxDistinct(relativeSD: Double = 0.05): Long = withScope { + require(relativeSD > 0.000017, s"accuracy ($relativeSD) must be greater than 0.000017") val p = math.ceil(2.0 * math.log(1.054 / relativeSD) / math.log(2)).toInt - countApproxDistinct(p, 0) + countApproxDistinct(if (p < 4) 4 else p, 0) } /** @@ -1142,7 +1212,9 @@ abstract class RDD[T: ClassTag]( * and may even change if the RDD is reevaluated. If a fixed ordering is required to guarantee * the same index assignments, you should sort the RDD with sortByKey() or save it to a file. */ - def zipWithIndex(): RDD[(T, Long)] = new ZippedWithIndexRDD(this) + def zipWithIndex(): RDD[(T, Long)] = withScope { + new ZippedWithIndexRDD(this) + } /** * Zips this RDD with generated unique Long ids. Items in the kth partition will get ids k, n+k, @@ -1154,7 +1226,7 @@ abstract class RDD[T: ClassTag]( * and may even change if the RDD is reevaluated. If a fixed ordering is required to guarantee * the same index assignments, you should sort the RDD with sortByKey() or save it to a file. */ - def zipWithUniqueId(): RDD[(T, Long)] = { + def zipWithUniqueId(): RDD[(T, Long)] = withScope { val n = this.partitions.length.toLong this.mapPartitionsWithIndex { case (k, iter) => iter.zipWithIndex.map { case (item, i) => @@ -1171,48 +1243,50 @@ abstract class RDD[T: ClassTag]( * @note due to complications in the internal implementation, this method will raise * an exception if called on an RDD of `Nothing` or `Null`. */ - def take(num: Int): Array[T] = { + def take(num: Int): Array[T] = withScope { if (num == 0) { - return new Array[T](0) - } - - val buf = new ArrayBuffer[T] - val totalParts = this.partitions.length - var partsScanned = 0 - while (buf.size < num && partsScanned < totalParts) { - // The number of partitions to try in this iteration. It is ok for this number to be - // greater than totalParts because we actually cap it at totalParts in runJob. - var numPartsToTry = 1 - if (partsScanned > 0) { - // If we didn't find any rows after the previous iteration, quadruple and retry. Otherwise, - // interpolate the number of partitions we need to try, but overestimate it by 50%. - // We also cap the estimation in the end. - if (buf.size == 0) { - numPartsToTry = partsScanned * 4 - } else { - // the left side of max is >=1 whenever partsScanned >= 2 - numPartsToTry = Math.max((1.5 * num * partsScanned / buf.size).toInt - partsScanned, 1) - numPartsToTry = Math.min(numPartsToTry, partsScanned * 4) + new Array[T](0) + } else { + val buf = new ArrayBuffer[T] + val totalParts = this.partitions.length + var partsScanned = 0 + while (buf.size < num && partsScanned < totalParts) { + // The number of partitions to try in this iteration. It is ok for this number to be + // greater than totalParts because we actually cap it at totalParts in runJob. + var numPartsToTry = 1 + if (partsScanned > 0) { + // If we didn't find any rows after the previous iteration, quadruple and retry. + // Otherwise, interpolate the number of partitions we need to try, but overestimate + // it by 50%. We also cap the estimation in the end. + if (buf.size == 0) { + numPartsToTry = partsScanned * 4 + } else { + // the left side of max is >=1 whenever partsScanned >= 2 + numPartsToTry = Math.max((1.5 * num * partsScanned / buf.size).toInt - partsScanned, 1) + numPartsToTry = Math.min(numPartsToTry, partsScanned * 4) + } } - } - val left = num - buf.size - val p = partsScanned until math.min(partsScanned + numPartsToTry, totalParts) - val res = sc.runJob(this, (it: Iterator[T]) => it.take(left).toArray, p, allowLocal = true) + val left = num - buf.size + val p = partsScanned until math.min(partsScanned + numPartsToTry, totalParts) + val res = sc.runJob(this, (it: Iterator[T]) => it.take(left).toArray, p, allowLocal = true) - res.foreach(buf ++= _.take(num - buf.size)) - partsScanned += numPartsToTry - } + res.foreach(buf ++= _.take(num - buf.size)) + partsScanned += numPartsToTry + } - buf.toArray + buf.toArray + } } /** * Return the first element in this RDD. */ - def first(): T = take(1) match { - case Array(t) => t - case _ => throw new UnsupportedOperationException("empty collection") + def first(): T = withScope { + take(1) match { + case Array(t) => t + case _ => throw new UnsupportedOperationException("empty collection") + } } /** @@ -1230,7 +1304,9 @@ abstract class RDD[T: ClassTag]( * @param ord the implicit ordering for T * @return an array of top elements */ - def top(num: Int)(implicit ord: Ordering[T]): Array[T] = takeOrdered(num)(ord.reverse) + def top(num: Int)(implicit ord: Ordering[T]): Array[T] = withScope { + takeOrdered(num)(ord.reverse) + } /** * Returns the first k (smallest) elements from this RDD as defined by the specified @@ -1248,7 +1324,7 @@ abstract class RDD[T: ClassTag]( * @param ord the implicit ordering for T * @return an array of top elements */ - def takeOrdered(num: Int)(implicit ord: Ordering[T]): Array[T] = { + def takeOrdered(num: Int)(implicit ord: Ordering[T]): Array[T] = withScope { if (num == 0) { Array.empty } else { @@ -1273,13 +1349,17 @@ abstract class RDD[T: ClassTag]( * Returns the max of this RDD as defined by the implicit Ordering[T]. * @return the maximum element of the RDD * */ - def max()(implicit ord: Ordering[T]): T = this.reduce(ord.max) + def max()(implicit ord: Ordering[T]): T = withScope { + this.reduce(ord.max) + } /** * Returns the min of this RDD as defined by the implicit Ordering[T]. * @return the minimum element of the RDD * */ - def min()(implicit ord: Ordering[T]): T = this.reduce(ord.min) + def min()(implicit ord: Ordering[T]): T = withScope { + this.reduce(ord.min) + } /** * @note due to complications in the internal implementation, this method will raise an @@ -1289,12 +1369,14 @@ abstract class RDD[T: ClassTag]( * @return true if and only if the RDD contains no elements at all. Note that an RDD * may be empty even when it has at least 1 partition. */ - def isEmpty(): Boolean = partitions.length == 0 || take(1).length == 0 + def isEmpty(): Boolean = withScope { + partitions.length == 0 || take(1).length == 0 + } /** * Save this RDD as a text file, using string representations of elements. */ - def saveAsTextFile(path: String) { + def saveAsTextFile(path: String): Unit = withScope { // https://issues.apache.org/jira/browse/SPARK-2075 // // NullWritable is a `Comparable` in Hadoop 1.+, so the compiler cannot find an implicit @@ -1321,7 +1403,7 @@ abstract class RDD[T: ClassTag]( /** * Save this RDD as a compressed text file, using string representations of elements. */ - def saveAsTextFile(path: String, codec: Class[_ <: CompressionCodec]) { + def saveAsTextFile(path: String, codec: Class[_ <: CompressionCodec]): Unit = withScope { // https://issues.apache.org/jira/browse/SPARK-2075 val nullWritableClassTag = implicitly[ClassTag[NullWritable]] val textClassTag = implicitly[ClassTag[Text]] @@ -1339,7 +1421,7 @@ abstract class RDD[T: ClassTag]( /** * Save this RDD as a SequenceFile of serialized objects. */ - def saveAsObjectFile(path: String) { + def saveAsObjectFile(path: String): Unit = withScope { this.mapPartitions(iter => iter.grouped(10).map(_.toArray)) .map(x => (NullWritable.get(), new BytesWritable(Utils.serialize(x)))) .saveAsSequenceFile(path) @@ -1348,12 +1430,13 @@ abstract class RDD[T: ClassTag]( /** * Creates tuples of the elements in this RDD by applying `f`. */ - def keyBy[K](f: T => K): RDD[(K, T)] = { - map(x => (f(x), x)) + def keyBy[K](f: T => K): RDD[(K, T)] = withScope { + val cleanedF = sc.clean(f) + map(x => (cleanedF(x), x)) } /** A private method for tests, to look at the contents of each partition */ - private[spark] def collectPartitions(): Array[Array[T]] = { + private[spark] def collectPartitions(): Array[Array[T]] = withScope { sc.runJob(this, (iter: Iterator[T]) => iter.toArray) } @@ -1392,6 +1475,17 @@ abstract class RDD[T: ClassTag]( /** User code that created this RDD (e.g. `textFile`, `parallelize`). */ @transient private[spark] val creationSite = sc.getCallSite() + /** + * The scope associated with the operation that created this RDD. + * + * This is more flexible than the call site and can be defined hierarchically. For more + * detail, see the documentation of {{RDDOperationScope}}. This scope is not defined if the + * user instantiates this RDD himself without using any Spark operations. + */ + @transient private[spark] val scope: Option[RDDOperationScope] = { + Option(sc.getLocalProperty(SparkContext.RDD_SCOPE_KEY)).map(RDDOperationScope.fromJson) + } + private[spark] def getCreationSite: String = Option(creationSite).map(_.shortForm).getOrElse("") private[spark] def elementClassTag: ClassTag[T] = classTag[T] @@ -1436,13 +1530,15 @@ abstract class RDD[T: ClassTag]( * has completed (therefore the RDD has been materialized and potentially stored in memory). * doCheckpoint() is called recursively on the parent RDDs. */ - private[spark] def doCheckpoint() { - if (!doCheckpointCalled) { - doCheckpointCalled = true - if (checkpointData.isDefined) { - checkpointData.get.doCheckpoint() - } else { - dependencies.foreach(_.rdd.doCheckpoint()) + private[spark] def doCheckpoint(): Unit = { + RDDOperationScope.withScope(sc, "checkpoint", allowNesting = false, ignoreParent = true) { + if (!doCheckpointCalled) { + doCheckpointCalled = true + if (checkpointData.isDefined) { + checkpointData.get.doCheckpoint() + } else { + dependencies.foreach(_.rdd.doCheckpoint()) + } } } } @@ -1470,7 +1566,7 @@ abstract class RDD[T: ClassTag]( /** A description of this RDD and its recursive dependencies for debugging. */ def toDebugString: String = { // Get a debug description of an rdd without its children - def debugSelf (rdd: RDD[_]): Seq[String] = { + def debugSelf(rdd: RDD[_]): Seq[String] = { import Utils.bytesToString val persistence = if (storageLevel != StorageLevel.NONE) storageLevel.description else "" @@ -1489,15 +1585,15 @@ abstract class RDD[T: ClassTag]( case 0 => Seq.empty case 1 => val d = rdd.dependencies.head - debugString(d.rdd, prefix, d.isInstanceOf[ShuffleDependency[_,_,_]], true) + debugString(d.rdd, prefix, d.isInstanceOf[ShuffleDependency[_, _, _]], true) case _ => val frontDeps = rdd.dependencies.take(len - 1) val frontDepStrings = frontDeps.flatMap( - d => debugString(d.rdd, prefix, d.isInstanceOf[ShuffleDependency[_,_,_]])) + d => debugString(d.rdd, prefix, d.isInstanceOf[ShuffleDependency[_, _, _]])) val lastDep = rdd.dependencies.last val lastDepStrings = - debugString(lastDep.rdd, prefix, lastDep.isInstanceOf[ShuffleDependency[_,_,_]], true) + debugString(lastDep.rdd, prefix, lastDep.isInstanceOf[ShuffleDependency[_, _, _]], true) (frontDepStrings ++ lastDepStrings) } @@ -1527,10 +1623,11 @@ abstract class RDD[T: ClassTag]( case (desc: String, _) => s"$nextPrefix$desc" } ++ debugChildren(rdd, nextPrefix) } - def debugString(rdd: RDD[_], - prefix: String = "", - isShuffle: Boolean = true, - isLastChild: Boolean = false): Seq[String] = { + def debugString( + rdd: RDD[_], + prefix: String = "", + isShuffle: Boolean = true, + isLastChild: Boolean = false): Seq[String] = { if (isShuffle) { shuffleDebugString(rdd, prefix, isLastChild) } else { diff --git a/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala b/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala new file mode 100644 index 0000000000000..6b09dfafc889c --- /dev/null +++ b/core/src/main/scala/org/apache/spark/rdd/RDDOperationScope.scala @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.rdd + +import java.util.concurrent.atomic.AtomicInteger + +import com.fasterxml.jackson.annotation.{JsonIgnore, JsonInclude, JsonPropertyOrder} +import com.fasterxml.jackson.annotation.JsonInclude.Include +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule + +import org.apache.spark.{Logging, SparkContext} + +/** + * A general, named code block representing an operation that instantiates RDDs. + * + * All RDDs instantiated in the corresponding code block will store a pointer to this object. + * Examples include, but will not be limited to, existing RDD operations, such as textFile, + * reduceByKey, and treeAggregate. + * + * An operation scope may be nested in other scopes. For instance, a SQL query may enclose + * scopes associated with the public RDD APIs it uses under the hood. + * + * There is no particular relationship between an operation scope and a stage or a job. + * A scope may live inside one stage (e.g. map) or span across multiple jobs (e.g. take). + */ +@JsonInclude(Include.NON_NULL) +@JsonPropertyOrder(Array("id", "name", "parent")) +private[spark] class RDDOperationScope( + val name: String, + val parent: Option[RDDOperationScope] = None, + val id: String = RDDOperationScope.nextScopeId().toString) { + + def toJson: String = { + RDDOperationScope.jsonMapper.writeValueAsString(this) + } + + /** + * Return a list of scopes that this scope is a part of, including this scope itself. + * The result is ordered from the outermost scope (eldest ancestor) to this scope. + */ + @JsonIgnore + def getAllScopes: Seq[RDDOperationScope] = { + parent.map(_.getAllScopes).getOrElse(Seq.empty) ++ Seq(this) + } + + override def equals(other: Any): Boolean = { + other match { + case s: RDDOperationScope => + id == s.id && name == s.name && parent == s.parent + case _ => false + } + } + + override def toString: String = toJson +} + +/** + * A collection of utility methods to construct a hierarchical representation of RDD scopes. + * An RDD scope tracks the series of operations that created a given RDD. + */ +private[spark] object RDDOperationScope extends Logging { + private val jsonMapper = new ObjectMapper().registerModule(DefaultScalaModule) + private val scopeCounter = new AtomicInteger(0) + + def fromJson(s: String): RDDOperationScope = { + jsonMapper.readValue(s, classOf[RDDOperationScope]) + } + + /** Return a globally unique operation scope ID. */ + def nextScopeId(): Int = scopeCounter.getAndIncrement + + /** + * Execute the given body such that all RDDs created in this body will have the same scope. + * The name of the scope will be the first method name in the stack trace that is not the + * same as this method's. + * + * Note: Return statements are NOT allowed in body. + */ + private[spark] def withScope[T]( + sc: SparkContext, + allowNesting: Boolean = false)(body: => T): T = { + val stackTrace = Thread.currentThread.getStackTrace().tail // ignore "Thread#getStackTrace" + val ourMethodName = stackTrace(1).getMethodName // i.e. withScope + // Climb upwards to find the first method that's called something different + val callerMethodName = stackTrace + .find(_.getMethodName != ourMethodName) + .map(_.getMethodName) + .getOrElse { + // Log a warning just in case, but this should almost certainly never happen + logWarning("No valid method name for this RDD operation scope!") + "N/A" + } + withScope[T](sc, callerMethodName, allowNesting, ignoreParent = false)(body) + } + + /** + * Execute the given body such that all RDDs created in this body will have the same scope. + * + * If nesting is allowed, any subsequent calls to this method in the given body will instantiate + * child scopes that are nested within our scope. Otherwise, these calls will take no effect. + * + * Additionally, the caller of this method may optionally ignore the configurations and scopes + * set by the higher level caller. In this case, this method will ignore the parent caller's + * intention to disallow nesting, and the new scope instantiated will not have a parent. This + * is useful for scoping physical operations in Spark SQL, for instance. + * + * Note: Return statements are NOT allowed in body. + */ + private[spark] def withScope[T]( + sc: SparkContext, + name: String, + allowNesting: Boolean, + ignoreParent: Boolean)(body: => T): T = { + // Save the old scope to restore it later + val scopeKey = SparkContext.RDD_SCOPE_KEY + val noOverrideKey = SparkContext.RDD_SCOPE_NO_OVERRIDE_KEY + val oldScopeJson = sc.getLocalProperty(scopeKey) + val oldScope = Option(oldScopeJson).map(RDDOperationScope.fromJson) + val oldNoOverride = sc.getLocalProperty(noOverrideKey) + try { + if (ignoreParent) { + // Ignore all parent settings and scopes and start afresh with our own root scope + sc.setLocalProperty(scopeKey, new RDDOperationScope(name).toJson) + } else if (sc.getLocalProperty(noOverrideKey) == null) { + // Otherwise, set the scope only if the higher level caller allows us to do so + sc.setLocalProperty(scopeKey, new RDDOperationScope(name, oldScope).toJson) + } + // Optionally disallow the child body to override our scope + if (!allowNesting) { + sc.setLocalProperty(noOverrideKey, "true") + } + body + } finally { + // Remember to restore any state that was modified before exiting + sc.setLocalProperty(scopeKey, oldScopeJson) + sc.setLocalProperty(noOverrideKey, oldNoOverride) + } + } +} diff --git a/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala index 059f8963691f0..4b5f15dd06b85 100644 --- a/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala +++ b/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala @@ -85,7 +85,9 @@ class SequenceFileRDDFunctions[K <% Writable: ClassTag, V <% Writable : ClassTag * byte arrays to BytesWritable, and Strings to Text. The `path` can be on any Hadoop-supported * file system. */ - def saveAsSequenceFile(path: String, codec: Option[Class[_ <: CompressionCodec]] = None) { + def saveAsSequenceFile( + path: String, + codec: Option[Class[_ <: CompressionCodec]] = None): Unit = self.withScope { def anyToWritable[U <% Writable](u: U): Writable = u // TODO We cannot force the return type of `anyToWritable` be same as keyWritableClass and @@ -102,13 +104,13 @@ class SequenceFileRDDFunctions[K <% Writable: ClassTag, V <% Writable : ClassTag if (!convertKey && !convertValue) { self.saveAsHadoopFile(path, keyWritableClass, valueWritableClass, format, jobConf, codec) } else if (!convertKey && convertValue) { - self.map(x => (x._1,anyToWritable(x._2))).saveAsHadoopFile( + self.map(x => (x._1, anyToWritable(x._2))).saveAsHadoopFile( path, keyWritableClass, valueWritableClass, format, jobConf, codec) } else if (convertKey && !convertValue) { - self.map(x => (anyToWritable(x._1),x._2)).saveAsHadoopFile( + self.map(x => (anyToWritable(x._1), x._2)).saveAsHadoopFile( path, keyWritableClass, valueWritableClass, format, jobConf, codec) } else if (convertKey && convertValue) { - self.map(x => (anyToWritable(x._1),anyToWritable(x._2))).saveAsHadoopFile( + self.map(x => (anyToWritable(x._1), anyToWritable(x._2))).saveAsHadoopFile( path, keyWritableClass, valueWritableClass, format, jobConf, codec) } } diff --git a/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala index 633aeba3bbae6..f7cb1791d4ac6 100644 --- a/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala @@ -125,7 +125,7 @@ private[spark] class SubtractedRDD[K: ClassTag, V: ClassTag, W: ClassTag]( integrate(0, t => getSeq(t._1) += t._2) // the second dep is rdd2; remove all of its keys integrate(1, t => map.remove(t._1)) - map.iterator.map { t => t._2.iterator.map { (t._1, _) } }.flatten + map.iterator.map { t => t._2.iterator.map { (t._1, _) } }.flatten } override def clearDependencies() { diff --git a/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala index a96b6c3d23454..81f40ad33aa5d 100644 --- a/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala @@ -123,7 +123,7 @@ private[spark] class ZippedPartitionsRDD3 } private[spark] class ZippedPartitionsRDD4 - [A: ClassTag, B: ClassTag, C: ClassTag, D:ClassTag, V: ClassTag]( + [A: ClassTag, B: ClassTag, C: ClassTag, D: ClassTag, V: ClassTag]( sc: SparkContext, var f: (Iterator[A], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V], var rdd1: RDD[A], diff --git a/core/src/main/scala/org/apache/spark/rpc/akka/AkkaRpcEnv.scala b/core/src/main/scala/org/apache/spark/rpc/akka/AkkaRpcEnv.scala index ba0d468f111ef..0161962cde073 100644 --- a/core/src/main/scala/org/apache/spark/rpc/akka/AkkaRpcEnv.scala +++ b/core/src/main/scala/org/apache/spark/rpc/akka/AkkaRpcEnv.scala @@ -29,9 +29,11 @@ import akka.actor.{ActorSystem, ExtendedActorSystem, Actor, ActorRef, Props, Add import akka.event.Logging.Error import akka.pattern.{ask => akkaAsk} import akka.remote.{AssociationEvent, AssociatedEvent, DisassociatedEvent, AssociationErrorEvent} +import com.google.common.util.concurrent.MoreExecutors + import org.apache.spark.{SparkException, Logging, SparkConf} import org.apache.spark.rpc._ -import org.apache.spark.util.{ActorLogReceive, AkkaUtils} +import org.apache.spark.util.{ActorLogReceive, AkkaUtils, ThreadUtils} /** * A RpcEnv implementation based on Akka. @@ -294,8 +296,8 @@ private[akka] class AkkaRpcEndpointRef( } override def ask[T: ClassTag](message: Any, timeout: FiniteDuration): Future[T] = { - import scala.concurrent.ExecutionContext.Implicits.global actorRef.ask(AkkaMessage(message, true))(timeout).flatMap { + // The function will run in the calling thread, so it should be short and never block. case msg @ AkkaMessage(message, reply) => if (reply) { logError(s"Receive $msg but the sender cannot reply") @@ -305,7 +307,7 @@ private[akka] class AkkaRpcEndpointRef( } case AkkaFailure(e) => Future.failed(e) - }.mapTo[T] + }(ThreadUtils.sameThread).mapTo[T] } override def toString: String = s"${getClass.getSimpleName}($actorRef)" diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala index 5d812918a13d1..7fc1f0a752f85 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala @@ -1364,10 +1364,10 @@ class DAGScheduler( private def getPreferredLocsInternal( rdd: RDD[_], partition: Int, - visited: HashSet[(RDD[_],Int)]): Seq[TaskLocation] = { + visited: HashSet[(RDD[_], Int)]): Seq[TaskLocation] = { // If the partition has already been visited, no need to re-visit. // This avoids exponential path exploration. SPARK-695 - if (!visited.add((rdd,partition))) { + if (!visited.add((rdd, partition))) { // Nil has already been returned for previously visited partitions. return Nil } diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerSource.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerSource.scala index 12668b6c0988e..02c67073af6a0 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerSource.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerSource.scala @@ -17,9 +17,8 @@ package org.apache.spark.scheduler -import com.codahale.metrics.{Gauge,MetricRegistry} +import com.codahale.metrics.{Gauge, MetricRegistry} -import org.apache.spark.SparkContext import org.apache.spark.metrics.source.Source private[spark] class DAGSchedulerSource(val dagScheduler: DAGScheduler) diff --git a/core/src/main/scala/org/apache/spark/scheduler/OutputCommitCoordinator.scala b/core/src/main/scala/org/apache/spark/scheduler/OutputCommitCoordinator.scala index 0b1d47cff3746..8321037cdc026 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/OutputCommitCoordinator.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/OutputCommitCoordinator.scala @@ -38,7 +38,7 @@ private case class AskPermissionToCommitOutput(stage: Int, task: Long, taskAttem * This class was introduced in SPARK-4879; see that JIRA issue (and the associated pull requests) * for an extensive design discussion. */ -private[spark] class OutputCommitCoordinator(conf: SparkConf) extends Logging { +private[spark] class OutputCommitCoordinator(conf: SparkConf, isDriver: Boolean) extends Logging { // Initialized by SparkEnv var coordinatorRef: Option[RpcEndpointRef] = None @@ -129,9 +129,11 @@ private[spark] class OutputCommitCoordinator(conf: SparkConf) extends Logging { } def stop(): Unit = synchronized { - coordinatorRef.foreach(_ send StopCoordinator) - coordinatorRef = None - authorizedCommittersByStage.clear() + if (isDriver) { + coordinatorRef.foreach(_ send StopCoordinator) + coordinatorRef = None + authorizedCommittersByStage.clear() + } } // Marked private[scheduler] instead of private so this can be mocked in tests diff --git a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala index 86f357abb8723..c6d957b65f3fb 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala @@ -41,7 +41,7 @@ private[spark] class ReplayListenerBus extends SparkListenerBus with Logging { * * @param logData Stream containing event log data. * @param sourceName Filename (or other source identifier) from whence @logData is being read - * @param maybeTruncated Indicate whether log file might be truncated (some abnormal situations + * @param maybeTruncated Indicate whether log file might be truncated (some abnormal situations * encountered, log file might not finished writing) or not */ def replay( @@ -62,7 +62,7 @@ private[spark] class ReplayListenerBus extends SparkListenerBus with Logging { if (!maybeTruncated || lines.hasNext) { throw jpe } else { - logWarning(s"Got JsonParseException from log file $sourceName" + + logWarning(s"Got JsonParseException from log file $sourceName" + s" at line $lineNumber, the file might not have finished writing cleanly.") } } diff --git a/core/src/main/scala/org/apache/spark/scheduler/SchedulingAlgorithm.scala b/core/src/main/scala/org/apache/spark/scheduler/SchedulingAlgorithm.scala index 5e62c8468f007..864941d468af9 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/SchedulingAlgorithm.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/SchedulingAlgorithm.scala @@ -56,7 +56,7 @@ private[spark] class FairSchedulingAlgorithm extends SchedulingAlgorithm { val minShareRatio2 = runningTasks2.toDouble / math.max(minShare2, 1.0).toDouble val taskToWeightRatio1 = runningTasks1.toDouble / s1.weight.toDouble val taskToWeightRatio2 = runningTasks2.toDouble / s2.weight.toDouble - var compare:Int = 0 + var compare: Int = 0 if (s1Needy && !s2Needy) { return true diff --git a/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala index 169d4fd3a94f0..e93924615c440 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala @@ -265,7 +265,7 @@ class StatsReportListener extends SparkListener with Logging { private[spark] object StatsReportListener extends Logging { // For profiling, the extremes are more interesting - val percentiles = Array[Int](0,5,10,25,50,75,90,95,100) + val percentiles = Array[Int](0, 5, 10, 25, 50, 75, 90, 95, 100) val probabilities = percentiles.map(_ / 100.0) val percentilesHeader = "\t" + percentiles.mkString("%\t") + "%" @@ -299,7 +299,7 @@ private[spark] object StatsReportListener extends Logging { dOpt.foreach { d => showDistribution(heading, d, formatNumber)} } - def showDistribution(heading: String, dOpt: Option[Distribution], format:String) { + def showDistribution(heading: String, dOpt: Option[Distribution], format: String) { def f(d: Double): String = format.format(d) showDistribution(heading, dOpt, f _) } @@ -313,7 +313,7 @@ private[spark] object StatsReportListener extends Logging { } def showBytesDistribution( - heading:String, + heading: String, getMetric: (TaskInfo, TaskMetrics) => Option[Long], taskInfoMetrics: Seq[(TaskInfo, TaskMetrics)]) { showBytesDistribution(heading, extractLongDistribution(taskInfoMetrics, getMetric)) diff --git a/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala index cf3db0b94a0b3..e439d2a7e1229 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala @@ -33,6 +33,7 @@ class StageInfo( val name: String, val numTasks: Int, val rddInfos: Seq[RDDInfo], + val parentIds: Seq[Int], val details: String) { /** When this stage was submitted from the DAGScheduler to a TaskScheduler. */ var submissionTime: Option[Long] = None @@ -78,6 +79,7 @@ private[spark] object StageInfo { stage.name, numTasks.getOrElse(stage.numTasks), rddInfos, + stage.parents.map(_.id), stage.details) } } diff --git a/core/src/main/scala/org/apache/spark/scheduler/Task.scala b/core/src/main/scala/org/apache/spark/scheduler/Task.scala index 586d1e06204c1..15101c64f0503 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/Task.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/Task.scala @@ -125,7 +125,7 @@ private[spark] abstract class Task[T](val stageId: Int, var partitionId: Int) ex if (interruptThread && taskThread != null) { taskThread.interrupt() } - } + } } /** diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala index 1f114a0207f7b..8b2a742b96988 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala @@ -40,6 +40,9 @@ class DirectTaskResult[T](var valueBytes: ByteBuffer, var accumUpdates: Map[Long var metrics: TaskMetrics) extends TaskResult[T] with Externalizable { + private var valueObjectDeserialized = false + private var valueObject: T = _ + def this() = this(null.asInstanceOf[ByteBuffer], null, null) override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException { @@ -72,10 +75,26 @@ class DirectTaskResult[T](var valueBytes: ByteBuffer, var accumUpdates: Map[Long } } metrics = in.readObject().asInstanceOf[TaskMetrics] + valueObjectDeserialized = false } + /** + * When `value()` is called at the first time, it needs to deserialize `valueObject` from + * `valueBytes`. It may cost dozens of seconds for a large instance. So when calling `value` at + * the first time, the caller should avoid to block other threads. + * + * After the first time, `value()` is trivial and just returns the deserialized `valueObject`. + */ def value(): T = { - val resultSer = SparkEnv.get.serializer.newInstance() - resultSer.deserialize(valueBytes) + if (valueObjectDeserialized) { + valueObject + } else { + // This should not run when holding a lock because it may cost dozens of seconds for a large + // value. + val resultSer = SparkEnv.get.serializer.newInstance() + valueObject = resultSer.deserialize(valueBytes) + valueObjectDeserialized = true + valueObject + } } } diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala index 391827c1d2156..46a6f6537e2ee 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala @@ -54,6 +54,10 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul if (!taskSetManager.canFetchMoreResults(serializedData.limit())) { return } + // deserialize "value" without holding any lock so that it won't block other threads. + // We should call it here, so that when it's called again in + // "TaskSetManager.handleSuccessfulTask", it does not need to deserialize the value. + directResult.value() (directResult, serializedData.limit()) case IndirectTaskResult(blockId, size) => if (!taskSetManager.canFetchMoreResults(size)) { diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala index 7dc325283d961..673cd0e19eba2 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala @@ -620,6 +620,12 @@ private[spark] class TaskSetManager( val index = info.index info.markSuccessful() removeRunningTask(tid) + // This method is called by "TaskSchedulerImpl.handleSuccessfulTask" which holds the + // "TaskSchedulerImpl" lock until exiting. To avoid the SPARK-7655 issue, we should not + // "deserialize" the value when holding a lock to avoid blocking other threads. So we call + // "result.value()" in "TaskResultGetter.enqueueSuccessfulTask" before reaching here. + // Note: "result.value()" only deserializes the value when it's called at the first time, so + // here "result.value()" just returns the value and won't block other threads. sched.dagScheduler.taskEnded( tasks(index), Success, result.value(), result.accumUpdates, info, result.metrics) if (!successful(index)) { @@ -775,10 +781,10 @@ private[spark] class TaskSetManager( // that it's okay if we add a task to the same queue twice (if it had multiple preferred // locations), because dequeueTaskFromList will skip already-running tasks. for (index <- getPendingTasksForExecutor(execId)) { - addPendingTask(index, readding=true) + addPendingTask(index, readding = true) } for (index <- getPendingTasksForHost(host)) { - addPendingTask(index, readding=true) + addPendingTask(index, readding = true) } // Re-enqueue any tasks that ran on the failed executor if this is a shuffle map stage, @@ -855,9 +861,9 @@ private[spark] class TaskSetManager( case TaskLocality.RACK_LOCAL => "spark.locality.wait.rack" case _ => null } - + if (localityWaitKey != null) { - conf.getTimeAsMs(localityWaitKey, defaultWait) + conf.getTimeAsMs(localityWaitKey, defaultWait) } else { 0L } diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala index 70364cea62a80..4be1eda2e9291 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala @@ -75,7 +75,8 @@ private[spark] object CoarseGrainedClusterMessages { case class SetupDriver(driver: RpcEndpointRef) extends CoarseGrainedClusterMessage // Exchanged between the driver and the AM in Yarn client mode - case class AddWebUIFilter(filterName:String, filterParams: Map[String, String], proxyBase: String) + case class AddWebUIFilter( + filterName: String, filterParams: Map[String, String], proxyBase: String) extends CoarseGrainedClusterMessage // Messages exchanged between the driver and the cluster manager for executor allocation diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala index f107148f3b8c6..b2898b1ae0763 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala @@ -79,7 +79,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp override def onStart() { // Periodically revive offers to allow delay scheduling to work val reviveIntervalMs = conf.getTimeAsMs("spark.scheduler.revive.interval", "1s") - + reviveThread.scheduleAtFixedRate(new Runnable { override def run(): Unit = Utils.tryLogNonFatalError { Option(self).foreach(_.send(ReviveOffers)) diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala index 2a3a5d925d06f..190ff61d689d1 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala @@ -149,7 +149,7 @@ private[spark] abstract class YarnSchedulerBackend( } } - override def onStop(): Unit ={ + override def onStop(): Unit = { askAmThreadPool.shutdownNow() } } diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala index dc59545b43314..26b1c6201e94a 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala @@ -51,7 +51,7 @@ private[spark] class CoarseMesosSchedulerBackend( val MAX_SLAVE_FAILURES = 2 // Blacklist a slave after this many failures // Maximum number of cores to acquire (TODO: we'll need more flexible controls here) - val maxCores = conf.get("spark.cores.max", Int.MaxValue.toString).toInt + val maxCores = conf.get("spark.cores.max", Int.MaxValue.toString).toInt // Cores we have acquired with each Mesos task ID val coresByTaskId = new HashMap[Int, Int] diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala index 06f0e2881c344..1067a7f1caf4c 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala @@ -370,16 +370,21 @@ private[spark] class MesosClusterScheduler( val executorOpts = desc.schedulerProperties.map { case (k, v) => s"-D$k=$v" }.mkString(" ") envBuilder.addVariables( Variable.newBuilder().setName("SPARK_EXECUTOR_OPTS").setValue(executorOpts)) - val cmdOptions = generateCmdOption(desc) + val cmdOptions = generateCmdOption(desc).mkString(" ") + val dockerDefined = desc.schedulerProperties.contains("spark.mesos.executor.docker.image") val executorUri = desc.schedulerProperties.get("spark.executor.uri") .orElse(desc.command.environment.get("SPARK_EXECUTOR_URI")) val appArguments = desc.command.arguments.mkString(" ") - val cmd = if (executorUri.isDefined) { + val (executable, jar) = if (dockerDefined) { + // Application jar is automatically downloaded in the mounted sandbox by Mesos, + // and the path to the mounted volume is stored in $MESOS_SANDBOX env variable. + ("./bin/spark-submit", s"$$MESOS_SANDBOX/${desc.jarUrl.split("/").last}") + } else if (executorUri.isDefined) { builder.addUris(CommandInfo.URI.newBuilder().setValue(executorUri.get).build()) val folderBasename = executorUri.get.split('/').last.split('.').head val cmdExecutable = s"cd $folderBasename*; $prefixEnv bin/spark-submit" val cmdJar = s"../${desc.jarUrl.split("/").last}" - s"$cmdExecutable ${cmdOptions.mkString(" ")} $cmdJar $appArguments" + (cmdExecutable, cmdJar) } else { val executorSparkHome = desc.schedulerProperties.get("spark.mesos.executor.home") .orElse(conf.getOption("spark.home")) @@ -389,9 +394,9 @@ private[spark] class MesosClusterScheduler( } val cmdExecutable = new File(executorSparkHome, "./bin/spark-submit").getCanonicalPath val cmdJar = desc.jarUrl.split("/").last - s"$cmdExecutable ${cmdOptions.mkString(" ")} $cmdJar $appArguments" + (cmdExecutable, cmdJar) } - builder.setValue(cmd) + builder.setValue(s"$executable $cmdOptions $jar $appArguments") builder.setEnvironment(envBuilder.build()) builder.build() } @@ -458,9 +463,20 @@ private[spark] class MesosClusterScheduler( .setCommand(commandInfo) .addResources(cpuResource) .addResources(memResource) - .build() + submission.schedulerProperties.get("spark.mesos.executor.docker.image").foreach { image => + val container = taskInfo.getContainerBuilder() + val volumes = submission.schedulerProperties + .get("spark.mesos.executor.docker.volumes") + .map(MesosSchedulerBackendUtil.parseVolumesSpec) + val portmaps = submission.schedulerProperties + .get("spark.mesos.executor.docker.portmaps") + .map(MesosSchedulerBackendUtil.parsePortMappingsSpec) + MesosSchedulerBackendUtil.addDockerInfo( + container, image, volumes = volumes, portmaps = portmaps) + taskInfo.setContainer(container.build()) + } val queuedTasks = tasks.getOrElseUpdate(offer.offer.getId, new ArrayBuffer[TaskInfo]) - queuedTasks += taskInfo + queuedTasks += taskInfo.build() logTrace(s"Using offer ${offer.offer.getId.getValue} to launch driver " + submission.submissionId) val newState = new MesosClusterSubmissionState(submission, taskId, offer.offer.getSlaveId, diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala index db0a080b3b0c0..49de85ef48ada 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala @@ -146,7 +146,7 @@ private[spark] class MesosSchedulerBackend( private def createExecArg(): Array[Byte] = { if (execArgs == null) { val props = new HashMap[String, String] - for ((key,value) <- sc.conf.getAll) { + for ((key, value) <- sc.conf.getAll) { props(key) = value } // Serialize the map as an array of (String, String) pairs diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendUtil.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendUtil.scala index 928c5cfed417a..e79c543a9de27 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendUtil.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendUtil.scala @@ -37,14 +37,14 @@ private[mesos] object MesosSchedulerBackendUtil extends Logging { .newBuilder() .setMode(Volume.Mode.RW) spec match { - case Array(container_path) => + case Array(container_path) => Some(vol.setContainerPath(container_path)) case Array(container_path, "rw") => Some(vol.setContainerPath(container_path)) case Array(container_path, "ro") => Some(vol.setContainerPath(container_path) .setMode(Volume.Mode.RO)) - case Array(host_path, container_path) => + case Array(host_path, container_path) => Some(vol.setContainerPath(container_path) .setHostPath(host_path)) case Array(host_path, container_path, "rw") => @@ -108,7 +108,7 @@ private[mesos] object MesosSchedulerBackendUtil extends Logging { image: String, volumes: Option[List[Volume]] = None, network: Option[ContainerInfo.DockerInfo.Network] = None, - portmaps: Option[List[ContainerInfo.DockerInfo.PortMapping]] = None):Unit = { + portmaps: Option[List[ContainerInfo.DockerInfo.PortMapping]] = None): Unit = { val docker = ContainerInfo.DockerInfo.newBuilder().setImage(image) diff --git a/core/src/main/scala/org/apache/spark/scheduler/local/LocalBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/local/LocalBackend.scala index e64d06c4d3cfc..3078a1b10be8b 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/local/LocalBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/local/LocalBackend.scala @@ -18,14 +18,12 @@ package org.apache.spark.scheduler.local import java.nio.ByteBuffer -import java.util.concurrent.TimeUnit import org.apache.spark.{Logging, SparkConf, SparkContext, SparkEnv, TaskState} import org.apache.spark.TaskState.TaskState import org.apache.spark.executor.{Executor, ExecutorBackend} -import org.apache.spark.rpc.{ThreadSafeRpcEndpoint, RpcCallContext, RpcEndpointRef, RpcEnv} +import org.apache.spark.rpc.{RpcCallContext, RpcEndpointRef, RpcEnv, ThreadSafeRpcEndpoint} import org.apache.spark.scheduler.{SchedulerBackend, TaskSchedulerImpl, WorkerOffer} -import org.apache.spark.util.{ThreadUtils, Utils} private case class ReviveOffers() @@ -47,9 +45,6 @@ private[spark] class LocalEndpoint( private val totalCores: Int) extends ThreadSafeRpcEndpoint with Logging { - private val reviveThread = - ThreadUtils.newDaemonSingleThreadScheduledExecutor("local-revive-thread") - private var freeCores = totalCores private val localExecutorId = SparkContext.DRIVER_IDENTIFIER @@ -79,27 +74,13 @@ private[spark] class LocalEndpoint( context.reply(true) } - def reviveOffers() { val offers = Seq(new WorkerOffer(localExecutorId, localExecutorHostname, freeCores)) - val tasks = scheduler.resourceOffers(offers).flatten - for (task <- tasks) { + for (task <- scheduler.resourceOffers(offers).flatten) { freeCores -= scheduler.CPUS_PER_TASK executor.launchTask(executorBackend, taskId = task.taskId, attemptNumber = task.attemptNumber, task.name, task.serializedTask) } - if (tasks.isEmpty && scheduler.activeTaskSets.nonEmpty) { - // Try to reviveOffer after 1 second, because scheduler may wait for locality timeout - reviveThread.schedule(new Runnable { - override def run(): Unit = Utils.tryLogNonFatalError { - Option(self).foreach(_.send(ReviveOffers)) - } - }, 1000, TimeUnit.MILLISECONDS) - } - } - - override def onStop(): Unit = { - reviveThread.shutdownNow() } } diff --git a/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala index dfbde7c8a1b0d..698d1384d580d 100644 --- a/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala +++ b/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala @@ -121,6 +121,8 @@ class JavaSerializer(conf: SparkConf) extends Serializer with Externalizable { private var counterReset = conf.getInt("spark.serializer.objectStreamReset", 100) private var extraDebugInfo = conf.getBoolean("spark.serializer.extraDebugInfo", true) + protected def this() = this(new SparkConf()) // For deserialization only + override def newInstance(): SerializerInstance = { val classLoader = defaultClassLoader.getOrElse(Thread.currentThread.getContextClassLoader) new JavaSerializerInstance(counterReset, extraDebugInfo, classLoader) diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala index b7bc087855b9f..cd8a82347a1e9 100644 --- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala +++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala @@ -17,8 +17,9 @@ package org.apache.spark.serializer -import java.io.{EOFException, InputStream, OutputStream} +import java.io.{EOFException, IOException, InputStream, OutputStream} import java.nio.ByteBuffer +import javax.annotation.Nullable import scala.reflect.ClassTag @@ -32,6 +33,7 @@ import org.apache.spark._ import org.apache.spark.api.python.PythonBroadcast import org.apache.spark.broadcast.HttpBroadcast import org.apache.spark.network.nio.{GetBlock, GotBlock, PutBlock} +import org.apache.spark.network.util.ByteUnit import org.apache.spark.scheduler.{CompressedMapStatus, HighlyCompressedMapStatus} import org.apache.spark.storage._ import org.apache.spark.util.BoundedPriorityQueue @@ -50,19 +52,19 @@ class KryoSerializer(conf: SparkConf) with Serializable { private val bufferSizeKb = conf.getSizeAsKb("spark.kryoserializer.buffer", "64k") - - if (bufferSizeKb >= 2048) { + + if (bufferSizeKb >= ByteUnit.GiB.toKiB(2)) { throw new IllegalArgumentException("spark.kryoserializer.buffer must be less than " + - s"2048 mb, got: + $bufferSizeKb mb.") + s"2048 mb, got: + ${ByteUnit.KiB.toMiB(bufferSizeKb)} mb.") } - private val bufferSize = (bufferSizeKb * 1024).toInt + private val bufferSize = ByteUnit.KiB.toBytes(bufferSizeKb).toInt val maxBufferSizeMb = conf.getSizeAsMb("spark.kryoserializer.buffer.max", "64m").toInt - if (maxBufferSizeMb >= 2048) { + if (maxBufferSizeMb >= ByteUnit.GiB.toMiB(2)) { throw new IllegalArgumentException("spark.kryoserializer.buffer.max must be less than " + s"2048 mb, got: + $maxBufferSizeMb mb.") } - private val maxBufferSize = maxBufferSizeMb * 1024 * 1024 + private val maxBufferSize = ByteUnit.MiB.toBytes(maxBufferSizeMb).toInt private val referenceTracking = conf.getBoolean("spark.kryo.referenceTracking", true) private val registrationRequired = conf.getBoolean("spark.kryo.registrationRequired", false) @@ -125,24 +127,55 @@ class KryoSerializer(conf: SparkConf) override def newInstance(): SerializerInstance = { new KryoSerializerInstance(this) } + + private[spark] override lazy val supportsRelocationOfSerializedObjects: Boolean = { + // If auto-reset is disabled, then Kryo may store references to duplicate occurrences of objects + // in the stream rather than writing those objects' serialized bytes, breaking relocation. See + // https://groups.google.com/d/msg/kryo-users/6ZUSyfjjtdo/FhGG1KHDXPgJ for more details. + newInstance().asInstanceOf[KryoSerializerInstance].getAutoReset() + } } private[spark] -class KryoSerializationStream(kryo: Kryo, outStream: OutputStream) extends SerializationStream { - val output = new KryoOutput(outStream) +class KryoSerializationStream( + serInstance: KryoSerializerInstance, + outStream: OutputStream) extends SerializationStream { + + private[this] var output: KryoOutput = new KryoOutput(outStream) + private[this] var kryo: Kryo = serInstance.borrowKryo() override def writeObject[T: ClassTag](t: T): SerializationStream = { kryo.writeClassAndObject(output, t) this } - override def flush() { output.flush() } - override def close() { output.close() } + override def flush() { + if (output == null) { + throw new IOException("Stream is closed") + } + output.flush() + } + + override def close() { + if (output != null) { + try { + output.close() + } finally { + serInstance.releaseKryo(kryo) + kryo = null + output = null + } + } + } } private[spark] -class KryoDeserializationStream(kryo: Kryo, inStream: InputStream) extends DeserializationStream { - private val input = new KryoInput(inStream) +class KryoDeserializationStream( + serInstance: KryoSerializerInstance, + inStream: InputStream) extends DeserializationStream { + + private[this] var input: KryoInput = new KryoInput(inStream) + private[this] var kryo: Kryo = serInstance.borrowKryo() override def readObject[T: ClassTag](): T = { try { @@ -155,50 +188,105 @@ class KryoDeserializationStream(kryo: Kryo, inStream: InputStream) extends Deser } override def close() { - // Kryo's Input automatically closes the input stream it is using. - input.close() + if (input != null) { + try { + // Kryo's Input automatically closes the input stream it is using. + input.close() + } finally { + serInstance.releaseKryo(kryo) + kryo = null + input = null + } + } } } private[spark] class KryoSerializerInstance(ks: KryoSerializer) extends SerializerInstance { - private val kryo = ks.newKryo() - // Make these lazy vals to avoid creating a buffer unless we use them + /** + * A re-used [[Kryo]] instance. Methods will borrow this instance by calling `borrowKryo()`, do + * their work, then release the instance by calling `releaseKryo()`. Logically, this is a caching + * pool of size one. SerializerInstances are not thread-safe, hence accesses to this field are + * not synchronized. + */ + @Nullable private[this] var cachedKryo: Kryo = borrowKryo() + + /** + * Borrows a [[Kryo]] instance. If possible, this tries to re-use a cached Kryo instance; + * otherwise, it allocates a new instance. + */ + private[serializer] def borrowKryo(): Kryo = { + if (cachedKryo != null) { + val kryo = cachedKryo + // As a defensive measure, call reset() to clear any Kryo state that might have been modified + // by the last operation to borrow this instance (see SPARK-7766 for discussion of this issue) + kryo.reset() + cachedKryo = null + kryo + } else { + ks.newKryo() + } + } + + /** + * Release a borrowed [[Kryo]] instance. If this serializer instance already has a cached Kryo + * instance, then the given Kryo instance is discarded; otherwise, the Kryo is stored for later + * re-use. + */ + private[serializer] def releaseKryo(kryo: Kryo): Unit = { + if (cachedKryo == null) { + cachedKryo = kryo + } + } + + // Make these lazy vals to avoid creating a buffer unless we use them. private lazy val output = ks.newKryoOutput() private lazy val input = new KryoInput() override def serialize[T: ClassTag](t: T): ByteBuffer = { output.clear() + val kryo = borrowKryo() try { kryo.writeClassAndObject(output, t) } catch { case e: KryoException if e.getMessage.startsWith("Buffer overflow") => throw new SparkException(s"Kryo serialization failed: ${e.getMessage}. To avoid this, " + "increase spark.kryoserializer.buffer.max value.") + } finally { + releaseKryo(kryo) } ByteBuffer.wrap(output.toBytes) } override def deserialize[T: ClassTag](bytes: ByteBuffer): T = { - input.setBuffer(bytes.array) - kryo.readClassAndObject(input).asInstanceOf[T] + val kryo = borrowKryo() + try { + input.setBuffer(bytes.array) + kryo.readClassAndObject(input).asInstanceOf[T] + } finally { + releaseKryo(kryo) + } } override def deserialize[T: ClassTag](bytes: ByteBuffer, loader: ClassLoader): T = { + val kryo = borrowKryo() val oldClassLoader = kryo.getClassLoader - kryo.setClassLoader(loader) - input.setBuffer(bytes.array) - val obj = kryo.readClassAndObject(input).asInstanceOf[T] - kryo.setClassLoader(oldClassLoader) - obj + try { + kryo.setClassLoader(loader) + input.setBuffer(bytes.array) + kryo.readClassAndObject(input).asInstanceOf[T] + } finally { + kryo.setClassLoader(oldClassLoader) + releaseKryo(kryo) + } } override def serializeStream(s: OutputStream): SerializationStream = { - new KryoSerializationStream(kryo, s) + new KryoSerializationStream(this, s) } override def deserializeStream(s: InputStream): DeserializationStream = { - new KryoDeserializationStream(kryo, s) + new KryoDeserializationStream(this, s) } /** @@ -208,7 +296,12 @@ private[spark] class KryoSerializerInstance(ks: KryoSerializer) extends Serializ def getAutoReset(): Boolean = { val field = classOf[Kryo].getDeclaredField("autoReset") field.setAccessible(true) - field.get(kryo).asInstanceOf[Boolean] + val kryo = borrowKryo() + try { + field.get(kryo).asInstanceOf[Boolean] + } finally { + releaseKryo(kryo) + } } } diff --git a/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala b/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala index 5abfa467c0ec8..bb5db545531d2 100644 --- a/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala +++ b/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala @@ -27,7 +27,7 @@ import scala.util.control.NonFatal import org.apache.spark.Logging -private[serializer] object SerializationDebugger extends Logging { +private[spark] object SerializationDebugger extends Logging { /** * Improve the given NotSerializableException with the serialization path leading from the given diff --git a/core/src/main/scala/org/apache/spark/serializer/Serializer.scala b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala index c381672a4f588..f1bdff96d3df1 100644 --- a/core/src/main/scala/org/apache/spark/serializer/Serializer.scala +++ b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala @@ -19,11 +19,12 @@ package org.apache.spark.serializer import java.io._ import java.nio.ByteBuffer +import javax.annotation.concurrent.NotThreadSafe import scala.reflect.ClassTag import org.apache.spark.{SparkConf, SparkEnv} -import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.annotation.{DeveloperApi, Private} import org.apache.spark.util.{Utils, ByteBufferInputStream, NextIterator} /** @@ -63,6 +64,39 @@ abstract class Serializer { /** Creates a new [[SerializerInstance]]. */ def newInstance(): SerializerInstance + + /** + * :: Private :: + * Returns true if this serializer supports relocation of its serialized objects and false + * otherwise. This should return true if and only if reordering the bytes of serialized objects + * in serialization stream output is equivalent to having re-ordered those elements prior to + * serializing them. More specifically, the following should hold if a serializer supports + * relocation: + * + * {{{ + * serOut.open() + * position = 0 + * serOut.write(obj1) + * serOut.flush() + * position = # of bytes writen to stream so far + * obj1Bytes = output[0:position-1] + * serOut.write(obj2) + * serOut.flush() + * position2 = # of bytes written to stream so far + * obj2Bytes = output[position:position2-1] + * serIn.open([obj2bytes] concatenate [obj1bytes]) should return (obj2, obj1) + * }}} + * + * In general, this property should hold for serializers that are stateless and that do not + * write special metadata at the beginning or end of the serialization stream. + * + * This API is private to Spark; this method should not be overridden in third-party subclasses + * or called in user code and is subject to removal in future Spark releases. + * + * See SPARK-7311 for more details. + */ + @Private + private[spark] def supportsRelocationOfSerializedObjects: Boolean = false } @@ -81,8 +115,12 @@ object Serializer { /** * :: DeveloperApi :: * An instance of a serializer, for use by one thread at a time. + * + * It is legal to create multiple serialization / deserialization streams from the same + * SerializerInstance as long as those streams are all used within the same thread. */ @DeveloperApi +@NotThreadSafe abstract class SerializerInstance { def serialize[T: ClassTag](t: T): ByteBuffer diff --git a/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockResolver.scala similarity index 97% rename from core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala rename to core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockResolver.scala index e9b4e2b955dc8..6c3b3080d2605 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockManager.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/FileShuffleBlockResolver.scala @@ -18,7 +18,6 @@ package org.apache.spark.shuffle import java.io.File -import java.nio.ByteBuffer import java.util.concurrent.ConcurrentLinkedQueue import java.util.concurrent.atomic.AtomicInteger @@ -29,7 +28,7 @@ import org.apache.spark.executor.ShuffleWriteMetrics import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer} import org.apache.spark.network.netty.SparkTransportConf import org.apache.spark.serializer.Serializer -import org.apache.spark.shuffle.FileShuffleBlockManager.ShuffleFileGroup +import org.apache.spark.shuffle.FileShuffleBlockResolver.ShuffleFileGroup import org.apache.spark.storage._ import org.apache.spark.util.{MetadataCleaner, MetadataCleanerType, TimeStampedHashMap} import org.apache.spark.util.collection.{PrimitiveKeyOpenHashMap, PrimitiveVector} @@ -64,9 +63,8 @@ private[spark] trait ShuffleWriterGroup { * files within a ShuffleFileGroups associated with the block's reducer. */ // Note: Changes to the format in this file should be kept in sync with -// org.apache.spark.network.shuffle.StandaloneShuffleBlockManager#getHashBasedShuffleBlockData(). -private[spark] -class FileShuffleBlockManager(conf: SparkConf) +// org.apache.spark.network.shuffle.ExternalShuffleBlockResolver#getHashBasedShuffleBlockData(). +private[spark] class FileShuffleBlockResolver(conf: SparkConf) extends ShuffleBlockResolver with Logging { private val transportConf = SparkTransportConf.fromSparkConf(conf) @@ -78,7 +76,7 @@ class FileShuffleBlockManager(conf: SparkConf) private val consolidateShuffleFiles = conf.getBoolean("spark.shuffle.consolidateFiles", false) - // Use getSizeAsKb (not bytes) to maintain backwards compatibility of on units are provided + // Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided private val bufferSize = conf.getSizeAsKb("spark.shuffle.file.buffer", "32k").toInt * 1024 /** @@ -242,8 +240,7 @@ class FileShuffleBlockManager(conf: SparkConf) } } -private[spark] -object FileShuffleBlockManager { +private[spark] object FileShuffleBlockResolver { /** * A group of shuffle files, one per reducer. * A particular mapper will be assigned a single ShuffleFileGroup to write its output to. diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala similarity index 93% rename from core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala rename to core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala index a1741e2875c16..d9c63b6e7bbb9 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockManager.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala @@ -18,7 +18,6 @@ package org.apache.spark.shuffle import java.io._ -import java.nio.ByteBuffer import com.google.common.io.ByteStreams @@ -28,7 +27,7 @@ import org.apache.spark.network.netty.SparkTransportConf import org.apache.spark.storage._ import org.apache.spark.util.Utils -import IndexShuffleBlockManager.NOOP_REDUCE_ID +import IndexShuffleBlockResolver.NOOP_REDUCE_ID /** * Create and maintain the shuffle blocks' mapping between logic block and physical file location. @@ -40,9 +39,8 @@ import IndexShuffleBlockManager.NOOP_REDUCE_ID * */ // Note: Changes to the format in this file should be kept in sync with -// org.apache.spark.network.shuffle.StandaloneShuffleBlockManager#getSortBasedShuffleBlockData(). -private[spark] -class IndexShuffleBlockManager(conf: SparkConf) extends ShuffleBlockResolver { +// org.apache.spark.network.shuffle.ExternalShuffleBlockResolver#getSortBasedShuffleBlockData(). +private[spark] class IndexShuffleBlockResolver(conf: SparkConf) extends ShuffleBlockResolver { private lazy val blockManager = SparkEnv.get.blockManager @@ -115,7 +113,7 @@ class IndexShuffleBlockManager(conf: SparkConf) extends ShuffleBlockResolver { override def stop(): Unit = {} } -private[spark] object IndexShuffleBlockManager { +private[spark] object IndexShuffleBlockResolver { // No-op reduce ID used in interactions with disk store and BlockObjectWriter. // The disk store currently expects puts to relate to a (map, reduce) pair, but in the sort // shuffle outputs for several reduces are glommed into a single file. diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriter.scala index f6e6fe5defe09..4cc4ef5f1886e 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriter.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriter.scala @@ -17,14 +17,17 @@ package org.apache.spark.shuffle +import java.io.IOException + import org.apache.spark.scheduler.MapStatus /** * Obtained inside a map task to write out records to the shuffle system. */ -private[spark] trait ShuffleWriter[K, V] { +private[spark] abstract class ShuffleWriter[K, V] { /** Write a sequence of records to this task's output */ - def write(records: Iterator[_ <: Product2[K, V]]): Unit + @throws[IOException] + def write(records: Iterator[Product2[K, V]]): Unit /** Close this writer, passing along whether the map completed */ def stop(success: Boolean): Option[MapStatus] diff --git a/core/src/main/scala/org/apache/spark/shuffle/hash/BlockStoreShuffleFetcher.scala b/core/src/main/scala/org/apache/spark/shuffle/hash/BlockStoreShuffleFetcher.scala index 80374adc44296..597d46a3d2223 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/hash/BlockStoreShuffleFetcher.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/hash/BlockStoreShuffleFetcher.scala @@ -80,7 +80,7 @@ private[hash] object BlockStoreShuffleFetcher extends Logging { blocksByAddress, serializer, // Note: we use getSizeAsMb when no suffix is provided for backwards compatibility - SparkEnv.get.conf.getSizeAsMb("spark.reducer.maxSizeInFlight", "48m") * 1024 * 1024) + SparkEnv.get.conf.getSizeAsMb("spark.reducer.maxSizeInFlight", "48m") * 1024 * 1024) val itr = blockFetcherItr.flatMap(unpackBlock) val completionIter = CompletionIterator[T, Iterator[T]](itr, { diff --git a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleManager.scala index 2a7df8dd5bd83..c089088f409dd 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleManager.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleManager.scala @@ -26,7 +26,7 @@ import org.apache.spark.shuffle._ */ private[spark] class HashShuffleManager(conf: SparkConf) extends ShuffleManager { - private val fileShuffleBlockManager = new FileShuffleBlockManager(conf) + private val fileShuffleBlockResolver = new FileShuffleBlockResolver(conf) /* Register a shuffle with the manager and obtain a handle for it to pass to tasks. */ override def registerShuffle[K, V, C]( @@ -61,8 +61,8 @@ private[spark] class HashShuffleManager(conf: SparkConf) extends ShuffleManager shuffleBlockResolver.removeShuffle(shuffleId) } - override def shuffleBlockResolver: FileShuffleBlockManager = { - fileShuffleBlockManager + override def shuffleBlockResolver: FileShuffleBlockResolver = { + fileShuffleBlockResolver } /** Shut down this ShuffleManager. */ diff --git a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleWriter.scala index cd27c9e07a3cd..eb87cee15903c 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleWriter.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleWriter.scala @@ -25,7 +25,7 @@ import org.apache.spark.shuffle._ import org.apache.spark.storage.BlockObjectWriter private[spark] class HashShuffleWriter[K, V]( - shuffleBlockManager: FileShuffleBlockManager, + shuffleBlockResolver: FileShuffleBlockResolver, handle: BaseShuffleHandle[K, V, _], mapId: Int, context: TaskContext) @@ -45,11 +45,11 @@ private[spark] class HashShuffleWriter[K, V]( private val blockManager = SparkEnv.get.blockManager private val ser = Serializer.getSerializer(dep.serializer.getOrElse(null)) - private val shuffle = shuffleBlockManager.forMapTask(dep.shuffleId, mapId, numOutputSplits, ser, + private val shuffle = shuffleBlockResolver.forMapTask(dep.shuffleId, mapId, numOutputSplits, ser, writeMetrics) /** Write a bunch of records to this task's output */ - override def write(records: Iterator[_ <: Product2[K, V]]): Unit = { + override def write(records: Iterator[Product2[K, V]]): Unit = { val iter = if (dep.aggregator.isDefined) { if (dep.mapSideCombine) { dep.aggregator.get.combineValuesByKey(records, context) diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala index 0497036192154..d7fab351ca3b8 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala @@ -25,7 +25,7 @@ import org.apache.spark.shuffle.hash.HashShuffleReader private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager { - private val indexShuffleBlockManager = new IndexShuffleBlockManager(conf) + private val indexShuffleBlockResolver = new IndexShuffleBlockResolver(conf) private val shuffleMapNumber = new ConcurrentHashMap[Int, Int]() /** @@ -72,8 +72,8 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager true } - override def shuffleBlockResolver: IndexShuffleBlockManager = { - indexShuffleBlockManager + override val shuffleBlockResolver: IndexShuffleBlockResolver = { + indexShuffleBlockResolver } /** Shut down this ShuffleManager. */ diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala index a066435df6fb0..c9dd6bfc4c219 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala @@ -20,12 +20,12 @@ package org.apache.spark.shuffle.sort import org.apache.spark.{MapOutputTracker, SparkEnv, Logging, TaskContext} import org.apache.spark.executor.ShuffleWriteMetrics import org.apache.spark.scheduler.MapStatus -import org.apache.spark.shuffle.{IndexShuffleBlockManager, ShuffleWriter, BaseShuffleHandle} +import org.apache.spark.shuffle.{IndexShuffleBlockResolver, ShuffleWriter, BaseShuffleHandle} import org.apache.spark.storage.ShuffleBlockId import org.apache.spark.util.collection.ExternalSorter private[spark] class SortShuffleWriter[K, V, C]( - shuffleBlockManager: IndexShuffleBlockManager, + shuffleBlockResolver: IndexShuffleBlockResolver, handle: BaseShuffleHandle[K, V, C], mapId: Int, context: TaskContext) @@ -48,7 +48,7 @@ private[spark] class SortShuffleWriter[K, V, C]( context.taskMetrics.shuffleWriteMetrics = Some(writeMetrics) /** Write a bunch of records to this task's output */ - override def write(records: Iterator[_ <: Product2[K, V]]): Unit = { + override def write(records: Iterator[Product2[K, V]]): Unit = { if (dep.mapSideCombine) { require(dep.aggregator.isDefined, "Map-side combine without Aggregator specified!") sorter = new ExternalSorter[K, V, C]( @@ -65,10 +65,10 @@ private[spark] class SortShuffleWriter[K, V, C]( // Don't bother including the time to open the merged output file in the shuffle write time, // because it just opens a single file, so is typically too fast to measure accurately // (see SPARK-3570). - val outputFile = shuffleBlockManager.getDataFile(dep.shuffleId, mapId) - val blockId = ShuffleBlockId(dep.shuffleId, mapId, IndexShuffleBlockManager.NOOP_REDUCE_ID) + val outputFile = shuffleBlockResolver.getDataFile(dep.shuffleId, mapId) + val blockId = ShuffleBlockId(dep.shuffleId, mapId, IndexShuffleBlockResolver.NOOP_REDUCE_ID) val partitionLengths = sorter.writePartitionedFile(blockId, context, outputFile) - shuffleBlockManager.writeIndexFile(dep.shuffleId, mapId, partitionLengths) + shuffleBlockResolver.writeIndexFile(dep.shuffleId, mapId, partitionLengths) mapStatus = MapStatus(blockManager.shuffleServerId, partitionLengths) } @@ -84,7 +84,7 @@ private[spark] class SortShuffleWriter[K, V, C]( return Option(mapStatus) } else { // The map task failed, so delete our output data. - shuffleBlockManager.removeDataByMap(dep.shuffleId, mapId) + shuffleBlockResolver.removeDataByMap(dep.shuffleId, mapId) return None } } finally { diff --git a/core/src/main/scala/org/apache/spark/shuffle/unsafe/UnsafeShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/unsafe/UnsafeShuffleManager.scala new file mode 100644 index 0000000000000..f2bfef376d3ca --- /dev/null +++ b/core/src/main/scala/org/apache/spark/shuffle/unsafe/UnsafeShuffleManager.scala @@ -0,0 +1,205 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.shuffle.unsafe + +import java.util.Collections +import java.util.concurrent.ConcurrentHashMap + +import org.apache.spark._ +import org.apache.spark.serializer.Serializer +import org.apache.spark.shuffle._ +import org.apache.spark.shuffle.sort.SortShuffleManager + +/** + * Subclass of [[BaseShuffleHandle]], used to identify when we've chosen to use the new shuffle. + */ +private[spark] class UnsafeShuffleHandle[K, V]( + shuffleId: Int, + numMaps: Int, + dependency: ShuffleDependency[K, V, V]) + extends BaseShuffleHandle(shuffleId, numMaps, dependency) { +} + +private[spark] object UnsafeShuffleManager extends Logging { + + /** + * The maximum number of shuffle output partitions that UnsafeShuffleManager supports. + */ + val MAX_SHUFFLE_OUTPUT_PARTITIONS = PackedRecordPointer.MAXIMUM_PARTITION_ID + 1 + + /** + * Helper method for determining whether a shuffle should use the optimized unsafe shuffle + * path or whether it should fall back to the original sort-based shuffle. + */ + def canUseUnsafeShuffle[K, V, C](dependency: ShuffleDependency[K, V, C]): Boolean = { + val shufId = dependency.shuffleId + val serializer = Serializer.getSerializer(dependency.serializer) + if (!serializer.supportsRelocationOfSerializedObjects) { + log.debug(s"Can't use UnsafeShuffle for shuffle $shufId because the serializer, " + + s"${serializer.getClass.getName}, does not support object relocation") + false + } else if (dependency.aggregator.isDefined) { + log.debug(s"Can't use UnsafeShuffle for shuffle $shufId because an aggregator is defined") + false + } else if (dependency.keyOrdering.isDefined) { + log.debug(s"Can't use UnsafeShuffle for shuffle $shufId because a key ordering is defined") + false + } else if (dependency.partitioner.numPartitions > MAX_SHUFFLE_OUTPUT_PARTITIONS) { + log.debug(s"Can't use UnsafeShuffle for shuffle $shufId because it has more than " + + s"$MAX_SHUFFLE_OUTPUT_PARTITIONS partitions") + false + } else { + log.debug(s"Can use UnsafeShuffle for shuffle $shufId") + true + } + } +} + +/** + * A shuffle implementation that uses directly-managed memory to implement several performance + * optimizations for certain types of shuffles. In cases where the new performance optimizations + * cannot be applied, this shuffle manager delegates to [[SortShuffleManager]] to handle those + * shuffles. + * + * UnsafeShuffleManager's optimizations will apply when _all_ of the following conditions hold: + * + * - The shuffle dependency specifies no aggregation or output ordering. + * - The shuffle serializer supports relocation of serialized values (this is currently supported + * by KryoSerializer and Spark SQL's custom serializers). + * - The shuffle produces fewer than 16777216 output partitions. + * - No individual record is larger than 128 MB when serialized. + * + * In addition, extra spill-merging optimizations are automatically applied when the shuffle + * compression codec supports concatenation of serialized streams. This is currently supported by + * Spark's LZF serializer. + * + * At a high-level, UnsafeShuffleManager's design is similar to Spark's existing SortShuffleManager. + * In sort-based shuffle, incoming records are sorted according to their target partition ids, then + * written to a single map output file. Reducers fetch contiguous regions of this file in order to + * read their portion of the map output. In cases where the map output data is too large to fit in + * memory, sorted subsets of the output can are spilled to disk and those on-disk files are merged + * to produce the final output file. + * + * UnsafeShuffleManager optimizes this process in several ways: + * + * - Its sort operates on serialized binary data rather than Java objects, which reduces memory + * consumption and GC overheads. This optimization requires the record serializer to have certain + * properties to allow serialized records to be re-ordered without requiring deserialization. + * See SPARK-4550, where this optimization was first proposed and implemented, for more details. + * + * - It uses a specialized cache-efficient sorter ([[UnsafeShuffleExternalSorter]]) that sorts + * arrays of compressed record pointers and partition ids. By using only 8 bytes of space per + * record in the sorting array, this fits more of the array into cache. + * + * - The spill merging procedure operates on blocks of serialized records that belong to the same + * partition and does not need to deserialize records during the merge. + * + * - When the spill compression codec supports concatenation of compressed data, the spill merge + * simply concatenates the serialized and compressed spill partitions to produce the final output + * partition. This allows efficient data copying methods, like NIO's `transferTo`, to be used + * and avoids the need to allocate decompression or copying buffers during the merge. + * + * For more details on UnsafeShuffleManager's design, see SPARK-7081. + */ +private[spark] class UnsafeShuffleManager(conf: SparkConf) extends ShuffleManager with Logging { + + if (!conf.getBoolean("spark.shuffle.spill", true)) { + logWarning( + "spark.shuffle.spill was set to false, but this is ignored by the tungsten-sort shuffle " + + "manager; its optimized shuffles will continue to spill to disk when necessary.") + } + + private[this] val sortShuffleManager: SortShuffleManager = new SortShuffleManager(conf) + private[this] val shufflesThatFellBackToSortShuffle = + Collections.newSetFromMap(new ConcurrentHashMap[Int, java.lang.Boolean]()) + private[this] val numMapsForShufflesThatUsedNewPath = new ConcurrentHashMap[Int, Int]() + + /** + * Register a shuffle with the manager and obtain a handle for it to pass to tasks. + */ + override def registerShuffle[K, V, C]( + shuffleId: Int, + numMaps: Int, + dependency: ShuffleDependency[K, V, C]): ShuffleHandle = { + if (UnsafeShuffleManager.canUseUnsafeShuffle(dependency)) { + new UnsafeShuffleHandle[K, V]( + shuffleId, numMaps, dependency.asInstanceOf[ShuffleDependency[K, V, V]]) + } else { + new BaseShuffleHandle(shuffleId, numMaps, dependency) + } + } + + /** + * Get a reader for a range of reduce partitions (startPartition to endPartition-1, inclusive). + * Called on executors by reduce tasks. + */ + override def getReader[K, C]( + handle: ShuffleHandle, + startPartition: Int, + endPartition: Int, + context: TaskContext): ShuffleReader[K, C] = { + sortShuffleManager.getReader(handle, startPartition, endPartition, context) + } + + /** Get a writer for a given partition. Called on executors by map tasks. */ + override def getWriter[K, V]( + handle: ShuffleHandle, + mapId: Int, + context: TaskContext): ShuffleWriter[K, V] = { + handle match { + case unsafeShuffleHandle: UnsafeShuffleHandle[K, V] => + numMapsForShufflesThatUsedNewPath.putIfAbsent(handle.shuffleId, unsafeShuffleHandle.numMaps) + val env = SparkEnv.get + new UnsafeShuffleWriter( + env.blockManager, + shuffleBlockResolver.asInstanceOf[IndexShuffleBlockResolver], + context.taskMemoryManager(), + env.shuffleMemoryManager, + unsafeShuffleHandle, + mapId, + context, + env.conf) + case other => + shufflesThatFellBackToSortShuffle.add(handle.shuffleId) + sortShuffleManager.getWriter(handle, mapId, context) + } + } + + /** Remove a shuffle's metadata from the ShuffleManager. */ + override def unregisterShuffle(shuffleId: Int): Boolean = { + if (shufflesThatFellBackToSortShuffle.remove(shuffleId)) { + sortShuffleManager.unregisterShuffle(shuffleId) + } else { + Option(numMapsForShufflesThatUsedNewPath.remove(shuffleId)).foreach { numMaps => + (0 until numMaps).foreach { mapId => + shuffleBlockResolver.removeDataByMap(shuffleId, mapId) + } + } + true + } + } + + override val shuffleBlockResolver: IndexShuffleBlockResolver = { + sortShuffleManager.shuffleBlockResolver + } + + /** Shut down this ShuffleManager. */ + override def stop(): Unit = { + sortShuffleManager.stop() + } +} diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala new file mode 100644 index 0000000000000..5783df5d8220c --- /dev/null +++ b/core/src/main/scala/org/apache/spark/status/api/v1/AllJobsResource.scala @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.status.api.v1 + +import java.util.{Arrays, Date, List => JList} +import javax.ws.rs._ +import javax.ws.rs.core.MediaType + +import org.apache.spark.JobExecutionStatus +import org.apache.spark.ui.SparkUI +import org.apache.spark.ui.jobs.JobProgressListener +import org.apache.spark.ui.jobs.UIData.JobUIData + +@Produces(Array(MediaType.APPLICATION_JSON)) +private[v1] class AllJobsResource(ui: SparkUI) { + + @GET + def jobsList(@QueryParam("status") statuses: JList[JobExecutionStatus]): Seq[JobData] = { + val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] = + AllJobsResource.getStatusToJobs(ui) + val adjStatuses: JList[JobExecutionStatus] = { + if (statuses.isEmpty) { + Arrays.asList(JobExecutionStatus.values(): _*) + } else { + statuses + } + } + val jobInfos = for { + (status, jobs) <- statusToJobs + job <- jobs if adjStatuses.contains(status) + } yield { + AllJobsResource.convertJobData(job, ui.jobProgressListener, false) + } + jobInfos.sortBy{- _.jobId} + } + +} + +private[v1] object AllJobsResource { + + def getStatusToJobs(ui: SparkUI): Seq[(JobExecutionStatus, Seq[JobUIData])] = { + val statusToJobs = ui.jobProgressListener.synchronized { + Seq( + JobExecutionStatus.RUNNING -> ui.jobProgressListener.activeJobs.values.toSeq, + JobExecutionStatus.SUCCEEDED -> ui.jobProgressListener.completedJobs.toSeq, + JobExecutionStatus.FAILED -> ui.jobProgressListener.failedJobs.reverse.toSeq + ) + } + statusToJobs + } + + def convertJobData( + job: JobUIData, + listener: JobProgressListener, + includeStageDetails: Boolean): JobData = { + listener.synchronized { + val lastStageInfo = listener.stageIdToInfo.get(job.stageIds.max) + val lastStageData = lastStageInfo.flatMap { s => + listener.stageIdToData.get((s.stageId, s.attemptId)) + } + val lastStageName = lastStageInfo.map { _.name }.getOrElse("(Unknown Stage Name)") + val lastStageDescription = lastStageData.flatMap { _.description } + new JobData( + jobId = job.jobId, + name = lastStageName, + description = lastStageDescription, + submissionTime = job.submissionTime.map{new Date(_)}, + completionTime = job.completionTime.map{new Date(_)}, + stageIds = job.stageIds, + jobGroup = job.jobGroup, + status = job.status, + numTasks = job.numTasks, + numActiveTasks = job.numActiveTasks, + numCompletedTasks = job.numCompletedTasks, + numSkippedTasks = job.numCompletedTasks, + numFailedTasks = job.numFailedTasks, + numActiveStages = job.numActiveStages, + numCompletedStages = job.completedStageIndices.size, + numSkippedStages = job.numSkippedStages, + numFailedStages = job.numFailedStages + ) + } + } +} diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/AllRDDResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/AllRDDResource.scala new file mode 100644 index 0000000000000..645ede26a0879 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/status/api/v1/AllRDDResource.scala @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.status.api.v1 + +import javax.ws.rs.{GET, Produces} +import javax.ws.rs.core.MediaType + +import org.apache.spark.storage.{RDDInfo, StorageStatus, StorageUtils} +import org.apache.spark.ui.SparkUI +import org.apache.spark.ui.storage.StorageListener + +@Produces(Array(MediaType.APPLICATION_JSON)) +private[v1] class AllRDDResource(ui: SparkUI) { + + @GET + def rddList(): Seq[RDDStorageInfo] = { + val storageStatusList = ui.storageListener.storageStatusList + val rddInfos = ui.storageListener.rddInfoList + rddInfos.map{rddInfo => + AllRDDResource.getRDDStorageInfo(rddInfo.id, rddInfo, storageStatusList, + includeDetails = false) + } + } + +} + +private[spark] object AllRDDResource { + + def getRDDStorageInfo( + rddId: Int, + listener: StorageListener, + includeDetails: Boolean): Option[RDDStorageInfo] = { + val storageStatusList = listener.storageStatusList + listener.rddInfoList.find { _.id == rddId }.map { rddInfo => + getRDDStorageInfo(rddId, rddInfo, storageStatusList, includeDetails) + } + } + + def getRDDStorageInfo( + rddId: Int, + rddInfo: RDDInfo, + storageStatusList: Seq[StorageStatus], + includeDetails: Boolean): RDDStorageInfo = { + val workers = storageStatusList.map { (rddId, _) } + val blockLocations = StorageUtils.getRddBlockLocations(rddId, storageStatusList) + val blocks = storageStatusList + .flatMap { _.rddBlocksById(rddId) } + .sortWith { _._1.name < _._1.name } + .map { case (blockId, status) => + (blockId, status, blockLocations.get(blockId).getOrElse(Seq[String]("Unknown"))) + } + + val dataDistribution = if (includeDetails) { + Some(storageStatusList.map { status => + new RDDDataDistribution( + address = status.blockManagerId.hostPort, + memoryUsed = status.memUsedByRdd(rddId), + memoryRemaining = status.memRemaining, + diskUsed = status.diskUsedByRdd(rddId) + ) } ) + } else { + None + } + val partitions = if (includeDetails) { + Some(blocks.map { case (id, block, locations) => + new RDDPartitionInfo( + blockName = id.name, + storageLevel = block.storageLevel.description, + memoryUsed = block.memSize, + diskUsed = block.diskSize, + executors = locations + ) + } ) + } else { + None + } + + new RDDStorageInfo( + id = rddId, + name = rddInfo.name, + numPartitions = rddInfo.numPartitions, + numCachedPartitions = rddInfo.numCachedPartitions, + storageLevel = rddInfo.storageLevel.description, + memoryUsed = rddInfo.memSize, + diskUsed = rddInfo.diskSize, + dataDistribution = dataDistribution, + partitions = partitions + ) + } +} diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/AllStagesResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/AllStagesResource.scala new file mode 100644 index 0000000000000..390c136df79b3 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/status/api/v1/AllStagesResource.scala @@ -0,0 +1,309 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.status.api.v1 + +import java.util.{Arrays, Date, List => JList} +import javax.ws.rs.{GET, PathParam, Produces, QueryParam} +import javax.ws.rs.core.MediaType + +import org.apache.spark.executor.{InputMetrics => InternalInputMetrics, OutputMetrics => InternalOutputMetrics, ShuffleReadMetrics => InternalShuffleReadMetrics, ShuffleWriteMetrics => InternalShuffleWriteMetrics, TaskMetrics => InternalTaskMetrics} +import org.apache.spark.scheduler.{AccumulableInfo => InternalAccumulableInfo, StageInfo} +import org.apache.spark.ui.SparkUI +import org.apache.spark.ui.jobs.UIData.{StageUIData, TaskUIData} +import org.apache.spark.util.Distribution + +@Produces(Array(MediaType.APPLICATION_JSON)) +private[v1] class AllStagesResource(ui: SparkUI) { + + @GET + def stageList(@QueryParam("status") statuses: JList[StageStatus]): Seq[StageData] = { + val listener = ui.jobProgressListener + val stageAndStatus = AllStagesResource.stagesAndStatus(ui) + val adjStatuses = { + if (statuses.isEmpty()) { + Arrays.asList(StageStatus.values(): _*) + } else { + statuses + } + } + for { + (status, stageList) <- stageAndStatus + stageInfo: StageInfo <- stageList if adjStatuses.contains(status) + stageUiData: StageUIData <- listener.synchronized { + listener.stageIdToData.get((stageInfo.stageId, stageInfo.attemptId)) + } + } yield { + AllStagesResource.stageUiToStageData(status, stageInfo, stageUiData, includeDetails = false) + } + } +} + +private[v1] object AllStagesResource { + def stageUiToStageData( + status: StageStatus, + stageInfo: StageInfo, + stageUiData: StageUIData, + includeDetails: Boolean): StageData = { + + val taskData = if (includeDetails) { + Some(stageUiData.taskData.map { case (k, v) => k -> convertTaskData(v) } ) + } else { + None + } + val executorSummary = if (includeDetails) { + Some(stageUiData.executorSummary.map { case (k, summary) => + k -> new ExecutorStageSummary( + taskTime = summary.taskTime, + failedTasks = summary.failedTasks, + succeededTasks = summary.succeededTasks, + inputBytes = summary.inputBytes, + outputBytes = summary.outputBytes, + shuffleRead = summary.shuffleRead, + shuffleWrite = summary.shuffleWrite, + memoryBytesSpilled = summary.memoryBytesSpilled, + diskBytesSpilled = summary.diskBytesSpilled + ) + }) + } else { + None + } + + val accumulableInfo = stageUiData.accumulables.values.map { convertAccumulableInfo }.toSeq + + new StageData( + status = status, + stageId = stageInfo.stageId, + attemptId = stageInfo.attemptId, + numActiveTasks = stageUiData.numActiveTasks, + numCompleteTasks = stageUiData.numCompleteTasks, + numFailedTasks = stageUiData.numFailedTasks, + executorRunTime = stageUiData.executorRunTime, + inputBytes = stageUiData.inputBytes, + inputRecords = stageUiData.inputRecords, + outputBytes = stageUiData.outputBytes, + outputRecords = stageUiData.outputRecords, + shuffleReadBytes = stageUiData.shuffleReadTotalBytes, + shuffleReadRecords = stageUiData.shuffleReadRecords, + shuffleWriteBytes = stageUiData.shuffleWriteBytes, + shuffleWriteRecords = stageUiData.shuffleWriteRecords, + memoryBytesSpilled = stageUiData.memoryBytesSpilled, + diskBytesSpilled = stageUiData.diskBytesSpilled, + schedulingPool = stageUiData.schedulingPool, + name = stageInfo.name, + details = stageInfo.details, + accumulatorUpdates = accumulableInfo, + tasks = taskData, + executorSummary = executorSummary + ) + } + + def stagesAndStatus(ui: SparkUI): Seq[(StageStatus, Seq[StageInfo])] = { + val listener = ui.jobProgressListener + listener.synchronized { + Seq( + StageStatus.ACTIVE -> listener.activeStages.values.toSeq, + StageStatus.COMPLETE -> listener.completedStages.reverse.toSeq, + StageStatus.FAILED -> listener.failedStages.reverse.toSeq, + StageStatus.PENDING -> listener.pendingStages.values.toSeq + ) + } + } + + def convertTaskData(uiData: TaskUIData): TaskData = { + new TaskData( + taskId = uiData.taskInfo.taskId, + index = uiData.taskInfo.index, + attempt = uiData.taskInfo.attempt, + launchTime = new Date(uiData.taskInfo.launchTime), + executorId = uiData.taskInfo.executorId, + host = uiData.taskInfo.host, + taskLocality = uiData.taskInfo.taskLocality.toString(), + speculative = uiData.taskInfo.speculative, + accumulatorUpdates = uiData.taskInfo.accumulables.map { convertAccumulableInfo }, + errorMessage = uiData.errorMessage, + taskMetrics = uiData.taskMetrics.map { convertUiTaskMetrics } + ) + } + + def taskMetricDistributions( + allTaskData: Iterable[TaskUIData], + quantiles: Array[Double]): TaskMetricDistributions = { + + val rawMetrics = allTaskData.flatMap{_.taskMetrics}.toSeq + + def metricQuantiles(f: InternalTaskMetrics => Double): IndexedSeq[Double] = + Distribution(rawMetrics.map { d => f(d) }).get.getQuantiles(quantiles) + + // We need to do a lot of similar munging to nested metrics here. For each one, + // we want (a) extract the values for nested metrics (b) make a distribution for each metric + // (c) shove the distribution into the right field in our return type and (d) only return + // a result if the option is defined for any of the tasks. MetricHelper is a little util + // to make it a little easier to deal w/ all of the nested options. Mostly it lets us just + // implement one "build" method, which just builds the quantiles for each field. + + val inputMetrics: Option[InputMetricDistributions] = + new MetricHelper[InternalInputMetrics, InputMetricDistributions](rawMetrics, quantiles) { + def getSubmetrics(raw: InternalTaskMetrics): Option[InternalInputMetrics] = { + raw.inputMetrics + } + + def build: InputMetricDistributions = new InputMetricDistributions( + bytesRead = submetricQuantiles(_.bytesRead), + recordsRead = submetricQuantiles(_.recordsRead) + ) + }.metricOption + + val outputMetrics: Option[OutputMetricDistributions] = + new MetricHelper[InternalOutputMetrics, OutputMetricDistributions](rawMetrics, quantiles) { + def getSubmetrics(raw: InternalTaskMetrics): Option[InternalOutputMetrics] = { + raw.outputMetrics + } + def build: OutputMetricDistributions = new OutputMetricDistributions( + bytesWritten = submetricQuantiles(_.bytesWritten), + recordsWritten = submetricQuantiles(_.recordsWritten) + ) + }.metricOption + + val shuffleReadMetrics: Option[ShuffleReadMetricDistributions] = + new MetricHelper[InternalShuffleReadMetrics, ShuffleReadMetricDistributions](rawMetrics, + quantiles) { + def getSubmetrics(raw: InternalTaskMetrics): Option[InternalShuffleReadMetrics] = { + raw.shuffleReadMetrics + } + def build: ShuffleReadMetricDistributions = new ShuffleReadMetricDistributions( + readBytes = submetricQuantiles(_.totalBytesRead), + readRecords = submetricQuantiles(_.recordsRead), + remoteBytesRead = submetricQuantiles(_.remoteBytesRead), + remoteBlocksFetched = submetricQuantiles(_.remoteBlocksFetched), + localBlocksFetched = submetricQuantiles(_.localBlocksFetched), + totalBlocksFetched = submetricQuantiles(_.totalBlocksFetched), + fetchWaitTime = submetricQuantiles(_.fetchWaitTime) + ) + }.metricOption + + val shuffleWriteMetrics: Option[ShuffleWriteMetricDistributions] = + new MetricHelper[InternalShuffleWriteMetrics, ShuffleWriteMetricDistributions](rawMetrics, + quantiles) { + def getSubmetrics(raw: InternalTaskMetrics): Option[InternalShuffleWriteMetrics] = { + raw.shuffleWriteMetrics + } + def build: ShuffleWriteMetricDistributions = new ShuffleWriteMetricDistributions( + writeBytes = submetricQuantiles(_.shuffleBytesWritten), + writeRecords = submetricQuantiles(_.shuffleRecordsWritten), + writeTime = submetricQuantiles(_.shuffleWriteTime) + ) + }.metricOption + + new TaskMetricDistributions( + quantiles = quantiles, + executorDeserializeTime = metricQuantiles(_.executorDeserializeTime), + executorRunTime = metricQuantiles(_.executorRunTime), + resultSize = metricQuantiles(_.resultSize), + jvmGcTime = metricQuantiles(_.jvmGCTime), + resultSerializationTime = metricQuantiles(_.resultSerializationTime), + memoryBytesSpilled = metricQuantiles(_.memoryBytesSpilled), + diskBytesSpilled = metricQuantiles(_.diskBytesSpilled), + inputMetrics = inputMetrics, + outputMetrics = outputMetrics, + shuffleReadMetrics = shuffleReadMetrics, + shuffleWriteMetrics = shuffleWriteMetrics + ) + } + + def convertAccumulableInfo(acc: InternalAccumulableInfo): AccumulableInfo = { + new AccumulableInfo(acc.id, acc.name, acc.update, acc.value) + } + + def convertUiTaskMetrics(internal: InternalTaskMetrics): TaskMetrics = { + new TaskMetrics( + executorDeserializeTime = internal.executorDeserializeTime, + executorRunTime = internal.executorRunTime, + resultSize = internal.resultSize, + jvmGcTime = internal.jvmGCTime, + resultSerializationTime = internal.resultSerializationTime, + memoryBytesSpilled = internal.memoryBytesSpilled, + diskBytesSpilled = internal.diskBytesSpilled, + inputMetrics = internal.inputMetrics.map { convertInputMetrics }, + outputMetrics = Option(internal.outputMetrics).flatten.map { convertOutputMetrics }, + shuffleReadMetrics = internal.shuffleReadMetrics.map { convertShuffleReadMetrics }, + shuffleWriteMetrics = internal.shuffleWriteMetrics.map { convertShuffleWriteMetrics } + ) + } + + def convertInputMetrics(internal: InternalInputMetrics): InputMetrics = { + new InputMetrics( + bytesRead = internal.bytesRead, + recordsRead = internal.recordsRead + ) + } + + def convertOutputMetrics(internal: InternalOutputMetrics): OutputMetrics = { + new OutputMetrics( + bytesWritten = internal.bytesWritten, + recordsWritten = internal.recordsWritten + ) + } + + def convertShuffleReadMetrics(internal: InternalShuffleReadMetrics): ShuffleReadMetrics = { + new ShuffleReadMetrics( + remoteBlocksFetched = internal.remoteBlocksFetched, + localBlocksFetched = internal.localBlocksFetched, + fetchWaitTime = internal.fetchWaitTime, + remoteBytesRead = internal.remoteBytesRead, + totalBlocksFetched = internal.totalBlocksFetched, + recordsRead = internal.recordsRead + ) + } + + def convertShuffleWriteMetrics(internal: InternalShuffleWriteMetrics): ShuffleWriteMetrics = { + new ShuffleWriteMetrics( + bytesWritten = internal.shuffleBytesWritten, + writeTime = internal.shuffleWriteTime, + recordsWritten = internal.shuffleRecordsWritten + ) + } +} + +/** + * Helper for getting distributions from nested metric types. Many of the metrics we want are + * contained in options inside TaskMetrics (eg., ShuffleWriteMetrics). This makes it easy to handle + * the options (returning None if the metrics are all empty), and extract the quantiles for each + * metric. After creating an instance, call metricOption to get the result type. + */ +private[v1] abstract class MetricHelper[I, O]( + rawMetrics: Seq[InternalTaskMetrics], + quantiles: Array[Double]) { + + def getSubmetrics(raw: InternalTaskMetrics): Option[I] + + def build: O + + val data: Seq[I] = rawMetrics.flatMap(getSubmetrics) + + /** applies the given function to all input metrics, and returns the quantiles */ + def submetricQuantiles(f: I => Double): IndexedSeq[Double] = { + Distribution(data.map { d => f(d) }).get.getQuantiles(quantiles) + } + + def metricOption: Option[O] = { + if (data.isEmpty) { + None + } else { + Some(build) + } + } +} diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala new file mode 100644 index 0000000000000..f73c742732dec --- /dev/null +++ b/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala @@ -0,0 +1,255 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.status.api.v1 + +import javax.servlet.ServletContext +import javax.ws.rs._ +import javax.ws.rs.core.{Context, Response} + +import com.sun.jersey.api.core.ResourceConfig +import com.sun.jersey.spi.container.servlet.ServletContainer +import org.eclipse.jetty.server.handler.ContextHandler +import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder} + +import org.apache.spark.SecurityManager +import org.apache.spark.ui.SparkUI + +/** + * Main entry point for serving spark application metrics as json, using JAX-RS. + * + * Each resource should have endpoints that return **public** classes defined in api.scala. Mima + * binary compatibility checks ensure that we don't inadvertently make changes that break the api. + * The returned objects are automatically converted to json by jackson with JacksonMessageWriter. + * In addition, there are a number of tests in HistoryServerSuite that compare the json to "golden + * files". Any changes and additions should be reflected there as well -- see the notes in + * HistoryServerSuite. + */ +@Path("/v1") +private[v1] class ApiRootResource extends UIRootFromServletContext { + + @Path("applications") + def getApplicationList(): ApplicationListResource = { + new ApplicationListResource(uiRoot) + } + + @Path("applications/{appId}") + def getApplication(): OneApplicationResource = { + new OneApplicationResource(uiRoot) + } + + @Path("applications/{appId}/{attemptId}/jobs") + def getJobs( + @PathParam("appId") appId: String, + @PathParam("attemptId") attemptId: String): AllJobsResource = { + uiRoot.withSparkUI(appId, Some(attemptId)) { ui => + new AllJobsResource(ui) + } + } + + @Path("applications/{appId}/jobs") + def getJobs(@PathParam("appId") appId: String): AllJobsResource = { + uiRoot.withSparkUI(appId, None) { ui => + new AllJobsResource(ui) + } + } + + @Path("applications/{appId}/jobs/{jobId: \\d+}") + def getJob(@PathParam("appId") appId: String): OneJobResource = { + uiRoot.withSparkUI(appId, None) { ui => + new OneJobResource(ui) + } + } + + @Path("applications/{appId}/{attemptId}/jobs/{jobId: \\d+}") + def getJob( + @PathParam("appId") appId: String, + @PathParam("attemptId") attemptId: String): OneJobResource = { + uiRoot.withSparkUI(appId, Some(attemptId)) { ui => + new OneJobResource(ui) + } + } + + @Path("applications/{appId}/executors") + def getExecutors(@PathParam("appId") appId: String): ExecutorListResource = { + uiRoot.withSparkUI(appId, None) { ui => + new ExecutorListResource(ui) + } + } + + @Path("applications/{appId}/{attemptId}/executors") + def getExecutors( + @PathParam("appId") appId: String, + @PathParam("attemptId") attemptId: String): ExecutorListResource = { + uiRoot.withSparkUI(appId, Some(attemptId)) { ui => + new ExecutorListResource(ui) + } + } + + + @Path("applications/{appId}/stages") + def getStages(@PathParam("appId") appId: String): AllStagesResource = { + uiRoot.withSparkUI(appId, None) { ui => + new AllStagesResource(ui) + } + } + + @Path("applications/{appId}/{attemptId}/stages") + def getStages( + @PathParam("appId") appId: String, + @PathParam("attemptId") attemptId: String): AllStagesResource = { + uiRoot.withSparkUI(appId, Some(attemptId)) { ui => + new AllStagesResource(ui) + } + } + + @Path("applications/{appId}/stages/{stageId: \\d+}") + def getStage(@PathParam("appId") appId: String): OneStageResource = { + uiRoot.withSparkUI(appId, None) { ui => + new OneStageResource(ui) + } + } + + @Path("applications/{appId}/{attemptId}/stages/{stageId: \\d+}") + def getStage( + @PathParam("appId") appId: String, + @PathParam("attemptId") attemptId: String): OneStageResource = { + uiRoot.withSparkUI(appId, Some(attemptId)) { ui => + new OneStageResource(ui) + } + } + + @Path("applications/{appId}/storage/rdd") + def getRdds(@PathParam("appId") appId: String): AllRDDResource = { + uiRoot.withSparkUI(appId, None) { ui => + new AllRDDResource(ui) + } + } + + @Path("applications/{appId}/{attemptId}/storage/rdd") + def getRdds( + @PathParam("appId") appId: String, + @PathParam("attemptId") attemptId: String): AllRDDResource = { + uiRoot.withSparkUI(appId, Some(attemptId)) { ui => + new AllRDDResource(ui) + } + } + + @Path("applications/{appId}/storage/rdd/{rddId: \\d+}") + def getRdd(@PathParam("appId") appId: String): OneRDDResource = { + uiRoot.withSparkUI(appId, None) { ui => + new OneRDDResource(ui) + } + } + + @Path("applications/{appId}/{attemptId}/storage/rdd/{rddId: \\d+}") + def getRdd( + @PathParam("appId") appId: String, + @PathParam("attemptId") attemptId: String): OneRDDResource = { + uiRoot.withSparkUI(appId, Some(attemptId)) { ui => + new OneRDDResource(ui) + } + } + +} + +private[spark] object ApiRootResource { + + def getServletHandler(uiRoot: UIRoot): ServletContextHandler = { + val jerseyContext = new ServletContextHandler(ServletContextHandler.NO_SESSIONS) + jerseyContext.setContextPath("/api") + val holder: ServletHolder = new ServletHolder(classOf[ServletContainer]) + holder.setInitParameter("com.sun.jersey.config.property.resourceConfigClass", + "com.sun.jersey.api.core.PackagesResourceConfig") + holder.setInitParameter("com.sun.jersey.config.property.packages", + "org.apache.spark.status.api.v1") + holder.setInitParameter(ResourceConfig.PROPERTY_CONTAINER_REQUEST_FILTERS, + classOf[SecurityFilter].getCanonicalName) + UIRootFromServletContext.setUiRoot(jerseyContext, uiRoot) + jerseyContext.addServlet(holder, "/*") + jerseyContext + } +} + +/** + * This trait is shared by the all the root containers for application UI information -- + * the HistoryServer, the Master UI, and the application UI. This provides the common + * interface needed for them all to expose application info as json. + */ +private[spark] trait UIRoot { + def getSparkUI(appKey: String): Option[SparkUI] + def getApplicationInfoList: Iterator[ApplicationInfo] + + /** + * Get the spark UI with the given appID, and apply a function + * to it. If there is no such app, throw an appropriate exception + */ + def withSparkUI[T](appId: String, attemptId: Option[String])(f: SparkUI => T): T = { + val appKey = attemptId.map(appId + "/" + _).getOrElse(appId) + getSparkUI(appKey) match { + case Some(ui) => + f(ui) + case None => throw new NotFoundException("no such app: " + appId) + } + } + def securityManager: SecurityManager +} + +private[v1] object UIRootFromServletContext { + + private val attribute = getClass.getCanonicalName + + def setUiRoot(contextHandler: ContextHandler, uiRoot: UIRoot): Unit = { + contextHandler.setAttribute(attribute, uiRoot) + } + + def getUiRoot(context: ServletContext): UIRoot = { + context.getAttribute(attribute).asInstanceOf[UIRoot] + } +} + +private[v1] trait UIRootFromServletContext { + @Context + var servletContext: ServletContext = _ + + def uiRoot: UIRoot = UIRootFromServletContext.getUiRoot(servletContext) +} + +private[v1] class NotFoundException(msg: String) extends WebApplicationException( + new NoSuchElementException(msg), + Response + .status(Response.Status.NOT_FOUND) + .entity(ErrorWrapper(msg)) + .build() +) + +private[v1] class BadParameterException(msg: String) extends WebApplicationException( + new IllegalArgumentException(msg), + Response + .status(Response.Status.BAD_REQUEST) + .entity(ErrorWrapper(msg)) + .build() +) { + def this(param: String, exp: String, actual: String) = { + this(raw"""Bad value for parameter "$param". Expected a $exp, got "$actual"""") + } +} + +/** + * Signal to JacksonMessageWriter to not convert the message into json (which would result in an + * extra set of quotes). + */ +private[v1] case class ErrorWrapper(s: String) diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala new file mode 100644 index 0000000000000..17b521f3e1d41 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.status.api.v1 + +import java.util.{Arrays, Date, List => JList} +import javax.ws.rs.{DefaultValue, GET, Produces, QueryParam} +import javax.ws.rs.core.MediaType + +import org.apache.spark.deploy.history.ApplicationHistoryInfo +import org.apache.spark.deploy.master.{ApplicationInfo => InternalApplicationInfo} + +@Produces(Array(MediaType.APPLICATION_JSON)) +private[v1] class ApplicationListResource(uiRoot: UIRoot) { + + @GET + def appList( + @QueryParam("status") status: JList[ApplicationStatus], + @DefaultValue("2010-01-01") @QueryParam("minDate") minDate: SimpleDateParam, + @DefaultValue("3000-01-01") @QueryParam("maxDate") maxDate: SimpleDateParam) + : Iterator[ApplicationInfo] = { + val allApps = uiRoot.getApplicationInfoList + val adjStatus = { + if (status.isEmpty) { + Arrays.asList(ApplicationStatus.values(): _*) + } else { + status + } + } + val includeCompleted = adjStatus.contains(ApplicationStatus.COMPLETED) + val includeRunning = adjStatus.contains(ApplicationStatus.RUNNING) + allApps.filter { app => + val anyRunning = app.attempts.exists(!_.completed) + // if any attempt is still running, we consider the app to also still be running + val statusOk = (!anyRunning && includeCompleted) || + (anyRunning && includeRunning) + // keep the app if *any* attempts fall in the right time window + val dateOk = app.attempts.exists { attempt => + attempt.startTime.getTime >= minDate.timestamp && + attempt.startTime.getTime <= maxDate.timestamp + } + statusOk && dateOk + } + } +} + +private[spark] object ApplicationsListResource { + def appHistoryInfoToPublicAppInfo(app: ApplicationHistoryInfo): ApplicationInfo = { + new ApplicationInfo( + id = app.id, + name = app.name, + attempts = app.attempts.map { internalAttemptInfo => + new ApplicationAttemptInfo( + attemptId = internalAttemptInfo.attemptId, + startTime = new Date(internalAttemptInfo.startTime), + endTime = new Date(internalAttemptInfo.endTime), + sparkUser = internalAttemptInfo.sparkUser, + completed = internalAttemptInfo.completed + ) + } + ) + } + + def convertApplicationInfo( + internal: InternalApplicationInfo, + completed: Boolean): ApplicationInfo = { + // standalone application info always has just one attempt + new ApplicationInfo( + id = internal.id, + name = internal.desc.name, + attempts = Seq(new ApplicationAttemptInfo( + attemptId = None, + startTime = new Date(internal.startTime), + endTime = new Date(internal.endTime), + sparkUser = internal.desc.user, + completed = completed + )) + ) + } + +} diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/ExecutorListResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/ExecutorListResource.scala new file mode 100644 index 0000000000000..8ad4656b4dada --- /dev/null +++ b/core/src/main/scala/org/apache/spark/status/api/v1/ExecutorListResource.scala @@ -0,0 +1,36 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +package org.apache.spark.status.api.v1 + +import javax.ws.rs.{GET, PathParam, Produces} +import javax.ws.rs.core.MediaType + +import org.apache.spark.ui.SparkUI +import org.apache.spark.ui.exec.ExecutorsPage + +@Produces(Array(MediaType.APPLICATION_JSON)) +private[v1] class ExecutorListResource(ui: SparkUI) { + + @GET + def executorList(): Seq[ExecutorSummary] = { + val listener = ui.executorsListener + val storageStatusList = listener.storageStatusList + (0 until storageStatusList.size).map { statusId => + ExecutorsPage.getExecInfo(listener, statusId) + } + } +} diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/JacksonMessageWriter.scala b/core/src/main/scala/org/apache/spark/status/api/v1/JacksonMessageWriter.scala new file mode 100644 index 0000000000000..202a5191ad57d --- /dev/null +++ b/core/src/main/scala/org/apache/spark/status/api/v1/JacksonMessageWriter.scala @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.status.api.v1 + +import java.io.OutputStream +import java.lang.annotation.Annotation +import java.lang.reflect.Type +import java.text.SimpleDateFormat +import java.util.{Calendar, SimpleTimeZone} +import javax.ws.rs.Produces +import javax.ws.rs.core.{MediaType, MultivaluedMap} +import javax.ws.rs.ext.{MessageBodyWriter, Provider} + +import com.fasterxml.jackson.annotation.JsonInclude +import com.fasterxml.jackson.databind.{ObjectMapper, SerializationFeature} + +/** + * This class converts the POJO metric responses into json, using jackson. + * + * This doesn't follow the standard jersey-jackson plugin options, because we want to stick + * with an old version of jersey (since we have it from yarn anyway) and don't want to pull in lots + * of dependencies from a new plugin. + * + * Note that jersey automatically discovers this class based on its package and its annotations. + */ +@Provider +@Produces(Array(MediaType.APPLICATION_JSON)) +private[v1] class JacksonMessageWriter extends MessageBodyWriter[Object]{ + + val mapper = new ObjectMapper() { + override def writeValueAsString(t: Any): String = { + super.writeValueAsString(t) + } + } + mapper.registerModule(com.fasterxml.jackson.module.scala.DefaultScalaModule) + mapper.enable(SerializationFeature.INDENT_OUTPUT) + mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL) + mapper.setDateFormat(JacksonMessageWriter.makeISODateFormat) + + override def isWriteable( + aClass: Class[_], + `type`: Type, + annotations: Array[Annotation], + mediaType: MediaType): Boolean = { + true + } + + override def writeTo( + t: Object, + aClass: Class[_], + `type`: Type, + annotations: Array[Annotation], + mediaType: MediaType, + multivaluedMap: MultivaluedMap[String, AnyRef], + outputStream: OutputStream): Unit = { + t match { + case ErrorWrapper(err) => outputStream.write(err.getBytes("utf-8")) + case _ => mapper.writeValue(outputStream, t) + } + } + + override def getSize( + t: Object, + aClass: Class[_], + `type`: Type, + annotations: Array[Annotation], + mediaType: MediaType): Long = { + -1L + } +} + +private[spark] object JacksonMessageWriter { + def makeISODateFormat: SimpleDateFormat = { + val iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'GMT'") + val cal = Calendar.getInstance(new SimpleTimeZone(0, "GMT")) + iso8601.setCalendar(cal) + iso8601 + } +} diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala new file mode 100644 index 0000000000000..b5ef72649e295 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.status.api.v1 + +import javax.ws.rs.core.MediaType +import javax.ws.rs.{Produces, PathParam, GET} + +@Produces(Array(MediaType.APPLICATION_JSON)) +private[v1] class OneApplicationResource(uiRoot: UIRoot) { + + @GET + def getApp(@PathParam("appId") appId: String): ApplicationInfo = { + val apps = uiRoot.getApplicationInfoList.find { _.id == appId } + apps.getOrElse(throw new NotFoundException("unknown app: " + appId)) + } + +} diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/OneJobResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/OneJobResource.scala new file mode 100644 index 0000000000000..6d8a60d480aed --- /dev/null +++ b/core/src/main/scala/org/apache/spark/status/api/v1/OneJobResource.scala @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.status.api.v1 + +import javax.ws.rs.{PathParam, GET, Produces} +import javax.ws.rs.core.MediaType + +import org.apache.spark.JobExecutionStatus +import org.apache.spark.ui.SparkUI +import org.apache.spark.ui.jobs.UIData.JobUIData + +@Produces(Array(MediaType.APPLICATION_JSON)) +private[v1] class OneJobResource(ui: SparkUI) { + + @GET + def oneJob(@PathParam("jobId") jobId: Int): JobData = { + val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] = + AllJobsResource.getStatusToJobs(ui) + val jobOpt = statusToJobs.map {_._2} .flatten.find { jobInfo => jobInfo.jobId == jobId} + jobOpt.map { job => + AllJobsResource.convertJobData(job, ui.jobProgressListener, false) + }.getOrElse { + throw new NotFoundException("unknown job: " + jobId) + } + } + +} diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/OneRDDResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/OneRDDResource.scala new file mode 100644 index 0000000000000..dfdc09c6caf3b --- /dev/null +++ b/core/src/main/scala/org/apache/spark/status/api/v1/OneRDDResource.scala @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.status.api.v1 + +import javax.ws.rs.{PathParam, GET, Produces} +import javax.ws.rs.core.MediaType + +import org.apache.spark.ui.SparkUI + +@Produces(Array(MediaType.APPLICATION_JSON)) +private[v1] class OneRDDResource(ui: SparkUI) { + + @GET + def rddData(@PathParam("rddId") rddId: Int): RDDStorageInfo = { + AllRDDResource.getRDDStorageInfo(rddId, ui.storageListener, true).getOrElse( + throw new NotFoundException(s"no rdd found w/ id $rddId") + ) + } + +} diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/OneStageResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/OneStageResource.scala new file mode 100644 index 0000000000000..f9812f06cf527 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/status/api/v1/OneStageResource.scala @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.status.api.v1 + +import javax.ws.rs._ +import javax.ws.rs.core.MediaType + +import org.apache.spark.SparkException +import org.apache.spark.scheduler.StageInfo +import org.apache.spark.status.api.v1.StageStatus._ +import org.apache.spark.status.api.v1.TaskSorting._ +import org.apache.spark.ui.SparkUI +import org.apache.spark.ui.jobs.JobProgressListener +import org.apache.spark.ui.jobs.UIData.StageUIData + +@Produces(Array(MediaType.APPLICATION_JSON)) +private[v1] class OneStageResource(ui: SparkUI) { + + @GET + @Path("") + def stageData(@PathParam("stageId") stageId: Int): Seq[StageData] = { + withStage(stageId){ stageAttempts => + stageAttempts.map { stage => + AllStagesResource.stageUiToStageData(stage.status, stage.info, stage.ui, + includeDetails = true) + } + } + } + + @GET + @Path("/{stageAttemptId: \\d+}") + def oneAttemptData( + @PathParam("stageId") stageId: Int, + @PathParam("stageAttemptId") stageAttemptId: Int): StageData = { + withStageAttempt(stageId, stageAttemptId) { stage => + AllStagesResource.stageUiToStageData(stage.status, stage.info, stage.ui, + includeDetails = true) + } + } + + @GET + @Path("/{stageAttemptId: \\d+}/taskSummary") + def taskSummary( + @PathParam("stageId") stageId: Int, + @PathParam("stageAttemptId") stageAttemptId: Int, + @DefaultValue("0.05,0.25,0.5,0.75,0.95") @QueryParam("quantiles") quantileString: String) + : TaskMetricDistributions = { + withStageAttempt(stageId, stageAttemptId) { stage => + val quantiles = quantileString.split(",").map { s => + try { + s.toDouble + } catch { + case nfe: NumberFormatException => + throw new BadParameterException("quantiles", "double", s) + } + } + AllStagesResource.taskMetricDistributions(stage.ui.taskData.values, quantiles) + } + } + + @GET + @Path("/{stageAttemptId: \\d+}/taskList") + def taskList( + @PathParam("stageId") stageId: Int, + @PathParam("stageAttemptId") stageAttemptId: Int, + @DefaultValue("0") @QueryParam("offset") offset: Int, + @DefaultValue("20") @QueryParam("length") length: Int, + @DefaultValue("ID") @QueryParam("sortBy") sortBy: TaskSorting): Seq[TaskData] = { + withStageAttempt(stageId, stageAttemptId) { stage => + val tasks = stage.ui.taskData.values.map{AllStagesResource.convertTaskData}.toIndexedSeq + .sorted(OneStageResource.ordering(sortBy)) + tasks.slice(offset, offset + length) + } + } + + private case class StageStatusInfoUi(status: StageStatus, info: StageInfo, ui: StageUIData) + + private def withStage[T](stageId: Int)(f: Seq[StageStatusInfoUi] => T): T = { + val stageAttempts = findStageStatusUIData(ui.jobProgressListener, stageId) + if (stageAttempts.isEmpty) { + throw new NotFoundException("unknown stage: " + stageId) + } else { + f(stageAttempts) + } + } + + private def findStageStatusUIData( + listener: JobProgressListener, + stageId: Int): Seq[StageStatusInfoUi] = { + listener.synchronized { + def getStatusInfoUi(status: StageStatus, infos: Seq[StageInfo]): Seq[StageStatusInfoUi] = { + infos.filter { _.stageId == stageId }.map { info => + val ui = listener.stageIdToData.getOrElse((info.stageId, info.attemptId), + // this is an internal error -- we should always have uiData + throw new SparkException( + s"no stage ui data found for stage: ${info.stageId}:${info.attemptId}") + ) + StageStatusInfoUi(status, info, ui) + } + } + getStatusInfoUi(ACTIVE, listener.activeStages.values.toSeq) ++ + getStatusInfoUi(COMPLETE, listener.completedStages) ++ + getStatusInfoUi(FAILED, listener.failedStages) ++ + getStatusInfoUi(PENDING, listener.pendingStages.values.toSeq) + } + } + + private def withStageAttempt[T]( + stageId: Int, + stageAttemptId: Int) + (f: StageStatusInfoUi => T): T = { + withStage(stageId) { attempts => + val oneAttempt = attempts.find { stage => stage.info.attemptId == stageAttemptId } + oneAttempt match { + case Some(stage) => + f(stage) + case None => + val stageAttempts = attempts.map { _.info.attemptId } + throw new NotFoundException(s"unknown attempt for stage $stageId. " + + s"Found attempts: ${stageAttempts.mkString("[", ",", "]")}") + } + } + } +} + +object OneStageResource { + def ordering(taskSorting: TaskSorting): Ordering[TaskData] = { + val extractor: (TaskData => Long) = td => + taskSorting match { + case ID => td.taskId + case INCREASING_RUNTIME => td.taskMetrics.map{_.executorRunTime}.getOrElse(-1L) + case DECREASING_RUNTIME => -td.taskMetrics.map{_.executorRunTime}.getOrElse(-1L) + } + Ordering.by(extractor) + } +} diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/SecurityFilter.scala b/core/src/main/scala/org/apache/spark/status/api/v1/SecurityFilter.scala new file mode 100644 index 0000000000000..95fbd96ade5ab --- /dev/null +++ b/core/src/main/scala/org/apache/spark/status/api/v1/SecurityFilter.scala @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.status.api.v1 + +import javax.ws.rs.WebApplicationException +import javax.ws.rs.core.Response + +import com.sun.jersey.spi.container.{ContainerRequest, ContainerRequestFilter} + +private[v1] class SecurityFilter extends ContainerRequestFilter with UIRootFromServletContext { + def filter(req: ContainerRequest): ContainerRequest = { + val user = Option(req.getUserPrincipal).map { _.getName }.orNull + if (uiRoot.securityManager.checkUIViewPermissions(user)) { + req + } else { + throw new WebApplicationException( + Response + .status(Response.Status.FORBIDDEN) + .entity(raw"""user "$user"is not authorized""") + .build() + ) + } + } +} diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/SimpleDateParam.scala b/core/src/main/scala/org/apache/spark/status/api/v1/SimpleDateParam.scala new file mode 100644 index 0000000000000..cee29786c3019 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/status/api/v1/SimpleDateParam.scala @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.status.api.v1 + +import java.text.SimpleDateFormat +import java.util.TimeZone +import javax.ws.rs.WebApplicationException +import javax.ws.rs.core.Response +import javax.ws.rs.core.Response.Status + +import scala.util.Try + +private[v1] class SimpleDateParam(val originalValue: String) { + val timestamp: Long = { + SimpleDateParam.formats.collectFirst { + case fmt if Try(fmt.parse(originalValue)).isSuccess => + fmt.parse(originalValue).getTime() + }.getOrElse( + throw new WebApplicationException( + Response + .status(Status.BAD_REQUEST) + .entity("Couldn't parse date: " + originalValue) + .build() + ) + ) + } +} + +private[v1] object SimpleDateParam { + + val formats: Seq[SimpleDateFormat] = { + + val gmtDay = new SimpleDateFormat("yyyy-MM-dd") + gmtDay.setTimeZone(TimeZone.getTimeZone("GMT")) + + Seq( + new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSz"), + gmtDay + ) + } +} diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala new file mode 100644 index 0000000000000..2bec64f2ef02b --- /dev/null +++ b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala @@ -0,0 +1,228 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.status.api.v1 + +import java.util.Date + +import scala.collection.Map + +import org.apache.spark.JobExecutionStatus + +class ApplicationInfo private[spark]( + val id: String, + val name: String, + val attempts: Seq[ApplicationAttemptInfo]) + +class ApplicationAttemptInfo private[spark]( + val attemptId: Option[String], + val startTime: Date, + val endTime: Date, + val sparkUser: String, + val completed: Boolean = false) + +class ExecutorStageSummary private[spark]( + val taskTime : Long, + val failedTasks : Int, + val succeededTasks : Int, + val inputBytes : Long, + val outputBytes : Long, + val shuffleRead : Long, + val shuffleWrite : Long, + val memoryBytesSpilled : Long, + val diskBytesSpilled : Long) + +class ExecutorSummary private[spark]( + val id: String, + val hostPort: String, + val rddBlocks: Int, + val memoryUsed: Long, + val diskUsed: Long, + val activeTasks: Int, + val failedTasks: Int, + val completedTasks: Int, + val totalTasks: Int, + val totalDuration: Long, + val totalInputBytes: Long, + val totalShuffleRead: Long, + val totalShuffleWrite: Long, + val maxMemory: Long, + val executorLogs: Map[String, String]) + +class JobData private[spark]( + val jobId: Int, + val name: String, + val description: Option[String], + val submissionTime: Option[Date], + val completionTime: Option[Date], + val stageIds: Seq[Int], + val jobGroup: Option[String], + val status: JobExecutionStatus, + val numTasks: Int, + val numActiveTasks: Int, + val numCompletedTasks: Int, + val numSkippedTasks: Int, + val numFailedTasks: Int, + val numActiveStages: Int, + val numCompletedStages: Int, + val numSkippedStages: Int, + val numFailedStages: Int) + +// Q: should Tachyon size go in here as well? currently the UI only shows it on the overall storage +// page ... does anybody pay attention to it? +class RDDStorageInfo private[spark]( + val id: Int, + val name: String, + val numPartitions: Int, + val numCachedPartitions: Int, + val storageLevel: String, + val memoryUsed: Long, + val diskUsed: Long, + val dataDistribution: Option[Seq[RDDDataDistribution]], + val partitions: Option[Seq[RDDPartitionInfo]]) + +class RDDDataDistribution private[spark]( + val address: String, + val memoryUsed: Long, + val memoryRemaining: Long, + val diskUsed: Long) + +class RDDPartitionInfo private[spark]( + val blockName: String, + val storageLevel: String, + val memoryUsed: Long, + val diskUsed: Long, + val executors: Seq[String]) + +class StageData private[spark]( + val status: StageStatus, + val stageId: Int, + val attemptId: Int, + val numActiveTasks: Int , + val numCompleteTasks: Int, + val numFailedTasks: Int, + + val executorRunTime: Long, + + val inputBytes: Long, + val inputRecords: Long, + val outputBytes: Long, + val outputRecords: Long, + val shuffleReadBytes: Long, + val shuffleReadRecords: Long, + val shuffleWriteBytes: Long, + val shuffleWriteRecords: Long, + val memoryBytesSpilled: Long, + val diskBytesSpilled: Long, + + val name: String, + val details: String, + val schedulingPool: String, + + val accumulatorUpdates: Seq[AccumulableInfo], + val tasks: Option[Map[Long, TaskData]], + val executorSummary: Option[Map[String, ExecutorStageSummary]]) + +class TaskData private[spark]( + val taskId: Long, + val index: Int, + val attempt: Int, + val launchTime: Date, + val executorId: String, + val host: String, + val taskLocality: String, + val speculative: Boolean, + val accumulatorUpdates: Seq[AccumulableInfo], + val errorMessage: Option[String] = None, + val taskMetrics: Option[TaskMetrics] = None) + +class TaskMetrics private[spark]( + val executorDeserializeTime: Long, + val executorRunTime: Long, + val resultSize: Long, + val jvmGcTime: Long, + val resultSerializationTime: Long, + val memoryBytesSpilled: Long, + val diskBytesSpilled: Long, + val inputMetrics: Option[InputMetrics], + val outputMetrics: Option[OutputMetrics], + val shuffleReadMetrics: Option[ShuffleReadMetrics], + val shuffleWriteMetrics: Option[ShuffleWriteMetrics]) + +class InputMetrics private[spark]( + val bytesRead: Long, + val recordsRead: Long) + +class OutputMetrics private[spark]( + val bytesWritten: Long, + val recordsWritten: Long) + +class ShuffleReadMetrics private[spark]( + val remoteBlocksFetched: Int, + val localBlocksFetched: Int, + val fetchWaitTime: Long, + val remoteBytesRead: Long, + val totalBlocksFetched: Int, + val recordsRead: Long) + +class ShuffleWriteMetrics private[spark]( + val bytesWritten: Long, + val writeTime: Long, + val recordsWritten: Long) + +class TaskMetricDistributions private[spark]( + val quantiles: IndexedSeq[Double], + + val executorDeserializeTime: IndexedSeq[Double], + val executorRunTime: IndexedSeq[Double], + val resultSize: IndexedSeq[Double], + val jvmGcTime: IndexedSeq[Double], + val resultSerializationTime: IndexedSeq[Double], + val memoryBytesSpilled: IndexedSeq[Double], + val diskBytesSpilled: IndexedSeq[Double], + + val inputMetrics: Option[InputMetricDistributions], + val outputMetrics: Option[OutputMetricDistributions], + val shuffleReadMetrics: Option[ShuffleReadMetricDistributions], + val shuffleWriteMetrics: Option[ShuffleWriteMetricDistributions]) + +class InputMetricDistributions private[spark]( + val bytesRead: IndexedSeq[Double], + val recordsRead: IndexedSeq[Double]) + +class OutputMetricDistributions private[spark]( + val bytesWritten: IndexedSeq[Double], + val recordsWritten: IndexedSeq[Double]) + +class ShuffleReadMetricDistributions private[spark]( + val readBytes: IndexedSeq[Double], + val readRecords: IndexedSeq[Double], + val remoteBlocksFetched: IndexedSeq[Double], + val localBlocksFetched: IndexedSeq[Double], + val fetchWaitTime: IndexedSeq[Double], + val remoteBytesRead: IndexedSeq[Double], + val totalBlocksFetched: IndexedSeq[Double]) + +class ShuffleWriteMetricDistributions private[spark]( + val writeBytes: IndexedSeq[Double], + val writeRecords: IndexedSeq[Double], + val writeTime: IndexedSeq[Double]) + +class AccumulableInfo private[spark]( + val id: Long, + val name: String, + val update: Option[String], + val value: String) diff --git a/core/src/main/scala/org/apache/spark/storage/BlockId.scala b/core/src/main/scala/org/apache/spark/storage/BlockId.scala index c186fd360fef6..524f6970992a5 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockId.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockId.scala @@ -54,7 +54,7 @@ case class RDDBlockId(rddId: Int, splitIndex: Int) extends BlockId { } // Format of the shuffle block ids (including data and index) should be kept in sync with -// org.apache.spark.network.shuffle.StandaloneShuffleBlockManager#getBlockData(). +// org.apache.spark.network.shuffle.ExternalShuffleBlockResolver#getBlockData(). @DeveloperApi case class ShuffleBlockId(shuffleId: Int, mapId: Int, reduceId: Int) extends BlockId { override def name: String = "shuffle_" + shuffleId + "_" + mapId + "_" + reduceId diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala index a46fecd2274ef..16d67cbfca80b 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala @@ -21,8 +21,7 @@ import java.io.{BufferedOutputStream, ByteArrayOutputStream, File, InputStream, import java.nio.{ByteBuffer, MappedByteBuffer} import scala.collection.mutable.{ArrayBuffer, HashMap} -import scala.concurrent.{Await, Future} -import scala.concurrent.ExecutionContext.Implicits.global +import scala.concurrent.{ExecutionContext, Await, Future} import scala.concurrent.duration._ import scala.util.Random @@ -77,6 +76,9 @@ private[spark] class BlockManager( private val blockInfo = new TimeStampedHashMap[BlockId, BlockInfo] + private val futureExecutionContext = ExecutionContext.fromExecutorService( + ThreadUtils.newDaemonCachedThreadPool("block-manager-future", 128)) + // Actual storage of where blocks are kept private var externalBlockStoreInitialized = false private[spark] val memoryStore = new MemoryStore(this, maxMemory) @@ -266,11 +268,13 @@ private[spark] class BlockManager( asyncReregisterLock.synchronized { if (asyncReregisterTask == null) { asyncReregisterTask = Future[Unit] { + // This is a blocking action and should run in futureExecutionContext which is a cached + // thread pool reregister() asyncReregisterLock.synchronized { asyncReregisterTask = null } - } + }(futureExecutionContext) } } } @@ -431,10 +435,11 @@ private[spark] class BlockManager( // As an optimization for map output fetches, if the block is for a shuffle, return it // without acquiring a lock; the disk store never deletes (recent) items so this should work if (blockId.isShuffle) { - val shuffleBlockManager = shuffleManager.shuffleBlockResolver + val shuffleBlockResolver = shuffleManager.shuffleBlockResolver // TODO: This should gracefully handle case where local block is not available. Currently // downstream code will throw an exception. - Option(shuffleBlockManager.getBlockData(blockId.asInstanceOf[ShuffleBlockId]).nioByteBuffer()) + Option( + shuffleBlockResolver.getBlockData(blockId.asInstanceOf[ShuffleBlockId]).nioByteBuffer()) } else { doGetLocal(blockId, asBlockResult = false).asInstanceOf[Option[ByteBuffer]] } @@ -743,7 +748,11 @@ private[spark] class BlockManager( case b: ByteBufferValues if putLevel.replication > 1 => // Duplicate doesn't copy the bytes, but just creates a wrapper val bufferView = b.buffer.duplicate() - Future { replicate(blockId, bufferView, putLevel) } + Future { + // This is a blocking action and should run in futureExecutionContext which is a cached + // thread pool + replicate(blockId, bufferView, putLevel) + }(futureExecutionContext) case _ => null } @@ -1217,6 +1226,7 @@ private[spark] class BlockManager( } metadataCleaner.cancel() broadcastCleaner.cancel() + futureExecutionContext.shutdownNow() logInfo("BlockManager stopped") } } diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala index a85e1c7632973..abcad9438bf28 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala @@ -17,13 +17,14 @@ package org.apache.spark.storage +import scala.collection.Iterable +import scala.collection.generic.CanBuildFrom import scala.concurrent.{Await, Future} -import scala.concurrent.ExecutionContext.Implicits.global import org.apache.spark.rpc.RpcEndpointRef import org.apache.spark.{Logging, SparkConf, SparkException} import org.apache.spark.storage.BlockManagerMessages._ -import org.apache.spark.util.RpcUtils +import org.apache.spark.util.{ThreadUtils, RpcUtils} private[spark] class BlockManagerMaster( @@ -102,8 +103,8 @@ class BlockManagerMaster( val future = driverEndpoint.askWithRetry[Future[Seq[Int]]](RemoveRdd(rddId)) future.onFailure { case e: Exception => - logWarning(s"Failed to remove RDD $rddId - ${e.getMessage}}") - } + logWarning(s"Failed to remove RDD $rddId - ${e.getMessage}}", e) + }(ThreadUtils.sameThread) if (blocking) { Await.result(future, timeout) } @@ -114,8 +115,8 @@ class BlockManagerMaster( val future = driverEndpoint.askWithRetry[Future[Seq[Boolean]]](RemoveShuffle(shuffleId)) future.onFailure { case e: Exception => - logWarning(s"Failed to remove shuffle $shuffleId - ${e.getMessage}}") - } + logWarning(s"Failed to remove shuffle $shuffleId - ${e.getMessage}}", e) + }(ThreadUtils.sameThread) if (blocking) { Await.result(future, timeout) } @@ -128,8 +129,8 @@ class BlockManagerMaster( future.onFailure { case e: Exception => logWarning(s"Failed to remove broadcast $broadcastId" + - s" with removeFromMaster = $removeFromMaster - ${e.getMessage}}") - } + s" with removeFromMaster = $removeFromMaster - ${e.getMessage}}", e) + }(ThreadUtils.sameThread) if (blocking) { Await.result(future, timeout) } @@ -169,11 +170,17 @@ class BlockManagerMaster( val response = driverEndpoint. askWithRetry[Map[BlockManagerId, Future[Option[BlockStatus]]]](msg) val (blockManagerIds, futures) = response.unzip - val result = Await.result(Future.sequence(futures), timeout) - if (result == null) { + implicit val sameThread = ThreadUtils.sameThread + val cbf = + implicitly[ + CanBuildFrom[Iterable[Future[Option[BlockStatus]]], + Option[BlockStatus], + Iterable[Option[BlockStatus]]]] + val blockStatus = Await.result( + Future.sequence[Option[BlockStatus], Iterable](futures)(cbf, ThreadUtils.sameThread), timeout) + if (blockStatus == null) { throw new SparkException("BlockManager returned null for BlockStatus query: " + blockId) } - val blockStatus = result.asInstanceOf[Iterable[Option[BlockStatus]]] blockManagerIds.zip(blockStatus).flatMap { case (blockManagerId, status) => status.map { s => (blockManagerId, s) } }.toMap diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala index 3afb4c3c02e2d..2cd8c5297b741 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala @@ -292,16 +292,16 @@ class BlockManagerMasterEndpoint( blockManagerIdByExecutor.get(id.executorId) match { case Some(oldId) => // A block manager of the same executor already exists, so remove it (assumed dead) - logError("Got two different block manager registrations on same executor - " + logError("Got two different block manager registrations on same executor - " + s" will replace old one $oldId with new one $id") - removeExecutor(id.executorId) + removeExecutor(id.executorId) case None => } logInfo("Registering block manager %s with %s RAM, %s".format( id.hostPort, Utils.bytesToString(maxMemSize), id)) - + blockManagerIdByExecutor(id.executorId) = id - + blockManagerInfo(id) = new BlockManagerInfo( id, System.currentTimeMillis(), maxMemSize, slaveEndpoint) } diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerSlaveEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerSlaveEndpoint.scala index 543df4e1350dd..7478ab0fc2f7a 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerSlaveEndpoint.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerSlaveEndpoint.scala @@ -40,7 +40,7 @@ class BlockManagerSlaveEndpoint( private implicit val asyncExecutionContext = ExecutionContext.fromExecutorService(asyncThreadPool) // Operations that involve removing blocks may be slow and should be done asynchronously - override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = { + override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = { case RemoveBlock(blockId) => doAsync[Boolean]("removing block " + blockId, context) { blockManager.removeBlock(blockId) diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerSource.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerSource.scala index 8569c6f3cbbc3..c5ba9af3e2658 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerSource.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerSource.scala @@ -17,9 +17,8 @@ package org.apache.spark.storage -import com.codahale.metrics.{Gauge,MetricRegistry} +import com.codahale.metrics.{Gauge, MetricRegistry} -import org.apache.spark.SparkContext import org.apache.spark.metrics.source.Source private[spark] class BlockManagerSource(val blockManager: BlockManager) diff --git a/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala b/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala index 499dd97c0656a..a33f22ef52687 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala @@ -59,7 +59,7 @@ private[spark] abstract class BlockObjectWriter(val blockId: BlockId) extends Ou def write(key: Any, value: Any) /** - * Notify the writer that a record worth of bytes has been written with writeBytes. + * Notify the writer that a record worth of bytes has been written with OutputStream#write. */ def recordWritten() @@ -86,16 +86,6 @@ private[spark] class DiskBlockObjectWriter( extends BlockObjectWriter(blockId) with Logging { - /** Intercepts write calls and tracks total time spent writing. Not thread safe. */ - private class TimeTrackingOutputStream(out: OutputStream) extends OutputStream { - override def write(i: Int): Unit = callWithTiming(out.write(i)) - override def write(b: Array[Byte]): Unit = callWithTiming(out.write(b)) - override def write(b: Array[Byte], off: Int, len: Int): Unit = { - callWithTiming(out.write(b, off, len)) - } - override def close(): Unit = out.close() - override def flush(): Unit = out.flush() - } /** The file channel, used for repositioning / truncating the file. */ private var channel: FileChannel = null @@ -136,7 +126,7 @@ private[spark] class DiskBlockObjectWriter( throw new IllegalStateException("Writer already closed. Cannot be reopened.") } fos = new FileOutputStream(file, true) - ts = new TimeTrackingOutputStream(fos) + ts = new TimeTrackingOutputStream(writeMetrics, fos) channel = fos.getChannel() bs = compressStream(new BufferedOutputStream(ts, bufferSize)) objOut = serializerInstance.serializeStream(bs) @@ -150,9 +140,9 @@ private[spark] class DiskBlockObjectWriter( if (syncWrites) { // Force outstanding writes to disk and track how long it takes objOut.flush() - callWithTiming { - fos.getFD.sync() - } + val start = System.nanoTime() + fos.getFD.sync() + writeMetrics.incShuffleWriteTime(System.nanoTime() - start) } } { objOut.close() @@ -215,12 +205,7 @@ private[spark] class DiskBlockObjectWriter( objOut.writeKey(key) objOut.writeValue(value) - numRecordsWritten += 1 - writeMetrics.incShuffleRecordsWritten(1) - - if (numRecordsWritten % 32 == 0) { - updateBytesWritten() - } + recordWritten() } override def write(b: Int): Unit = throw new UnsupportedOperationException() @@ -256,12 +241,6 @@ private[spark] class DiskBlockObjectWriter( reportedPosition = pos } - private def callWithTiming(f: => Unit) = { - val start = System.nanoTime() - f - writeMetrics.incShuffleWriteTime(System.nanoTime() - start) - } - // For testing private[spark] override def flush() { objOut.flush() diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala index 5764c16902c66..91ef86389a0c3 100644 --- a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala @@ -55,7 +55,7 @@ private[spark] class DiskBlockManager(blockManager: BlockManager, conf: SparkCon /** Looks up a file by hashing it into one of our local subdirectories. */ // This method should be kept in sync with - // org.apache.spark.network.shuffle.StandaloneShuffleBlockManager#getFile(). + // org.apache.spark.network.shuffle.ExternalShuffleBlockResolver#getFile(). def getFile(filename: String): File = { // Figure out which local directory it hashes to, and which subdirectory in that val hash = Utils.nonNegativeHash(filename) @@ -139,8 +139,8 @@ private[spark] class DiskBlockManager(blockManager: BlockManager, conf: SparkCon } private def addShutdownHook(): AnyRef = { - Utils.addShutdownHook { () => - logDebug("Shutdown hook called") + Utils.addShutdownHook(Utils.TEMP_DIR_SHUTDOWN_PRIORITY + 1) { () => + logInfo("Shutdown hook called") DiskBlockManager.this.doStop() } } @@ -151,7 +151,7 @@ private[spark] class DiskBlockManager(blockManager: BlockManager, conf: SparkCon try { Utils.removeShutdownHook(shutdownHook) } catch { - case e: Exception => + case e: Exception => logError(s"Exception while removing shutdown hook.", e) } doStop() diff --git a/core/src/main/scala/org/apache/spark/storage/RDDInfo.scala b/core/src/main/scala/org/apache/spark/storage/RDDInfo.scala index ad53a3edc7cc1..96062626b5045 100644 --- a/core/src/main/scala/org/apache/spark/storage/RDDInfo.scala +++ b/core/src/main/scala/org/apache/spark/storage/RDDInfo.scala @@ -18,7 +18,7 @@ package org.apache.spark.storage import org.apache.spark.annotation.DeveloperApi -import org.apache.spark.rdd.RDD +import org.apache.spark.rdd.{RDDOperationScope, RDD} import org.apache.spark.util.Utils @DeveloperApi @@ -26,7 +26,9 @@ class RDDInfo( val id: Int, val name: String, val numPartitions: Int, - var storageLevel: StorageLevel) + var storageLevel: StorageLevel, + val parentIds: Seq[Int], + val scope: Option[RDDOperationScope] = None) extends Ordered[RDDInfo] { var numCachedPartitions = 0 @@ -52,7 +54,8 @@ class RDDInfo( private[spark] object RDDInfo { def fromRdd(rdd: RDD[_]): RDDInfo = { - val rddName = Option(rdd.name).getOrElse(rdd.id.toString) - new RDDInfo(rdd.id, rddName, rdd.partitions.length, rdd.getStorageLevel) + val rddName = Option(rdd.name).getOrElse(Utils.getFormattedClassName(rdd)) + val parentIds = rdd.dependencies.map(_.rdd.id) + new RDDInfo(rdd.id, rddName, rdd.partitions.length, rdd.getStorageLevel, parentIds, rdd.scope) } } diff --git a/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala b/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala index 7d75929b96f75..ec711480ebf30 100644 --- a/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala +++ b/core/src/main/scala/org/apache/spark/storage/StorageStatusListener.scala @@ -25,13 +25,17 @@ import org.apache.spark.scheduler._ /** * :: DeveloperApi :: * A SparkListener that maintains executor storage status. + * + * This class is thread-safe (unlike JobProgressListener) */ @DeveloperApi class StorageStatusListener extends SparkListener { // This maintains only blocks that are cached (i.e. storage level is not StorageLevel.NONE) private[storage] val executorIdToStorageStatus = mutable.Map[String, StorageStatus]() - def storageStatusList: Seq[StorageStatus] = executorIdToStorageStatus.values.toSeq + def storageStatusList: Seq[StorageStatus] = synchronized { + executorIdToStorageStatus.values.toSeq + } /** Update storage status list to reflect updated block statuses */ private def updateStorageStatus(execId: String, updatedBlocks: Seq[(BlockId, BlockStatus)]) { diff --git a/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala index bdc6276e41915..d3b8c80699f4b 100644 --- a/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/TachyonBlockManager.scala @@ -38,7 +38,7 @@ import org.apache.spark.util.Utils */ private[spark] class TachyonBlockManager() extends ExternalBlockManager with Logging { - var blockManager: BlockManager =_ + var blockManager: BlockManager = _ var rootDirs: String = _ var master: String = _ var client: tachyon.client.TachyonFS = _ diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala index dfd6fdb5e9993..06e616220c706 100644 --- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala @@ -78,6 +78,9 @@ private[spark] object JettyUtils extends Logging { } catch { case e: IllegalArgumentException => response.sendError(HttpServletResponse.SC_BAD_REQUEST, e.getMessage) + case e: Exception => + logWarning(s"GET ${request.getRequestURI} failed: $e", e) + throw e } } // SPARK-5983 ensure TRACE is not supported @@ -217,6 +220,9 @@ private[spark] object JettyUtils extends Logging { val pool = new QueuedThreadPool pool.setDaemon(true) server.setThreadPool(pool) + val errorHandler = new ErrorHandler() + errorHandler.setShowStacks(true) + server.addBean(errorHandler) server.setHandler(collection) try { server.start() diff --git a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala index 06fce86bd38d2..3788916cf39bb 100644 --- a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala +++ b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala @@ -17,6 +17,10 @@ package org.apache.spark.ui +import java.util.Date + +import org.apache.spark.status.api.v1.{ApiRootResource, ApplicationAttemptInfo, ApplicationInfo, + UIRoot} import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkContext} import org.apache.spark.scheduler._ import org.apache.spark.storage.StorageStatusListener @@ -25,6 +29,7 @@ import org.apache.spark.ui.env.{EnvironmentListener, EnvironmentTab} import org.apache.spark.ui.exec.{ExecutorsListener, ExecutorsTab} import org.apache.spark.ui.jobs.{JobsTab, JobProgressListener, StagesTab} import org.apache.spark.ui.storage.{StorageListener, StorageTab} +import org.apache.spark.ui.scope.RDDOperationGraphListener /** * Top level user interface for a Spark application. @@ -32,29 +37,35 @@ import org.apache.spark.ui.storage.{StorageListener, StorageTab} private[spark] class SparkUI private ( val sc: Option[SparkContext], val conf: SparkConf, - val securityManager: SecurityManager, + securityManager: SecurityManager, val environmentListener: EnvironmentListener, val storageStatusListener: StorageStatusListener, val executorsListener: ExecutorsListener, val jobProgressListener: JobProgressListener, val storageListener: StorageListener, + val operationGraphListener: RDDOperationGraphListener, var appName: String, - val basePath: String) + val basePath: String, + val startTime: Long) extends WebUI(securityManager, SparkUI.getUIPort(conf), conf, basePath, "SparkUI") - with Logging { + with Logging + with UIRoot { val killEnabled = sc.map(_.conf.getBoolean("spark.ui.killEnabled", true)).getOrElse(false) + + val stagesTab = new StagesTab(this) + /** Initialize all components of the server. */ def initialize() { attachTab(new JobsTab(this)) - val stagesTab = new StagesTab(this) attachTab(stagesTab) attachTab(new StorageTab(this)) attachTab(new EnvironmentTab(this)) attachTab(new ExecutorsTab(this)) attachHandler(createStaticHandler(SparkUI.STATIC_RESOURCE_DIR, "/static")) attachHandler(createRedirectHandler("/", "/jobs", basePath = basePath)) + attachHandler(ApiRootResource.getServletHandler(this)) // This should be POST only, but, the YARN AM proxy won't proxy POSTs attachHandler(createRedirectHandler( "/stages/stage/kill", "/stages", stagesTab.handleKillRequest, @@ -81,6 +92,24 @@ private[spark] class SparkUI private ( private[spark] def appUIHostPort = publicHostName + ":" + boundPort private[spark] def appUIAddress = s"http://$appUIHostPort" + + def getSparkUI(appId: String): Option[SparkUI] = { + if (appId == appName) Some(this) else None + } + + def getApplicationInfoList: Iterator[ApplicationInfo] = { + Iterator(new ApplicationInfo( + id = appName, + name = appName, + attempts = Seq(new ApplicationAttemptInfo( + attemptId = None, + startTime = new Date(startTime), + endTime = new Date(-1), + sparkUser = "", + completed = false + )) + )) + } } private[spark] abstract class SparkUITab(parent: SparkUI, prefix: String) @@ -93,6 +122,9 @@ private[spark] abstract class SparkUITab(parent: SparkUI, prefix: String) private[spark] object SparkUI { val DEFAULT_PORT = 4040 val STATIC_RESOURCE_DIR = "org/apache/spark/ui/static" + val DEFAULT_POOL_NAME = "default" + val DEFAULT_RETAINED_STAGES = 1000 + val DEFAULT_RETAINED_JOBS = 1000 def getUIPort(conf: SparkConf): Int = { conf.getInt("spark.ui.port", SparkUI.DEFAULT_PORT) @@ -104,9 +136,10 @@ private[spark] object SparkUI { listenerBus: SparkListenerBus, jobProgressListener: JobProgressListener, securityManager: SecurityManager, - appName: String): SparkUI = { + appName: String, + startTime: Long): SparkUI = { create(Some(sc), conf, listenerBus, securityManager, appName, - jobProgressListener = Some(jobProgressListener)) + jobProgressListener = Some(jobProgressListener), startTime = startTime) } def createHistoryUI( @@ -114,8 +147,9 @@ private[spark] object SparkUI { listenerBus: SparkListenerBus, securityManager: SecurityManager, appName: String, - basePath: String): SparkUI = { - create(None, conf, listenerBus, securityManager, appName, basePath) + basePath: String, + startTime: Long): SparkUI = { + create(None, conf, listenerBus, securityManager, appName, basePath, startTime = startTime) } /** @@ -132,7 +166,8 @@ private[spark] object SparkUI { securityManager: SecurityManager, appName: String, basePath: String = "", - jobProgressListener: Option[JobProgressListener] = None): SparkUI = { + jobProgressListener: Option[JobProgressListener] = None, + startTime: Long): SparkUI = { val _jobProgressListener: JobProgressListener = jobProgressListener.getOrElse { val listener = new JobProgressListener(conf) @@ -144,13 +179,16 @@ private[spark] object SparkUI { val storageStatusListener = new StorageStatusListener val executorsListener = new ExecutorsListener(storageStatusListener) val storageListener = new StorageListener(storageStatusListener) + val operationGraphListener = new RDDOperationGraphListener(conf) listenerBus.addListener(environmentListener) listenerBus.addListener(storageStatusListener) listenerBus.addListener(executorsListener) listenerBus.addListener(storageListener) + listenerBus.addListener(operationGraphListener) new SparkUI(sc, conf, securityManager, environmentListener, storageStatusListener, - executorsListener, _jobProgressListener, storageListener, appName, basePath) + executorsListener, _jobProgressListener, storageListener, operationGraphListener, + appName, basePath, startTime) } } diff --git a/core/src/main/scala/org/apache/spark/ui/ToolTips.scala b/core/src/main/scala/org/apache/spark/ui/ToolTips.scala index 24f3236456248..063e2a1f8b18e 100644 --- a/core/src/main/scala/org/apache/spark/ui/ToolTips.scala +++ b/core/src/main/scala/org/apache/spark/ui/ToolTips.scala @@ -57,4 +57,23 @@ private[spark] object ToolTips { val GC_TIME = """Time that the executor spent paused for Java garbage collection while the task was running.""" + + val JOB_TIMELINE = + """Shows when jobs started and ended and when executors joined or left. Drag to scroll. + Click Enable Zooming and use mouse wheel to zoom in/out.""" + + val STAGE_TIMELINE = + """Shows when stages started and ended and when executors joined or left. Drag to scroll. + Click Enable Zooming and use mouse wheel to zoom in/out.""" + + val JOB_DAG = + """Shows a graph of stages executed for this job, each of which can contain + multiple RDD operations (e.g. map() and filter()), and of RDDs inside each operation + (shown as dots).""" + + val STAGE_DAG = + """Shows a graph of RDD operations in this stage, and RDDs inside each one. A stage can run + multiple operations (e.g. two map() functions) if they can be pipelined. Some operations + also create multiple RDDs internally. Cached RDDs are shown in green. + """ } diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala index 395af2ea30b9d..65162f4fdcd62 100644 --- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala @@ -20,9 +20,10 @@ package org.apache.spark.ui import java.text.SimpleDateFormat import java.util.{Locale, Date} -import scala.xml.{Node, Text} +import scala.xml.{Node, Text, Unparsed} import org.apache.spark.Logging +import org.apache.spark.ui.scope.RDDOperationGraph /** Utility functions for generating XML pages with spark content. */ private[spark] object UIUtils extends Logging { @@ -155,13 +156,10 @@ private[spark] object UIUtils extends Logging { def commonHeaderNodes: Seq[Node] = { - - - - + + + + @@ -172,13 +170,22 @@ private[spark] object UIUtils extends Logging { } + def vizHeaderNodes: Seq[Node] = { + + + + + + } + /** Returns a spark page with correctly formatted headers */ def headerSparkPage( title: String, content: => Seq[Node], activeTab: SparkUITab, refreshInterval: Option[Int] = None, - helpText: Option[String] = None): Seq[Node] = { + helpText: Option[String] = None, + showVisualization: Boolean = false): Seq[Node] = { val appName = activeTab.appName val shortAppName = if (appName.length < 36) appName else appName.take(32) + "..." @@ -187,15 +194,12 @@ private[spark] object UIUtils extends Logging { {tab.name} } - val helpButton: Seq[Node] = helpText.map { helpText => - - (?) - - }.getOrElse(Seq.empty) + val helpButton: Seq[Node] = helpText.map(tooltip(_, "bottom")).getOrElse(Seq.empty) {commonHeaderNodes} + {if (showVisualization) vizHeaderNodes else Seq.empty} {appName} - {title} @@ -242,7 +246,7 @@ private[spark] object UIUtils extends Logging {

- {org.apache.spark.SPARK_VERSION} {title} @@ -305,7 +309,7 @@ private[spark] object UIUtils extends Logging { started: Int, completed: Int, failed: Int, - skipped:Int, + skipped: Int, total: Int): Seq[Node] = { val completeWidth = "width: %s%%".format((completed.toDouble/total)*100) val startWidth = "width: %s%%".format((started.toDouble/total)*100) @@ -320,4 +324,66 @@ private[spark] object UIUtils extends Logging {
} + + /** Return a "DAG visualization" DOM element that expands into a visualization for a stage. */ + def showDagVizForStage(stageId: Int, graph: Option[RDDOperationGraph]): Seq[Node] = { + showDagViz(graph.toSeq, forJob = false) + } + + /** Return a "DAG visualization" DOM element that expands into a visualization for a job. */ + def showDagVizForJob(jobId: Int, graphs: Seq[RDDOperationGraph]): Seq[Node] = { + showDagViz(graphs, forJob = true) + } + + /** + * Return a "DAG visualization" DOM element that expands into a visualization on the UI. + * + * This populates metadata necessary for generating the visualization on the front-end in + * a format that is expected by spark-dag-viz.js. Any changes in the format here must be + * reflected there. + */ + private def showDagViz(graphs: Seq[RDDOperationGraph], forJob: Boolean): Seq[Node] = { +
+ + + + DAG Visualization + + +
+ +
+ } + + def tooltip(text: String, position: String): Seq[Node] = { + + (?) + + } + + /** Return a script element that automatically expands the DAG visualization on page load. */ + def expandDagVizOnLoad(forJob: Boolean): Seq[Node] = { + + } + } diff --git a/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala b/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala index 5fbcd6bb8ad94..ba03acdb38cc5 100644 --- a/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala +++ b/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala @@ -54,7 +54,7 @@ private[spark] object UIWorkloadGenerator { val sc = new SparkContext(conf) def setProperties(s: String): Unit = { - if(schedulingMode == SchedulingMode.FAIR) { + if (schedulingMode == SchedulingMode.FAIR) { sc.setLocalProperty("spark.scheduler.pool", s) } sc.setLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION, s) diff --git a/core/src/main/scala/org/apache/spark/ui/WebUI.scala b/core/src/main/scala/org/apache/spark/ui/WebUI.scala index f9860d1a5ce76..2c84e4485996e 100644 --- a/core/src/main/scala/org/apache/spark/ui/WebUI.scala +++ b/core/src/main/scala/org/apache/spark/ui/WebUI.scala @@ -37,7 +37,7 @@ import org.apache.spark.{Logging, SecurityManager, SparkConf} * pages. The use of tabs is optional, however; a WebUI may choose to include pages directly. */ private[spark] abstract class WebUI( - securityManager: SecurityManager, + val securityManager: SecurityManager, port: Int, conf: SparkConf, basePath: String = "", @@ -62,12 +62,12 @@ private[spark] abstract class WebUI( tab.pages.foreach(attachPage) tabs += tab } - + def detachTab(tab: WebUITab) { tab.pages.foreach(detachPage) tabs -= tab } - + def detachPage(page: WebUIPage) { pageToHandlers.remove(page).foreach(_.foreach(detachHandler)) } @@ -83,9 +83,6 @@ private[spark] abstract class WebUI( attachHandler(renderJsonHandler) pageToHandlers.getOrElseUpdate(page, ArrayBuffer[ServletContextHandler]()) .append(renderHandler) - pageToHandlers.getOrElseUpdate(page, ArrayBuffer[ServletContextHandler]()) - .append(renderJsonHandler) - } /** Attach a handler to this UI. */ @@ -100,7 +97,7 @@ private[spark] abstract class WebUI( } /** Detach a handler from this UI. */ - protected def detachHandler(handler: ServletContextHandler) { + def detachHandler(handler: ServletContextHandler) { handlers -= handler serverInfo.foreach { info => info.rootHandler.removeHandler(handler) diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala index 956608d7c0cbe..b247e4cdc3bd4 100644 --- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala +++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala @@ -22,11 +22,11 @@ import javax.servlet.http.HttpServletRequest import scala.xml.Node +import org.apache.spark.status.api.v1.ExecutorSummary import org.apache.spark.ui.{ToolTips, UIUtils, WebUIPage} import org.apache.spark.util.Utils -/** Summary information about an executor to display in the UI. */ -// Needs to be private[ui] because of a false positive MiMa failure. +// This isn't even used anymore -- but we need to keep it b/c of a MiMa false positive private[ui] case class ExecutorSummaryInfo( id: String, hostPort: String, @@ -44,6 +44,7 @@ private[ui] case class ExecutorSummaryInfo( maxMemory: Long, executorLogs: Map[String, String]) + private[ui] class ExecutorsPage( parent: ExecutorsTab, threadDumpEnabled: Boolean) @@ -55,7 +56,8 @@ private[ui] class ExecutorsPage( val maxMem = storageStatusList.map(_.maxMem).sum val memUsed = storageStatusList.map(_.memUsed).sum val diskUsed = storageStatusList.map(_.diskUsed).sum - val execInfo = for (statusId <- 0 until storageStatusList.size) yield getExecInfo(statusId) + val execInfo = for (statusId <- 0 until storageStatusList.size) yield + ExecutorsPage.getExecInfo(listener, statusId) val execInfoSorted = execInfo.sortBy(_.id) val logsExist = execInfo.filter(_.executorLogs.nonEmpty).nonEmpty @@ -111,7 +113,7 @@ private[ui] class ExecutorsPage( } /** Render an HTML row representing an executor */ - private def execRow(info: ExecutorSummaryInfo, logsExist: Boolean): Seq[Node] = { + private def execRow(info: ExecutorSummary, logsExist: Boolean): Seq[Node] = { val maximumMemory = info.maxMemory val memoryUsed = info.memoryUsed val diskUsed = info.diskUsed @@ -170,8 +172,11 @@ private[ui] class ExecutorsPage( } +} + +private[spark] object ExecutorsPage { /** Represent an executor's info as a map given a storage status index */ - private def getExecInfo(statusId: Int): ExecutorSummaryInfo = { + def getExecInfo(listener: ExecutorsListener, statusId: Int): ExecutorSummary = { val status = listener.storageStatusList(statusId) val execId = status.blockManagerId.executorId val hostPort = status.blockManagerId.hostPort @@ -189,7 +194,7 @@ private[ui] class ExecutorsPage( val totalShuffleWrite = listener.executorToShuffleWrite.getOrElse(execId, 0L) val executorLogs = listener.executorToLogUrls.getOrElse(execId, Map.empty) - new ExecutorSummaryInfo( + new ExecutorSummary( execId, hostPort, rddBlocks, diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala index a7ea12b1655fe..2ce670ad02e97 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala @@ -18,12 +18,12 @@ package org.apache.spark.ui.jobs import scala.collection.mutable.{HashMap, ListBuffer} -import scala.xml.{Node, NodeSeq, Unparsed} +import scala.xml.{Node, NodeSeq, Unparsed, Utility} import java.util.Date import javax.servlet.http.HttpServletRequest -import org.apache.spark.ui.{UIUtils, WebUIPage} +import org.apache.spark.ui.{ToolTips, UIUtils, WebUIPage} import org.apache.spark.ui.jobs.UIData.{ExecutorUIData, JobUIData} import org.apache.spark.JobExecutionStatus @@ -81,6 +81,9 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") { case JobExecutionStatus.RUNNING => "running" } + // The timeline library treats contents as HTML, so we have to escape them; for the + // data-title attribute string we have to escape them twice since that's in a string. + val escapedDesc = Utility.escape(displayJobDescription) val jobEventJsonAsStr = s""" |{ @@ -90,16 +93,17 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") { | 'end': new Date(${completionTime}), | 'content': '
' + + | 'Status: ${status}
' + + | 'Submitted: ${UIUtils.formatDate(new Date(submissionTime))}' + | '${ if (status != JobExecutionStatus.RUNNING) { - s"""
Completion Time: ${UIUtils.formatDate(new Date(completionTime))}""" + s"""
Completed: ${UIUtils.formatDate(new Date(completionTime))}""" } else { "" } }">' + - | '${displayJobDescription} (Job ${jobId})
' + | '${escapedDesc} (Job ${jobId})' |} """.stripMargin jobEventJsonAsStr @@ -179,13 +183,15 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") { - Event Timeline + + Event Timeline + ++ ++ @@ -225,7 +231,7 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") { {lastStageDescription} - {lastStageName} + {lastStageName} {formattedSubmissionTime} @@ -271,13 +277,19 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") { val shouldShowCompletedJobs = completedJobs.nonEmpty val shouldShowFailedJobs = failedJobs.nonEmpty + val completedJobNumStr = if (completedJobs.size == listener.numCompletedJobs) { + s"${completedJobs.size}" + } else { + s"${listener.numCompletedJobs}, only showing ${completedJobs.size}" + } + val summary: NodeSeq =
    {if (parent.sc.isDefined) { // Total duration is not meaningful unless the UI is live
  • - Total Duration: + Total Uptime: {UIUtils.formatDuration(System.currentTimeMillis() - startTime)}
  • }} @@ -295,9 +307,9 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") { } { if (shouldShowCompletedJobs) { -
  • +
  • Completed Jobs: - {completedJobs.size} + {completedJobNumStr}
  • } } @@ -305,7 +317,7 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") { if (shouldShowFailedJobs) {
  • Failed Jobs: - {failedJobs.size} + {listener.numFailedJobs}
  • } } @@ -322,7 +334,7 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") { activeJobsTable } if (shouldShowCompletedJobs) { - content ++=

    Completed Jobs ({completedJobs.size})

    ++ + content ++=

    Completed Jobs ({completedJobNumStr})

    ++ completedJobsTable } if (shouldShowFailedJobs) { @@ -330,9 +342,8 @@ private[ui] class AllJobsPage(parent: JobsTab) extends WebUIPage("") { failedJobsTable } - val helpText = """A job is triggered by an action, like "count()" or "saveAsTextFile()".""" + - " Click on a job's title to see information about the stages of tasks associated with" + - " the job." + val helpText = """A job is triggered by an action, like count() or saveAsTextFile().""" + + " Click on a job to see information about the stages of tasks inside it." UIUtils.headerSparkPage("Spark Jobs", content, parent, helpText = Some(helpText)) } diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala index 527f960af2dfc..5e52942b64f3f 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllStagesPage.scala @@ -27,7 +27,7 @@ import org.apache.spark.ui.{WebUIPage, UIUtils} /** Page showing list of all ongoing and recently finished stages and pools */ private[ui] class AllStagesPage(parent: StagesTab) extends WebUIPage("") { private val sc = parent.sc - private val listener = parent.listener + private val listener = parent.progressListener private def isFairScheduler = parent.isFairScheduler def render(request: HttpServletRequest): Seq[Node] = { @@ -42,18 +42,18 @@ private[ui] class AllStagesPage(parent: StagesTab) extends WebUIPage("") { val activeStagesTable = new StageTableBase(activeStages.sortBy(_.submissionTime).reverse, - parent.basePath, parent.listener, isFairScheduler = parent.isFairScheduler, + parent.basePath, parent.progressListener, isFairScheduler = parent.isFairScheduler, killEnabled = parent.killEnabled) val pendingStagesTable = new StageTableBase(pendingStages.sortBy(_.submissionTime).reverse, - parent.basePath, parent.listener, isFairScheduler = parent.isFairScheduler, + parent.basePath, parent.progressListener, isFairScheduler = parent.isFairScheduler, killEnabled = false) val completedStagesTable = new StageTableBase(completedStages.sortBy(_.submissionTime).reverse, parent.basePath, - parent.listener, isFairScheduler = parent.isFairScheduler, killEnabled = false) + parent.progressListener, isFairScheduler = parent.isFairScheduler, killEnabled = false) val failedStagesTable = new FailedStageTable(failedStages.sortBy(_.submissionTime).reverse, parent.basePath, - parent.listener, isFairScheduler = parent.isFairScheduler) + parent.progressListener, isFairScheduler = parent.isFairScheduler) // For now, pool information is only accessible in live UIs val pools = sc.map(_.getAllPools).getOrElse(Seq.empty[Schedulable]) @@ -64,22 +64,15 @@ private[ui] class AllStagesPage(parent: StagesTab) extends WebUIPage("") { val shouldShowCompletedStages = completedStages.nonEmpty val shouldShowFailedStages = failedStages.nonEmpty + val completedStageNumStr = if (numCompletedStages == completedStages.size) { + s"$numCompletedStages" + } else { + s"$numCompletedStages, only showing ${completedStages.size}" + } + val summary: NodeSeq =
      - { - if (sc.isDefined) { - // Total duration is not meaningful unless the UI is live -
    • - Total Duration: - {UIUtils.formatDuration(now - sc.get.startTime)} -
    • - } - } -
    • - Scheduling Mode: - {listener.schedulingMode.map(_.toString).getOrElse("Unknown")} -
    • { if (shouldShowActiveStages) {
    • @@ -98,9 +91,9 @@ private[ui] class AllStagesPage(parent: StagesTab) extends WebUIPage("") { } { if (shouldShowCompletedStages) { -
    • +
    • Completed Stages: - {numCompletedStages} + {completedStageNumStr}
    • } } @@ -132,14 +125,14 @@ private[ui] class AllStagesPage(parent: StagesTab) extends WebUIPage("") { pendingStagesTable.toNodeSeq } if (shouldShowCompletedStages) { - content ++=

      Completed Stages ({numCompletedStages})

      ++ + content ++=

      Completed Stages ({completedStageNumStr})

      ++ completedStagesTable.toNodeSeq } if (shouldShowFailedStages) { content ++=

      Failed Stages ({numFailedStages})

      ++ failedStagesTable.toNodeSeq } - UIUtils.headerSparkPage("Spark Stages (for all jobs)", content, parent) + UIUtils.headerSparkPage("Stages for All Jobs", content, parent) } } } diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorTable.scala index 1f8536d1b7195..d5cdbfac104f8 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorTable.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/ExecutorTable.scala @@ -26,7 +26,7 @@ import org.apache.spark.util.Utils /** Stage summary grouped by executors. */ private[ui] class ExecutorTable(stageId: Int, stageAttemptId: Int, parent: StagesTab) { - private val listener = parent.listener + private val listener = parent.progressListener def toNodeSeq: Seq[Node] = { listener.synchronized { diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala index dd968e124738e..2cad0a796913e 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala @@ -20,17 +20,18 @@ package org.apache.spark.ui.jobs import java.util.Date import scala.collection.mutable.{Buffer, HashMap, ListBuffer} -import scala.xml.{NodeSeq, Node, Unparsed} +import scala.xml.{NodeSeq, Node, Unparsed, Utility} import javax.servlet.http.HttpServletRequest import org.apache.spark.JobExecutionStatus import org.apache.spark.scheduler.StageInfo -import org.apache.spark.ui.{UIUtils, WebUIPage} +import org.apache.spark.ui.{ToolTips, UIUtils, WebUIPage} import org.apache.spark.ui.jobs.UIData.ExecutorUIData /** Page showing statistics and stage list for a given job */ private[ui] class JobPage(parent: JobsTab) extends WebUIPage("job") { + private val STAGES_LEGEND =
      ' + | 'Status: ${status.toUpperCase}
      ' + - | 'Submission Time: ${UIUtils.formatDate(new Date(submissionTime))}' + + | 'Submitted: ${UIUtils.formatDate(new Date(submissionTime))}' + | '${ if (status != "running") { - s"""
      Completion Time: ${UIUtils.formatDate(new Date(completionTime))}""" + s"""
      Completed: ${UIUtils.formatDate(new Date(completionTime))}""" } else { "" } }">' + - | '${name} (Stage ${stageId}.${attemptId})
      ', + | '${escapedName} (Stage ${stageId}.${attemptId})
    ', |} """.stripMargin } @@ -133,7 +137,7 @@ private[ui] class JobPage(parent: JobsTab) extends WebUIPage("job") { events.toSeq } - private def makeTimeline( + private def makeTimeline( stages: Seq[StageInfo], executors: HashMap[String, ExecutorUIData], appStartTime: Long): Seq[Node] = { @@ -160,13 +164,15 @@ private[ui] class JobPage(parent: JobsTab) extends WebUIPage("job") { - Event Timeline + + Event Timeline + ++ ++ @@ -186,7 +192,7 @@ private[ui] class JobPage(parent: JobsTab) extends WebUIPage("job") { val jobDataOption = listener.jobIdToData.get(jobId) if (jobDataOption.isEmpty) { val content = -
    +

    No information to display for job {jobId}

    return UIUtils.headerSparkPage( @@ -198,7 +204,7 @@ private[ui] class JobPage(parent: JobsTab) extends WebUIPage("job") { // This could be empty if the JobProgressListener hasn't received information about the // stage or if the stage information has been garbage collected listener.stageIdToInfo.getOrElse(stageId, - new StageInfo(stageId, 0, "Unknown", 0, Seq.empty, "Unknown")) + new StageInfo(stageId, 0, "Unknown", 0, Seq.empty, Seq.empty, "Unknown")) } val activeStages = Buffer[StageInfo]() @@ -303,9 +309,14 @@ private[ui] class JobPage(parent: JobsTab) extends WebUIPage("job") { var content = summary val appStartTime = listener.startTime val executorListener = parent.executorListener + val operationGraphListener = parent.operationGraphListener + content ++= makeTimeline(activeStages ++ completedStages ++ failedStages, executorListener.executorIdToData, appStartTime) + content ++= UIUtils.showDagVizForJob( + jobId, operationGraphListener.getOperationGraphForJob(jobId)) + if (shouldShowActiveStages) { content ++=

    Active Stages ({activeStages.size})

    ++ activeStagesTable.toNodeSeq @@ -326,7 +337,7 @@ private[ui] class JobPage(parent: JobsTab) extends WebUIPage("job") { content ++=

    Failed Stages ({failedStages.size})

    ++ failedStagesTable.toNodeSeq } - UIUtils.headerSparkPage(s"Details for Job $jobId", content, parent) + UIUtils.headerSparkPage(s"Details for Job $jobId", content, parent, showVisualization = true) } } } diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala index d6d716de25920..f39e961772c46 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala @@ -25,6 +25,7 @@ import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler._ import org.apache.spark.scheduler.SchedulingMode.SchedulingMode import org.apache.spark.storage.BlockManagerId +import org.apache.spark.ui.SparkUI import org.apache.spark.ui.jobs.UIData._ /** @@ -38,8 +39,6 @@ import org.apache.spark.ui.jobs.UIData._ @DeveloperApi class JobProgressListener(conf: SparkConf) extends SparkListener with Logging { - import JobProgressListener._ - // Define a handful of type aliases so that data structures' types can serve as documentation. // These type aliases are public because they're used in the types of public fields: @@ -75,6 +74,8 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging { // JobProgressListener's retention limits. var numCompletedStages = 0 var numFailedStages = 0 + var numCompletedJobs = 0 + var numFailedJobs = 0 // Misc: val executorIdToBlockManagerId = HashMap[ExecutorId, BlockManagerId]() @@ -86,8 +87,8 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging { // To limit the total memory usage of JobProgressListener, we only track information for a fixed // number of non-active jobs and stages (there is no limit for active jobs and stages): - val retainedStages = conf.getInt("spark.ui.retainedStages", DEFAULT_RETAINED_STAGES) - val retainedJobs = conf.getInt("spark.ui.retainedJobs", DEFAULT_RETAINED_JOBS) + val retainedStages = conf.getInt("spark.ui.retainedStages", SparkUI.DEFAULT_RETAINED_STAGES) + val retainedJobs = conf.getInt("spark.ui.retainedJobs", SparkUI.DEFAULT_RETAINED_JOBS) // We can test for memory leaks by ensuring that collections that track non-active jobs and // stages do not grow without bound and that collections for active jobs/stages eventually become @@ -118,7 +119,7 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging { "failedStages" -> failedStages.size ) } - + // These collections may grow arbitrarily, but once Spark becomes idle they should shrink back to // some bound based on the `spark.ui.retainedStages` and `spark.ui.retainedJobs` settings: private[spark] def getSizesOfSoftSizeLimitedCollections: Map[String, Int] = { @@ -218,10 +219,12 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging { completedJobs += jobData trimJobsIfNecessary(completedJobs) jobData.status = JobExecutionStatus.SUCCEEDED + numCompletedJobs += 1 case JobFailed(exception) => failedJobs += jobData trimJobsIfNecessary(failedJobs) jobData.status = JobExecutionStatus.FAILED + numFailedJobs += 1 } for (stageId <- jobData.stageIds) { stageIdToActiveJobIds.get(stageId).foreach { jobsUsingStage => @@ -288,8 +291,8 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging { activeStages(stage.stageId) = stage pendingStages.remove(stage.stageId) val poolName = Option(stageSubmitted.properties).map { - p => p.getProperty("spark.scheduler.pool", DEFAULT_POOL_NAME) - }.getOrElse(DEFAULT_POOL_NAME) + p => p.getProperty("spark.scheduler.pool", SparkUI.DEFAULT_POOL_NAME) + }.getOrElse(SparkUI.DEFAULT_POOL_NAME) stageIdToInfo(stage.stageId) = stage val stageData = stageIdToData.getOrElseUpdate((stage.stageId, stage.attemptId), new StageUIData) @@ -524,9 +527,3 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging { startTime = appStarted.time } } - -private object JobProgressListener { - val DEFAULT_POOL_NAME = "default" - val DEFAULT_RETAINED_STAGES = 1000 - val DEFAULT_RETAINED_JOBS = 1000 -} diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala index 342787f1a3c2d..77ca60b000a9b 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala @@ -24,10 +24,12 @@ import org.apache.spark.ui.{SparkUI, SparkUITab} private[ui] class JobsTab(parent: SparkUI) extends SparkUITab(parent, "jobs") { val sc = parent.sc val killEnabled = parent.killEnabled - def isFairScheduler: Boolean = - jobProgresslistener.schedulingMode.exists(_ == SchedulingMode.FAIR) val jobProgresslistener = parent.jobProgressListener val executorListener = parent.executorsListener + val operationGraphListener = parent.operationGraphListener + + def isFairScheduler: Boolean = + jobProgresslistener.schedulingMode.exists(_ == SchedulingMode.FAIR) attachPage(new AllJobsPage(this)) attachPage(new JobPage(this)) diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala index f47cdc935e539..f3e0b38523f32 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala @@ -21,13 +21,13 @@ import javax.servlet.http.HttpServletRequest import scala.xml.Node -import org.apache.spark.scheduler.{Schedulable, StageInfo} +import org.apache.spark.scheduler.StageInfo import org.apache.spark.ui.{WebUIPage, UIUtils} /** Page showing specific pool details */ private[ui] class PoolPage(parent: StagesTab) extends WebUIPage("pool") { private val sc = parent.sc - private val listener = parent.listener + private val listener = parent.progressListener def render(request: HttpServletRequest): Seq[Node] = { listener.synchronized { @@ -40,7 +40,7 @@ private[ui] class PoolPage(parent: StagesTab) extends WebUIPage("pool") { case None => Seq[StageInfo]() } val activeStagesTable = new StageTableBase(activeStages.sortBy(_.submissionTime).reverse, - parent.basePath, parent.listener, isFairScheduler = parent.isFairScheduler, + parent.basePath, parent.progressListener, isFairScheduler = parent.isFairScheduler, killEnabled = parent.killEnabled) // For now, pool information is only accessible in live UIs diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala index df1899e7a9b84..9ba2af54dacf4 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala @@ -25,7 +25,7 @@ import org.apache.spark.ui.UIUtils /** Table showing list of pools */ private[ui] class PoolTable(pools: Seq[Schedulable], parent: StagesTab) { - private val listener = parent.listener + private val listener = parent.progressListener def toNodeSeq: Seq[Node] = { listener.synchronized { diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala index 797c9404bc449..b83a49f79c8a8 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala @@ -20,54 +20,98 @@ package org.apache.spark.ui.jobs import java.util.Date import javax.servlet.http.HttpServletRequest +import scala.collection.mutable.HashSet import scala.xml.{Elem, Node, Unparsed} import org.apache.commons.lang3.StringEscapeUtils import org.apache.spark.executor.TaskMetrics +import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo} import org.apache.spark.ui.{ToolTips, WebUIPage, UIUtils} import org.apache.spark.ui.jobs.UIData._ +import org.apache.spark.ui.scope.RDDOperationGraph import org.apache.spark.util.{Utils, Distribution} -import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo} /** Page showing statistics and task list for a given stage */ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") { - private val listener = parent.listener + private val progressListener = parent.progressListener + private val operationGraphListener = parent.operationGraphListener + + private val TIMELINE_LEGEND = { +
    + + { + val legendPairs = List(("scheduler-delay-proportion", "Scheduler Delay"), + ("deserialization-time-proportion", "Task Deserialization Time"), + ("shuffle-read-time-proportion", "Shuffle Read Time"), + ("executor-runtime-proportion", "Executor Computing Time"), + ("shuffle-write-time-proportion", "Shuffle Write Time"), + ("serialization-time-proportion", "Result Serialization TIme"), + ("getting-result-time-proportion", "Getting Result Time")) + + legendPairs.zipWithIndex.map { + case ((classAttr, name), index) => + + {name} + } + } + +
    + } + + // TODO: We should consider increasing the number of this parameter over time + // if we find that it's okay. + private val MAX_TIMELINE_TASKS = parent.conf.getInt("spark.ui.timeline.tasks.maximum", 1000) + def render(request: HttpServletRequest): Seq[Node] = { - listener.synchronized { + progressListener.synchronized { val parameterId = request.getParameter("id") require(parameterId != null && parameterId.nonEmpty, "Missing id parameter") val parameterAttempt = request.getParameter("attempt") require(parameterAttempt != null && parameterAttempt.nonEmpty, "Missing attempt parameter") + // If this is set, expand the dag visualization by default + val expandDagVizParam = request.getParameter("expandDagViz") + val expandDagViz = expandDagVizParam != null && expandDagVizParam.toBoolean + val stageId = parameterId.toInt val stageAttemptId = parameterAttempt.toInt - val stageDataOption = listener.stageIdToData.get((stageId, stageAttemptId)) + val stageDataOption = progressListener.stageIdToData.get((stageId, stageAttemptId)) + + val stageHeader = s"Details for Stage $stageId (Attempt $stageAttemptId)" + if (stageDataOption.isEmpty) { + val content = +
    +

    No information to display for Stage {stageId} (Attempt {stageAttemptId})

    +
    + return UIUtils.headerSparkPage(stageHeader, content, parent) - if (stageDataOption.isEmpty || stageDataOption.get.taskData.isEmpty) { + } + if (stageDataOption.get.taskData.isEmpty) { val content =

    Summary Metrics

    No tasks have started yet

    Tasks

    No tasks have started yet
    - return UIUtils.headerSparkPage( - s"Details for Stage $stageId (Attempt $stageAttemptId)", content, parent) + return UIUtils.headerSparkPage(stageHeader, content, parent) } val stageData = stageDataOption.get val tasks = stageData.taskData.values.toSeq.sortBy(_.taskInfo.launchTime) val numCompleted = tasks.count(_.taskInfo.finished) - val accumulables = listener.stageIdToData((stageId, stageAttemptId)).accumulables + val accumulables = progressListener.stageIdToData((stageId, stageAttemptId)).accumulables val hasAccumulators = accumulables.size > 0 val summary =
    • - Total task time across all tasks: + Total Time Across All Tasks: {UIUtils.formatDuration(stageData.executorRunTime)}
    • {if (stageData.hasInput) { @@ -84,25 +128,25 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") { }} {if (stageData.hasShuffleRead) {
    • - Shuffle read: + Shuffle Read: {s"${Utils.bytesToString(stageData.shuffleReadTotalBytes)} / " + s"${stageData.shuffleReadRecords}"}
    • }} {if (stageData.hasShuffleWrite) {
    • - Shuffle write: + Shuffle Write: {s"${Utils.bytesToString(stageData.shuffleWriteBytes)} / " + s"${stageData.shuffleWriteRecords}"}
    • }} {if (stageData.hasBytesSpilled) {
    • - Shuffle spill (memory): + Shuffle Spill (Memory): {Utils.bytesToString(stageData.memoryBytesSpilled)}
    • - Shuffle spill (disk): + Shuffle Spill (Disk): {Utils.bytesToString(stageData.diskBytesSpilled)}
    • }} @@ -113,10 +157,10 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") {
      - Show additional metrics + Show Additional Metrics
      + val dagViz = UIUtils.showDagVizForStage( + stageId, operationGraphListener.getOperationGraphForStage(stageId)) + + val maybeExpandDagViz: Seq[Node] = + if (expandDagViz) { + UIUtils.expandDagVizOnLoad(forJob = false) + } else { + Seq.empty + } + val accumulableHeaders: Seq[String] = Seq("Accumulable", "Value") def accumulableRow(acc: AccumulableInfo): Elem = {acc.name}{acc.value} - val accumulableTable = UIUtils.listingTable(accumulableHeaders, accumulableRow, + val accumulableTable = UIUtils.listingTable( + accumulableHeaders, + accumulableRow, accumulables.values.toSeq) val taskHeadersAndCssClasses: Seq[(String, String)] = @@ -208,10 +264,17 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") { val unzipped = taskHeadersAndCssClasses.unzip + val currentTime = System.currentTimeMillis() val taskTable = UIUtils.listingTable( unzipped._1, - taskRow(hasAccumulators, stageData.hasInput, stageData.hasOutput, - stageData.hasShuffleRead, stageData.hasShuffleWrite, stageData.hasBytesSpilled), + taskRow( + hasAccumulators, + stageData.hasInput, + stageData.hasOutput, + stageData.hasShuffleRead, + stageData.hasShuffleWrite, + stageData.hasBytesSpilled, + currentTime), tasks, headerClasses = unzipped._2) // Excludes tasks which failed and have incomplete metrics @@ -433,26 +496,184 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") { val content = summary ++ + dagViz ++ + maybeExpandDagViz ++ showAdditionalMetrics ++ + makeTimeline(stageData.taskData.values.toSeq, currentTime) ++

      Summary Metrics for {numCompleted} Completed Tasks

      ++
      {summaryTable.getOrElse("No tasks have reported metrics yet.")}
      ++

      Aggregated Metrics by Executor

      ++ executorTable.toNodeSeq ++ maybeAccumulableTable ++

      Tasks

      ++ taskTable - - UIUtils.headerSparkPage("Details for Stage %d".format(stageId), content, parent) + UIUtils.headerSparkPage(stageHeader, content, parent, showVisualization = true) } } + def makeTimeline(tasks: Seq[TaskUIData], currentTime: Long): Seq[Node] = { + val executorsSet = new HashSet[(String, String)] + var minLaunchTime = Long.MaxValue + var maxFinishTime = Long.MinValue + + val executorsArrayStr = + tasks.sortBy(-_.taskInfo.launchTime).take(MAX_TIMELINE_TASKS).map { taskUIData => + val taskInfo = taskUIData.taskInfo + val executorId = taskInfo.executorId + val host = taskInfo.host + executorsSet += ((executorId, host)) + + val launchTime = taskInfo.launchTime + val finishTime = if (!taskInfo.running) taskInfo.finishTime else currentTime + val totalExecutionTime = finishTime - launchTime + minLaunchTime = launchTime.min(minLaunchTime) + maxFinishTime = finishTime.max(maxFinishTime) + + def toProportion(time: Long) = time.toDouble / totalExecutionTime * 100 + + val metricsOpt = taskUIData.taskMetrics + val shuffleReadTime = + metricsOpt.flatMap(_.shuffleReadMetrics.map(_.fetchWaitTime)).getOrElse(0L) + val shuffleReadTimeProportion = toProportion(shuffleReadTime) + val shuffleWriteTime = + (metricsOpt.flatMap(_.shuffleWriteMetrics + .map(_.shuffleWriteTime)).getOrElse(0L) / 1e6).toLong + val shuffleWriteTimeProportion = toProportion(shuffleWriteTime) + val executorComputingTime = metricsOpt.map(_.executorRunTime).getOrElse(0L) - + shuffleReadTime - shuffleWriteTime + val executorComputingTimeProportion = toProportion(executorComputingTime) + val serializationTime = metricsOpt.map(_.resultSerializationTime).getOrElse(0L) + val serializationTimeProportion = toProportion(serializationTime) + val deserializationTime = metricsOpt.map(_.executorDeserializeTime).getOrElse(0L) + val deserializationTimeProportion = toProportion(deserializationTime) + val gettingResultTime = getGettingResultTime(taskUIData.taskInfo) + val gettingResultTimeProportion = toProportion(gettingResultTime) + val schedulerDelay = totalExecutionTime - + (executorComputingTime + shuffleReadTime + shuffleWriteTime + + serializationTime + deserializationTime + gettingResultTime) + val schedulerDelayProportion = + (100 - executorComputingTimeProportion - shuffleReadTimeProportion - + shuffleWriteTimeProportion - serializationTimeProportion - + deserializationTimeProportion - gettingResultTimeProportion) + + val schedulerDelayProportionPos = 0 + val deserializationTimeProportionPos = + schedulerDelayProportionPos + schedulerDelayProportion + val shuffleReadTimeProportionPos = + deserializationTimeProportionPos + deserializationTimeProportion + val executorRuntimeProportionPos = + shuffleReadTimeProportionPos + shuffleReadTimeProportion + val shuffleWriteTimeProportionPos = + executorRuntimeProportionPos + executorComputingTimeProportion + val serializationTimeProportionPos = + shuffleWriteTimeProportionPos + shuffleWriteTimeProportion + val gettingResultTimeProportionPos = + serializationTimeProportionPos + serializationTimeProportion + + val index = taskInfo.index + val attempt = taskInfo.attempt + val timelineObject = + s""" + { + 'className': 'task task-assignment-timeline-object', + 'group': '$executorId', + 'content': '
      ' + + 'Status: ${taskInfo.status}
      ' + + 'Launch Time: ${UIUtils.formatDate(new Date(launchTime))}' + + '${ + if (!taskInfo.running) { + s"""
      Finish Time: ${UIUtils.formatDate(new Date(finishTime))}""" + } else { + "" + } + }' + + '
      Scheduler Delay: $schedulerDelay ms' + + '
      Task Deserialization Time: ${UIUtils.formatDuration(deserializationTime)}' + + '
      Shuffle Read Time: ${UIUtils.formatDuration(shuffleReadTime)}' + + '
      Executor Computing Time: ${UIUtils.formatDuration(executorComputingTime)}' + + '
      Shuffle Write Time: ${UIUtils.formatDuration(shuffleWriteTime)}' + + '
      Result Serialization Time: ${UIUtils.formatDuration(serializationTime)}' + + '
      Getting Result Time: ${UIUtils.formatDuration(gettingResultTime)}">' + + '' + + '' + + '' + + '' + + '' + + '' + + '' + + '', + 'start': new Date($launchTime), + 'end': new Date($finishTime) + } + """ + timelineObject + }.mkString("[", ",", "]") + + val groupArrayStr = executorsSet.map { + case (executorId, host) => + s""" + { + 'id': '$executorId', + 'content': '$executorId / $host', + } + """ + }.mkString("[", ",", "]") + + + + Event Timeline + ++ + ++ + + } + def taskRow( hasAccumulators: Boolean, hasInput: Boolean, hasOutput: Boolean, hasShuffleRead: Boolean, hasShuffleWrite: Boolean, - hasBytesSpilled: Boolean)(taskData: TaskUIData): Seq[Node] = { + hasBytesSpilled: Boolean, + currentTime: Long)(taskData: TaskUIData): Seq[Node] = { taskData match { case TaskUIData(info, metrics, errorMessage) => - val duration = if (info.status == "RUNNING") info.timeRunning(System.currentTimeMillis()) + val duration = if (info.status == "RUNNING") info.timeRunning(currentTime) else metrics.map(_.executorRunTime).getOrElse(1L) val formatDuration = if (info.status == "RUNNING") UIUtils.formatDuration(duration) else metrics.map(m => UIUtils.formatDuration(m.executorRunTime)).getOrElse("") diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala index a33243d4252bf..99812db4912a3 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala @@ -93,7 +93,7 @@ private[ui] class StageTableBase( } val nameLinkUri = s"$basePathUri/stages/stage?id=${s.stageId}&attempt=${s.attemptId}" - val nameLink = {s.name} + val nameLink = {s.name} val cachedRddInfos = s.rddInfos.filter(_.numCachedPartitions > 0) val details = if (s.details.nonEmpty) { @@ -121,10 +121,23 @@ private[ui] class StageTableBase(
      {stageDesc.getOrElse("")} {killLink} {nameLink} {details}
      } + protected def missingStageRow(stageId: Int): Seq[Node] = { + {stageId} ++ + {if (isFairScheduler) {-} else Seq.empty} ++ + No data available for this stage ++ // Description + ++ // Submitted + ++ // Duration + ++ // Tasks: Succeeded/Total + ++ // Input + ++ // Output + ++ // Shuffle Read + // Shuffle Write + } + protected def stageRow(s: StageInfo): Seq[Node] = { val stageDataOption = listener.stageIdToData.get((s.stageId, s.attemptId)) if (stageDataOption.isEmpty) { - return {s.stageId}No data available for this stage + return missingStageRow(s.stageId) } val stageData = stageDataOption.get diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala index 1bd2d87e00796..5989f0035b270 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagesTab.scala @@ -25,20 +25,22 @@ import org.apache.spark.ui.{SparkUI, SparkUITab} /** Web UI showing progress status of all stages in the given SparkContext. */ private[ui] class StagesTab(parent: SparkUI) extends SparkUITab(parent, "stages") { val sc = parent.sc + val conf = parent.conf val killEnabled = parent.killEnabled - val listener = parent.jobProgressListener + val progressListener = parent.jobProgressListener + val operationGraphListener = parent.operationGraphListener attachPage(new AllStagesPage(this)) attachPage(new StagePage(this)) attachPage(new PoolPage(this)) - def isFairScheduler: Boolean = listener.schedulingMode.exists(_ == SchedulingMode.FAIR) + def isFairScheduler: Boolean = progressListener.schedulingMode.exists(_ == SchedulingMode.FAIR) def handleKillRequest(request: HttpServletRequest): Unit = { if (killEnabled && parent.securityManager.checkModifyPermissions(request.getRemoteUser)) { val killFlag = Option(request.getParameter("terminate")).getOrElse("false").toBoolean val stageId = Option(request.getParameter("id")).getOrElse("-1").toInt - if (stageId >= 0 && killFlag && listener.activeStages.contains(stageId)) { + if (stageId >= 0 && killFlag && progressListener.activeStages.contains(stageId)) { sc.get.cancelStage(stageId) } // Do a quick pause here to give Spark time to kill the stage so it shows up as diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala new file mode 100644 index 0000000000000..d6a5085db1efb --- /dev/null +++ b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala @@ -0,0 +1,197 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ui.scope + +import scala.collection.mutable +import scala.collection.mutable.ListBuffer + +import org.apache.spark.Logging +import org.apache.spark.scheduler.StageInfo +import org.apache.spark.storage.StorageLevel + +/** + * A representation of a generic cluster graph used for storing information on RDD operations. + * + * Each graph is defined with a set of edges and a root cluster, which may contain children + * nodes and children clusters. Additionally, a graph may also have edges that enter or exit + * the graph from nodes that belong to adjacent graphs. + */ +private[ui] case class RDDOperationGraph( + edges: Seq[RDDOperationEdge], + outgoingEdges: Seq[RDDOperationEdge], + incomingEdges: Seq[RDDOperationEdge], + rootCluster: RDDOperationCluster) + +/** A node in an RDDOperationGraph. This represents an RDD. */ +private[ui] case class RDDOperationNode(id: Int, name: String, cached: Boolean) + +/** + * A directed edge connecting two nodes in an RDDOperationGraph. + * This represents an RDD dependency. + */ +private[ui] case class RDDOperationEdge(fromId: Int, toId: Int) + +/** + * A cluster that groups nodes together in an RDDOperationGraph. + * + * This represents any grouping of RDDs, including operation scopes (e.g. textFile, flatMap), + * stages, jobs, or any higher level construct. A cluster may be nested inside of other clusters. + */ +private[ui] class RDDOperationCluster(val id: String, private var _name: String) { + private val _childNodes = new ListBuffer[RDDOperationNode] + private val _childClusters = new ListBuffer[RDDOperationCluster] + + def name: String = _name + def setName(n: String): Unit = { _name = n } + + def childNodes: Seq[RDDOperationNode] = _childNodes.iterator.toSeq + def childClusters: Seq[RDDOperationCluster] = _childClusters.iterator.toSeq + def attachChildNode(childNode: RDDOperationNode): Unit = { _childNodes += childNode } + def attachChildCluster(childCluster: RDDOperationCluster): Unit = { + _childClusters += childCluster + } + + /** Return all the nodes container in this cluster, including ones nested in other clusters. */ + def getAllNodes: Seq[RDDOperationNode] = { + _childNodes ++ _childClusters.flatMap(_.childNodes) + } +} + +private[ui] object RDDOperationGraph extends Logging { + + val STAGE_CLUSTER_PREFIX = "stage_" + + /** + * Construct a RDDOperationGraph for a given stage. + * + * The root cluster represents the stage, and all children clusters represent RDD operations. + * Each node represents an RDD, and each edge represents a dependency between two RDDs pointing + * from the parent to the child. + * + * This does not currently merge common operation scopes across stages. This may be worth + * supporting in the future if we decide to group certain stages within the same job under + * a common scope (e.g. part of a SQL query). + */ + def makeOperationGraph(stage: StageInfo): RDDOperationGraph = { + val edges = new ListBuffer[RDDOperationEdge] + val nodes = new mutable.HashMap[Int, RDDOperationNode] + val clusters = new mutable.HashMap[String, RDDOperationCluster] // indexed by cluster ID + + // Root cluster is the stage cluster + // Use a special prefix here to differentiate this cluster from other operation clusters + val stageClusterId = STAGE_CLUSTER_PREFIX + stage.stageId + val stageClusterName = s"Stage ${stage.stageId}" + + { if (stage.attemptId == 0) "" else s" (attempt ${stage.attemptId})" } + val rootCluster = new RDDOperationCluster(stageClusterId, stageClusterName) + + // Find nodes, edges, and operation scopes that belong to this stage + stage.rddInfos.foreach { rdd => + edges ++= rdd.parentIds.map { parentId => RDDOperationEdge(parentId, rdd.id) } + + // TODO: differentiate between the intention to cache an RDD and whether it's actually cached + val node = nodes.getOrElseUpdate( + rdd.id, RDDOperationNode(rdd.id, rdd.name, rdd.storageLevel != StorageLevel.NONE)) + + if (rdd.scope.isEmpty) { + // This RDD has no encompassing scope, so we put it directly in the root cluster + // This should happen only if an RDD is instantiated outside of a public RDD API + rootCluster.attachChildNode(node) + } else { + // Otherwise, this RDD belongs to an inner cluster, + // which may be nested inside of other clusters + val rddScopes = rdd.scope.map { scope => scope.getAllScopes }.getOrElse(Seq.empty) + val rddClusters = rddScopes.map { scope => + val clusterId = scope.id + val clusterName = scope.name.replaceAll("\\n", "\\\\n") + clusters.getOrElseUpdate(clusterId, new RDDOperationCluster(clusterId, clusterName)) + } + // Build the cluster hierarchy for this RDD + rddClusters.sliding(2).foreach { pc => + if (pc.size == 2) { + val parentCluster = pc(0) + val childCluster = pc(1) + parentCluster.attachChildCluster(childCluster) + } + } + // Attach the outermost cluster to the root cluster, and the RDD to the innermost cluster + rddClusters.headOption.foreach { cluster => rootCluster.attachChildCluster(cluster) } + rddClusters.lastOption.foreach { cluster => cluster.attachChildNode(node) } + } + } + + // Classify each edge as internal, outgoing or incoming + // This information is needed to reason about how stages relate to each other + val internalEdges = new ListBuffer[RDDOperationEdge] + val outgoingEdges = new ListBuffer[RDDOperationEdge] + val incomingEdges = new ListBuffer[RDDOperationEdge] + edges.foreach { case e: RDDOperationEdge => + val fromThisGraph = nodes.contains(e.fromId) + val toThisGraph = nodes.contains(e.toId) + (fromThisGraph, toThisGraph) match { + case (true, true) => internalEdges += e + case (true, false) => outgoingEdges += e + case (false, true) => incomingEdges += e + // should never happen + case _ => logWarning(s"Found an orphan edge in stage ${stage.stageId}: $e") + } + } + + RDDOperationGraph(internalEdges, outgoingEdges, incomingEdges, rootCluster) + } + + /** + * Generate the content of a dot file that describes the specified graph. + * + * Note that this only uses a minimal subset of features available to the DOT specification. + * Part of the styling must be done here because the rendering library must take certain + * attributes into account when arranging the graph elements. More style is added in the + * visualization later through post-processing in JavaScript. + * + * For the complete DOT specification, see http://www.graphviz.org/Documentation/dotguide.pdf. + */ + def makeDotFile(graph: RDDOperationGraph): String = { + val dotFile = new StringBuilder + dotFile.append("digraph G {\n") + dotFile.append(makeDotSubgraph(graph.rootCluster, indent = " ")) + graph.edges.foreach { edge => dotFile.append(s""" ${edge.fromId}->${edge.toId};\n""") } + dotFile.append("}") + val result = dotFile.toString() + logDebug(result) + result + } + + /** Return the dot representation of a node in an RDDOperationGraph. */ + private def makeDotNode(node: RDDOperationNode): String = { + s"""${node.id} [label="${node.name} [${node.id}]"]""" + } + + /** Return the dot representation of a subgraph in an RDDOperationGraph. */ + private def makeDotSubgraph(cluster: RDDOperationCluster, indent: String): String = { + val subgraph = new StringBuilder + subgraph.append(indent + s"subgraph cluster${cluster.id} {\n") + subgraph.append(indent + s""" label="${cluster.name}";\n""") + cluster.childNodes.foreach { node => + subgraph.append(indent + s" ${makeDotNode(node)};\n") + } + cluster.childClusters.foreach { cscope => + subgraph.append(makeDotSubgraph(cscope, indent + " ")) + } + subgraph.append(indent + "}\n") + subgraph.toString() + } +} diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala new file mode 100644 index 0000000000000..89119cd3579ef --- /dev/null +++ b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraphListener.scala @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ui.scope + +import scala.collection.mutable + +import org.apache.spark.SparkConf +import org.apache.spark.scheduler._ +import org.apache.spark.ui.SparkUI + +/** + * A SparkListener that constructs a DAG of RDD operations. + */ +private[ui] class RDDOperationGraphListener(conf: SparkConf) extends SparkListener { + + // Note: the fate of jobs and stages are tied. This means when we clean up a job, + // we always clean up all of its stages. Similarly, when we clean up a stage, we + // always clean up its job (and, transitively, other stages in the same job). + private[ui] val jobIdToStageIds = new mutable.HashMap[Int, Seq[Int]] + private[ui] val jobIdToSkippedStageIds = new mutable.HashMap[Int, Seq[Int]] + private[ui] val stageIdToJobId = new mutable.HashMap[Int, Int] + private[ui] val stageIdToGraph = new mutable.HashMap[Int, RDDOperationGraph] + private[ui] val completedStageIds = new mutable.HashSet[Int] + + // Keep track of the order in which these are inserted so we can remove old ones + private[ui] val jobIds = new mutable.ArrayBuffer[Int] + private[ui] val stageIds = new mutable.ArrayBuffer[Int] + + // How many jobs or stages to retain graph metadata for + private val retainedJobs = + conf.getInt("spark.ui.retainedJobs", SparkUI.DEFAULT_RETAINED_JOBS) + private val retainedStages = + conf.getInt("spark.ui.retainedStages", SparkUI.DEFAULT_RETAINED_STAGES) + + /** + * Return the graph metadata for all stages in the given job. + * An empty list is returned if one or more of its stages has been cleaned up. + */ + def getOperationGraphForJob(jobId: Int): Seq[RDDOperationGraph] = synchronized { + val skippedStageIds = jobIdToSkippedStageIds.get(jobId).getOrElse(Seq.empty) + val graphs = jobIdToStageIds.get(jobId) + .getOrElse(Seq.empty) + .flatMap { sid => stageIdToGraph.get(sid) } + // Mark any skipped stages as such + graphs.foreach { g => + val stageId = g.rootCluster.id.replaceAll(RDDOperationGraph.STAGE_CLUSTER_PREFIX, "").toInt + if (skippedStageIds.contains(stageId) && !g.rootCluster.name.contains("skipped")) { + g.rootCluster.setName(g.rootCluster.name + " (skipped)") + } + } + graphs + } + + /** Return the graph metadata for the given stage, or None if no such information exists. */ + def getOperationGraphForStage(stageId: Int): Option[RDDOperationGraph] = synchronized { + stageIdToGraph.get(stageId) + } + + /** On job start, construct a RDDOperationGraph for each stage in the job for display later. */ + override def onJobStart(jobStart: SparkListenerJobStart): Unit = synchronized { + val jobId = jobStart.jobId + val stageInfos = jobStart.stageInfos + + jobIds += jobId + jobIdToStageIds(jobId) = jobStart.stageInfos.map(_.stageId).sorted + + stageInfos.foreach { stageInfo => + val stageId = stageInfo.stageId + stageIds += stageId + stageIdToJobId(stageId) = jobId + stageIdToGraph(stageId) = RDDOperationGraph.makeOperationGraph(stageInfo) + trimStagesIfNecessary() + } + + trimJobsIfNecessary() + } + + /** Keep track of stages that have completed. */ + override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): Unit = synchronized { + val stageId = stageCompleted.stageInfo.stageId + if (stageIdToJobId.contains(stageId)) { + // Note: Only do this if the stage has not already been cleaned up + // Otherwise, we may never clean this stage from `completedStageIds` + completedStageIds += stageCompleted.stageInfo.stageId + } + } + + /** On job end, find all stages in this job that are skipped and mark them as such. */ + override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = synchronized { + val jobId = jobEnd.jobId + jobIdToStageIds.get(jobId).foreach { stageIds => + val skippedStageIds = stageIds.filter { sid => !completedStageIds.contains(sid) } + // Note: Only do this if the job has not already been cleaned up + // Otherwise, we may never clean this job from `jobIdToSkippedStageIds` + jobIdToSkippedStageIds(jobId) = skippedStageIds + } + } + + /** Clean metadata for old stages if we have exceeded the number to retain. */ + private def trimStagesIfNecessary(): Unit = { + if (stageIds.size >= retainedStages) { + val toRemove = math.max(retainedStages / 10, 1) + stageIds.take(toRemove).foreach { id => cleanStage(id) } + stageIds.trimStart(toRemove) + } + } + + /** Clean metadata for old jobs if we have exceeded the number to retain. */ + private def trimJobsIfNecessary(): Unit = { + if (jobIds.size >= retainedJobs) { + val toRemove = math.max(retainedJobs / 10, 1) + jobIds.take(toRemove).foreach { id => cleanJob(id) } + jobIds.trimStart(toRemove) + } + } + + /** Clean metadata for the given stage, its job, and all other stages that belong to the job. */ + private[ui] def cleanStage(stageId: Int): Unit = { + completedStageIds.remove(stageId) + stageIdToGraph.remove(stageId) + stageIdToJobId.remove(stageId).foreach { jobId => cleanJob(jobId) } + } + + /** Clean metadata for the given job and all stages that belong to it. */ + private[ui] def cleanJob(jobId: Int): Unit = { + jobIdToSkippedStageIds.remove(jobId) + jobIdToStageIds.remove(jobId).foreach { stageIds => + stageIds.foreach { stageId => cleanStage(stageId) } + } + } + +} diff --git a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala index 199f731b92bcc..36943978ff594 100644 --- a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala +++ b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala @@ -21,8 +21,8 @@ import javax.servlet.http.HttpServletRequest import scala.xml.Node -import org.apache.spark.storage.{BlockId, BlockStatus, StorageStatus, StorageUtils} -import org.apache.spark.ui.{WebUIPage, UIUtils} +import org.apache.spark.status.api.v1.{AllRDDResource, RDDDataDistribution, RDDPartitionInfo} +import org.apache.spark.ui.{UIUtils, WebUIPage} import org.apache.spark.util.Utils /** Page showing storage details for a given RDD */ @@ -32,28 +32,19 @@ private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") { def render(request: HttpServletRequest): Seq[Node] = { val parameterId = request.getParameter("id") require(parameterId != null && parameterId.nonEmpty, "Missing id parameter") - val rddId = parameterId.toInt - val storageStatusList = listener.storageStatusList - val rddInfo = listener.rddInfoList.find(_.id == rddId).getOrElse { - // Rather than crashing, render an "RDD Not Found" page - return UIUtils.headerSparkPage("RDD Not Found", Seq[Node](), parent) - } + val rddStorageInfo = AllRDDResource.getRDDStorageInfo(rddId, listener, includeDetails = true) + .getOrElse { + // Rather than crashing, render an "RDD Not Found" page + return UIUtils.headerSparkPage("RDD Not Found", Seq[Node](), parent) + } // Worker table - val workers = storageStatusList.map((rddId, _)) - val workerTable = UIUtils.listingTable(workerHeader, workerRow, workers, - id = Some("rdd-storage-by-worker-table")) + val workerTable = UIUtils.listingTable(workerHeader, workerRow, + rddStorageInfo.dataDistribution.get, id = Some("rdd-storage-by-worker-table")) // Block table - val blockLocations = StorageUtils.getRddBlockLocations(rddId, storageStatusList) - val blocks = storageStatusList - .flatMap(_.rddBlocksById(rddId)) - .sortWith(_._1.name < _._1.name) - .map { case (blockId, status) => - (blockId, status, blockLocations.get(blockId).getOrElse(Seq[String]("Unknown"))) - } - val blockTable = UIUtils.listingTable(blockHeader, blockRow, blocks, + val blockTable = UIUtils.listingTable(blockHeader, blockRow, rddStorageInfo.partitions.get, id = Some("rdd-storage-by-block-table")) val content = @@ -62,23 +53,23 @@ private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") {
      • Storage Level: - {rddInfo.storageLevel.description} + {rddStorageInfo.storageLevel}
      • Cached Partitions: - {rddInfo.numCachedPartitions} + {rddStorageInfo.numCachedPartitions}
      • Total Partitions: - {rddInfo.numPartitions} + {rddStorageInfo.numPartitions}
      • Memory Size: - {Utils.bytesToString(rddInfo.memSize)} + {Utils.bytesToString(rddStorageInfo.memoryUsed)}
      • Disk Size: - {Utils.bytesToString(rddInfo.diskSize)} + {Utils.bytesToString(rddStorageInfo.diskUsed)}
      @@ -86,19 +77,22 @@ private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") {
      -

      Data Distribution on {workers.size} Executors

      +

      + Data Distribution on {rddStorageInfo.dataDistribution.map(_.size).getOrElse(0)} + Executors +

      {workerTable}
      -

      {blocks.size} Partitions

      +

      {rddStorageInfo.partitions.map(_.size).getOrElse(0)} Partitions

      {blockTable}
      ; - UIUtils.headerSparkPage("RDD Storage Info for " + rddInfo.name, content, parent) + UIUtils.headerSparkPage("RDD Storage Info for " + rddStorageInfo.name, content, parent) } /** Header fields for the worker table */ @@ -116,34 +110,32 @@ private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") { "Executors") /** Render an HTML row representing a worker */ - private def workerRow(worker: (Int, StorageStatus)): Seq[Node] = { - val (rddId, status) = worker + private def workerRow(worker: RDDDataDistribution): Seq[Node] = { - {status.blockManagerId.host + ":" + status.blockManagerId.port} + {worker.address} - {Utils.bytesToString(status.memUsedByRdd(rddId))} - ({Utils.bytesToString(status.memRemaining)} Remaining) + {Utils.bytesToString(worker.memoryUsed)} + ({Utils.bytesToString(worker.memoryRemaining)} Remaining) - {Utils.bytesToString(status.diskUsedByRdd(rddId))} + {Utils.bytesToString(worker.diskUsed)} } /** Render an HTML row representing a block */ - private def blockRow(row: (BlockId, BlockStatus, Seq[String])): Seq[Node] = { - val (id, block, locations) = row + private def blockRow(row: RDDPartitionInfo): Seq[Node] = { - {id} + {row.blockName} - {block.storageLevel.description} + {row.storageLevel} - - {Utils.bytesToString(block.memSize)} + + {Utils.bytesToString(row.memoryUsed)} - - {Utils.bytesToString(block.diskSize)} + + {Utils.bytesToString(row.diskUsed)} - {locations.map(l => {l}
      )} + {row.executors.map(l => {l}
      )} } diff --git a/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala b/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala index 59dc6b547c1d8..07db783c572cf 100644 --- a/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala +++ b/core/src/main/scala/org/apache/spark/ui/storage/StoragePage.scala @@ -22,7 +22,7 @@ import javax.servlet.http.HttpServletRequest import scala.xml.Node import org.apache.spark.storage.RDDInfo -import org.apache.spark.ui.{WebUIPage, UIUtils} +import org.apache.spark.ui.{UIUtils, WebUIPage} import org.apache.spark.util.Utils /** Page showing list of RDD's currently stored in the cluster */ diff --git a/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala b/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala index 045bd784990d1..0351749700962 100644 --- a/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala +++ b/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala @@ -35,6 +35,8 @@ private[ui] class StorageTab(parent: SparkUI) extends SparkUITab(parent, "storag /** * :: DeveloperApi :: * A SparkListener that prepares information to be displayed on the BlockManagerUI. + * + * This class is thread-safe (unlike JobProgressListener) */ @DeveloperApi class StorageListener(storageStatusListener: StorageStatusListener) extends SparkListener { @@ -43,7 +45,9 @@ class StorageListener(storageStatusListener: StorageStatusListener) extends Spar def storageStatusList: Seq[StorageStatus] = storageStatusListener.storageStatusList /** Filter RDD info to include only those with cached partitions */ - def rddInfoList: Seq[RDDInfo] = _rddInfoMap.values.filter(_.numCachedPartitions > 0).toSeq + def rddInfoList: Seq[RDDInfo] = synchronized { + _rddInfoMap.values.filter(_.numCachedPartitions > 0).toSeq + } /** Update the storage info of the RDDs whose blocks are among the given updated blocks */ private def updateRDDInfo(updatedBlocks: Seq[(BlockId, BlockStatus)]): Unit = { diff --git a/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala b/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala index b725df3b44596..5415097e434bb 100644 --- a/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala +++ b/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala @@ -63,7 +63,7 @@ private[spark] object AkkaUtils extends Logging { conf: SparkConf, securityManager: SecurityManager): (ActorSystem, Int) = { - val akkaThreads = conf.getInt("spark.akka.threads", 4) + val akkaThreads = conf.getInt("spark.akka.threads", 4) val akkaBatchSize = conf.getInt("spark.akka.batchSize", 15) val akkaTimeoutS = conf.getTimeAsSeconds("spark.akka.timeout", conf.get("spark.network.timeout", "120s")) @@ -183,7 +183,9 @@ private[spark] object AkkaUtils extends Logging { lastException = e logWarning(s"Error sending message [message = $message] in $attempts attempts", e) } - Thread.sleep(retryInterval) + if (attempts < maxAttempts) { + Thread.sleep(retryInterval) + } } throw new SparkException( diff --git a/core/src/main/scala/org/apache/spark/util/AsynchronousListenerBus.scala b/core/src/main/scala/org/apache/spark/util/AsynchronousListenerBus.scala index ce7887b76ff96..1861d38640102 100644 --- a/core/src/main/scala/org/apache/spark/util/AsynchronousListenerBus.scala +++ b/core/src/main/scala/org/apache/spark/util/AsynchronousListenerBus.scala @@ -40,7 +40,7 @@ private[spark] abstract class AsynchronousListenerBus[L <: AnyRef, E](name: Stri self => private var sparkContext: SparkContext = null - + /* Cap the capacity of the event queue so we get an explicit error (rather than * an OOM exception) if it's perpetually being added to more quickly than it's being drained. */ private val EVENT_QUEUE_CAPACITY = 10000 diff --git a/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala index 4ac0382d80815..6f2966bd4fd31 100644 --- a/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala +++ b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala @@ -179,6 +179,11 @@ private[spark] object ClosureCleaner extends Logging { cleanTransitively: Boolean, accessedFields: Map[Class[_], Set[String]]): Unit = { + if (!isClosure(func.getClass)) { + logWarning("Expected a closure; got " + func.getClass.getName) + return + } + // TODO: clean all inner closures first. This requires us to find the inner objects. // TODO: cache outerClasses / innerClasses / accessedFields @@ -234,15 +239,6 @@ private[spark] object ClosureCleaner extends Logging { logDebug(s" + fields accessed by starting closure: " + accessedFields.size) accessedFields.foreach { f => logDebug(" " + f) } - val inInterpreter = { - try { - val interpClass = Class.forName("spark.repl.Main") - interpClass.getMethod("interp").invoke(null) != null - } catch { - case _: ClassNotFoundException => true - } - } - // List of outer (class, object) pairs, ordered from outermost to innermost // Note that all outer objects but the outermost one (first one in this list) must be closures var outerPairs: List[(Class[_], AnyRef)] = (outerClasses zip outerObjects).reverse @@ -269,7 +265,7 @@ private[spark] object ClosureCleaner extends Logging { // required fields from the original object. We need the parent here because the Java // language specification requires the first constructor parameter of any closure to be // its enclosing object. - val clone = instantiateClass(cls, parent, inInterpreter) + val clone = instantiateClass(cls, parent) for (fieldName <- accessedFields(cls)) { val field = cls.getDeclaredField(fieldName) field.setAccessible(true) @@ -312,7 +308,9 @@ private[spark] object ClosureCleaner extends Logging { private def ensureSerializable(func: AnyRef) { try { - SparkEnv.get.closureSerializer.newInstance().serialize(func) + if (SparkEnv.get != null) { + SparkEnv.get.closureSerializer.newInstance().serialize(func) + } } catch { case ex: Exception => throw new SparkException("Task not serializable", ex) } @@ -320,9 +318,8 @@ private[spark] object ClosureCleaner extends Logging { private def instantiateClass( cls: Class[_], - enclosingObject: AnyRef, - inInterpreter: Boolean): AnyRef = { - if (!inInterpreter) { + enclosingObject: AnyRef): AnyRef = { + if (!Utils.isInInterpreter) { // This is a bona fide closure class, whose constructor has no effects // other than to set its fields, so use its constructor val cons = cls.getConstructors()(0) @@ -347,6 +344,9 @@ private[spark] object ClosureCleaner extends Logging { } } +private[spark] class ReturnStatementInClosureException + extends SparkException("Return statements aren't allowed in Spark closures") + private class ReturnStatementFinder extends ClassVisitor(ASM4) { override def visitMethod(access: Int, name: String, desc: String, sig: String, exceptions: Array[String]): MethodVisitor = { @@ -354,7 +354,7 @@ private class ReturnStatementFinder extends ClassVisitor(ASM4) { new MethodVisitor(ASM4) { override def visitTypeInsn(op: Int, tp: String) { if (op == NEW && tp.contains("scala/runtime/NonLocalReturnControl")) { - throw new SparkException("Return statements aren't allowed in Spark closures") + throw new ReturnStatementInClosureException } } } diff --git a/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala b/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala index 9044aaeef2d48..31d230d0fec8e 100644 --- a/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala +++ b/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala @@ -42,7 +42,7 @@ abstract class CompletionIterator[ +A, +I <: Iterator[A]](sub: I) extends Iterat private[spark] object CompletionIterator { def apply[A, I <: Iterator[A]](sub: I, completionFunction: => Unit) : CompletionIterator[A, I] = { - new CompletionIterator[A,I](sub) { + new CompletionIterator[A, I](sub) { def completion(): Unit = completionFunction } } diff --git a/core/src/main/scala/org/apache/spark/util/Distribution.scala b/core/src/main/scala/org/apache/spark/util/Distribution.scala index 9aea8efa38c7a..1bab707235b89 100644 --- a/core/src/main/scala/org/apache/spark/util/Distribution.scala +++ b/core/src/main/scala/org/apache/spark/util/Distribution.scala @@ -35,7 +35,7 @@ private[spark] class Distribution(val data: Array[Double], val startIdx: Int, va java.util.Arrays.sort(data, startIdx, endIdx) val length = endIdx - startIdx - val defaultProbabilities = Array(0,0.25,0.5,0.75,1.0) + val defaultProbabilities = Array(0, 0.25, 0.5, 0.75, 1.0) /** * Get the value of the distribution at the given probabilities. Probabilities should be @@ -44,7 +44,7 @@ private[spark] class Distribution(val data: Array[Double], val startIdx: Int, va */ def getQuantiles(probabilities: Traversable[Double] = defaultProbabilities) : IndexedSeq[Double] = { - probabilities.toIndexedSeq.map{p:Double => data(closestIndex(p))} + probabilities.toIndexedSeq.map { p: Double => data(closestIndex(p)) } } private def closestIndex(p: Double) = { diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala index ee02fbd9ce069..3f162d1f6c3eb 100644 --- a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala +++ b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala @@ -28,10 +28,11 @@ import org.json4s.DefaultFormats import org.json4s.JsonDSL._ import org.json4s.JsonAST._ +import org.apache.spark._ import org.apache.spark.executor._ +import org.apache.spark.rdd.RDDOperationScope import org.apache.spark.scheduler._ import org.apache.spark.storage._ -import org.apache.spark._ /** * Serializes SparkListener events to/from JSON. This protocol provides strong backwards- @@ -228,6 +229,7 @@ private[spark] object JsonProtocol { def stageInfoToJson(stageInfo: StageInfo): JValue = { val rddInfo = JArray(stageInfo.rddInfos.map(rddInfoToJson).toList) + val parentIds = JArray(stageInfo.parentIds.map(JInt(_)).toList) val submissionTime = stageInfo.submissionTime.map(JInt(_)).getOrElse(JNothing) val completionTime = stageInfo.completionTime.map(JInt(_)).getOrElse(JNothing) val failureReason = stageInfo.failureReason.map(JString(_)).getOrElse(JNothing) @@ -236,6 +238,7 @@ private[spark] object JsonProtocol { ("Stage Name" -> stageInfo.name) ~ ("Number of Tasks" -> stageInfo.numTasks) ~ ("RDD Info" -> rddInfo) ~ + ("Parent IDs" -> parentIds) ~ ("Details" -> stageInfo.details) ~ ("Submission Time" -> submissionTime) ~ ("Completion Time" -> completionTime) ~ @@ -368,8 +371,11 @@ private[spark] object JsonProtocol { def rddInfoToJson(rddInfo: RDDInfo): JValue = { val storageLevel = storageLevelToJson(rddInfo.storageLevel) + val parentIds = JArray(rddInfo.parentIds.map(JInt(_)).toList) ("RDD ID" -> rddInfo.id) ~ ("Name" -> rddInfo.name) ~ + ("Scope" -> rddInfo.scope.map(_.toJson)) ~ + ("Parent IDs" -> parentIds) ~ ("Storage Level" -> storageLevel) ~ ("Number of Partitions" -> rddInfo.numPartitions) ~ ("Number of Cached Partitions" -> rddInfo.numCachedPartitions) ~ @@ -519,7 +525,7 @@ private[spark] object JsonProtocol { // The "Stage Infos" field was added in Spark 1.2.0 val stageInfos = Utils.jsonOption(json \ "Stage Infos") .map(_.extract[Seq[JValue]].map(stageInfoFromJson)).getOrElse { - stageIds.map(id => new StageInfo(id, 0, "unknown", 0, Seq.empty, "unknown")) + stageIds.map(id => new StageInfo(id, 0, "unknown", 0, Seq.empty, Seq.empty, "unknown")) } SparkListenerJobStart(jobId, submissionTime, stageInfos, properties) } @@ -599,7 +605,10 @@ private[spark] object JsonProtocol { val attemptId = (json \ "Stage Attempt ID").extractOpt[Int].getOrElse(0) val stageName = (json \ "Stage Name").extract[String] val numTasks = (json \ "Number of Tasks").extract[Int] - val rddInfos = (json \ "RDD Info").extract[List[JValue]].map(rddInfoFromJson(_)) + val rddInfos = (json \ "RDD Info").extract[List[JValue]].map(rddInfoFromJson) + val parentIds = Utils.jsonOption(json \ "Parent IDs") + .map { l => l.extract[List[JValue]].map(_.extract[Int]) } + .getOrElse(Seq.empty) val details = (json \ "Details").extractOpt[String].getOrElse("") val submissionTime = Utils.jsonOption(json \ "Submission Time").map(_.extract[Long]) val completionTime = Utils.jsonOption(json \ "Completion Time").map(_.extract[Long]) @@ -609,7 +618,8 @@ private[spark] object JsonProtocol { case None => Seq[AccumulableInfo]() } - val stageInfo = new StageInfo(stageId, attemptId, stageName, numTasks, rddInfos, details) + val stageInfo = new StageInfo( + stageId, attemptId, stageName, numTasks, rddInfos, parentIds, details) stageInfo.submissionTime = submissionTime stageInfo.completionTime = completionTime stageInfo.failureReason = failureReason @@ -785,16 +795,22 @@ private[spark] object JsonProtocol { def rddInfoFromJson(json: JValue): RDDInfo = { val rddId = (json \ "RDD ID").extract[Int] val name = (json \ "Name").extract[String] + val scope = Utils.jsonOption(json \ "Scope") + .map(_.extract[String]) + .map(RDDOperationScope.fromJson) + val parentIds = Utils.jsonOption(json \ "Parent IDs") + .map { l => l.extract[List[JValue]].map(_.extract[Int]) } + .getOrElse(Seq.empty) val storageLevel = storageLevelFromJson(json \ "Storage Level") val numPartitions = (json \ "Number of Partitions").extract[Int] val numCachedPartitions = (json \ "Number of Cached Partitions").extract[Int] val memSize = (json \ "Memory Size").extract[Long] - // fallback to tachyon for backward compatability + // fallback to tachyon for backward compatibility val externalBlockStoreSize = (json \ "ExternalBlockStore Size").toSome .getOrElse(json \ "Tachyon Size").extract[Long] val diskSize = (json \ "Disk Size").extract[Long] - val rddInfo = new RDDInfo(rddId, name, numPartitions, storageLevel) + val rddInfo = new RDDInfo(rddId, name, numPartitions, storageLevel, parentIds, scope) rddInfo.numCachedPartitions = numCachedPartitions rddInfo.memSize = memSize rddInfo.externalBlockStoreSize = externalBlockStoreSize diff --git a/core/src/main/scala/org/apache/spark/util/MetadataCleaner.scala b/core/src/main/scala/org/apache/spark/util/MetadataCleaner.scala index 2bbfc988a99a8..a8bbad086849e 100644 --- a/core/src/main/scala/org/apache/spark/util/MetadataCleaner.scala +++ b/core/src/main/scala/org/apache/spark/util/MetadataCleaner.scala @@ -89,7 +89,7 @@ private[spark] object MetadataCleaner { conf: SparkConf, cleanerType: MetadataCleanerType.MetadataCleanerType, delay: Int) { - conf.set(MetadataCleanerType.systemProperty(cleanerType), delay.toString) + conf.set(MetadataCleanerType.systemProperty(cleanerType), delay.toString) } /** diff --git a/core/src/main/scala/org/apache/spark/util/MutablePair.scala b/core/src/main/scala/org/apache/spark/util/MutablePair.scala index dad888548ed10..3d95b7869f494 100644 --- a/core/src/main/scala/org/apache/spark/util/MutablePair.scala +++ b/core/src/main/scala/org/apache/spark/util/MutablePair.scala @@ -45,5 +45,5 @@ case class MutablePair[@specialized(Int, Long, Double, Char, Boolean/* , AnyRef override def toString: String = "(" + _1 + "," + _2 + ")" - override def canEqual(that: Any): Boolean = that.isInstanceOf[MutablePair[_,_]] + override def canEqual(that: Any): Boolean = that.isInstanceOf[MutablePair[_, _]] } diff --git a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala index d91c3294ddb8b..3e838dcbe249d 100644 --- a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala +++ b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala @@ -21,31 +21,47 @@ import java.lang.management.ManagementFactory import java.lang.reflect.{Field, Modifier} import java.util.{IdentityHashMap, Random} import java.util.concurrent.ConcurrentHashMap + import scala.collection.mutable.ArrayBuffer import scala.runtime.ScalaRunTime import org.apache.spark.Logging +import org.apache.spark.annotation.DeveloperApi import org.apache.spark.util.collection.OpenHashSet /** + * :: DeveloperApi :: * Estimates the sizes of Java objects (number of bytes of memory they occupy), for use in * memory-aware caches. * * Based on the following JavaWorld article: * http://www.javaworld.com/javaworld/javaqa/2003-12/02-qa-1226-sizeof.html */ -private[spark] object SizeEstimator extends Logging { +@DeveloperApi +object SizeEstimator extends Logging { + + /** + * Estimate the number of bytes that the given object takes up on the JVM heap. The estimate + * includes space taken up by objects referenced by the given object, their references, and so on + * and so forth. + * + * This is useful for determining the amount of heap space a broadcast variable will occupy on + * each executor or the amount of space each object will take when caching objects in + * deserialized form. This is not the same as the serialized size of the object, which will + * typically be much smaller. + */ + def estimate(obj: AnyRef): Long = estimate(obj, new IdentityHashMap[AnyRef, AnyRef]) // Sizes of primitive types - private val BYTE_SIZE = 1 + private val BYTE_SIZE = 1 private val BOOLEAN_SIZE = 1 - private val CHAR_SIZE = 2 - private val SHORT_SIZE = 2 - private val INT_SIZE = 4 - private val LONG_SIZE = 8 - private val FLOAT_SIZE = 4 - private val DOUBLE_SIZE = 8 + private val CHAR_SIZE = 2 + private val SHORT_SIZE = 2 + private val INT_SIZE = 4 + private val LONG_SIZE = 8 + private val FLOAT_SIZE = 4 + private val DOUBLE_SIZE = 8 // Fields can be primitive types, sizes are: 1, 2, 4, 8. Or fields can be pointers. The size of // a pointer is 4 or 8 depending on the JVM (32-bit or 64-bit) and UseCompressedOops flag. @@ -79,7 +95,7 @@ private[spark] object SizeEstimator extends Logging { isCompressedOops = getIsCompressedOops objectSize = if (!is64bit) 8 else { - if(!isCompressedOops) { + if (!isCompressedOops) { 16 } else { 12 @@ -155,8 +171,6 @@ private[spark] object SizeEstimator extends Logging { val shellSize: Long, val pointerFields: List[Field]) {} - def estimate(obj: AnyRef): Long = estimate(obj, new IdentityHashMap[AnyRef, AnyRef]) - private def estimate(obj: AnyRef, visited: IdentityHashMap[AnyRef, AnyRef]): Long = { val state = new SearchState(visited) state.enqueue(obj) @@ -216,7 +230,7 @@ private[spark] object SizeEstimator extends Logging { val s1 = sampleArray(array, state, rand, drawn, length) val s2 = sampleArray(array, state, rand, drawn, length) val size = math.min(s1, s2) - state.size += math.max(s1, s2) + + state.size += math.max(s1, s2) + (size * ((length - ARRAY_SAMPLE_SIZE) / (ARRAY_SAMPLE_SIZE))).toLong } } @@ -224,7 +238,7 @@ private[spark] object SizeEstimator extends Logging { private def sampleArray( array: AnyRef, - state: SearchState, + state: SearchState, rand: Random, drawn: OpenHashSet[Int], length: Int): Long = { diff --git a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala index 098a4b79496b2..ca5624a3d8b3d 100644 --- a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala +++ b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala @@ -20,10 +20,22 @@ package org.apache.spark.util import java.util.concurrent._ -import com.google.common.util.concurrent.ThreadFactoryBuilder +import scala.concurrent.{ExecutionContext, ExecutionContextExecutor} + +import com.google.common.util.concurrent.{MoreExecutors, ThreadFactoryBuilder} private[spark] object ThreadUtils { + private val sameThreadExecutionContext = + ExecutionContext.fromExecutorService(MoreExecutors.sameThreadExecutor()) + + /** + * An `ExecutionContextExecutor` that runs each task in the thread that invokes `execute/submit`. + * The caller should make sure the tasks running in this `ExecutionContextExecutor` are short and + * never block. + */ + def sameThread: ExecutionContextExecutor = sameThreadExecutionContext + /** * Create a thread factory that names threads with a prefix and also sets the threads to daemon. */ @@ -40,6 +52,16 @@ private[spark] object ThreadUtils { Executors.newCachedThreadPool(threadFactory).asInstanceOf[ThreadPoolExecutor] } + /** + * Create a cached thread pool whose max number of threads is `maxThreadNumber`. Thread names + * are formatted as prefix-ID, where ID is a unique, sequentially assigned integer. + */ + def newDaemonCachedThreadPool(prefix: String, maxThreadNumber: Int): ThreadPoolExecutor = { + val threadFactory = namedThreadFactory(prefix) + new ThreadPoolExecutor( + 0, maxThreadNumber, 60L, TimeUnit.SECONDS, new SynchronousQueue[Runnable], threadFactory) + } + /** * Wrapper over newFixedThreadPool. Thread names are formatted as prefix-ID, where ID is a * unique, sequentially assigned integer. diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index be4db02ab86d0..693e1a0a3d5f0 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -73,6 +73,13 @@ private[spark] object Utils extends Logging { */ val SPARK_CONTEXT_SHUTDOWN_PRIORITY = 50 + /** + * The shutdown priority of temp directory must be lower than the SparkContext shutdown + * priority. Otherwise cleaning the temp directories while Spark jobs are running can + * throw undesirable errors at the time of shutdown. + */ + val TEMP_DIR_SHUTDOWN_PRIORITY = 25 + private val MAX_DIR_CREATION_ATTEMPTS: Int = 10 @volatile private var localRootDirs: Array[String] = null @@ -189,10 +196,11 @@ private[spark] object Utils extends Logging { private val shutdownDeleteTachyonPaths = new scala.collection.mutable.HashSet[String]() // Add a shutdown hook to delete the temp dirs when the JVM exits - addShutdownHook { () => - logDebug("Shutdown hook called") + addShutdownHook(TEMP_DIR_SHUTDOWN_PRIORITY) { () => + logInfo("Shutdown hook called") shutdownDeletePaths.foreach { dirPath => try { + logInfo("Deleting directory " + dirPath) Utils.deleteRecursively(new File(dirPath)) } catch { case e: Exception => logError(s"Exception while deleting Spark temp dir: $dirPath", e) @@ -882,7 +890,7 @@ private[spark] object Utils extends Logging { // If not, we should change it to LRUCache or something. private val hostPortParseResults = new ConcurrentHashMap[String, (String, Int)]() - def parseHostPort(hostPort: String): (String, Int) = { + def parseHostPort(hostPort: String): (String, Int) = { // Check cache first. val cached = hostPortParseResults.get(hostPort) if (cached != null) { @@ -1704,11 +1712,6 @@ private[spark] object Utils extends Logging { */ val windowsDrive = "([a-zA-Z])".r - /** - * Format a Windows path such that it can be safely passed to a URI. - */ - def formatWindowsPath(path: String): String = path.replace("\\", "/") - /** * Indicates whether Spark is currently running unit tests. */ @@ -1800,43 +1803,44 @@ private[spark] object Utils extends Logging { } } + lazy val isInInterpreter: Boolean = { + try { + val interpClass = classForName("spark.repl.Main") + interpClass.getMethod("interp").invoke(null) != null + } catch { + // Returning true seems to be a mistake. + // Currently changing it to false causes tests failures in Streaming. + // For a more detailed discussion, please, refer to + // https://github.com/apache/spark/pull/5835#issuecomment-101042271 and subsequent comments. + // Addressing this changed is tracked as https://issues.apache.org/jira/browse/SPARK-7527 + case _: ClassNotFoundException => true + } + } + /** * Return a well-formed URI for the file described by a user input string. * * If the supplied path does not contain a scheme, or is a relative path, it will be * converted into an absolute path with a file:// scheme. */ - def resolveURI(path: String, testWindows: Boolean = false): URI = { - - // In Windows, the file separator is a backslash, but this is inconsistent with the URI format - val windows = isWindows || testWindows - val formattedPath = if (windows) formatWindowsPath(path) else path - - val uri = new URI(formattedPath) - if (uri.getPath == null) { - throw new IllegalArgumentException(s"Given path is malformed: $uri") - } - - Option(uri.getScheme) match { - case Some(windowsDrive(d)) if windows => - new URI("file:/" + uri.toString.stripPrefix("/")) - case None => - // Preserve fragments for HDFS file name substitution (denoted by "#") - // For instance, in "abc.py#xyz.py", "xyz.py" is the name observed by the application - val fragment = uri.getFragment - val part = new File(uri.getPath).toURI - new URI(part.getScheme, part.getPath, fragment) - case Some(other) => - uri + def resolveURI(path: String): URI = { + try { + val uri = new URI(path) + if (uri.getScheme() != null) { + return uri + } + } catch { + case e: URISyntaxException => } + new File(path).getAbsoluteFile().toURI() } /** Resolve a comma-separated list of paths. */ - def resolveURIs(paths: String, testWindows: Boolean = false): String = { + def resolveURIs(paths: String): String = { if (paths == null || paths.trim.isEmpty) { "" } else { - paths.split(",").map { p => Utils.resolveURI(p, testWindows) }.mkString(",") + paths.split(",").map { p => Utils.resolveURI(p) }.mkString(",") } } @@ -1847,8 +1851,7 @@ private[spark] object Utils extends Logging { Array.empty } else { paths.split(",").filter { p => - val formattedPath = if (windows) formatWindowsPath(p) else p - val uri = new URI(formattedPath) + val uri = resolveURI(p) Option(uri.getScheme).getOrElse("file") match { case windowsDrive(d) if windows => false case "local" | "file" => false @@ -2206,6 +2209,24 @@ private[spark] object Utils extends Logging { shutdownHooks.remove(ref) } + /** + * To avoid calling `Utils.getCallSite` for every single RDD we create in the body, + * set a dummy call site that RDDs use instead. This is for performance optimization. + */ + def withDummyCallSite[T](sc: SparkContext)(body: => T): T = { + val oldShortCallSite = sc.getLocalProperty(CallSite.SHORT_FORM) + val oldLongCallSite = sc.getLocalProperty(CallSite.LONG_FORM) + try { + sc.setLocalProperty(CallSite.SHORT_FORM, "") + sc.setLocalProperty(CallSite.LONG_FORM, "") + body + } finally { + // Restore the old ones here + sc.setLocalProperty(CallSite.SHORT_FORM, oldShortCallSite) + sc.setLocalProperty(CallSite.LONG_FORM, oldLongCallSite) + } + } + } private [util] class SparkShutdownHookManager { diff --git a/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala b/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala index 41cb8cfe2afa3..9c15b1188d91c 100644 --- a/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala +++ b/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala @@ -161,7 +161,7 @@ class BitSet(numBits: Int) extends Serializable { override def hasNext: Boolean = ind >= 0 override def next(): Int = { val tmp = ind - ind = nextSetBit(ind + 1) + ind = nextSetBit(ind + 1) tmp } } diff --git a/core/src/main/scala/org/apache/spark/util/collection/ChainedBuffer.scala b/core/src/main/scala/org/apache/spark/util/collection/ChainedBuffer.scala index a60bffe611f14..516aaa44d03fc 100644 --- a/core/src/main/scala/org/apache/spark/util/collection/ChainedBuffer.scala +++ b/core/src/main/scala/org/apache/spark/util/collection/ChainedBuffer.scala @@ -28,11 +28,13 @@ import scala.collection.mutable.ArrayBuffer * occupy a contiguous segment of memory. */ private[spark] class ChainedBuffer(chunkSize: Int) { - private val chunkSizeLog2 = (math.log(chunkSize) / math.log(2)).toInt - assert(math.pow(2, chunkSizeLog2).toInt == chunkSize, + + private val chunkSizeLog2: Int = java.lang.Long.numberOfTrailingZeros( + java.lang.Long.highestOneBit(chunkSize)) + assert((1 << chunkSizeLog2) == chunkSize, s"ChainedBuffer chunk size $chunkSize must be a power of two") private val chunks: ArrayBuffer[Array[Byte]] = new ArrayBuffer[Array[Byte]]() - private var _size: Int = _ + private var _size: Long = 0 /** * Feed bytes from this buffer into a BlockObjectWriter. @@ -41,16 +43,16 @@ private[spark] class ChainedBuffer(chunkSize: Int) { * @param os OutputStream to read into. * @param len Number of bytes to read. */ - def read(pos: Int, os: OutputStream, len: Int): Unit = { + def read(pos: Long, os: OutputStream, len: Int): Unit = { if (pos + len > _size) { throw new IndexOutOfBoundsException( s"Read of $len bytes at position $pos would go past size ${_size} of buffer") } - var chunkIndex = pos >> chunkSizeLog2 - var posInChunk = pos - (chunkIndex << chunkSizeLog2) - var written = 0 + var chunkIndex: Int = (pos >> chunkSizeLog2).toInt + var posInChunk: Int = (pos - (chunkIndex.toLong << chunkSizeLog2)).toInt + var written: Int = 0 while (written < len) { - val toRead = math.min(len - written, chunkSize - posInChunk) + val toRead: Int = math.min(len - written, chunkSize - posInChunk) os.write(chunks(chunkIndex), posInChunk, toRead) written += toRead chunkIndex += 1 @@ -66,16 +68,16 @@ private[spark] class ChainedBuffer(chunkSize: Int) { * @param offs Offset in the byte array to read to. * @param len Number of bytes to read. */ - def read(pos: Int, bytes: Array[Byte], offs: Int, len: Int): Unit = { + def read(pos: Long, bytes: Array[Byte], offs: Int, len: Int): Unit = { if (pos + len > _size) { throw new IndexOutOfBoundsException( s"Read of $len bytes at position $pos would go past size of buffer") } - var chunkIndex = pos >> chunkSizeLog2 - var posInChunk = pos - (chunkIndex << chunkSizeLog2) - var written = 0 + var chunkIndex: Int = (pos >> chunkSizeLog2).toInt + var posInChunk: Int = (pos - (chunkIndex.toLong << chunkSizeLog2)).toInt + var written: Int = 0 while (written < len) { - val toRead = math.min(len - written, chunkSize - posInChunk) + val toRead: Int = math.min(len - written, chunkSize - posInChunk) System.arraycopy(chunks(chunkIndex), posInChunk, bytes, offs + written, toRead) written += toRead chunkIndex += 1 @@ -91,22 +93,22 @@ private[spark] class ChainedBuffer(chunkSize: Int) { * @param offs Offset in the byte array to write from. * @param len Number of bytes to write. */ - def write(pos: Int, bytes: Array[Byte], offs: Int, len: Int): Unit = { + def write(pos: Long, bytes: Array[Byte], offs: Int, len: Int): Unit = { if (pos > _size) { throw new IndexOutOfBoundsException( s"Write at position $pos starts after end of buffer ${_size}") } // Grow if needed - val endChunkIndex = (pos + len - 1) >> chunkSizeLog2 + val endChunkIndex: Int = ((pos + len - 1) >> chunkSizeLog2).toInt while (endChunkIndex >= chunks.length) { chunks += new Array[Byte](chunkSize) } - var chunkIndex = pos >> chunkSizeLog2 - var posInChunk = pos - (chunkIndex << chunkSizeLog2) - var written = 0 + var chunkIndex: Int = (pos >> chunkSizeLog2).toInt + var posInChunk: Int = (pos - (chunkIndex.toLong << chunkSizeLog2)).toInt + var written: Int = 0 while (written < len) { - val toWrite = math.min(len - written, chunkSize - posInChunk) + val toWrite: Int = math.min(len - written, chunkSize - posInChunk) System.arraycopy(bytes, offs + written, chunks(chunkIndex), posInChunk, toWrite) written += toWrite chunkIndex += 1 @@ -119,19 +121,19 @@ private[spark] class ChainedBuffer(chunkSize: Int) { /** * Total size of buffer that can be written to without allocating additional memory. */ - def capacity: Int = chunks.size * chunkSize + def capacity: Long = chunks.size.toLong * chunkSize /** * Size of the logical buffer. */ - def size: Int = _size + def size: Long = _size } /** * Output stream that writes to a ChainedBuffer. */ private[spark] class ChainedBufferOutputStream(chainedBuffer: ChainedBuffer) extends OutputStream { - private var pos = 0 + private var pos: Long = 0 override def write(b: Int): Unit = { throw new UnsupportedOperationException() diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala index b850973145077..1e4531ef395ae 100644 --- a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala +++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala @@ -89,9 +89,9 @@ class ExternalAppendOnlyMap[K, V, C]( // Number of bytes spilled in total private var _diskBytesSpilled = 0L - - // Use getSizeAsKb (not bytes) to maintain backwards compatibility of on units are provided - private val fileBufferSize = + + // Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided + private val fileBufferSize = sparkConf.getSizeAsKb("spark.shuffle.file.buffer", "32k").toInt * 1024 // Write metrics for current spill diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala index b7306cd551918..de9200454f0d4 100644 --- a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala +++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala @@ -109,8 +109,8 @@ private[spark] class ExternalSorter[K, V, C]( private val conf = SparkEnv.get.conf private val spillingEnabled = conf.getBoolean("spark.shuffle.spill", true) - - // Use getSizeAsKb (not bytes) to maintain backwards compatibility of on units are provided + + // Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided private val fileBufferSize = conf.getSizeAsKb("spark.shuffle.file.buffer", "32k").toInt * 1024 private val transferToEnabled = conf.getBoolean("spark.file.transferTo", true) @@ -131,8 +131,7 @@ private[spark] class ExternalSorter[K, V, C]( private val kvChunkSize = conf.getInt("spark.shuffle.sort.kvChunkSize", 1 << 22) // 4 MB private val useSerializedPairBuffer = !ordering.isDefined && conf.getBoolean("spark.shuffle.sort.serializeMapOutputs", true) && - ser.isInstanceOf[KryoSerializer] && - serInstance.asInstanceOf[KryoSerializerInstance].getAutoReset + ser.supportsRelocationOfSerializedObjects // Data structures to store in-memory objects before we spill. Depending on whether we have an // Aggregator set, we either put objects into an AppendOnlyMap where we combine them, or we diff --git a/core/src/main/scala/org/apache/spark/util/collection/PartitionedSerializedPairBuffer.scala b/core/src/main/scala/org/apache/spark/util/collection/PartitionedSerializedPairBuffer.scala index b5ca0c62a04f2..554d88206e221 100644 --- a/core/src/main/scala/org/apache/spark/util/collection/PartitionedSerializedPairBuffer.scala +++ b/core/src/main/scala/org/apache/spark/util/collection/PartitionedSerializedPairBuffer.scala @@ -41,6 +41,13 @@ import org.apache.spark.util.collection.PartitionedSerializedPairBuffer._ * * Currently, only sorting by partition is supported. * + * Each record is laid out inside the the metaBuffer as follows. keyStart, a long, is split across + * two integers: + * + * +-------------+------------+------------+-------------+ + * | keyStart | keyValLen | partitionId | + * +-------------+------------+------------+-------------+ + * * @param metaInitialRecords The initial number of entries in the metadata buffer. * @param kvBlockSize The size of each byte buffer in the ChainedBuffer used to store the records. * @param serializerInstance the serializer used for serializing inserted records. @@ -68,19 +75,15 @@ private[spark] class PartitionedSerializedPairBuffer[K, V]( } val keyStart = kvBuffer.size - if (keyStart < 0) { - throw new Exception(s"Can't grow buffer beyond ${1 << 31} bytes") - } - kvSerializationStream.writeObject[Any](key) - kvSerializationStream.flush() - val valueStart = kvBuffer.size - kvSerializationStream.writeObject[Any](value) + kvSerializationStream.writeKey[Any](key) + kvSerializationStream.writeValue[Any](value) kvSerializationStream.flush() - val valueEnd = kvBuffer.size + val keyValLen = (kvBuffer.size - keyStart).toInt - metaBuffer.put(keyStart) - metaBuffer.put(valueStart) - metaBuffer.put(valueEnd) + // keyStart, a long, gets split across two ints + metaBuffer.put(keyStart.toInt) + metaBuffer.put((keyStart >> 32).toInt) + metaBuffer.put(keyValLen) metaBuffer.put(partition) } @@ -114,7 +117,7 @@ private[spark] class PartitionedSerializedPairBuffer[K, V]( } } - override def estimateSize: Long = metaBuffer.capacity * 4 + kvBuffer.capacity + override def estimateSize: Long = metaBuffer.capacity * 4L + kvBuffer.capacity override def destructiveSortedWritablePartitionedIterator(keyComparator: Option[Comparator[K]]) : WritablePartitionedIterator = { @@ -128,10 +131,10 @@ private[spark] class PartitionedSerializedPairBuffer[K, V]( var pos = 0 def writeNext(writer: BlockObjectWriter): Unit = { - val keyStart = metaBuffer.get(pos + KEY_START) - val valueEnd = metaBuffer.get(pos + VAL_END) + val keyStart = getKeyStartPos(metaBuffer, pos) + val keyValLen = metaBuffer.get(pos + KEY_VAL_LEN) pos += RECORD_SIZE - kvBuffer.read(keyStart, writer, valueEnd - keyStart) + kvBuffer.read(keyStart, writer, keyValLen) writer.recordWritten() } def nextPartition(): Int = metaBuffer.get(pos + PARTITION) @@ -163,9 +166,11 @@ private[spark] class PartitionedSerializedPairBuffer[K, V]( private[spark] class OrderedInputStream(metaBuffer: IntBuffer, kvBuffer: ChainedBuffer) extends InputStream { + import PartitionedSerializedPairBuffer._ + private var metaBufferPos = 0 private var kvBufferPos = - if (metaBuffer.position > 0) metaBuffer.get(metaBufferPos + KEY_START) else 0 + if (metaBuffer.position > 0) getKeyStartPos(metaBuffer, metaBufferPos) else 0 override def read(bytes: Array[Byte]): Int = read(bytes, 0, bytes.length) @@ -173,13 +178,14 @@ private[spark] class OrderedInputStream(metaBuffer: IntBuffer, kvBuffer: Chained if (metaBufferPos >= metaBuffer.position) { return -1 } - val bytesRemainingInRecord = metaBuffer.get(metaBufferPos + VAL_END) - kvBufferPos + val bytesRemainingInRecord = (metaBuffer.get(metaBufferPos + KEY_VAL_LEN) - + (kvBufferPos - getKeyStartPos(metaBuffer, metaBufferPos))).toInt val toRead = math.min(bytesRemainingInRecord, len) kvBuffer.read(kvBufferPos, bytes, offs, toRead) if (toRead == bytesRemainingInRecord) { metaBufferPos += RECORD_SIZE if (metaBufferPos < metaBuffer.position) { - kvBufferPos = metaBuffer.get(metaBufferPos + KEY_START) + kvBufferPos = getKeyStartPos(metaBuffer, metaBufferPos) } } else { kvBufferPos += toRead @@ -246,9 +252,14 @@ private[spark] class SerializedSortDataFormat extends SortDataFormat[Int, IntBuf } private[spark] object PartitionedSerializedPairBuffer { - val KEY_START = 0 - val VAL_START = 1 - val VAL_END = 2 + val KEY_START = 0 // keyStart, a long, gets split across two ints + val KEY_VAL_LEN = 2 val PARTITION = 3 - val RECORD_SIZE = Seq(KEY_START, VAL_START, VAL_END, PARTITION).size // num ints of metadata + val RECORD_SIZE = PARTITION + 1 // num ints of metadata + + def getKeyStartPos(metaBuffer: IntBuffer, metaBufferPos: Int): Long = { + val lower32 = metaBuffer.get(metaBufferPos + KEY_START) + val upper32 = metaBuffer.get(metaBufferPos + KEY_START + 1) + (upper32.toLong << 32) | (lower32 & 0xFFFFFFFFL) + } } diff --git a/core/src/main/scala/org/apache/spark/util/collection/SortDataFormat.scala b/core/src/main/scala/org/apache/spark/util/collection/SortDataFormat.scala index 4f0bf8384afc9..9a7a5a4e74868 100644 --- a/core/src/main/scala/org/apache/spark/util/collection/SortDataFormat.scala +++ b/core/src/main/scala/org/apache/spark/util/collection/SortDataFormat.scala @@ -90,9 +90,9 @@ class KVArraySortDataFormat[K, T <: AnyRef : ClassTag] extends SortDataFormat[K, override def swap(data: Array[T], pos0: Int, pos1: Int) { val tmpKey = data(2 * pos0) val tmpVal = data(2 * pos0 + 1) - data(2 * pos0) = data(2 * pos1) + data(2 * pos0) = data(2 * pos1) data(2 * pos0 + 1) = data(2 * pos1 + 1) - data(2 * pos1) = tmpKey + data(2 * pos1) = tmpKey data(2 * pos1 + 1) = tmpVal } diff --git a/core/src/main/scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala b/core/src/main/scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala index 9e29bf9d61f17..effe6fa2adcfa 100644 --- a/core/src/main/scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala +++ b/core/src/main/scala/org/apache/spark/util/random/StratifiedSamplingUtils.scala @@ -196,7 +196,7 @@ private[spark] object StratifiedSamplingUtils extends Logging { * * The sampling function has a unique seed per partition. */ - def getBernoulliSamplingFunction[K, V](rdd: RDD[(K, V)], + def getBernoulliSamplingFunction[K, V](rdd: RDD[(K, V)], fractions: Map[K, Double], exact: Boolean, seed: Long): (Int, Iterator[(K, V)]) => Iterator[(K, V)] = { diff --git a/core/src/test/java/org/apache/spark/shuffle/unsafe/PackedRecordPointerSuite.java b/core/src/test/java/org/apache/spark/shuffle/unsafe/PackedRecordPointerSuite.java new file mode 100644 index 0000000000000..db9e82759090a --- /dev/null +++ b/core/src/test/java/org/apache/spark/shuffle/unsafe/PackedRecordPointerSuite.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.shuffle.unsafe; + +import org.junit.Test; +import static org.junit.Assert.*; + +import org.apache.spark.unsafe.memory.ExecutorMemoryManager; +import org.apache.spark.unsafe.memory.MemoryAllocator; +import org.apache.spark.unsafe.memory.MemoryBlock; +import org.apache.spark.unsafe.memory.TaskMemoryManager; +import static org.apache.spark.shuffle.unsafe.PackedRecordPointer.*; + +public class PackedRecordPointerSuite { + + @Test + public void heap() { + final TaskMemoryManager memoryManager = + new TaskMemoryManager(new ExecutorMemoryManager(MemoryAllocator.HEAP)); + final MemoryBlock page0 = memoryManager.allocatePage(100); + final MemoryBlock page1 = memoryManager.allocatePage(100); + final long addressInPage1 = memoryManager.encodePageNumberAndOffset(page1, + page1.getBaseOffset() + 42); + PackedRecordPointer packedPointer = new PackedRecordPointer(); + packedPointer.set(PackedRecordPointer.packPointer(addressInPage1, 360)); + assertEquals(360, packedPointer.getPartitionId()); + final long recordPointer = packedPointer.getRecordPointer(); + assertEquals(1, TaskMemoryManager.decodePageNumber(recordPointer)); + assertEquals(page1.getBaseOffset() + 42, memoryManager.getOffsetInPage(recordPointer)); + assertEquals(addressInPage1, recordPointer); + memoryManager.cleanUpAllAllocatedMemory(); + } + + @Test + public void offHeap() { + final TaskMemoryManager memoryManager = + new TaskMemoryManager(new ExecutorMemoryManager(MemoryAllocator.UNSAFE)); + final MemoryBlock page0 = memoryManager.allocatePage(100); + final MemoryBlock page1 = memoryManager.allocatePage(100); + final long addressInPage1 = memoryManager.encodePageNumberAndOffset(page1, + page1.getBaseOffset() + 42); + PackedRecordPointer packedPointer = new PackedRecordPointer(); + packedPointer.set(PackedRecordPointer.packPointer(addressInPage1, 360)); + assertEquals(360, packedPointer.getPartitionId()); + final long recordPointer = packedPointer.getRecordPointer(); + assertEquals(1, TaskMemoryManager.decodePageNumber(recordPointer)); + assertEquals(page1.getBaseOffset() + 42, memoryManager.getOffsetInPage(recordPointer)); + assertEquals(addressInPage1, recordPointer); + memoryManager.cleanUpAllAllocatedMemory(); + } + + @Test + public void maximumPartitionIdCanBeEncoded() { + PackedRecordPointer packedPointer = new PackedRecordPointer(); + packedPointer.set(PackedRecordPointer.packPointer(0, MAXIMUM_PARTITION_ID)); + assertEquals(MAXIMUM_PARTITION_ID, packedPointer.getPartitionId()); + } + + @Test + public void partitionIdsGreaterThanMaximumPartitionIdWillOverflowOrTriggerError() { + PackedRecordPointer packedPointer = new PackedRecordPointer(); + try { + // Pointers greater than the maximum partition ID will overflow or trigger an assertion error + packedPointer.set(PackedRecordPointer.packPointer(0, MAXIMUM_PARTITION_ID + 1)); + assertFalse(MAXIMUM_PARTITION_ID + 1 == packedPointer.getPartitionId()); + } catch (AssertionError e ) { + // pass + } + } + + @Test + public void maximumOffsetInPageCanBeEncoded() { + PackedRecordPointer packedPointer = new PackedRecordPointer(); + long address = TaskMemoryManager.encodePageNumberAndOffset(0, MAXIMUM_PAGE_SIZE_BYTES - 1); + packedPointer.set(PackedRecordPointer.packPointer(address, 0)); + assertEquals(address, packedPointer.getRecordPointer()); + } + + @Test + public void offsetsPastMaxOffsetInPageWillOverflow() { + PackedRecordPointer packedPointer = new PackedRecordPointer(); + long address = TaskMemoryManager.encodePageNumberAndOffset(0, MAXIMUM_PAGE_SIZE_BYTES); + packedPointer.set(PackedRecordPointer.packPointer(address, 0)); + assertEquals(0, packedPointer.getRecordPointer()); + } +} diff --git a/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleInMemorySorterSuite.java b/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleInMemorySorterSuite.java new file mode 100644 index 0000000000000..8fa72597db24d --- /dev/null +++ b/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleInMemorySorterSuite.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.shuffle.unsafe; + +import java.util.Arrays; +import java.util.Random; + +import org.junit.Assert; +import org.junit.Test; + +import org.apache.spark.HashPartitioner; +import org.apache.spark.unsafe.PlatformDependent; +import org.apache.spark.unsafe.memory.ExecutorMemoryManager; +import org.apache.spark.unsafe.memory.MemoryAllocator; +import org.apache.spark.unsafe.memory.MemoryBlock; +import org.apache.spark.unsafe.memory.TaskMemoryManager; + +public class UnsafeShuffleInMemorySorterSuite { + + private static String getStringFromDataPage(Object baseObject, long baseOffset, int strLength) { + final byte[] strBytes = new byte[strLength]; + PlatformDependent.copyMemory( + baseObject, + baseOffset, + strBytes, + PlatformDependent.BYTE_ARRAY_OFFSET, strLength); + return new String(strBytes); + } + + @Test + public void testSortingEmptyInput() { + final UnsafeShuffleInMemorySorter sorter = new UnsafeShuffleInMemorySorter(100); + final UnsafeShuffleInMemorySorter.UnsafeShuffleSorterIterator iter = sorter.getSortedIterator(); + assert(!iter.hasNext()); + } + + @Test + public void testBasicSorting() throws Exception { + final String[] dataToSort = new String[] { + "Boba", + "Pearls", + "Tapioca", + "Taho", + "Condensed Milk", + "Jasmine", + "Milk Tea", + "Lychee", + "Mango" + }; + final TaskMemoryManager memoryManager = + new TaskMemoryManager(new ExecutorMemoryManager(MemoryAllocator.HEAP)); + final MemoryBlock dataPage = memoryManager.allocatePage(2048); + final Object baseObject = dataPage.getBaseObject(); + final UnsafeShuffleInMemorySorter sorter = new UnsafeShuffleInMemorySorter(4); + final HashPartitioner hashPartitioner = new HashPartitioner(4); + + // Write the records into the data page and store pointers into the sorter + long position = dataPage.getBaseOffset(); + for (String str : dataToSort) { + final long recordAddress = memoryManager.encodePageNumberAndOffset(dataPage, position); + final byte[] strBytes = str.getBytes("utf-8"); + PlatformDependent.UNSAFE.putInt(baseObject, position, strBytes.length); + position += 4; + PlatformDependent.copyMemory( + strBytes, + PlatformDependent.BYTE_ARRAY_OFFSET, + baseObject, + position, + strBytes.length); + position += strBytes.length; + sorter.insertRecord(recordAddress, hashPartitioner.getPartition(str)); + } + + // Sort the records + final UnsafeShuffleInMemorySorter.UnsafeShuffleSorterIterator iter = sorter.getSortedIterator(); + int prevPartitionId = -1; + Arrays.sort(dataToSort); + for (int i = 0; i < dataToSort.length; i++) { + Assert.assertTrue(iter.hasNext()); + iter.loadNext(); + final int partitionId = iter.packedRecordPointer.getPartitionId(); + Assert.assertTrue(partitionId >= 0 && partitionId <= 3); + Assert.assertTrue("Partition id " + partitionId + " should be >= prev id " + prevPartitionId, + partitionId >= prevPartitionId); + final long recordAddress = iter.packedRecordPointer.getRecordPointer(); + final int recordLength = PlatformDependent.UNSAFE.getInt( + memoryManager.getPage(recordAddress), memoryManager.getOffsetInPage(recordAddress)); + final String str = getStringFromDataPage( + memoryManager.getPage(recordAddress), + memoryManager.getOffsetInPage(recordAddress) + 4, // skip over record length + recordLength); + Assert.assertTrue(Arrays.binarySearch(dataToSort, str) != -1); + } + Assert.assertFalse(iter.hasNext()); + } + + @Test + public void testSortingManyNumbers() throws Exception { + UnsafeShuffleInMemorySorter sorter = new UnsafeShuffleInMemorySorter(4); + int[] numbersToSort = new int[128000]; + Random random = new Random(16); + for (int i = 0; i < numbersToSort.length; i++) { + numbersToSort[i] = random.nextInt(PackedRecordPointer.MAXIMUM_PARTITION_ID + 1); + sorter.insertRecord(0, numbersToSort[i]); + } + Arrays.sort(numbersToSort); + int[] sorterResult = new int[numbersToSort.length]; + UnsafeShuffleInMemorySorter.UnsafeShuffleSorterIterator iter = sorter.getSortedIterator(); + int j = 0; + while (iter.hasNext()) { + iter.loadNext(); + sorterResult[j] = iter.packedRecordPointer.getPartitionId(); + j += 1; + } + Assert.assertArrayEquals(numbersToSort, sorterResult); + } +} diff --git a/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriterSuite.java new file mode 100644 index 0000000000000..83d109115aa5c --- /dev/null +++ b/core/src/test/java/org/apache/spark/shuffle/unsafe/UnsafeShuffleWriterSuite.java @@ -0,0 +1,528 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.shuffle.unsafe; + +import java.io.*; +import java.nio.ByteBuffer; +import java.util.*; + +import scala.*; +import scala.collection.Iterator; +import scala.reflect.ClassTag; +import scala.runtime.AbstractFunction1; + +import com.google.common.collect.Iterators; +import com.google.common.collect.HashMultiset; +import com.google.common.io.ByteStreams; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.lessThan; +import static org.junit.Assert.*; +import static org.mockito.AdditionalAnswers.returnsFirstArg; +import static org.mockito.Answers.RETURNS_SMART_NULLS; +import static org.mockito.Mockito.*; + +import org.apache.spark.*; +import org.apache.spark.io.CompressionCodec$; +import org.apache.spark.io.LZ4CompressionCodec; +import org.apache.spark.io.LZFCompressionCodec; +import org.apache.spark.io.SnappyCompressionCodec; +import org.apache.spark.executor.ShuffleWriteMetrics; +import org.apache.spark.executor.TaskMetrics; +import org.apache.spark.network.util.LimitedInputStream; +import org.apache.spark.serializer.*; +import org.apache.spark.scheduler.MapStatus; +import org.apache.spark.shuffle.IndexShuffleBlockResolver; +import org.apache.spark.shuffle.ShuffleMemoryManager; +import org.apache.spark.storage.*; +import org.apache.spark.unsafe.memory.ExecutorMemoryManager; +import org.apache.spark.unsafe.memory.MemoryAllocator; +import org.apache.spark.unsafe.memory.TaskMemoryManager; +import org.apache.spark.util.Utils; + +public class UnsafeShuffleWriterSuite { + + static final int NUM_PARTITITONS = 4; + final TaskMemoryManager taskMemoryManager = + new TaskMemoryManager(new ExecutorMemoryManager(MemoryAllocator.HEAP)); + final HashPartitioner hashPartitioner = new HashPartitioner(NUM_PARTITITONS); + File mergedOutputFile; + File tempDir; + long[] partitionSizesInMergedFile; + final LinkedList spillFilesCreated = new LinkedList(); + SparkConf conf; + final Serializer serializer = new KryoSerializer(new SparkConf()); + TaskMetrics taskMetrics; + + @Mock(answer = RETURNS_SMART_NULLS) ShuffleMemoryManager shuffleMemoryManager; + @Mock(answer = RETURNS_SMART_NULLS) BlockManager blockManager; + @Mock(answer = RETURNS_SMART_NULLS) IndexShuffleBlockResolver shuffleBlockResolver; + @Mock(answer = RETURNS_SMART_NULLS) DiskBlockManager diskBlockManager; + @Mock(answer = RETURNS_SMART_NULLS) TaskContext taskContext; + @Mock(answer = RETURNS_SMART_NULLS) ShuffleDependency shuffleDep; + + private final class CompressStream extends AbstractFunction1 { + @Override + public OutputStream apply(OutputStream stream) { + if (conf.getBoolean("spark.shuffle.compress", true)) { + return CompressionCodec$.MODULE$.createCodec(conf).compressedOutputStream(stream); + } else { + return stream; + } + } + } + + @After + public void tearDown() { + Utils.deleteRecursively(tempDir); + final long leakedMemory = taskMemoryManager.cleanUpAllAllocatedMemory(); + if (leakedMemory != 0) { + fail("Test leaked " + leakedMemory + " bytes of managed memory"); + } + } + + @Before + @SuppressWarnings("unchecked") + public void setUp() throws IOException { + MockitoAnnotations.initMocks(this); + tempDir = Utils.createTempDir("test", "test"); + mergedOutputFile = File.createTempFile("mergedoutput", "", tempDir); + partitionSizesInMergedFile = null; + spillFilesCreated.clear(); + conf = new SparkConf(); + taskMetrics = new TaskMetrics(); + + when(shuffleMemoryManager.tryToAcquire(anyLong())).then(returnsFirstArg()); + + when(blockManager.diskBlockManager()).thenReturn(diskBlockManager); + when(blockManager.getDiskWriter( + any(BlockId.class), + any(File.class), + any(SerializerInstance.class), + anyInt(), + any(ShuffleWriteMetrics.class))).thenAnswer(new Answer() { + @Override + public DiskBlockObjectWriter answer(InvocationOnMock invocationOnMock) throws Throwable { + Object[] args = invocationOnMock.getArguments(); + + return new DiskBlockObjectWriter( + (BlockId) args[0], + (File) args[1], + (SerializerInstance) args[2], + (Integer) args[3], + new CompressStream(), + false, + (ShuffleWriteMetrics) args[4] + ); + } + }); + when(blockManager.wrapForCompression(any(BlockId.class), any(InputStream.class))).thenAnswer( + new Answer() { + @Override + public InputStream answer(InvocationOnMock invocation) throws Throwable { + assert (invocation.getArguments()[0] instanceof TempShuffleBlockId); + InputStream is = (InputStream) invocation.getArguments()[1]; + if (conf.getBoolean("spark.shuffle.compress", true)) { + return CompressionCodec$.MODULE$.createCodec(conf).compressedInputStream(is); + } else { + return is; + } + } + } + ); + + when(blockManager.wrapForCompression(any(BlockId.class), any(OutputStream.class))).thenAnswer( + new Answer() { + @Override + public OutputStream answer(InvocationOnMock invocation) throws Throwable { + assert (invocation.getArguments()[0] instanceof TempShuffleBlockId); + OutputStream os = (OutputStream) invocation.getArguments()[1]; + if (conf.getBoolean("spark.shuffle.compress", true)) { + return CompressionCodec$.MODULE$.createCodec(conf).compressedOutputStream(os); + } else { + return os; + } + } + } + ); + + when(shuffleBlockResolver.getDataFile(anyInt(), anyInt())).thenReturn(mergedOutputFile); + doAnswer(new Answer() { + @Override + public Void answer(InvocationOnMock invocationOnMock) throws Throwable { + partitionSizesInMergedFile = (long[]) invocationOnMock.getArguments()[2]; + return null; + } + }).when(shuffleBlockResolver).writeIndexFile(anyInt(), anyInt(), any(long[].class)); + + when(diskBlockManager.createTempShuffleBlock()).thenAnswer( + new Answer>() { + @Override + public Tuple2 answer( + InvocationOnMock invocationOnMock) throws Throwable { + TempShuffleBlockId blockId = new TempShuffleBlockId(UUID.randomUUID()); + File file = File.createTempFile("spillFile", ".spill", tempDir); + spillFilesCreated.add(file); + return Tuple2$.MODULE$.apply(blockId, file); + } + }); + + when(taskContext.taskMetrics()).thenReturn(taskMetrics); + + when(shuffleDep.serializer()).thenReturn(Option.apply(serializer)); + when(shuffleDep.partitioner()).thenReturn(hashPartitioner); + } + + private UnsafeShuffleWriter createWriter( + boolean transferToEnabled) throws IOException { + conf.set("spark.file.transferTo", String.valueOf(transferToEnabled)); + return new UnsafeShuffleWriter( + blockManager, + shuffleBlockResolver, + taskMemoryManager, + shuffleMemoryManager, + new UnsafeShuffleHandle(0, 1, shuffleDep), + 0, // map id + taskContext, + conf + ); + } + + private void assertSpillFilesWereCleanedUp() { + for (File spillFile : spillFilesCreated) { + assertFalse("Spill file " + spillFile.getPath() + " was not cleaned up", + spillFile.exists()); + } + } + + private List> readRecordsFromFile() throws IOException { + final ArrayList> recordsList = new ArrayList>(); + long startOffset = 0; + for (int i = 0; i < NUM_PARTITITONS; i++) { + final long partitionSize = partitionSizesInMergedFile[i]; + if (partitionSize > 0) { + InputStream in = new FileInputStream(mergedOutputFile); + ByteStreams.skipFully(in, startOffset); + in = new LimitedInputStream(in, partitionSize); + if (conf.getBoolean("spark.shuffle.compress", true)) { + in = CompressionCodec$.MODULE$.createCodec(conf).compressedInputStream(in); + } + DeserializationStream recordsStream = serializer.newInstance().deserializeStream(in); + Iterator> records = recordsStream.asKeyValueIterator(); + while (records.hasNext()) { + Tuple2 record = records.next(); + assertEquals(i, hashPartitioner.getPartition(record._1())); + recordsList.add(record); + } + recordsStream.close(); + startOffset += partitionSize; + } + } + return recordsList; + } + + @Test(expected=IllegalStateException.class) + public void mustCallWriteBeforeSuccessfulStop() throws IOException { + createWriter(false).stop(true); + } + + @Test + public void doNotNeedToCallWriteBeforeUnsuccessfulStop() throws IOException { + createWriter(false).stop(false); + } + + @Test + public void writeEmptyIterator() throws Exception { + final UnsafeShuffleWriter writer = createWriter(true); + writer.write(Iterators.>emptyIterator()); + final Option mapStatus = writer.stop(true); + assertTrue(mapStatus.isDefined()); + assertTrue(mergedOutputFile.exists()); + assertArrayEquals(new long[NUM_PARTITITONS], partitionSizesInMergedFile); + assertEquals(0, taskMetrics.shuffleWriteMetrics().get().shuffleRecordsWritten()); + assertEquals(0, taskMetrics.shuffleWriteMetrics().get().shuffleBytesWritten()); + assertEquals(0, taskMetrics.diskBytesSpilled()); + assertEquals(0, taskMetrics.memoryBytesSpilled()); + } + + @Test + public void writeWithoutSpilling() throws Exception { + // In this example, each partition should have exactly one record: + final ArrayList> dataToWrite = + new ArrayList>(); + for (int i = 0; i < NUM_PARTITITONS; i++) { + dataToWrite.add(new Tuple2(i, i)); + } + final UnsafeShuffleWriter writer = createWriter(true); + writer.write(dataToWrite.iterator()); + final Option mapStatus = writer.stop(true); + assertTrue(mapStatus.isDefined()); + assertTrue(mergedOutputFile.exists()); + + long sumOfPartitionSizes = 0; + for (long size: partitionSizesInMergedFile) { + // All partitions should be the same size: + assertEquals(partitionSizesInMergedFile[0], size); + sumOfPartitionSizes += size; + } + assertEquals(mergedOutputFile.length(), sumOfPartitionSizes); + assertEquals( + HashMultiset.create(dataToWrite), + HashMultiset.create(readRecordsFromFile())); + assertSpillFilesWereCleanedUp(); + ShuffleWriteMetrics shuffleWriteMetrics = taskMetrics.shuffleWriteMetrics().get(); + assertEquals(dataToWrite.size(), shuffleWriteMetrics.shuffleRecordsWritten()); + assertEquals(0, taskMetrics.diskBytesSpilled()); + assertEquals(0, taskMetrics.memoryBytesSpilled()); + assertEquals(mergedOutputFile.length(), shuffleWriteMetrics.shuffleBytesWritten()); + } + + private void testMergingSpills( + boolean transferToEnabled, + String compressionCodecName) throws IOException { + if (compressionCodecName != null) { + conf.set("spark.shuffle.compress", "true"); + conf.set("spark.io.compression.codec", compressionCodecName); + } else { + conf.set("spark.shuffle.compress", "false"); + } + final UnsafeShuffleWriter writer = createWriter(transferToEnabled); + final ArrayList> dataToWrite = + new ArrayList>(); + for (int i : new int[] { 1, 2, 3, 4, 4, 2 }) { + dataToWrite.add(new Tuple2(i, i)); + } + writer.insertRecordIntoSorter(dataToWrite.get(0)); + writer.insertRecordIntoSorter(dataToWrite.get(1)); + writer.insertRecordIntoSorter(dataToWrite.get(2)); + writer.insertRecordIntoSorter(dataToWrite.get(3)); + writer.forceSorterToSpill(); + writer.insertRecordIntoSorter(dataToWrite.get(4)); + writer.insertRecordIntoSorter(dataToWrite.get(5)); + writer.closeAndWriteOutput(); + final Option mapStatus = writer.stop(true); + assertTrue(mapStatus.isDefined()); + assertTrue(mergedOutputFile.exists()); + assertEquals(2, spillFilesCreated.size()); + + long sumOfPartitionSizes = 0; + for (long size: partitionSizesInMergedFile) { + sumOfPartitionSizes += size; + } + assertEquals(sumOfPartitionSizes, mergedOutputFile.length()); + + assertEquals( + HashMultiset.create(dataToWrite), + HashMultiset.create(readRecordsFromFile())); + assertSpillFilesWereCleanedUp(); + ShuffleWriteMetrics shuffleWriteMetrics = taskMetrics.shuffleWriteMetrics().get(); + assertEquals(dataToWrite.size(), shuffleWriteMetrics.shuffleRecordsWritten()); + assertThat(taskMetrics.diskBytesSpilled(), greaterThan(0L)); + assertThat(taskMetrics.diskBytesSpilled(), lessThan(mergedOutputFile.length())); + assertThat(taskMetrics.memoryBytesSpilled(), greaterThan(0L)); + assertEquals(mergedOutputFile.length(), shuffleWriteMetrics.shuffleBytesWritten()); + } + + @Test + public void mergeSpillsWithTransferToAndLZF() throws Exception { + testMergingSpills(true, LZFCompressionCodec.class.getName()); + } + + @Test + public void mergeSpillsWithFileStreamAndLZF() throws Exception { + testMergingSpills(false, LZFCompressionCodec.class.getName()); + } + + @Test + public void mergeSpillsWithTransferToAndLZ4() throws Exception { + testMergingSpills(true, LZ4CompressionCodec.class.getName()); + } + + @Test + public void mergeSpillsWithFileStreamAndLZ4() throws Exception { + testMergingSpills(false, LZ4CompressionCodec.class.getName()); + } + + @Test + public void mergeSpillsWithTransferToAndSnappy() throws Exception { + testMergingSpills(true, SnappyCompressionCodec.class.getName()); + } + + @Test + public void mergeSpillsWithFileStreamAndSnappy() throws Exception { + testMergingSpills(false, SnappyCompressionCodec.class.getName()); + } + + @Test + public void mergeSpillsWithTransferToAndNoCompression() throws Exception { + testMergingSpills(true, null); + } + + @Test + public void mergeSpillsWithFileStreamAndNoCompression() throws Exception { + testMergingSpills(false, null); + } + + @Test + public void writeEnoughDataToTriggerSpill() throws Exception { + when(shuffleMemoryManager.tryToAcquire(anyLong())) + .then(returnsFirstArg()) // Allocate initial sort buffer + .then(returnsFirstArg()) // Allocate initial data page + .thenReturn(0L) // Deny request to allocate new data page + .then(returnsFirstArg()); // Grant new sort buffer and data page. + final UnsafeShuffleWriter writer = createWriter(false); + final ArrayList> dataToWrite = new ArrayList>(); + final byte[] bigByteArray = new byte[PackedRecordPointer.MAXIMUM_PAGE_SIZE_BYTES / 128]; + for (int i = 0; i < 128 + 1; i++) { + dataToWrite.add(new Tuple2(i, bigByteArray)); + } + writer.write(dataToWrite.iterator()); + verify(shuffleMemoryManager, times(5)).tryToAcquire(anyLong()); + assertEquals(2, spillFilesCreated.size()); + writer.stop(true); + readRecordsFromFile(); + assertSpillFilesWereCleanedUp(); + ShuffleWriteMetrics shuffleWriteMetrics = taskMetrics.shuffleWriteMetrics().get(); + assertEquals(dataToWrite.size(), shuffleWriteMetrics.shuffleRecordsWritten()); + assertThat(taskMetrics.diskBytesSpilled(), greaterThan(0L)); + assertThat(taskMetrics.diskBytesSpilled(), lessThan(mergedOutputFile.length())); + assertThat(taskMetrics.memoryBytesSpilled(), greaterThan(0L)); + assertEquals(mergedOutputFile.length(), shuffleWriteMetrics.shuffleBytesWritten()); + } + + @Test + public void writeEnoughRecordsToTriggerSortBufferExpansionAndSpill() throws Exception { + when(shuffleMemoryManager.tryToAcquire(anyLong())) + .then(returnsFirstArg()) // Allocate initial sort buffer + .then(returnsFirstArg()) // Allocate initial data page + .thenReturn(0L) // Deny request to grow sort buffer + .then(returnsFirstArg()); // Grant new sort buffer and data page. + final UnsafeShuffleWriter writer = createWriter(false); + final ArrayList> dataToWrite = new ArrayList>(); + for (int i = 0; i < UnsafeShuffleWriter.INITIAL_SORT_BUFFER_SIZE; i++) { + dataToWrite.add(new Tuple2(i, i)); + } + writer.write(dataToWrite.iterator()); + verify(shuffleMemoryManager, times(5)).tryToAcquire(anyLong()); + assertEquals(2, spillFilesCreated.size()); + writer.stop(true); + readRecordsFromFile(); + assertSpillFilesWereCleanedUp(); + ShuffleWriteMetrics shuffleWriteMetrics = taskMetrics.shuffleWriteMetrics().get(); + assertEquals(dataToWrite.size(), shuffleWriteMetrics.shuffleRecordsWritten()); + assertThat(taskMetrics.diskBytesSpilled(), greaterThan(0L)); + assertThat(taskMetrics.diskBytesSpilled(), lessThan(mergedOutputFile.length())); + assertThat(taskMetrics.memoryBytesSpilled(), greaterThan(0L)); + assertEquals(mergedOutputFile.length(), shuffleWriteMetrics.shuffleBytesWritten()); + } + + @Test + public void writeRecordsThatAreBiggerThanDiskWriteBufferSize() throws Exception { + final UnsafeShuffleWriter writer = createWriter(false); + final ArrayList> dataToWrite = + new ArrayList>(); + final byte[] bytes = new byte[(int) (UnsafeShuffleExternalSorter.DISK_WRITE_BUFFER_SIZE * 2.5)]; + new Random(42).nextBytes(bytes); + dataToWrite.add(new Tuple2(1, ByteBuffer.wrap(bytes))); + writer.write(dataToWrite.iterator()); + writer.stop(true); + assertEquals( + HashMultiset.create(dataToWrite), + HashMultiset.create(readRecordsFromFile())); + assertSpillFilesWereCleanedUp(); + } + + @Test + public void writeRecordsThatAreBiggerThanMaxRecordSize() throws Exception { + // Use a custom serializer so that we have exact control over the size of serialized data. + final Serializer byteArraySerializer = new Serializer() { + @Override + public SerializerInstance newInstance() { + return new SerializerInstance() { + @Override + public SerializationStream serializeStream(final OutputStream s) { + return new SerializationStream() { + @Override + public void flush() { } + + @Override + public SerializationStream writeObject(T t, ClassTag ev1) { + byte[] bytes = (byte[]) t; + try { + s.write(bytes); + } catch (IOException e) { + throw new RuntimeException(e); + } + return this; + } + + @Override + public void close() { } + }; + } + public ByteBuffer serialize(T t, ClassTag ev1) { return null; } + public DeserializationStream deserializeStream(InputStream s) { return null; } + public T deserialize(ByteBuffer b, ClassLoader l, ClassTag ev1) { return null; } + public T deserialize(ByteBuffer bytes, ClassTag ev1) { return null; } + }; + } + }; + when(shuffleDep.serializer()).thenReturn(Option.apply(byteArraySerializer)); + final UnsafeShuffleWriter writer = createWriter(false); + // Insert a record and force a spill so that there's something to clean up: + writer.insertRecordIntoSorter(new Tuple2(new byte[1], new byte[1])); + writer.forceSorterToSpill(); + // We should be able to write a record that's right _at_ the max record size + final byte[] atMaxRecordSize = new byte[UnsafeShuffleExternalSorter.MAX_RECORD_SIZE]; + new Random(42).nextBytes(atMaxRecordSize); + writer.insertRecordIntoSorter(new Tuple2(new byte[0], atMaxRecordSize)); + writer.forceSorterToSpill(); + // Inserting a record that's larger than the max record size should fail: + final byte[] exceedsMaxRecordSize = new byte[UnsafeShuffleExternalSorter.MAX_RECORD_SIZE + 1]; + new Random(42).nextBytes(exceedsMaxRecordSize); + Product2 hugeRecord = + new Tuple2(new byte[0], exceedsMaxRecordSize); + try { + // Here, we write through the public `write()` interface instead of the test-only + // `insertRecordIntoSorter` interface: + writer.write(Collections.singletonList(hugeRecord).iterator()); + fail("Expected exception to be thrown"); + } catch (IOException e) { + // Pass + } + assertSpillFilesWereCleanedUp(); + } + + @Test + public void spillFilesAreDeletedWhenStoppingAfterError() throws IOException { + final UnsafeShuffleWriter writer = createWriter(false); + writer.insertRecordIntoSorter(new Tuple2(1, 1)); + writer.insertRecordIntoSorter(new Tuple2(2, 2)); + writer.forceSorterToSpill(); + writer.insertRecordIntoSorter(new Tuple2(2, 2)); + writer.stop(false); + assertSpillFilesWereCleanedUp(); + } +} diff --git a/core/src/test/resources/HistoryServerExpectations/application_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/application_list_json_expectation.json new file mode 100644 index 0000000000000..ce4fe80b66aa5 --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/application_list_json_expectation.json @@ -0,0 +1,53 @@ +[ { + "id" : "local-1430917381534", + "name" : "Spark shell", + "attempts" : [ { + "startTime" : "2015-05-06T13:03:00.893GMT", + "endTime" : "2015-05-06T13:03:11.398GMT", + "sparkUser" : "irashid", + "completed" : true + } ] +}, { + "id" : "local-1426533911241", + "name" : "Spark shell", + "attempts" : [ { + "attemptId" : "2", + "startTime" : "2015-03-17T23:11:50.242GMT", + "endTime" : "2015-03-17T23:12:25.177GMT", + "sparkUser" : "irashid", + "completed" : true + }, { + "attemptId" : "1", + "startTime" : "2015-03-16T19:25:10.242GMT", + "endTime" : "2015-03-16T19:25:45.177GMT", + "sparkUser" : "irashid", + "completed" : true + } ] +}, { + "id" : "local-1425081759269", + "name" : "Spark shell", + "attempts" : [ { + "startTime" : "2015-02-28T00:02:38.277GMT", + "endTime" : "2015-02-28T00:02:46.912GMT", + "sparkUser" : "irashid", + "completed" : true + } ] +}, { + "id" : "local-1422981780767", + "name" : "Spark shell", + "attempts" : [ { + "startTime" : "2015-02-03T16:42:59.720GMT", + "endTime" : "2015-02-03T16:43:08.731GMT", + "sparkUser" : "irashid", + "completed" : true + } ] +}, { + "id" : "local-1422981759269", + "name" : "Spark shell", + "attempts" : [ { + "startTime" : "2015-02-03T16:42:38.277GMT", + "endTime" : "2015-02-03T16:42:46.912GMT", + "sparkUser" : "irashid", + "completed" : true + } ] +} ] \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/complete_stage_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/complete_stage_list_json_expectation.json new file mode 100644 index 0000000000000..31ac9beea8788 --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/complete_stage_list_json_expectation.json @@ -0,0 +1,67 @@ +[ { + "status" : "COMPLETE", + "stageId" : 3, + "attemptId" : 0, + "numActiveTasks" : 0, + "numCompleteTasks" : 8, + "numFailedTasks" : 0, + "executorRunTime" : 162, + "inputBytes" : 160, + "inputRecords" : 0, + "outputBytes" : 0, + "outputRecords" : 0, + "shuffleReadBytes" : 0, + "shuffleReadRecords" : 0, + "shuffleWriteBytes" : 0, + "shuffleWriteRecords" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "name" : "count at :17", + "details" : "org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line19.$read$$iwC$$iwC$$iwC$$iwC.(:17)\n$line19.$read$$iwC$$iwC$$iwC.(:22)\n$line19.$read$$iwC$$iwC.(:24)\n$line19.$read$$iwC.(:26)\n$line19.$read.(:28)\n$line19.$read$.(:32)\n$line19.$read$.()\n$line19.$eval$.(:7)\n$line19.$eval$.()\n$line19.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)", + "schedulingPool" : "default", + "accumulatorUpdates" : [ ] +}, { + "status" : "COMPLETE", + "stageId" : 1, + "attemptId" : 0, + "numActiveTasks" : 0, + "numCompleteTasks" : 8, + "numFailedTasks" : 0, + "executorRunTime" : 3476, + "inputBytes" : 28000128, + "inputRecords" : 0, + "outputBytes" : 0, + "outputRecords" : 0, + "shuffleReadBytes" : 0, + "shuffleReadRecords" : 0, + "shuffleWriteBytes" : 13180, + "shuffleWriteRecords" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "name" : "map at :14", + "details" : "org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.(:14)\n$line10.$read$$iwC$$iwC$$iwC.(:19)\n$line10.$read$$iwC$$iwC.(:21)\n$line10.$read$$iwC.(:23)\n$line10.$read.(:25)\n$line10.$read$.(:29)\n$line10.$read$.()\n$line10.$eval$.(:7)\n$line10.$eval$.()\n$line10.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)", + "schedulingPool" : "default", + "accumulatorUpdates" : [ ] +}, { + "status" : "COMPLETE", + "stageId" : 0, + "attemptId" : 0, + "numActiveTasks" : 0, + "numCompleteTasks" : 8, + "numFailedTasks" : 0, + "executorRunTime" : 4338, + "inputBytes" : 0, + "inputRecords" : 0, + "outputBytes" : 0, + "outputRecords" : 0, + "shuffleReadBytes" : 0, + "shuffleReadRecords" : 0, + "shuffleWriteBytes" : 0, + "shuffleWriteRecords" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "name" : "count at :15", + "details" : "org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.(:15)\n$line9.$read$$iwC$$iwC$$iwC.(:20)\n$line9.$read$$iwC$$iwC.(:22)\n$line9.$read$$iwC.(:24)\n$line9.$read.(:26)\n$line9.$read$.(:30)\n$line9.$read$.()\n$line9.$eval$.(:7)\n$line9.$eval$.()\n$line9.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)", + "schedulingPool" : "default", + "accumulatorUpdates" : [ ] +} ] \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/completed_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/completed_app_list_json_expectation.json new file mode 100644 index 0000000000000..ce4fe80b66aa5 --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/completed_app_list_json_expectation.json @@ -0,0 +1,53 @@ +[ { + "id" : "local-1430917381534", + "name" : "Spark shell", + "attempts" : [ { + "startTime" : "2015-05-06T13:03:00.893GMT", + "endTime" : "2015-05-06T13:03:11.398GMT", + "sparkUser" : "irashid", + "completed" : true + } ] +}, { + "id" : "local-1426533911241", + "name" : "Spark shell", + "attempts" : [ { + "attemptId" : "2", + "startTime" : "2015-03-17T23:11:50.242GMT", + "endTime" : "2015-03-17T23:12:25.177GMT", + "sparkUser" : "irashid", + "completed" : true + }, { + "attemptId" : "1", + "startTime" : "2015-03-16T19:25:10.242GMT", + "endTime" : "2015-03-16T19:25:45.177GMT", + "sparkUser" : "irashid", + "completed" : true + } ] +}, { + "id" : "local-1425081759269", + "name" : "Spark shell", + "attempts" : [ { + "startTime" : "2015-02-28T00:02:38.277GMT", + "endTime" : "2015-02-28T00:02:46.912GMT", + "sparkUser" : "irashid", + "completed" : true + } ] +}, { + "id" : "local-1422981780767", + "name" : "Spark shell", + "attempts" : [ { + "startTime" : "2015-02-03T16:42:59.720GMT", + "endTime" : "2015-02-03T16:43:08.731GMT", + "sparkUser" : "irashid", + "completed" : true + } ] +}, { + "id" : "local-1422981759269", + "name" : "Spark shell", + "attempts" : [ { + "startTime" : "2015-02-03T16:42:38.277GMT", + "endTime" : "2015-02-03T16:42:46.912GMT", + "sparkUser" : "irashid", + "completed" : true + } ] +} ] \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json new file mode 100644 index 0000000000000..cb622e147249e --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json @@ -0,0 +1,17 @@ +[ { + "id" : "", + "hostPort" : "localhost:57971", + "rddBlocks" : 8, + "memoryUsed" : 28000128, + "diskUsed" : 0, + "activeTasks" : 0, + "failedTasks" : 1, + "completedTasks" : 31, + "totalTasks" : 32, + "totalDuration" : 8820, + "totalInputBytes" : 28000288, + "totalShuffleRead" : 0, + "totalShuffleWrite" : 13180, + "maxMemory" : 278302556, + "executorLogs" : { } +} ] \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/failed_stage_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/failed_stage_list_json_expectation.json new file mode 100644 index 0000000000000..bff6a4f69d077 --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/failed_stage_list_json_expectation.json @@ -0,0 +1,23 @@ +[ { + "status" : "FAILED", + "stageId" : 2, + "attemptId" : 0, + "numActiveTasks" : 0, + "numCompleteTasks" : 7, + "numFailedTasks" : 1, + "executorRunTime" : 278, + "inputBytes" : 0, + "inputRecords" : 0, + "outputBytes" : 0, + "outputRecords" : 0, + "shuffleReadBytes" : 0, + "shuffleReadRecords" : 0, + "shuffleWriteBytes" : 0, + "shuffleWriteRecords" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "name" : "count at :20", + "details" : "org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.(:20)\n$line11.$read$$iwC$$iwC$$iwC.(:25)\n$line11.$read$$iwC$$iwC.(:27)\n$line11.$read$$iwC.(:29)\n$line11.$read.(:31)\n$line11.$read$.(:35)\n$line11.$read$.()\n$line11.$eval$.(:7)\n$line11.$eval$.()\n$line11.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)", + "schedulingPool" : "default", + "accumulatorUpdates" : [ ] +} ] \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json new file mode 100644 index 0000000000000..2e92e1fa0ec23 --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_1__expectation.json @@ -0,0 +1,15 @@ +[ { + "jobId" : 0, + "name" : "foreach at :15", + "stageIds" : [ 0 ], + "status" : "SUCCEEDED", + "numTasks" : 8, + "numActiveTasks" : 0, + "numCompletedTasks" : 8, + "numSkippedTasks" : 8, + "numFailedTasks" : 0, + "numActiveStages" : 0, + "numCompletedStages" : 1, + "numSkippedStages" : 0, + "numFailedStages" : 0 +} ] \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json new file mode 100644 index 0000000000000..2e92e1fa0ec23 --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/job_list_from_multi_attempt_app_json_2__expectation.json @@ -0,0 +1,15 @@ +[ { + "jobId" : 0, + "name" : "foreach at :15", + "stageIds" : [ 0 ], + "status" : "SUCCEEDED", + "numTasks" : 8, + "numActiveTasks" : 0, + "numCompletedTasks" : 8, + "numSkippedTasks" : 8, + "numFailedTasks" : 0, + "numActiveStages" : 0, + "numCompletedStages" : 1, + "numSkippedStages" : 0, + "numFailedStages" : 0 +} ] \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/job_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/job_list_json_expectation.json new file mode 100644 index 0000000000000..cab4750270dfa --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/job_list_json_expectation.json @@ -0,0 +1,43 @@ +[ { + "jobId" : 2, + "name" : "count at :17", + "stageIds" : [ 3 ], + "status" : "SUCCEEDED", + "numTasks" : 8, + "numActiveTasks" : 0, + "numCompletedTasks" : 8, + "numSkippedTasks" : 8, + "numFailedTasks" : 0, + "numActiveStages" : 0, + "numCompletedStages" : 1, + "numSkippedStages" : 0, + "numFailedStages" : 0 +}, { + "jobId" : 1, + "name" : "count at :20", + "stageIds" : [ 1, 2 ], + "status" : "FAILED", + "numTasks" : 16, + "numActiveTasks" : 0, + "numCompletedTasks" : 15, + "numSkippedTasks" : 15, + "numFailedTasks" : 1, + "numActiveStages" : 0, + "numCompletedStages" : 1, + "numSkippedStages" : 0, + "numFailedStages" : 1 +}, { + "jobId" : 0, + "name" : "count at :15", + "stageIds" : [ 0 ], + "status" : "SUCCEEDED", + "numTasks" : 8, + "numActiveTasks" : 0, + "numCompletedTasks" : 8, + "numSkippedTasks" : 8, + "numFailedTasks" : 0, + "numActiveStages" : 0, + "numCompletedStages" : 1, + "numSkippedStages" : 0, + "numFailedStages" : 0 +} ] \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/maxDate2_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/maxDate2_app_list_json_expectation.json new file mode 100644 index 0000000000000..483632a3956ed --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/maxDate2_app_list_json_expectation.json @@ -0,0 +1,10 @@ +[ { + "id" : "local-1422981759269", + "name" : "Spark shell", + "attempts" : [ { + "startTime" : "2015-02-03T16:42:38.277GMT", + "endTime" : "2015-02-03T16:42:46.912GMT", + "sparkUser" : "irashid", + "completed" : true + } ] +} ] \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/maxDate_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/maxDate_app_list_json_expectation.json new file mode 100644 index 0000000000000..4b85690fd9199 --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/maxDate_app_list_json_expectation.json @@ -0,0 +1,19 @@ +[ { + "id" : "local-1422981780767", + "name" : "Spark shell", + "attempts" : [ { + "startTime" : "2015-02-03T16:42:59.720GMT", + "endTime" : "2015-02-03T16:43:08.731GMT", + "sparkUser" : "irashid", + "completed" : true + } ] +}, { + "id" : "local-1422981759269", + "name" : "Spark shell", + "attempts" : [ { + "startTime" : "2015-02-03T16:42:38.277GMT", + "endTime" : "2015-02-03T16:42:46.912GMT", + "sparkUser" : "irashid", + "completed" : true + } ] +} ] \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/minDate_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/minDate_app_list_json_expectation.json new file mode 100644 index 0000000000000..dca86fe5f7e6a --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/minDate_app_list_json_expectation.json @@ -0,0 +1,35 @@ +[ { + "id" : "local-1430917381534", + "name" : "Spark shell", + "attempts" : [ { + "startTime" : "2015-05-06T13:03:00.893GMT", + "endTime" : "2015-05-06T13:03:11.398GMT", + "sparkUser" : "irashid", + "completed" : true + } ] +}, { + "id" : "local-1426533911241", + "name" : "Spark shell", + "attempts" : [ { + "attemptId" : "2", + "startTime" : "2015-03-17T23:11:50.242GMT", + "endTime" : "2015-03-17T23:12:25.177GMT", + "sparkUser" : "irashid", + "completed" : true + }, { + "attemptId" : "1", + "startTime" : "2015-03-16T19:25:10.242GMT", + "endTime" : "2015-03-16T19:25:45.177GMT", + "sparkUser" : "irashid", + "completed" : true + } ] +}, { + "id" : "local-1425081759269", + "name" : "Spark shell", + "attempts" : [ { + "startTime" : "2015-02-28T00:02:38.277GMT", + "endTime" : "2015-02-28T00:02:46.912GMT", + "sparkUser" : "irashid", + "completed" : true + } ] +} ] \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/one_app_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_app_json_expectation.json new file mode 100644 index 0000000000000..07489ad96414a --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/one_app_json_expectation.json @@ -0,0 +1,10 @@ +{ + "id" : "local-1422981780767", + "name" : "Spark shell", + "attempts" : [ { + "startTime" : "2015-02-03T16:42:59.720GMT", + "endTime" : "2015-02-03T16:43:08.731GMT", + "sparkUser" : "irashid", + "completed" : true + } ] +} \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/one_app_multi_attempt_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_app_multi_attempt_json_expectation.json new file mode 100644 index 0000000000000..8f3d7160c723f --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/one_app_multi_attempt_json_expectation.json @@ -0,0 +1,17 @@ +{ + "id" : "local-1426533911241", + "name" : "Spark shell", + "attempts" : [ { + "attemptId" : "2", + "startTime" : "2015-03-17T23:11:50.242GMT", + "endTime" : "2015-03-17T23:12:25.177GMT", + "sparkUser" : "irashid", + "completed" : true + }, { + "attemptId" : "1", + "startTime" : "2015-03-16T19:25:10.242GMT", + "endTime" : "2015-03-16T19:25:45.177GMT", + "sparkUser" : "irashid", + "completed" : true + } ] +} \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/one_job_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_job_json_expectation.json new file mode 100644 index 0000000000000..4a29072bdb6e4 --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/one_job_json_expectation.json @@ -0,0 +1,15 @@ +{ + "jobId" : 0, + "name" : "count at :15", + "stageIds" : [ 0 ], + "status" : "SUCCEEDED", + "numTasks" : 8, + "numActiveTasks" : 0, + "numCompletedTasks" : 8, + "numSkippedTasks" : 8, + "numFailedTasks" : 0, + "numActiveStages" : 0, + "numCompletedStages" : 1, + "numSkippedStages" : 0, + "numFailedStages" : 0 +} \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/one_rdd_storage_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_rdd_storage_json_expectation.json new file mode 100644 index 0000000000000..38b5328ffbb03 --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/one_rdd_storage_json_expectation.json @@ -0,0 +1,64 @@ +{ + "id" : 0, + "name" : "0", + "numPartitions" : 8, + "numCachedPartitions" : 8, + "storageLevel" : "Memory Deserialized 1x Replicated", + "memoryUsed" : 28000128, + "diskUsed" : 0, + "dataDistribution" : [ { + "address" : "localhost:57971", + "memoryUsed" : 28000128, + "memoryRemaining" : 250302428, + "diskUsed" : 0 + } ], + "partitions" : [ { + "blockName" : "rdd_0_0", + "storageLevel" : "Memory Deserialized 1x Replicated", + "memoryUsed" : 3500016, + "diskUsed" : 0, + "executors" : [ "localhost:57971" ] + }, { + "blockName" : "rdd_0_1", + "storageLevel" : "Memory Deserialized 1x Replicated", + "memoryUsed" : 3500016, + "diskUsed" : 0, + "executors" : [ "localhost:57971" ] + }, { + "blockName" : "rdd_0_2", + "storageLevel" : "Memory Deserialized 1x Replicated", + "memoryUsed" : 3500016, + "diskUsed" : 0, + "executors" : [ "localhost:57971" ] + }, { + "blockName" : "rdd_0_3", + "storageLevel" : "Memory Deserialized 1x Replicated", + "memoryUsed" : 3500016, + "diskUsed" : 0, + "executors" : [ "localhost:57971" ] + }, { + "blockName" : "rdd_0_4", + "storageLevel" : "Memory Deserialized 1x Replicated", + "memoryUsed" : 3500016, + "diskUsed" : 0, + "executors" : [ "localhost:57971" ] + }, { + "blockName" : "rdd_0_5", + "storageLevel" : "Memory Deserialized 1x Replicated", + "memoryUsed" : 3500016, + "diskUsed" : 0, + "executors" : [ "localhost:57971" ] + }, { + "blockName" : "rdd_0_6", + "storageLevel" : "Memory Deserialized 1x Replicated", + "memoryUsed" : 3500016, + "diskUsed" : 0, + "executors" : [ "localhost:57971" ] + }, { + "blockName" : "rdd_0_7", + "storageLevel" : "Memory Deserialized 1x Replicated", + "memoryUsed" : 3500016, + "diskUsed" : 0, + "executors" : [ "localhost:57971" ] + } ] +} \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json new file mode 100644 index 0000000000000..111cb8163eb3d --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json @@ -0,0 +1,270 @@ +{ + "status" : "COMPLETE", + "stageId" : 1, + "attemptId" : 0, + "numActiveTasks" : 0, + "numCompleteTasks" : 8, + "numFailedTasks" : 0, + "executorRunTime" : 3476, + "inputBytes" : 28000128, + "inputRecords" : 0, + "outputBytes" : 0, + "outputRecords" : 0, + "shuffleReadBytes" : 0, + "shuffleReadRecords" : 0, + "shuffleWriteBytes" : 13180, + "shuffleWriteRecords" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "name" : "map at :14", + "details" : "org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.(:14)\n$line10.$read$$iwC$$iwC$$iwC.(:19)\n$line10.$read$$iwC$$iwC.(:21)\n$line10.$read$$iwC.(:23)\n$line10.$read.(:25)\n$line10.$read$.(:29)\n$line10.$read$.()\n$line10.$eval$.(:7)\n$line10.$eval$.()\n$line10.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)", + "schedulingPool" : "default", + "accumulatorUpdates" : [ ], + "tasks" : { + "8" : { + "taskId" : 8, + "index" : 0, + "attempt" : 0, + "launchTime" : "2015-02-03T16:43:05.829GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 1, + "executorRunTime" : 435, + "resultSize" : 1902, + "jvmGcTime" : 19, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 3500016, + "recordsRead" : 0 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1648, + "writeTime" : 94000, + "recordsWritten" : 0 + } + } + }, + "11" : { + "taskId" : 11, + "index" : 3, + "attempt" : 0, + "launchTime" : "2015-02-03T16:43:05.830GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 2, + "executorRunTime" : 434, + "resultSize" : 1902, + "jvmGcTime" : 19, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 3500016, + "recordsRead" : 0 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1647, + "writeTime" : 83000, + "recordsWritten" : 0 + } + } + }, + "14" : { + "taskId" : 14, + "index" : 6, + "attempt" : 0, + "launchTime" : "2015-02-03T16:43:05.832GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 2, + "executorRunTime" : 434, + "resultSize" : 1902, + "jvmGcTime" : 19, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 3500016, + "recordsRead" : 0 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1648, + "writeTime" : 88000, + "recordsWritten" : 0 + } + } + }, + "13" : { + "taskId" : 13, + "index" : 5, + "attempt" : 0, + "launchTime" : "2015-02-03T16:43:05.831GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 2, + "executorRunTime" : 434, + "resultSize" : 1902, + "jvmGcTime" : 19, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 3500016, + "recordsRead" : 0 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1648, + "writeTime" : 73000, + "recordsWritten" : 0 + } + } + }, + "10" : { + "taskId" : 10, + "index" : 2, + "attempt" : 0, + "launchTime" : "2015-02-03T16:43:05.830GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 2, + "executorRunTime" : 434, + "resultSize" : 1902, + "jvmGcTime" : 19, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 3500016, + "recordsRead" : 0 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1648, + "writeTime" : 76000, + "recordsWritten" : 0 + } + } + }, + "9" : { + "taskId" : 9, + "index" : 1, + "attempt" : 0, + "launchTime" : "2015-02-03T16:43:05.830GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 1, + "executorRunTime" : 436, + "resultSize" : 1902, + "jvmGcTime" : 19, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 3500016, + "recordsRead" : 0 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1648, + "writeTime" : 98000, + "recordsWritten" : 0 + } + } + }, + "12" : { + "taskId" : 12, + "index" : 4, + "attempt" : 0, + "launchTime" : "2015-02-03T16:43:05.831GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 2, + "executorRunTime" : 434, + "resultSize" : 1902, + "jvmGcTime" : 19, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 3500016, + "recordsRead" : 0 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1645, + "writeTime" : 101000, + "recordsWritten" : 0 + } + } + }, + "15" : { + "taskId" : 15, + "index" : 7, + "attempt" : 0, + "launchTime" : "2015-02-03T16:43:05.833GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 1, + "executorRunTime" : 435, + "resultSize" : 1902, + "jvmGcTime" : 19, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 3500016, + "recordsRead" : 0 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1648, + "writeTime" : 79000, + "recordsWritten" : 0 + } + } + } + }, + "executorSummary" : { + "" : { + "taskTime" : 3624, + "failedTasks" : 0, + "succeededTasks" : 8, + "inputBytes" : 28000128, + "outputBytes" : 0, + "shuffleRead" : 0, + "shuffleWrite" : 13180, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0 + } + } +} \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json new file mode 100644 index 0000000000000..ef339f89afa45 --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json @@ -0,0 +1,270 @@ +[ { + "status" : "COMPLETE", + "stageId" : 1, + "attemptId" : 0, + "numActiveTasks" : 0, + "numCompleteTasks" : 8, + "numFailedTasks" : 0, + "executorRunTime" : 3476, + "inputBytes" : 28000128, + "inputRecords" : 0, + "outputBytes" : 0, + "outputRecords" : 0, + "shuffleReadBytes" : 0, + "shuffleReadRecords" : 0, + "shuffleWriteBytes" : 13180, + "shuffleWriteRecords" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "name" : "map at :14", + "details" : "org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.(:14)\n$line10.$read$$iwC$$iwC$$iwC.(:19)\n$line10.$read$$iwC$$iwC.(:21)\n$line10.$read$$iwC.(:23)\n$line10.$read.(:25)\n$line10.$read$.(:29)\n$line10.$read$.()\n$line10.$eval$.(:7)\n$line10.$eval$.()\n$line10.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)", + "schedulingPool" : "default", + "accumulatorUpdates" : [ ], + "tasks" : { + "8" : { + "taskId" : 8, + "index" : 0, + "attempt" : 0, + "launchTime" : "2015-02-03T16:43:05.829GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 1, + "executorRunTime" : 435, + "resultSize" : 1902, + "jvmGcTime" : 19, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 3500016, + "recordsRead" : 0 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1648, + "writeTime" : 94000, + "recordsWritten" : 0 + } + } + }, + "11" : { + "taskId" : 11, + "index" : 3, + "attempt" : 0, + "launchTime" : "2015-02-03T16:43:05.830GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 2, + "executorRunTime" : 434, + "resultSize" : 1902, + "jvmGcTime" : 19, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 3500016, + "recordsRead" : 0 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1647, + "writeTime" : 83000, + "recordsWritten" : 0 + } + } + }, + "14" : { + "taskId" : 14, + "index" : 6, + "attempt" : 0, + "launchTime" : "2015-02-03T16:43:05.832GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 2, + "executorRunTime" : 434, + "resultSize" : 1902, + "jvmGcTime" : 19, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 3500016, + "recordsRead" : 0 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1648, + "writeTime" : 88000, + "recordsWritten" : 0 + } + } + }, + "13" : { + "taskId" : 13, + "index" : 5, + "attempt" : 0, + "launchTime" : "2015-02-03T16:43:05.831GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 2, + "executorRunTime" : 434, + "resultSize" : 1902, + "jvmGcTime" : 19, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 3500016, + "recordsRead" : 0 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1648, + "writeTime" : 73000, + "recordsWritten" : 0 + } + } + }, + "10" : { + "taskId" : 10, + "index" : 2, + "attempt" : 0, + "launchTime" : "2015-02-03T16:43:05.830GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 2, + "executorRunTime" : 434, + "resultSize" : 1902, + "jvmGcTime" : 19, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 3500016, + "recordsRead" : 0 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1648, + "writeTime" : 76000, + "recordsWritten" : 0 + } + } + }, + "9" : { + "taskId" : 9, + "index" : 1, + "attempt" : 0, + "launchTime" : "2015-02-03T16:43:05.830GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 1, + "executorRunTime" : 436, + "resultSize" : 1902, + "jvmGcTime" : 19, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 3500016, + "recordsRead" : 0 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1648, + "writeTime" : 98000, + "recordsWritten" : 0 + } + } + }, + "12" : { + "taskId" : 12, + "index" : 4, + "attempt" : 0, + "launchTime" : "2015-02-03T16:43:05.831GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 2, + "executorRunTime" : 434, + "resultSize" : 1902, + "jvmGcTime" : 19, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 3500016, + "recordsRead" : 0 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1645, + "writeTime" : 101000, + "recordsWritten" : 0 + } + } + }, + "15" : { + "taskId" : 15, + "index" : 7, + "attempt" : 0, + "launchTime" : "2015-02-03T16:43:05.833GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 1, + "executorRunTime" : 435, + "resultSize" : 1902, + "jvmGcTime" : 19, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 3500016, + "recordsRead" : 0 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1648, + "writeTime" : 79000, + "recordsWritten" : 0 + } + } + } + }, + "executorSummary" : { + "" : { + "taskTime" : 3624, + "failedTasks" : 0, + "succeededTasks" : 8, + "inputBytes" : 28000128, + "outputBytes" : 0, + "shuffleRead" : 0, + "shuffleWrite" : 13180, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0 + } + } +} ] \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/rdd_list_storage_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/rdd_list_storage_json_expectation.json new file mode 100644 index 0000000000000..f79a31022d214 --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/rdd_list_storage_json_expectation.json @@ -0,0 +1,9 @@ +[ { + "id" : 0, + "name" : "0", + "numPartitions" : 8, + "numCachedPartitions" : 8, + "storageLevel" : "Memory Deserialized 1x Replicated", + "memoryUsed" : 28000128, + "diskUsed" : 0 +} ] \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/running_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/running_app_list_json_expectation.json new file mode 100644 index 0000000000000..8878e547a7984 --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/running_app_list_json_expectation.json @@ -0,0 +1 @@ +[ ] \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/stage_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_list_json_expectation.json new file mode 100644 index 0000000000000..056fac7088594 --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/stage_list_json_expectation.json @@ -0,0 +1,89 @@ +[ { + "status" : "COMPLETE", + "stageId" : 3, + "attemptId" : 0, + "numActiveTasks" : 0, + "numCompleteTasks" : 8, + "numFailedTasks" : 0, + "executorRunTime" : 162, + "inputBytes" : 160, + "inputRecords" : 0, + "outputBytes" : 0, + "outputRecords" : 0, + "shuffleReadBytes" : 0, + "shuffleReadRecords" : 0, + "shuffleWriteBytes" : 0, + "shuffleWriteRecords" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "name" : "count at :17", + "details" : "org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line19.$read$$iwC$$iwC$$iwC$$iwC.(:17)\n$line19.$read$$iwC$$iwC$$iwC.(:22)\n$line19.$read$$iwC$$iwC.(:24)\n$line19.$read$$iwC.(:26)\n$line19.$read.(:28)\n$line19.$read$.(:32)\n$line19.$read$.()\n$line19.$eval$.(:7)\n$line19.$eval$.()\n$line19.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)", + "schedulingPool" : "default", + "accumulatorUpdates" : [ ] +}, { + "status" : "COMPLETE", + "stageId" : 1, + "attemptId" : 0, + "numActiveTasks" : 0, + "numCompleteTasks" : 8, + "numFailedTasks" : 0, + "executorRunTime" : 3476, + "inputBytes" : 28000128, + "inputRecords" : 0, + "outputBytes" : 0, + "outputRecords" : 0, + "shuffleReadBytes" : 0, + "shuffleReadRecords" : 0, + "shuffleWriteBytes" : 13180, + "shuffleWriteRecords" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "name" : "map at :14", + "details" : "org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.(:14)\n$line10.$read$$iwC$$iwC$$iwC.(:19)\n$line10.$read$$iwC$$iwC.(:21)\n$line10.$read$$iwC.(:23)\n$line10.$read.(:25)\n$line10.$read$.(:29)\n$line10.$read$.()\n$line10.$eval$.(:7)\n$line10.$eval$.()\n$line10.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)", + "schedulingPool" : "default", + "accumulatorUpdates" : [ ] +}, { + "status" : "COMPLETE", + "stageId" : 0, + "attemptId" : 0, + "numActiveTasks" : 0, + "numCompleteTasks" : 8, + "numFailedTasks" : 0, + "executorRunTime" : 4338, + "inputBytes" : 0, + "inputRecords" : 0, + "outputBytes" : 0, + "outputRecords" : 0, + "shuffleReadBytes" : 0, + "shuffleReadRecords" : 0, + "shuffleWriteBytes" : 0, + "shuffleWriteRecords" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "name" : "count at :15", + "details" : "org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.(:15)\n$line9.$read$$iwC$$iwC$$iwC.(:20)\n$line9.$read$$iwC$$iwC.(:22)\n$line9.$read$$iwC.(:24)\n$line9.$read.(:26)\n$line9.$read$.(:30)\n$line9.$read$.()\n$line9.$eval$.(:7)\n$line9.$eval$.()\n$line9.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)", + "schedulingPool" : "default", + "accumulatorUpdates" : [ ] +}, { + "status" : "FAILED", + "stageId" : 2, + "attemptId" : 0, + "numActiveTasks" : 0, + "numCompleteTasks" : 7, + "numFailedTasks" : 1, + "executorRunTime" : 278, + "inputBytes" : 0, + "inputRecords" : 0, + "outputBytes" : 0, + "outputRecords" : 0, + "shuffleReadBytes" : 0, + "shuffleReadRecords" : 0, + "shuffleWriteBytes" : 0, + "shuffleWriteRecords" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "name" : "count at :20", + "details" : "org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.(:20)\n$line11.$read$$iwC$$iwC$$iwC.(:25)\n$line11.$read$$iwC$$iwC.(:27)\n$line11.$read$$iwC.(:29)\n$line11.$read.(:31)\n$line11.$read$.(:35)\n$line11.$read$.()\n$line11.$eval$.(:7)\n$line11.$eval$.()\n$line11.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)", + "schedulingPool" : "default", + "accumulatorUpdates" : [ ] +} ] \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/stage_list_with_accumulable_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_list_with_accumulable_json_expectation.json new file mode 100644 index 0000000000000..79ccacd309693 --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/stage_list_with_accumulable_json_expectation.json @@ -0,0 +1,27 @@ +[ { + "status" : "COMPLETE", + "stageId" : 0, + "attemptId" : 0, + "numActiveTasks" : 0, + "numCompleteTasks" : 8, + "numFailedTasks" : 0, + "executorRunTime" : 120, + "inputBytes" : 0, + "inputRecords" : 0, + "outputBytes" : 0, + "outputRecords" : 0, + "shuffleReadBytes" : 0, + "shuffleReadRecords" : 0, + "shuffleWriteBytes" : 0, + "shuffleWriteRecords" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "name" : "foreach at :15", + "details" : "org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.(:15)\n$line9.$read$$iwC$$iwC$$iwC.(:20)\n$line9.$read$$iwC$$iwC.(:22)\n$line9.$read$$iwC.(:24)\n$line9.$read.(:26)\n$line9.$read$.(:30)\n$line9.$read$.()\n$line9.$eval$.(:7)\n$line9.$eval$.()\n$line9.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)", + "schedulingPool" : "default", + "accumulatorUpdates" : [ { + "id" : 1, + "name" : "my counter", + "value" : "5050" + } ] +} ] \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_expectation.json new file mode 100644 index 0000000000000..f2cb29b31c85f --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_expectation.json @@ -0,0 +1,561 @@ +[ { + "taskId" : 0, + "index" : 0, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.494GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 32, + "executorRunTime" : 349, + "resultSize" : 2010, + "jvmGcTime" : 7, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 49294, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 3842811, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 1, + "index" : 1, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.502GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 31, + "executorRunTime" : 350, + "resultSize" : 2010, + "jvmGcTime" : 7, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 60488, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 3934399, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 2, + "index" : 2, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.503GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 32, + "executorRunTime" : 348, + "resultSize" : 2010, + "jvmGcTime" : 7, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 60488, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 89885, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 3, + "index" : 3, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.504GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 31, + "executorRunTime" : 349, + "resultSize" : 2010, + "jvmGcTime" : 7, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 60488, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 1311694, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 4, + "index" : 4, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.504GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 31, + "executorRunTime" : 349, + "resultSize" : 2010, + "jvmGcTime" : 7, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 60488, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 83022, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 5, + "index" : 5, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.505GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 30, + "executorRunTime" : 350, + "resultSize" : 2010, + "jvmGcTime" : 7, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 60488, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 3675510, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 6, + "index" : 6, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.505GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 29, + "executorRunTime" : 351, + "resultSize" : 2010, + "jvmGcTime" : 7, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 60488, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 4016617, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 7, + "index" : 7, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.506GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 31, + "executorRunTime" : 349, + "resultSize" : 2010, + "jvmGcTime" : 7, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 60488, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 2579051, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 8, + "index" : 8, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.914GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorRunTime" : 80, + "resultSize" : 2010, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 60488, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 121551, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 9, + "index" : 9, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.915GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 9, + "executorRunTime" : 84, + "resultSize" : 2010, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 60489, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 101664, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 10, + "index" : 10, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.916GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 8, + "executorRunTime" : 73, + "resultSize" : 2010, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 94709, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 11, + "index" : 11, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.918GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 6, + "executorRunTime" : 75, + "resultSize" : 2010, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 94507, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 12, + "index" : 12, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.923GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 9, + "executorRunTime" : 77, + "resultSize" : 2010, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 102476, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 13, + "index" : 13, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.924GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 9, + "executorRunTime" : 76, + "resultSize" : 2010, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 95004, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 14, + "index" : 14, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.925GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 6, + "executorRunTime" : 83, + "resultSize" : 2010, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 95646, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 15, + "index" : 15, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.928GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorRunTime" : 76, + "resultSize" : 2010, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 602780, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 16, + "index" : 16, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.001GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 10, + "executorRunTime" : 84, + "resultSize" : 2010, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 108320, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 17, + "index" : 17, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.005GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 11, + "executorRunTime" : 91, + "resultSize" : 2065, + "jvmGcTime" : 5, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 99944, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 18, + "index" : 18, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.010GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 10, + "executorRunTime" : 92, + "resultSize" : 2065, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 100836, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 19, + "index" : 19, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.012GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 5, + "executorRunTime" : 84, + "resultSize" : 2010, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 95788, + "recordsWritten" : 10 + } + } +} ] \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_1__expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_1__expectation.json new file mode 100644 index 0000000000000..c3febc5fc9447 --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_1__expectation.json @@ -0,0 +1,193 @@ +[ { + "taskId" : 0, + "index" : 0, + "attempt" : 0, + "launchTime" : "2015-03-16T19:25:36.515GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ { + "id" : 1, + "name" : "my counter", + "update" : "78", + "value" : "5050" + } ], + "taskMetrics" : { + "executorDeserializeTime" : 14, + "executorRunTime" : 15, + "resultSize" : 697, + "jvmGcTime" : 0, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0 + } +}, { + "taskId" : 1, + "index" : 1, + "attempt" : 0, + "launchTime" : "2015-03-16T19:25:36.521GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ { + "id" : 1, + "name" : "my counter", + "update" : "247", + "value" : "2175" + } ], + "taskMetrics" : { + "executorDeserializeTime" : 14, + "executorRunTime" : 15, + "resultSize" : 697, + "jvmGcTime" : 0, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0 + } +}, { + "taskId" : 2, + "index" : 2, + "attempt" : 0, + "launchTime" : "2015-03-16T19:25:36.522GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ { + "id" : 1, + "name" : "my counter", + "update" : "378", + "value" : "378" + } ], + "taskMetrics" : { + "executorDeserializeTime" : 13, + "executorRunTime" : 15, + "resultSize" : 697, + "jvmGcTime" : 0, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0 + } +}, { + "taskId" : 3, + "index" : 3, + "attempt" : 0, + "launchTime" : "2015-03-16T19:25:36.522GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ { + "id" : 1, + "name" : "my counter", + "update" : "572", + "value" : "950" + } ], + "taskMetrics" : { + "executorDeserializeTime" : 13, + "executorRunTime" : 15, + "resultSize" : 697, + "jvmGcTime" : 0, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0 + } +}, { + "taskId" : 4, + "index" : 4, + "attempt" : 0, + "launchTime" : "2015-03-16T19:25:36.522GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ { + "id" : 1, + "name" : "my counter", + "update" : "678", + "value" : "2853" + } ], + "taskMetrics" : { + "executorDeserializeTime" : 12, + "executorRunTime" : 15, + "resultSize" : 697, + "jvmGcTime" : 0, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0 + } +}, { + "taskId" : 5, + "index" : 5, + "attempt" : 0, + "launchTime" : "2015-03-16T19:25:36.523GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ { + "id" : 1, + "name" : "my counter", + "update" : "897", + "value" : "3750" + } ], + "taskMetrics" : { + "executorDeserializeTime" : 12, + "executorRunTime" : 15, + "resultSize" : 697, + "jvmGcTime" : 0, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0 + } +}, { + "taskId" : 6, + "index" : 6, + "attempt" : 0, + "launchTime" : "2015-03-16T19:25:36.523GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ { + "id" : 1, + "name" : "my counter", + "update" : "978", + "value" : "1928" + } ], + "taskMetrics" : { + "executorDeserializeTime" : 12, + "executorRunTime" : 15, + "resultSize" : 697, + "jvmGcTime" : 0, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0 + } +}, { + "taskId" : 7, + "index" : 7, + "attempt" : 0, + "launchTime" : "2015-03-16T19:25:36.524GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ { + "id" : 1, + "name" : "my counter", + "update" : "1222", + "value" : "4972" + } ], + "taskMetrics" : { + "executorDeserializeTime" : 12, + "executorRunTime" : 15, + "resultSize" : 697, + "jvmGcTime" : 0, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0 + } +} ] \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_2__expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_2__expectation.json new file mode 100644 index 0000000000000..56d667d88917c --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_2__expectation.json @@ -0,0 +1,193 @@ +[ { + "taskId" : 0, + "index" : 0, + "attempt" : 0, + "launchTime" : "2015-03-17T23:12:16.515GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ { + "id" : 1, + "name" : "my counter", + "update" : "78", + "value" : "5050" + } ], + "taskMetrics" : { + "executorDeserializeTime" : 14, + "executorRunTime" : 15, + "resultSize" : 697, + "jvmGcTime" : 0, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0 + } +}, { + "taskId" : 1, + "index" : 1, + "attempt" : 0, + "launchTime" : "2015-03-17T23:12:16.521GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ { + "id" : 1, + "name" : "my counter", + "update" : "247", + "value" : "2175" + } ], + "taskMetrics" : { + "executorDeserializeTime" : 14, + "executorRunTime" : 15, + "resultSize" : 697, + "jvmGcTime" : 0, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0 + } +}, { + "taskId" : 2, + "index" : 2, + "attempt" : 0, + "launchTime" : "2015-03-17T23:12:16.522GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ { + "id" : 1, + "name" : "my counter", + "update" : "378", + "value" : "378" + } ], + "taskMetrics" : { + "executorDeserializeTime" : 13, + "executorRunTime" : 15, + "resultSize" : 697, + "jvmGcTime" : 0, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0 + } +}, { + "taskId" : 3, + "index" : 3, + "attempt" : 0, + "launchTime" : "2015-03-17T23:12:16.522GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ { + "id" : 1, + "name" : "my counter", + "update" : "572", + "value" : "950" + } ], + "taskMetrics" : { + "executorDeserializeTime" : 13, + "executorRunTime" : 15, + "resultSize" : 697, + "jvmGcTime" : 0, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0 + } +}, { + "taskId" : 4, + "index" : 4, + "attempt" : 0, + "launchTime" : "2015-03-17T23:12:16.522GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ { + "id" : 1, + "name" : "my counter", + "update" : "678", + "value" : "2853" + } ], + "taskMetrics" : { + "executorDeserializeTime" : 12, + "executorRunTime" : 15, + "resultSize" : 697, + "jvmGcTime" : 0, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0 + } +}, { + "taskId" : 5, + "index" : 5, + "attempt" : 0, + "launchTime" : "2015-03-17T23:12:16.523GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ { + "id" : 1, + "name" : "my counter", + "update" : "897", + "value" : "3750" + } ], + "taskMetrics" : { + "executorDeserializeTime" : 12, + "executorRunTime" : 15, + "resultSize" : 697, + "jvmGcTime" : 0, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0 + } +}, { + "taskId" : 6, + "index" : 6, + "attempt" : 0, + "launchTime" : "2015-03-17T23:12:16.523GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ { + "id" : 1, + "name" : "my counter", + "update" : "978", + "value" : "1928" + } ], + "taskMetrics" : { + "executorDeserializeTime" : 12, + "executorRunTime" : 15, + "resultSize" : 697, + "jvmGcTime" : 0, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0 + } +}, { + "taskId" : 7, + "index" : 7, + "attempt" : 0, + "launchTime" : "2015-03-17T23:12:16.524GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ { + "id" : 1, + "name" : "my counter", + "update" : "1222", + "value" : "4972" + } ], + "taskMetrics" : { + "executorDeserializeTime" : 12, + "executorRunTime" : 15, + "resultSize" : 697, + "jvmGcTime" : 0, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0 + } +} ] \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__offset___length_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__offset___length_expectation.json new file mode 100644 index 0000000000000..e5ec3bc4c7126 --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__offset___length_expectation.json @@ -0,0 +1,1401 @@ +[ { + "taskId" : 10, + "index" : 10, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.916GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 8, + "executorRunTime" : 73, + "resultSize" : 2010, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 94709, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 11, + "index" : 11, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.918GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 6, + "executorRunTime" : 75, + "resultSize" : 2010, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 94507, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 12, + "index" : 12, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.923GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 9, + "executorRunTime" : 77, + "resultSize" : 2010, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 102476, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 13, + "index" : 13, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.924GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 9, + "executorRunTime" : 76, + "resultSize" : 2010, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 95004, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 14, + "index" : 14, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.925GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 6, + "executorRunTime" : 83, + "resultSize" : 2010, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 95646, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 15, + "index" : 15, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.928GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorRunTime" : 76, + "resultSize" : 2010, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 602780, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 16, + "index" : 16, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.001GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 10, + "executorRunTime" : 84, + "resultSize" : 2010, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 108320, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 17, + "index" : 17, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.005GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 11, + "executorRunTime" : 91, + "resultSize" : 2065, + "jvmGcTime" : 5, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 99944, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 18, + "index" : 18, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.010GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 10, + "executorRunTime" : 92, + "resultSize" : 2065, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 100836, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 19, + "index" : 19, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.012GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 5, + "executorRunTime" : 84, + "resultSize" : 2010, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 95788, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 20, + "index" : 20, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.014GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorRunTime" : 83, + "resultSize" : 2010, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 97716, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 21, + "index" : 21, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.015GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 5, + "executorRunTime" : 88, + "resultSize" : 2010, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 100270, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 22, + "index" : 22, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.018GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 5, + "executorRunTime" : 93, + "resultSize" : 2065, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 143427, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 23, + "index" : 23, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.031GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorRunTime" : 65, + "resultSize" : 2010, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 91844, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 24, + "index" : 24, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.098GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorRunTime" : 43, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 157194, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 25, + "index" : 25, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.103GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorRunTime" : 49, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 94134, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 26, + "index" : 26, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.105GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 6, + "executorRunTime" : 38, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 108213, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 27, + "index" : 27, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.110GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorRunTime" : 32, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 102019, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 28, + "index" : 28, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.113GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorRunTime" : 29, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 104299, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 29, + "index" : 29, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.114GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 2, + "executorRunTime" : 39, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 114938, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 30, + "index" : 30, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.118GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 4, + "executorRunTime" : 34, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 119770, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 31, + "index" : 31, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.127GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 36, + "executorRunTime" : 24, + "resultSize" : 2065, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 92619, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 32, + "index" : 32, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.148GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorRunTime" : 17, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 89603, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 33, + "index" : 33, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.149GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 5, + "executorRunTime" : 43, + "resultSize" : 2065, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 118329, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 34, + "index" : 34, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.156GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 6, + "executorRunTime" : 27, + "resultSize" : 2065, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 127746, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 35, + "index" : 35, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.161GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 5, + "executorRunTime" : 35, + "resultSize" : 2065, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 160963, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 36, + "index" : 36, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.164GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 5, + "executorRunTime" : 29, + "resultSize" : 2065, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 123855, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 37, + "index" : 37, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.165GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 4, + "executorRunTime" : 32, + "resultSize" : 2065, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 111869, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 38, + "index" : 38, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.166GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorRunTime" : 31, + "resultSize" : 2065, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 131158, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 39, + "index" : 39, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.180GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 2, + "executorRunTime" : 17, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 98748, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 40, + "index" : 40, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.197GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 4, + "executorRunTime" : 14, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 94792, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 41, + "index" : 41, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.200GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 2, + "executorRunTime" : 16, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 90765, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 42, + "index" : 42, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.203GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 10, + "executorRunTime" : 17, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 103713, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 43, + "index" : 43, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.204GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 2, + "executorRunTime" : 16, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 171516, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 44, + "index" : 44, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.205GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorRunTime" : 18, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 98293, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 45, + "index" : 45, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.206GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 2, + "executorRunTime" : 19, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 92985, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 46, + "index" : 46, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.210GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 1, + "executorRunTime" : 31, + "resultSize" : 2065, + "jvmGcTime" : 6, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 113322, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 47, + "index" : 47, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.212GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 2, + "executorRunTime" : 18, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 103015, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 48, + "index" : 48, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.220GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorRunTime" : 24, + "resultSize" : 2065, + "jvmGcTime" : 6, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 139844, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 49, + "index" : 49, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.223GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 7, + "executorRunTime" : 23, + "resultSize" : 2065, + "jvmGcTime" : 6, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 94984, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 50, + "index" : 50, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.240GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 4, + "executorRunTime" : 18, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 90836, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 51, + "index" : 51, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.242GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 2, + "executorRunTime" : 17, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 96013, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 52, + "index" : 52, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.243GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 5, + "executorRunTime" : 18, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 89664, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 53, + "index" : 53, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.244GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 6, + "executorRunTime" : 18, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 92835, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 54, + "index" : 54, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.244GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorRunTime" : 18, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 90506, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 55, + "index" : 55, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.246GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 4, + "executorRunTime" : 21, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 108309, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 56, + "index" : 56, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.249GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 5, + "executorRunTime" : 20, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 90329, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 57, + "index" : 57, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.257GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorRunTime" : 16, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 96849, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 58, + "index" : 58, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.263GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorRunTime" : 16, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 97521, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 59, + "index" : 59, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.265GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorRunTime" : 17, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 100753, + "recordsWritten" : 10 + } + } +} ] \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_expectation.json new file mode 100644 index 0000000000000..5657123a2db15 --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_expectation.json @@ -0,0 +1,561 @@ +[ { + "taskId" : 6, + "index" : 6, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.505GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 29, + "executorRunTime" : 351, + "resultSize" : 2010, + "jvmGcTime" : 7, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 60488, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 4016617, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 5, + "index" : 5, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.505GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 30, + "executorRunTime" : 350, + "resultSize" : 2010, + "jvmGcTime" : 7, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 60488, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 3675510, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 1, + "index" : 1, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.502GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 31, + "executorRunTime" : 350, + "resultSize" : 2010, + "jvmGcTime" : 7, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 60488, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 3934399, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 4, + "index" : 4, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.504GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 31, + "executorRunTime" : 349, + "resultSize" : 2010, + "jvmGcTime" : 7, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 60488, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 83022, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 7, + "index" : 7, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.506GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 31, + "executorRunTime" : 349, + "resultSize" : 2010, + "jvmGcTime" : 7, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 60488, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 2579051, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 3, + "index" : 3, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.504GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 31, + "executorRunTime" : 349, + "resultSize" : 2010, + "jvmGcTime" : 7, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 60488, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 1311694, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 0, + "index" : 0, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.494GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 32, + "executorRunTime" : 349, + "resultSize" : 2010, + "jvmGcTime" : 7, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 49294, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 3842811, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 2, + "index" : 2, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.503GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 32, + "executorRunTime" : 348, + "resultSize" : 2010, + "jvmGcTime" : 7, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 60488, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 89885, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 22, + "index" : 22, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.018GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 5, + "executorRunTime" : 93, + "resultSize" : 2065, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 143427, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 18, + "index" : 18, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.010GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 10, + "executorRunTime" : 92, + "resultSize" : 2065, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 100836, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 17, + "index" : 17, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.005GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 11, + "executorRunTime" : 91, + "resultSize" : 2065, + "jvmGcTime" : 5, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 99944, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 21, + "index" : 21, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.015GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 5, + "executorRunTime" : 88, + "resultSize" : 2010, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 100270, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 16, + "index" : 16, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.001GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 10, + "executorRunTime" : 84, + "resultSize" : 2010, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 108320, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 19, + "index" : 19, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.012GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 5, + "executorRunTime" : 84, + "resultSize" : 2010, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 95788, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 9, + "index" : 9, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.915GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 9, + "executorRunTime" : 84, + "resultSize" : 2010, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 60489, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 101664, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 20, + "index" : 20, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.014GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorRunTime" : 83, + "resultSize" : 2010, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 97716, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 14, + "index" : 14, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.925GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 6, + "executorRunTime" : 83, + "resultSize" : 2010, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 95646, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 8, + "index" : 8, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.914GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorRunTime" : 80, + "resultSize" : 2010, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 60488, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 121551, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 12, + "index" : 12, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.923GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 9, + "executorRunTime" : 77, + "resultSize" : 2010, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 102476, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 13, + "index" : 13, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.924GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 9, + "executorRunTime" : 76, + "resultSize" : 2010, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 95004, + "recordsWritten" : 10 + } + } +} ] \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names___runtime_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names___runtime_expectation.json new file mode 100644 index 0000000000000..5657123a2db15 --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names___runtime_expectation.json @@ -0,0 +1,561 @@ +[ { + "taskId" : 6, + "index" : 6, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.505GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 29, + "executorRunTime" : 351, + "resultSize" : 2010, + "jvmGcTime" : 7, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 60488, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 4016617, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 5, + "index" : 5, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.505GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 30, + "executorRunTime" : 350, + "resultSize" : 2010, + "jvmGcTime" : 7, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 60488, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 3675510, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 1, + "index" : 1, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.502GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 31, + "executorRunTime" : 350, + "resultSize" : 2010, + "jvmGcTime" : 7, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 60488, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 3934399, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 4, + "index" : 4, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.504GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 31, + "executorRunTime" : 349, + "resultSize" : 2010, + "jvmGcTime" : 7, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 60488, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 83022, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 7, + "index" : 7, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.506GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 31, + "executorRunTime" : 349, + "resultSize" : 2010, + "jvmGcTime" : 7, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 60488, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 2579051, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 3, + "index" : 3, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.504GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 31, + "executorRunTime" : 349, + "resultSize" : 2010, + "jvmGcTime" : 7, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 60488, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 1311694, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 0, + "index" : 0, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.494GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 32, + "executorRunTime" : 349, + "resultSize" : 2010, + "jvmGcTime" : 7, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 49294, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 3842811, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 2, + "index" : 2, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.503GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 32, + "executorRunTime" : 348, + "resultSize" : 2010, + "jvmGcTime" : 7, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 60488, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 89885, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 22, + "index" : 22, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.018GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 5, + "executorRunTime" : 93, + "resultSize" : 2065, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 143427, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 18, + "index" : 18, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.010GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 10, + "executorRunTime" : 92, + "resultSize" : 2065, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 100836, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 17, + "index" : 17, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.005GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 11, + "executorRunTime" : 91, + "resultSize" : 2065, + "jvmGcTime" : 5, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 99944, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 21, + "index" : 21, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.015GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 5, + "executorRunTime" : 88, + "resultSize" : 2010, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 100270, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 16, + "index" : 16, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.001GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 10, + "executorRunTime" : 84, + "resultSize" : 2010, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 108320, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 19, + "index" : 19, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.012GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 5, + "executorRunTime" : 84, + "resultSize" : 2010, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 95788, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 9, + "index" : 9, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.915GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 9, + "executorRunTime" : 84, + "resultSize" : 2010, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 60489, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 101664, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 20, + "index" : 20, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.014GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorRunTime" : 83, + "resultSize" : 2010, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 97716, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 14, + "index" : 14, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.925GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 6, + "executorRunTime" : 83, + "resultSize" : 2010, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 95646, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 8, + "index" : 8, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.914GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorRunTime" : 80, + "resultSize" : 2010, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 60488, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 121551, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 12, + "index" : 12, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.923GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 9, + "executorRunTime" : 77, + "resultSize" : 2010, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 102476, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 13, + "index" : 13, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:06.924GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 9, + "executorRunTime" : 76, + "resultSize" : 2010, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 95004, + "recordsWritten" : 10 + } + } +} ] \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names__runtime_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names__runtime_expectation.json new file mode 100644 index 0000000000000..72fe017e9f85d --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names__runtime_expectation.json @@ -0,0 +1,561 @@ +[ { + "taskId" : 40, + "index" : 40, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.197GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 4, + "executorRunTime" : 14, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 94792, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 86, + "index" : 86, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.374GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorRunTime" : 16, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 95848, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 41, + "index" : 41, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.200GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 2, + "executorRunTime" : 16, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 90765, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 68, + "index" : 68, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.306GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 2, + "executorRunTime" : 16, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 101750, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 58, + "index" : 58, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.263GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorRunTime" : 16, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 97521, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 43, + "index" : 43, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.204GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 2, + "executorRunTime" : 16, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 171516, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 57, + "index" : 57, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.257GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorRunTime" : 16, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 96849, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 59, + "index" : 59, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.265GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorRunTime" : 17, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 100753, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 32, + "index" : 32, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.148GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorRunTime" : 17, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 89603, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 87, + "index" : 87, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.374GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 12, + "executorRunTime" : 17, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 102159, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 99, + "index" : 99, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.426GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 2, + "executorRunTime" : 17, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70565, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 133964, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 63, + "index" : 63, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.276GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 20, + "executorRunTime" : 17, + "resultSize" : 2065, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 102779, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 90, + "index" : 90, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.385GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 2, + "executorRunTime" : 17, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 98472, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 39, + "index" : 39, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.180GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 2, + "executorRunTime" : 17, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 98748, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 42, + "index" : 42, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.203GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 10, + "executorRunTime" : 17, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 103713, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 51, + "index" : 51, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.242GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 2, + "executorRunTime" : 17, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 96013, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 50, + "index" : 50, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.240GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 4, + "executorRunTime" : 18, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 90836, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 53, + "index" : 53, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.244GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 6, + "executorRunTime" : 18, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 92835, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 44, + "index" : 44, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.205GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorRunTime" : 18, + "resultSize" : 2065, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 98293, + "recordsWritten" : 10 + } + } +}, { + "taskId" : 80, + "index" : 80, + "attempt" : 0, + "launchTime" : "2015-05-06T13:03:07.341GMT", + "executorId" : "driver", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 13, + "executorRunTime" : 18, + "resultSize" : 2065, + "jvmGcTime" : 5, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "inputMetrics" : { + "bytesRead" : 70564, + "recordsRead" : 10000 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 1710, + "writeTime" : 98069, + "recordsWritten" : 10 + } + } +} ] \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w__custom_quantiles_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w__custom_quantiles_expectation.json new file mode 100644 index 0000000000000..bc3c302813de2 --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w__custom_quantiles_expectation.json @@ -0,0 +1,19 @@ +{ + "quantiles" : [ 0.01, 0.5, 0.99 ], + "executorDeserializeTime" : [ 1.0, 3.0, 36.0 ], + "executorRunTime" : [ 16.0, 28.0, 351.0 ], + "resultSize" : [ 2010.0, 2065.0, 2065.0 ], + "jvmGcTime" : [ 0.0, 0.0, 7.0 ], + "resultSerializationTime" : [ 0.0, 0.0, 2.0 ], + "memoryBytesSpilled" : [ 0.0, 0.0, 0.0 ], + "diskBytesSpilled" : [ 0.0, 0.0, 0.0 ], + "inputMetrics" : { + "bytesRead" : [ 60488.0, 70564.0, 70565.0 ], + "recordsRead" : [ 10000.0, 10000.0, 10000.0 ] + }, + "shuffleWriteMetrics" : { + "writeBytes" : [ 1710.0, 1710.0, 1710.0 ], + "writeRecords" : [ 10.0, 10.0, 10.0 ], + "writeTime" : [ 89437.0, 102159.0, 4016617.0 ] + } +} \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_read_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_read_expectation.json new file mode 100644 index 0000000000000..e084c839f1d5a --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_read_expectation.json @@ -0,0 +1,19 @@ +{ + "quantiles" : [ 0.05, 0.25, 0.5, 0.75, 0.95 ], + "executorDeserializeTime" : [ 1.0, 2.0, 2.0, 2.0, 3.0 ], + "executorRunTime" : [ 30.0, 74.0, 75.0, 76.0, 79.0 ], + "resultSize" : [ 1034.0, 1034.0, 1034.0, 1034.0, 1034.0 ], + "jvmGcTime" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], + "resultSerializationTime" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], + "memoryBytesSpilled" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], + "diskBytesSpilled" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], + "shuffleReadMetrics" : { + "readBytes" : [ 17100.0, 17100.0, 17100.0, 17100.0, 17100.0 ], + "readRecords" : [ 100.0, 100.0, 100.0, 100.0, 100.0 ], + "remoteBlocksFetched" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], + "localBlocksFetched" : [ 100.0, 100.0, 100.0, 100.0, 100.0 ], + "fetchWaitTime" : [ 0.0, 0.0, 0.0, 1.0, 1.0 ], + "remoteBytesRead" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], + "totalBlocksFetched" : [ 100.0, 100.0, 100.0, 100.0, 100.0 ] + } +} \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_write_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_write_expectation.json new file mode 100644 index 0000000000000..6ac7811ce691b --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_write_expectation.json @@ -0,0 +1,19 @@ +{ + "quantiles" : [ 0.05, 0.25, 0.5, 0.75, 0.95 ], + "executorDeserializeTime" : [ 2.0, 2.0, 3.0, 7.0, 31.0 ], + "executorRunTime" : [ 16.0, 18.0, 28.0, 49.0, 349.0 ], + "resultSize" : [ 2010.0, 2065.0, 2065.0, 2065.0, 2065.0 ], + "jvmGcTime" : [ 0.0, 0.0, 0.0, 5.0, 7.0 ], + "resultSerializationTime" : [ 0.0, 0.0, 0.0, 0.0, 1.0 ], + "memoryBytesSpilled" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], + "diskBytesSpilled" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ], + "inputMetrics" : { + "bytesRead" : [ 60488.0, 70564.0, 70564.0, 70564.0, 70564.0 ], + "recordsRead" : [ 10000.0, 10000.0, 10000.0, 10000.0, 10000.0 ] + }, + "shuffleWriteMetrics" : { + "writeBytes" : [ 1710.0, 1710.0, 1710.0, 1710.0, 1710.0 ], + "writeRecords" : [ 10.0, 10.0, 10.0, 10.0, 10.0 ], + "writeTime" : [ 90329.0, 95848.0, 102159.0, 121551.0, 2579051.0 ] + } +} \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json new file mode 100644 index 0000000000000..32d5731676ad5 --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json @@ -0,0 +1,242 @@ +{ + "status" : "COMPLETE", + "stageId" : 0, + "attemptId" : 0, + "numActiveTasks" : 0, + "numCompleteTasks" : 8, + "numFailedTasks" : 0, + "executorRunTime" : 120, + "inputBytes" : 0, + "inputRecords" : 0, + "outputBytes" : 0, + "outputRecords" : 0, + "shuffleReadBytes" : 0, + "shuffleReadRecords" : 0, + "shuffleWriteBytes" : 0, + "shuffleWriteRecords" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "name" : "foreach at :15", + "details" : "org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.(:15)\n$line9.$read$$iwC$$iwC$$iwC.(:20)\n$line9.$read$$iwC$$iwC.(:22)\n$line9.$read$$iwC.(:24)\n$line9.$read.(:26)\n$line9.$read$.(:30)\n$line9.$read$.()\n$line9.$eval$.(:7)\n$line9.$eval$.()\n$line9.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)", + "schedulingPool" : "default", + "accumulatorUpdates" : [ { + "id" : 1, + "name" : "my counter", + "value" : "5050" + } ], + "tasks" : { + "2" : { + "taskId" : 2, + "index" : 2, + "attempt" : 0, + "launchTime" : "2015-03-16T19:25:36.522GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ { + "id" : 1, + "name" : "my counter", + "update" : "378", + "value" : "378" + } ], + "taskMetrics" : { + "executorDeserializeTime" : 13, + "executorRunTime" : 15, + "resultSize" : 697, + "jvmGcTime" : 0, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0 + } + }, + "5" : { + "taskId" : 5, + "index" : 5, + "attempt" : 0, + "launchTime" : "2015-03-16T19:25:36.523GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ { + "id" : 1, + "name" : "my counter", + "update" : "897", + "value" : "3750" + } ], + "taskMetrics" : { + "executorDeserializeTime" : 12, + "executorRunTime" : 15, + "resultSize" : 697, + "jvmGcTime" : 0, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0 + } + }, + "4" : { + "taskId" : 4, + "index" : 4, + "attempt" : 0, + "launchTime" : "2015-03-16T19:25:36.522GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ { + "id" : 1, + "name" : "my counter", + "update" : "678", + "value" : "2853" + } ], + "taskMetrics" : { + "executorDeserializeTime" : 12, + "executorRunTime" : 15, + "resultSize" : 697, + "jvmGcTime" : 0, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0 + } + }, + "7" : { + "taskId" : 7, + "index" : 7, + "attempt" : 0, + "launchTime" : "2015-03-16T19:25:36.524GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ { + "id" : 1, + "name" : "my counter", + "update" : "1222", + "value" : "4972" + } ], + "taskMetrics" : { + "executorDeserializeTime" : 12, + "executorRunTime" : 15, + "resultSize" : 697, + "jvmGcTime" : 0, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0 + } + }, + "1" : { + "taskId" : 1, + "index" : 1, + "attempt" : 0, + "launchTime" : "2015-03-16T19:25:36.521GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ { + "id" : 1, + "name" : "my counter", + "update" : "247", + "value" : "2175" + } ], + "taskMetrics" : { + "executorDeserializeTime" : 14, + "executorRunTime" : 15, + "resultSize" : 697, + "jvmGcTime" : 0, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0 + } + }, + "3" : { + "taskId" : 3, + "index" : 3, + "attempt" : 0, + "launchTime" : "2015-03-16T19:25:36.522GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ { + "id" : 1, + "name" : "my counter", + "update" : "572", + "value" : "950" + } ], + "taskMetrics" : { + "executorDeserializeTime" : 13, + "executorRunTime" : 15, + "resultSize" : 697, + "jvmGcTime" : 0, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0 + } + }, + "6" : { + "taskId" : 6, + "index" : 6, + "attempt" : 0, + "launchTime" : "2015-03-16T19:25:36.523GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ { + "id" : 1, + "name" : "my counter", + "update" : "978", + "value" : "1928" + } ], + "taskMetrics" : { + "executorDeserializeTime" : 12, + "executorRunTime" : 15, + "resultSize" : 697, + "jvmGcTime" : 0, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0 + } + }, + "0" : { + "taskId" : 0, + "index" : 0, + "attempt" : 0, + "launchTime" : "2015-03-16T19:25:36.515GMT", + "executorId" : "", + "host" : "localhost", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ { + "id" : 1, + "name" : "my counter", + "update" : "78", + "value" : "5050" + } ], + "taskMetrics" : { + "executorDeserializeTime" : 14, + "executorRunTime" : 15, + "resultSize" : 697, + "jvmGcTime" : 0, + "resultSerializationTime" : 2, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0 + } + } + }, + "executorSummary" : { + "" : { + "taskTime" : 418, + "failedTasks" : 0, + "succeededTasks" : 8, + "inputBytes" : 0, + "outputBytes" : 0, + "shuffleRead" : 0, + "shuffleWrite" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0 + } + } +} \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/succeeded_failed_job_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/succeeded_failed_job_list_json_expectation.json new file mode 100644 index 0000000000000..cab4750270dfa --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/succeeded_failed_job_list_json_expectation.json @@ -0,0 +1,43 @@ +[ { + "jobId" : 2, + "name" : "count at :17", + "stageIds" : [ 3 ], + "status" : "SUCCEEDED", + "numTasks" : 8, + "numActiveTasks" : 0, + "numCompletedTasks" : 8, + "numSkippedTasks" : 8, + "numFailedTasks" : 0, + "numActiveStages" : 0, + "numCompletedStages" : 1, + "numSkippedStages" : 0, + "numFailedStages" : 0 +}, { + "jobId" : 1, + "name" : "count at :20", + "stageIds" : [ 1, 2 ], + "status" : "FAILED", + "numTasks" : 16, + "numActiveTasks" : 0, + "numCompletedTasks" : 15, + "numSkippedTasks" : 15, + "numFailedTasks" : 1, + "numActiveStages" : 0, + "numCompletedStages" : 1, + "numSkippedStages" : 0, + "numFailedStages" : 1 +}, { + "jobId" : 0, + "name" : "count at :15", + "stageIds" : [ 0 ], + "status" : "SUCCEEDED", + "numTasks" : 8, + "numActiveTasks" : 0, + "numCompletedTasks" : 8, + "numSkippedTasks" : 8, + "numFailedTasks" : 0, + "numActiveStages" : 0, + "numCompletedStages" : 1, + "numSkippedStages" : 0, + "numFailedStages" : 0 +} ] \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/succeeded_job_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/succeeded_job_list_json_expectation.json new file mode 100644 index 0000000000000..6fd25befbf7e8 --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/succeeded_job_list_json_expectation.json @@ -0,0 +1,29 @@ +[ { + "jobId" : 2, + "name" : "count at :17", + "stageIds" : [ 3 ], + "status" : "SUCCEEDED", + "numTasks" : 8, + "numActiveTasks" : 0, + "numCompletedTasks" : 8, + "numSkippedTasks" : 8, + "numFailedTasks" : 0, + "numActiveStages" : 0, + "numCompletedStages" : 1, + "numSkippedStages" : 0, + "numFailedStages" : 0 +}, { + "jobId" : 0, + "name" : "count at :15", + "stageIds" : [ 0 ], + "status" : "SUCCEEDED", + "numTasks" : 8, + "numActiveTasks" : 0, + "numCompletedTasks" : 8, + "numSkippedTasks" : 8, + "numFailedTasks" : 0, + "numActiveStages" : 0, + "numCompletedStages" : 1, + "numSkippedStages" : 0, + "numFailedStages" : 0 +} ] \ No newline at end of file diff --git a/core/src/test/resources/spark-events/local-1422981759269/APPLICATION_COMPLETE b/core/src/test/resources/spark-events/local-1422981759269/APPLICATION_COMPLETE new file mode 100755 index 0000000000000..e69de29bb2d1d diff --git a/core/src/test/resources/spark-events/local-1422981759269/EVENT_LOG_1 b/core/src/test/resources/spark-events/local-1422981759269/EVENT_LOG_1 new file mode 100755 index 0000000000000..4794e56d1107a --- /dev/null +++ b/core/src/test/resources/spark-events/local-1422981759269/EVENT_LOG_1 @@ -0,0 +1,88 @@ +{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"","Host":"localhost","Port":57967},"Maximum Memory":278302556,"Timestamp":1422981759407} +{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre","Java Version":"1.7.0_67 (Oracle Corporation)","Scala Version":"version 2.10.4"},"Spark Properties":{"spark.driver.host":"192.168.1.103","spark.eventLog.enabled":"true","spark.driver.port":"57965","spark.repl.class.uri":"http://192.168.1.103:57964","spark.jars":"","spark.app.name":"Spark shell","spark.scheduler.mode":"FIFO","spark.executor.id":"driver","spark.master":"local[*]","spark.fileserver.uri":"http://192.168.1.103:57966","spark.tachyonStore.folderName":"spark-fd6c823a-8a18-4113-8306-1fa7bb623a7f","spark.app.id":"local-1422981759269"},"System Properties":{"java.io.tmpdir":"/var/folders/36/m29jw1z95qv4ywb1c4n0rz000000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.7","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.7","user.home":"/Users/irashid","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib","user.dir":"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4","java.library.path":"/Users/irashid/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"x86_64","java.vm.version":"24.65-b04","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.7.0_67-b01","java.vm.info":"mixed mode","java.ext.dirs":"/Users/irashid/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","java.class.version":"51.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Chicago","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"10.9.5","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"en","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"irashid","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --class org.apache.spark.repl.Main --conf spark.eventLog.enabled=true spark-shell","java.home":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre","java.version":"1.7.0_67","sun.io.unicode.encoding":"UnicodeBig"},"Classpath Entries":{"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/spark-assembly-1.2.0-hadoop2.4.0.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/conf":"System Classpath"}} +{"Event":"SparkListenerApplicationStart","App Name":"Spark shell","App ID":"local-1422981759269","Timestamp":1422981758277,"User":"irashid"} +{"Event":"SparkListenerJobStart","Job ID":0,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at :15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.(:15)\n$line9.$read$$iwC$$iwC$$iwC.(:20)\n$line9.$read$$iwC$$iwC.(:22)\n$line9.$read$$iwC.(:24)\n$line9.$read.(:26)\n$line9.$read$.(:30)\n$line9.$read$.()\n$line9.$eval$.(:7)\n$line9.$eval$.()\n$line9.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[0]} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at :15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.(:15)\n$line9.$read$$iwC$$iwC$$iwC.(:20)\n$line9.$read$$iwC$$iwC.(:22)\n$line9.$read$$iwC.(:24)\n$line9.$read.(:26)\n$line9.$read$.(:30)\n$line9.$read$.()\n$line9.$eval$.(:7)\n$line9.$eval$.()\n$line9.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1422981762075,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1422981762081,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1422981762081,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1422981762082,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1422981762083,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1422981762084,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1422981762084,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1422981762085,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1422981762084,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762632,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_6","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1422981762081,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762633,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":520,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_2","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1422981762082,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762634,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_3","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1422981762084,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762634,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_5","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1422981762083,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762635,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_4","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1422981762075,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762636,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_0","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1422981762085,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762636,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":9,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_7","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1422981762081,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981762637,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_1","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at :15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":8,"Memory Size":28000128,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.(:15)\n$line9.$read$$iwC$$iwC$$iwC.(:20)\n$line9.$read$$iwC$$iwC.(:22)\n$line9.$read$$iwC.(:24)\n$line9.$read.(:26)\n$line9.$read$.(:30)\n$line9.$read$.()\n$line9.$eval$.(:7)\n$line9.$eval$.()\n$line9.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981762069,"Completion Time":1422981762637,"Accumulables":[]}} +{"Event":"SparkListenerJobEnd","Job ID":0,"Job Result":{"Result":"JobSucceeded"}} +{"Event":"SparkListenerJobStart","Job ID":1,"Stage Infos":[{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at :14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.(:14)\n$line10.$read$$iwC$$iwC$$iwC.(:19)\n$line10.$read$$iwC$$iwC.(:21)\n$line10.$read$$iwC.(:23)\n$line10.$read.(:25)\n$line10.$read$.(:29)\n$line10.$read$.()\n$line10.$eval$.(:7)\n$line10.$eval$.()\n$line10.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]},{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"first at :17","Number of Tasks":1,"RDD Info":[{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.first(RDD.scala:1093)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.(:17)\n$line11.$read$$iwC$$iwC$$iwC.(:22)\n$line11.$read$$iwC$$iwC.(:24)\n$line11.$read$$iwC.(:26)\n$line11.$read.(:28)\n$line11.$read$.(:32)\n$line11.$read$.()\n$line11.$eval$.(:7)\n$line11.$eval$.()\n$line11.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[1,2]} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at :14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.(:14)\n$line10.$read$$iwC$$iwC$$iwC.(:19)\n$line10.$read$$iwC$$iwC.(:21)\n$line10.$read$$iwC.(:23)\n$line10.$read.(:25)\n$line10.$read$.(:29)\n$line10.$read$.()\n$line10.$eval$.(:7)\n$line10.$eval$.()\n$line10.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1422981763578,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1422981763578,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1422981763579,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1422981763579,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1422981763580,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1422981763580,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1422981763581,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1422981763581,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1422981763579,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764001,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":406,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":138000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1422981763578,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764002,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":106000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1422981763580,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764002,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1645,"Shuffle Write Time":99000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1422981763578,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764003,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":123000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1422981763581,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764003,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":406,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":108000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1422981763579,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764004,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1647,"Shuffle Write Time":97000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1422981763581,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764004,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":132000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1422981763580,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764005,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":81000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at :14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.(:14)\n$line10.$read$$iwC$$iwC$$iwC.(:19)\n$line10.$read$$iwC$$iwC.(:21)\n$line10.$read$$iwC.(:23)\n$line10.$read.(:25)\n$line10.$read$.(:29)\n$line10.$read$.()\n$line10.$eval$.(:7)\n$line10.$eval$.()\n$line10.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981763578,"Completion Time":1422981764005,"Accumulables":[]}} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"first at :17","Number of Tasks":1,"RDD Info":[{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.first(RDD.scala:1093)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.(:17)\n$line11.$read$$iwC$$iwC$$iwC.(:22)\n$line11.$read$$iwC$$iwC.(:24)\n$line11.$read$$iwC.(:26)\n$line11.$read.(:28)\n$line11.$read$.(:32)\n$line11.$read$.()\n$line11.$eval$.(:7)\n$line11.$eval$.()\n$line11.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1422981764014,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1422981764014,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764045,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":0,"Executor Run Time":28,"Result Size":1013,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"first at :17","Number of Tasks":1,"RDD Info":[{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.first(RDD.scala:1093)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.(:17)\n$line11.$read$$iwC$$iwC$$iwC.(:22)\n$line11.$read$$iwC$$iwC.(:24)\n$line11.$read$$iwC.(:26)\n$line11.$read.(:28)\n$line11.$read$.(:32)\n$line11.$read$.()\n$line11.$eval$.(:7)\n$line11.$eval$.()\n$line11.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981764014,"Completion Time":1422981764045,"Accumulables":[]}} +{"Event":"SparkListenerJobEnd","Job ID":1,"Job Result":{"Result":"JobSucceeded"}} +{"Event":"SparkListenerJobStart","Job ID":2,"Stage Infos":[{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"map at :14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.(:14)\n$line10.$read$$iwC$$iwC$$iwC.(:19)\n$line10.$read$$iwC$$iwC.(:21)\n$line10.$read$$iwC.(:23)\n$line10.$read.(:25)\n$line10.$read$.(:29)\n$line10.$read$.()\n$line10.$eval$.(:7)\n$line10.$eval$.()\n$line10.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]},{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"saveAsTextFile at :19","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1164)\n$line13.$read$$iwC$$iwC$$iwC$$iwC.(:19)\n$line13.$read$$iwC$$iwC$$iwC.(:24)\n$line13.$read$$iwC$$iwC.(:26)\n$line13.$read$$iwC.(:28)\n$line13.$read.(:30)\n$line13.$read$.(:34)\n$line13.$read$.()\n$line13.$eval$.(:7)\n$line13.$eval$.()\n$line13.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[3,4]} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"saveAsTextFile at :19","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1164)\n$line13.$read$$iwC$$iwC$$iwC$$iwC.(:19)\n$line13.$read$$iwC$$iwC$$iwC.(:24)\n$line13.$read$$iwC$$iwC.(:26)\n$line13.$read$$iwC.(:28)\n$line13.$read.(:30)\n$line13.$read$.(:34)\n$line13.$read$.()\n$line13.$eval$.(:7)\n$line13.$eval$.()\n$line13.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":17,"Index":0,"Attempt":0,"Launch Time":1422981764396,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":18,"Index":1,"Attempt":0,"Launch Time":1422981764396,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":19,"Index":2,"Attempt":0,"Launch Time":1422981764397,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":20,"Index":3,"Attempt":0,"Launch Time":1422981764397,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":21,"Index":4,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":22,"Index":5,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":23,"Index":6,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":24,"Index":7,"Attempt":0,"Launch Time":1422981764399,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":24,"Index":7,"Attempt":0,"Launch Time":1422981764399,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764642,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":240,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}} +{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":19,"Index":2,"Attempt":0,"Launch Time":1422981764397,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764643,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}} +{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":21,"Index":4,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764643,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":240,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}} +{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":22,"Index":5,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764645,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}} +{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":23,"Index":6,"Attempt":0,"Launch Time":1422981764398,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764645,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}} +{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":17,"Index":0,"Attempt":0,"Launch Time":1422981764396,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764646,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":243,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}} +{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":18,"Index":1,"Attempt":0,"Launch Time":1422981764396,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764646,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":243,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}} +{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":20,"Index":3,"Attempt":0,"Launch Time":1422981764397,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981764648,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":247,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"saveAsTextFile at :19","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1164)\n$line13.$read$$iwC$$iwC$$iwC$$iwC.(:19)\n$line13.$read$$iwC$$iwC$$iwC.(:24)\n$line13.$read$$iwC$$iwC.(:26)\n$line13.$read$$iwC.(:28)\n$line13.$read.(:30)\n$line13.$read$.(:34)\n$line13.$read$.()\n$line13.$eval$.(:7)\n$line13.$eval$.()\n$line13.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981764396,"Completion Time":1422981764648,"Accumulables":[]}} +{"Event":"SparkListenerJobEnd","Job ID":2,"Job Result":{"Result":"JobSucceeded"}} +{"Event":"SparkListenerJobStart","Job ID":3,"Stage Infos":[{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"count at :17","Number of Tasks":8,"RDD Info":[{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":4,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line15.$read$$iwC$$iwC$$iwC$$iwC.(:17)\n$line15.$read$$iwC$$iwC$$iwC.(:22)\n$line15.$read$$iwC$$iwC.(:24)\n$line15.$read$$iwC.(:26)\n$line15.$read.(:28)\n$line15.$read$.(:32)\n$line15.$read$.()\n$line15.$eval$.(:7)\n$line15.$eval$.()\n$line15.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[5]} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"count at :17","Number of Tasks":8,"RDD Info":[{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":4,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line15.$read$$iwC$$iwC$$iwC$$iwC.(:17)\n$line15.$read$$iwC$$iwC$$iwC.(:22)\n$line15.$read$$iwC$$iwC.(:24)\n$line15.$read$$iwC.(:26)\n$line15.$read.(:28)\n$line15.$read$.(:32)\n$line15.$read$.()\n$line15.$eval$.(:7)\n$line15.$eval$.()\n$line15.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":25,"Index":0,"Attempt":0,"Launch Time":1422981765026,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":26,"Index":1,"Attempt":0,"Launch Time":1422981765026,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":27,"Index":2,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":28,"Index":3,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":29,"Index":4,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":30,"Index":5,"Attempt":0,"Launch Time":1422981765028,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":31,"Index":6,"Attempt":0,"Launch Time":1422981765028,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":32,"Index":7,"Attempt":0,"Launch Time":1422981765029,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":27,"Index":2,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765045,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}} +{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":30,"Index":5,"Attempt":0,"Launch Time":1422981765028,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765046,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}} +{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":25,"Index":0,"Attempt":0,"Launch Time":1422981765026,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765046,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}} +{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":29,"Index":4,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765047,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}} +{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":28,"Index":3,"Attempt":0,"Launch Time":1422981765027,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765047,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}} +{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":32,"Index":7,"Attempt":0,"Launch Time":1422981765029,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765048,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}} +{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":31,"Index":6,"Attempt":0,"Launch Time":1422981765028,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765048,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":14,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}} +{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":26,"Index":1,"Attempt":0,"Launch Time":1422981765026,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981765049,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"count at :17","Number of Tasks":8,"RDD Info":[{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":4,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line15.$read$$iwC$$iwC$$iwC$$iwC.(:17)\n$line15.$read$$iwC$$iwC$$iwC.(:22)\n$line15.$read$$iwC$$iwC.(:24)\n$line15.$read$$iwC.(:26)\n$line15.$read.(:28)\n$line15.$read$.(:32)\n$line15.$read$.()\n$line15.$eval$.(:7)\n$line15.$eval$.()\n$line15.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981765026,"Completion Time":1422981765050,"Accumulables":[]}} +{"Event":"SparkListenerJobEnd","Job ID":3,"Job Result":{"Result":"JobSucceeded"}} +{"Event":"SparkListenerApplicationEnd","Timestamp":1422981766912} diff --git a/core/src/test/resources/spark-events/local-1422981759269/SPARK_VERSION_1.2.0 b/core/src/test/resources/spark-events/local-1422981759269/SPARK_VERSION_1.2.0 new file mode 100755 index 0000000000000..e69de29bb2d1d diff --git a/core/src/test/resources/spark-events/local-1422981780767/APPLICATION_COMPLETE b/core/src/test/resources/spark-events/local-1422981780767/APPLICATION_COMPLETE new file mode 100755 index 0000000000000..e69de29bb2d1d diff --git a/core/src/test/resources/spark-events/local-1422981780767/EVENT_LOG_1 b/core/src/test/resources/spark-events/local-1422981780767/EVENT_LOG_1 new file mode 100755 index 0000000000000..f14a000bf2c28 --- /dev/null +++ b/core/src/test/resources/spark-events/local-1422981780767/EVENT_LOG_1 @@ -0,0 +1,82 @@ +{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"","Host":"localhost","Port":57971},"Maximum Memory":278302556,"Timestamp":1422981780906} +{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre","Java Version":"1.7.0_67 (Oracle Corporation)","Scala Version":"version 2.10.4"},"Spark Properties":{"spark.driver.host":"192.168.1.103","spark.eventLog.enabled":"true","spark.driver.port":"57969","spark.repl.class.uri":"http://192.168.1.103:57968","spark.jars":"","spark.app.name":"Spark shell","spark.scheduler.mode":"FIFO","spark.executor.id":"driver","spark.master":"local[*]","spark.fileserver.uri":"http://192.168.1.103:57970","spark.tachyonStore.folderName":"spark-3f19daee-844c-41d0-a3fc-5e3e508f9731","spark.app.id":"local-1422981780767"},"System Properties":{"java.io.tmpdir":"/var/folders/36/m29jw1z95qv4ywb1c4n0rz000000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.7","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.7","user.home":"/Users/irashid","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib","user.dir":"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4","java.library.path":"/Users/irashid/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"x86_64","java.vm.version":"24.65-b04","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.7.0_67-b01","java.vm.info":"mixed mode","java.ext.dirs":"/Users/irashid/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","java.class.version":"51.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Chicago","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"10.9.5","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"en","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"irashid","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --class org.apache.spark.repl.Main --conf spark.eventLog.enabled=true spark-shell","java.home":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre","java.version":"1.7.0_67","sun.io.unicode.encoding":"UnicodeBig"},"Classpath Entries":{"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/spark-assembly-1.2.0-hadoop2.4.0.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/conf":"System Classpath"}} +{"Event":"SparkListenerApplicationStart","App Name":"Spark shell","App ID":"local-1422981780767","Timestamp":1422981779720,"User":"irashid"} +{"Event":"SparkListenerJobStart","Job ID":0,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at :15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.(:15)\n$line9.$read$$iwC$$iwC$$iwC.(:20)\n$line9.$read$$iwC$$iwC.(:22)\n$line9.$read$$iwC.(:24)\n$line9.$read.(:26)\n$line9.$read$.(:30)\n$line9.$read$.()\n$line9.$eval$.(:7)\n$line9.$eval$.()\n$line9.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[0]} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at :15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.(:15)\n$line9.$read$$iwC$$iwC$$iwC.(:20)\n$line9.$read$$iwC$$iwC.(:22)\n$line9.$read$$iwC.(:24)\n$line9.$read.(:26)\n$line9.$read$.(:30)\n$line9.$read$.()\n$line9.$eval$.(:7)\n$line9.$eval$.()\n$line9.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1422981784234,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1422981784240,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1422981784240,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1422981784241,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1422981784241,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1422981784242,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1422981784242,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1422981784243,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1422981784241,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784812,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":543,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_3","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1422981784240,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784814,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":542,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_1","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1422981784234,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784816,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":542,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_0","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1422981784243,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784816,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":543,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_7","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1422981784242,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784817,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":541,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_5","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1422981784241,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784817,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":542,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_4","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1422981784242,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784818,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":543,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_6","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1422981784240,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981784818,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":542,"Result Size":1268,"JVM GC Time":25,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_2","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at :15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":8,"Memory Size":28000128,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.(:15)\n$line9.$read$$iwC$$iwC$$iwC.(:20)\n$line9.$read$$iwC$$iwC.(:22)\n$line9.$read$$iwC.(:24)\n$line9.$read.(:26)\n$line9.$read$.(:30)\n$line9.$read$.()\n$line9.$eval$.(:7)\n$line9.$eval$.()\n$line9.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981784228,"Completion Time":1422981784819,"Accumulables":[]}} +{"Event":"SparkListenerJobEnd","Job ID":0,"Job Result":{"Result":"JobSucceeded"}} +{"Event":"SparkListenerJobStart","Job ID":1,"Stage Infos":[{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at :14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.(:14)\n$line10.$read$$iwC$$iwC$$iwC.(:19)\n$line10.$read$$iwC$$iwC.(:21)\n$line10.$read$$iwC.(:23)\n$line10.$read.(:25)\n$line10.$read$.(:29)\n$line10.$read$.()\n$line10.$eval$.(:7)\n$line10.$eval$.()\n$line10.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]},{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"count at :20","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.(:20)\n$line11.$read$$iwC$$iwC$$iwC.(:25)\n$line11.$read$$iwC$$iwC.(:27)\n$line11.$read$$iwC.(:29)\n$line11.$read.(:31)\n$line11.$read$.(:35)\n$line11.$read$.()\n$line11.$eval$.(:7)\n$line11.$eval$.()\n$line11.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[1,2]} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at :14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.(:14)\n$line10.$read$$iwC$$iwC$$iwC.(:19)\n$line10.$read$$iwC$$iwC.(:21)\n$line10.$read$$iwC.(:23)\n$line10.$read.(:25)\n$line10.$read$.(:29)\n$line10.$read$.()\n$line10.$eval$.(:7)\n$line10.$eval$.()\n$line10.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1422981785829,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1422981785831,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1422981785831,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1422981785832,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1422981785833,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1422981785832,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786282,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":434,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":88000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1422981785829,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786283,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":435,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":94000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1422981785833,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786283,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":435,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":79000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1422981785831,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786283,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":434,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":73000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786284,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":434,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1647,"Shuffle Write Time":83000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786284,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":436,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":98000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1422981785831,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786285,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":434,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1645,"Shuffle Write Time":101000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1422981785830,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786286,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":434,"Result Size":1902,"JVM GC Time":19,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":76000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at :14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.(:14)\n$line10.$read$$iwC$$iwC$$iwC.(:19)\n$line10.$read$$iwC$$iwC.(:21)\n$line10.$read$$iwC.(:23)\n$line10.$read.(:25)\n$line10.$read$.(:29)\n$line10.$read$.()\n$line10.$eval$.(:7)\n$line10.$eval$.()\n$line10.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981785829,"Completion Time":1422981786286,"Accumulables":[]}} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"count at :20","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.(:20)\n$line11.$read$$iwC$$iwC$$iwC.(:25)\n$line11.$read$$iwC$$iwC.(:27)\n$line11.$read$$iwC.(:29)\n$line11.$read.(:31)\n$line11.$read$.(:35)\n$line11.$read$.()\n$line11.$eval$.(:7)\n$line11.$eval$.()\n$line11.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1422981786296,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":17,"Index":1,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":18,"Index":2,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":19,"Index":3,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":20,"Index":4,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":21,"Index":5,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":22,"Index":6,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":23,"Index":7,"Attempt":0,"Launch Time":1422981786299,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":20,"Index":4,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786337,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":34,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}} +{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":21,"Index":5,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786339,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":35,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}} +{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":17,"Index":1,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786340,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":35,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":1,"Remote Bytes Read":0}}} +{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":18,"Index":2,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786340,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":34,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}} +{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1422981786296,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786340,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":35,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}} +{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":23,"Index":7,"Attempt":0,"Launch Time":1422981786299,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786341,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":35,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}} +{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":22,"Index":6,"Attempt":0,"Launch Time":1422981786298,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786342,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":34,"Result Size":862,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}} +{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"ExceptionFailure","Class Name":"java.lang.RuntimeException","Description":"got a 3, failing","Stack Trace":[{"Declaring Class":"$line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1","Method Name":"apply","File Name":"","Line Number":18},{"Declaring Class":"$line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1","Method Name":"apply","File Name":"","Line Number":17},{"Declaring Class":"scala.collection.Iterator$$anon$11","Method Name":"next","File Name":"Iterator.scala","Line Number":328},{"Declaring Class":"org.apache.spark.util.Utils$","Method Name":"getIteratorSize","File Name":"Utils.scala","Line Number":1311},{"Declaring Class":"org.apache.spark.rdd.RDD$$anonfun$count$1","Method Name":"apply","File Name":"RDD.scala","Line Number":910},{"Declaring Class":"org.apache.spark.rdd.RDD$$anonfun$count$1","Method Name":"apply","File Name":"RDD.scala","Line Number":910},{"Declaring Class":"org.apache.spark.SparkContext$$anonfun$runJob$4","Method Name":"apply","File Name":"SparkContext.scala","Line Number":1314},{"Declaring Class":"org.apache.spark.SparkContext$$anonfun$runJob$4","Method Name":"apply","File Name":"SparkContext.scala","Line Number":1314},{"Declaring Class":"org.apache.spark.scheduler.ResultTask","Method Name":"runTask","File Name":"ResultTask.scala","Line Number":61},{"Declaring Class":"org.apache.spark.scheduler.Task","Method Name":"run","File Name":"Task.scala","Line Number":56},{"Declaring Class":"org.apache.spark.executor.Executor$TaskRunner","Method Name":"run","File Name":"Executor.scala","Line Number":196},{"Declaring Class":"java.util.concurrent.ThreadPoolExecutor","Method Name":"runWorker","File Name":"ThreadPoolExecutor.java","Line Number":1145},{"Declaring Class":"java.util.concurrent.ThreadPoolExecutor$Worker","Method Name":"run","File Name":"ThreadPoolExecutor.java","Line Number":615},{"Declaring Class":"java.lang.Thread","Method Name":"run","File Name":"Thread.java","Line Number":745}],"Full Stack Trace":"java.lang.RuntimeException: got a 3, failing\n\tat $line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(:18)\n\tat $line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(:17)\n\tat scala.collection.Iterator$$anon$11.next(Iterator.scala:328)\n\tat org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1311)\n\tat org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:910)\n\tat org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:910)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314)\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)\n\tat org.apache.spark.scheduler.Task.run(Task.scala:56)\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)\n\tat java.lang.Thread.run(Thread.java:745)\n","Metrics":{"Host Name":"localhost","Executor Deserialize Time":0,"Executor Run Time":36,"Result Size":0,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}},"Task Info":{"Task ID":19,"Index":3,"Attempt":0,"Launch Time":1422981786297,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981786343,"Failed":true,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":0,"Executor Run Time":36,"Result Size":0,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"count at :20","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.(:20)\n$line11.$read$$iwC$$iwC$$iwC.(:25)\n$line11.$read$$iwC$$iwC.(:27)\n$line11.$read$$iwC.(:29)\n$line11.$read.(:31)\n$line11.$read$.(:35)\n$line11.$read$.()\n$line11.$eval$.(:7)\n$line11.$eval$.()\n$line11.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981786296,"Completion Time":1422981786347,"Failure Reason":"Job aborted due to stage failure: Task 3 in stage 2.0 failed 1 times, most recent failure: Lost task 3.0 in stage 2.0 (TID 19, localhost): java.lang.RuntimeException: got a 3, failing\n\tat $line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(:18)\n\tat $line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(:17)\n\tat scala.collection.Iterator$$anon$11.next(Iterator.scala:328)\n\tat org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1311)\n\tat org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:910)\n\tat org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:910)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314)\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)\n\tat org.apache.spark.scheduler.Task.run(Task.scala:56)\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)\n\tat java.lang.Thread.run(Thread.java:745)\n\nDriver stacktrace:","Accumulables":[]}} +{"Event":"SparkListenerJobEnd","Job ID":1,"Job Result":{"Result":"JobFailed","Exception":{"Message":"Job aborted due to stage failure: Task 3 in stage 2.0 failed 1 times, most recent failure: Lost task 3.0 in stage 2.0 (TID 19, localhost): java.lang.RuntimeException: got a 3, failing\n\tat $line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(:18)\n\tat $line11.$read$$iwC$$iwC$$iwC$$iwC$$anonfun$1.apply(:17)\n\tat scala.collection.Iterator$$anon$11.next(Iterator.scala:328)\n\tat org.apache.spark.util.Utils$.getIteratorSize(Utils.scala:1311)\n\tat org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:910)\n\tat org.apache.spark.rdd.RDD$$anonfun$count$1.apply(RDD.scala:910)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314)\n\tat org.apache.spark.SparkContext$$anonfun$runJob$4.apply(SparkContext.scala:1314)\n\tat org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61)\n\tat org.apache.spark.scheduler.Task.run(Task.scala:56)\n\tat org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:196)\n\tat java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)\n\tat java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)\n\tat java.lang.Thread.run(Thread.java:745)\n\nDriver stacktrace:","Stack Trace":[{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler","Method Name":"org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages","File Name":"DAGScheduler.scala","Line Number":1214},{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1","Method Name":"apply","File Name":"DAGScheduler.scala","Line Number":1203},{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1","Method Name":"apply","File Name":"DAGScheduler.scala","Line Number":1202},{"Declaring Class":"scala.collection.mutable.ResizableArray$class","Method Name":"foreach","File Name":"ResizableArray.scala","Line Number":59},{"Declaring Class":"scala.collection.mutable.ArrayBuffer","Method Name":"foreach","File Name":"ArrayBuffer.scala","Line Number":47},{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler","Method Name":"abortStage","File Name":"DAGScheduler.scala","Line Number":1202},{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1","Method Name":"apply","File Name":"DAGScheduler.scala","Line Number":696},{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1","Method Name":"apply","File Name":"DAGScheduler.scala","Line Number":696},{"Declaring Class":"scala.Option","Method Name":"foreach","File Name":"Option.scala","Line Number":236},{"Declaring Class":"org.apache.spark.scheduler.DAGScheduler","Method Name":"handleTaskSetFailed","File Name":"DAGScheduler.scala","Line Number":696},{"Declaring Class":"org.apache.spark.scheduler.DAGSchedulerEventProcessActor$$anonfun$receive$2","Method Name":"applyOrElse","File Name":"DAGScheduler.scala","Line Number":1420},{"Declaring Class":"akka.actor.Actor$class","Method Name":"aroundReceive","File Name":"Actor.scala","Line Number":465},{"Declaring Class":"org.apache.spark.scheduler.DAGSchedulerEventProcessActor","Method Name":"aroundReceive","File Name":"DAGScheduler.scala","Line Number":1375},{"Declaring Class":"akka.actor.ActorCell","Method Name":"receiveMessage","File Name":"ActorCell.scala","Line Number":516},{"Declaring Class":"akka.actor.ActorCell","Method Name":"invoke","File Name":"ActorCell.scala","Line Number":487},{"Declaring Class":"akka.dispatch.Mailbox","Method Name":"processMailbox","File Name":"Mailbox.scala","Line Number":238},{"Declaring Class":"akka.dispatch.Mailbox","Method Name":"run","File Name":"Mailbox.scala","Line Number":220},{"Declaring Class":"akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask","Method Name":"exec","File Name":"AbstractDispatcher.scala","Line Number":393},{"Declaring Class":"scala.concurrent.forkjoin.ForkJoinTask","Method Name":"doExec","File Name":"ForkJoinTask.java","Line Number":260},{"Declaring Class":"scala.concurrent.forkjoin.ForkJoinPool$WorkQueue","Method Name":"runTask","File Name":"ForkJoinPool.java","Line Number":1339},{"Declaring Class":"scala.concurrent.forkjoin.ForkJoinPool","Method Name":"runWorker","File Name":"ForkJoinPool.java","Line Number":1979},{"Declaring Class":"scala.concurrent.forkjoin.ForkJoinWorkerThread","Method Name":"run","File Name":"ForkJoinWorkerThread.java","Line Number":107}]}}} +{"Event":"SparkListenerJobStart","Job ID":2,"Stage Infos":[{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"count at :17","Number of Tasks":8,"RDD Info":[{"RDD ID":6,"Name":"/Users/irashid/spark-examples/tmp_data/sums_2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums_2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line19.$read$$iwC$$iwC$$iwC$$iwC.(:17)\n$line19.$read$$iwC$$iwC$$iwC.(:22)\n$line19.$read$$iwC$$iwC.(:24)\n$line19.$read$$iwC.(:26)\n$line19.$read.(:28)\n$line19.$read$.(:32)\n$line19.$read$.()\n$line19.$eval$.(:7)\n$line19.$eval$.()\n$line19.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[3]} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"count at :17","Number of Tasks":8,"RDD Info":[{"RDD ID":6,"Name":"/Users/irashid/spark-examples/tmp_data/sums_2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums_2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line19.$read$$iwC$$iwC$$iwC$$iwC.(:17)\n$line19.$read$$iwC$$iwC$$iwC.(:22)\n$line19.$read$$iwC$$iwC.(:24)\n$line19.$read$$iwC.(:26)\n$line19.$read.(:28)\n$line19.$read$.(:32)\n$line19.$read$.()\n$line19.$eval$.(:7)\n$line19.$eval$.()\n$line19.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":24,"Index":0,"Attempt":0,"Launch Time":1422981787191,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":25,"Index":1,"Attempt":0,"Launch Time":1422981787191,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":26,"Index":2,"Attempt":0,"Launch Time":1422981787192,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":27,"Index":3,"Attempt":0,"Launch Time":1422981787192,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":28,"Index":4,"Attempt":0,"Launch Time":1422981787193,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":29,"Index":5,"Attempt":0,"Launch Time":1422981787193,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":30,"Index":6,"Attempt":0,"Launch Time":1422981787194,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":31,"Index":7,"Attempt":0,"Launch Time":1422981787194,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":25,"Index":1,"Attempt":0,"Launch Time":1422981787191,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787222,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":20,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}} +{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":24,"Index":0,"Attempt":0,"Launch Time":1422981787191,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787223,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":20,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}} +{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":26,"Index":2,"Attempt":0,"Launch Time":1422981787192,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787223,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":4,"Executor Run Time":22,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}} +{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":31,"Index":7,"Attempt":0,"Launch Time":1422981787194,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787223,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":19,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}} +{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":29,"Index":5,"Attempt":0,"Launch Time":1422981787193,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787224,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":19,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}} +{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":27,"Index":3,"Attempt":0,"Launch Time":1422981787192,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787224,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":6,"Executor Run Time":19,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}} +{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":28,"Index":4,"Attempt":0,"Launch Time":1422981787193,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787225,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":22,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}} +{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":30,"Index":6,"Attempt":0,"Launch Time":1422981787194,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1422981787225,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":21,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"count at :17","Number of Tasks":8,"RDD Info":[{"RDD ID":6,"Name":"/Users/irashid/spark-examples/tmp_data/sums_2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums_2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line19.$read$$iwC$$iwC$$iwC$$iwC.(:17)\n$line19.$read$$iwC$$iwC$$iwC.(:22)\n$line19.$read$$iwC$$iwC.(:24)\n$line19.$read$$iwC.(:26)\n$line19.$read.(:28)\n$line19.$read$.(:32)\n$line19.$read$.()\n$line19.$eval$.(:7)\n$line19.$eval$.()\n$line19.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1422981787191,"Completion Time":1422981787226,"Accumulables":[]}} +{"Event":"SparkListenerJobEnd","Job ID":2,"Job Result":{"Result":"JobSucceeded"}} +{"Event":"SparkListenerApplicationEnd","Timestamp":1422981788731} diff --git a/core/src/test/resources/spark-events/local-1422981780767/SPARK_VERSION_1.2.0 b/core/src/test/resources/spark-events/local-1422981780767/SPARK_VERSION_1.2.0 new file mode 100755 index 0000000000000..e69de29bb2d1d diff --git a/core/src/test/resources/spark-events/local-1425081759269/APPLICATION_COMPLETE b/core/src/test/resources/spark-events/local-1425081759269/APPLICATION_COMPLETE new file mode 100755 index 0000000000000..e69de29bb2d1d diff --git a/core/src/test/resources/spark-events/local-1425081759269/EVENT_LOG_1 b/core/src/test/resources/spark-events/local-1425081759269/EVENT_LOG_1 new file mode 100755 index 0000000000000..9745b36b09e44 --- /dev/null +++ b/core/src/test/resources/spark-events/local-1425081759269/EVENT_LOG_1 @@ -0,0 +1,88 @@ +{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"","Host":"localhost","Port":57967},"Maximum Memory":278302556,"Timestamp":1425081759407} +{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre","Java Version":"1.7.0_67 (Oracle Corporation)","Scala Version":"version 2.10.4"},"Spark Properties":{"spark.driver.host":"192.168.1.103","spark.eventLog.enabled":"true","spark.driver.port":"57965","spark.repl.class.uri":"http://192.168.1.103:57964","spark.jars":"","spark.app.name":"Spark shell","spark.scheduler.mode":"FIFO","spark.executor.id":"driver","spark.master":"local[*]","spark.fileserver.uri":"http://192.168.1.103:57966","spark.tachyonStore.folderName":"spark-fd6c823a-8a18-4113-8306-1fa7bb623a7f","spark.app.id":"local-1425081759269"},"System Properties":{"java.io.tmpdir":"/var/folders/36/m29jw1z95qv4ywb1c4n0rz000000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.7","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.7","user.home":"/Users/irashid","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib","user.dir":"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4","java.library.path":"/Users/irashid/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"x86_64","java.vm.version":"24.65-b04","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.7.0_67-b01","java.vm.info":"mixed mode","java.ext.dirs":"/Users/irashid/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","java.class.version":"51.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Chicago","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"10.9.5","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"en","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"irashid","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --class org.apache.spark.repl.Main --conf spark.eventLog.enabled=true spark-shell","java.home":"/Library/Java/JavaVirtualMachines/jdk1.7.0_67.jdk/Contents/Home/jre","java.version":"1.7.0_67","sun.io.unicode.encoding":"UnicodeBig"},"Classpath Entries":{"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/spark-assembly-1.2.0-hadoop2.4.0.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/conf":"System Classpath"}} +{"Event":"SparkListenerApplicationStart","App Name":"Spark shell","App ID":"local-1425081759269","Timestamp":1425081758277,"User":"irashid"} +{"Event":"SparkListenerJobStart","Job ID":0,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at :15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.(:15)\n$line9.$read$$iwC$$iwC$$iwC.(:20)\n$line9.$read$$iwC$$iwC.(:22)\n$line9.$read$$iwC.(:24)\n$line9.$read.(:26)\n$line9.$read$.(:30)\n$line9.$read$.()\n$line9.$eval$.(:7)\n$line9.$eval$.()\n$line9.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[0]} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at :15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.(:15)\n$line9.$read$$iwC$$iwC$$iwC.(:20)\n$line9.$read$$iwC$$iwC.(:22)\n$line9.$read$$iwC.(:24)\n$line9.$read.(:26)\n$line9.$read$.(:30)\n$line9.$read$.()\n$line9.$eval$.(:7)\n$line9.$eval$.()\n$line9.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1425081762075,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1425081762081,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1425081762081,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1425081762082,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1425081762083,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1425081762084,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1425081762084,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1425081762085,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1425081762084,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762632,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_6","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1425081762081,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762633,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":520,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_2","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1425081762082,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762634,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_3","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1425081762084,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762634,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_5","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1425081762083,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762635,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_4","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1425081762075,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762636,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_0","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1425081762085,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762636,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":9,"Executor Run Time":521,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_7","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1425081762081,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081762637,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":522,"Result Size":1268,"JVM GC Time":20,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Updated Blocks":[{"Block ID":"rdd_0_1","Status":{"Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Memory Size":3500016,"Tachyon Size":0,"Disk Size":0}}]}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at :15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":8,"Memory Size":28000128,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.(:15)\n$line9.$read$$iwC$$iwC$$iwC.(:20)\n$line9.$read$$iwC$$iwC.(:22)\n$line9.$read$$iwC.(:24)\n$line9.$read.(:26)\n$line9.$read$.(:30)\n$line9.$read$.()\n$line9.$eval$.(:7)\n$line9.$eval$.()\n$line9.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1425081762069,"Completion Time":1425081762637,"Accumulables":[]}} +{"Event":"SparkListenerJobEnd","Job ID":0,"Job Result":{"Result":"JobSucceeded"}} +{"Event":"SparkListenerJobStart","Job ID":1,"Stage Infos":[{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at :14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.(:14)\n$line10.$read$$iwC$$iwC$$iwC.(:19)\n$line10.$read$$iwC$$iwC.(:21)\n$line10.$read$$iwC.(:23)\n$line10.$read.(:25)\n$line10.$read$.(:29)\n$line10.$read$.()\n$line10.$eval$.(:7)\n$line10.$eval$.()\n$line10.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]},{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"first at :17","Number of Tasks":1,"RDD Info":[{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.first(RDD.scala:1093)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.(:17)\n$line11.$read$$iwC$$iwC$$iwC.(:22)\n$line11.$read$$iwC$$iwC.(:24)\n$line11.$read$$iwC.(:26)\n$line11.$read.(:28)\n$line11.$read$.(:32)\n$line11.$read$.()\n$line11.$eval$.(:7)\n$line11.$eval$.()\n$line11.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[1,2]} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at :14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.(:14)\n$line10.$read$$iwC$$iwC$$iwC.(:19)\n$line10.$read$$iwC$$iwC.(:21)\n$line10.$read$$iwC.(:23)\n$line10.$read.(:25)\n$line10.$read$.(:29)\n$line10.$read$.()\n$line10.$eval$.(:7)\n$line10.$eval$.()\n$line10.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1425081763578,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1425081763578,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1425081763579,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1425081763579,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1425081763580,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1425081763580,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1425081763581,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1425081763581,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Launch Time":1425081763579,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764001,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":406,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":138000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Launch Time":1425081763578,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764002,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":106000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Launch Time":1425081763580,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764002,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1645,"Shuffle Write Time":99000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Launch Time":1425081763578,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764003,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":123000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Launch Time":1425081763581,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764003,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":406,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":108000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Launch Time":1425081763579,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764004,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1647,"Shuffle Write Time":97000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Launch Time":1425081763581,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764004,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":132000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Launch Time":1425081763580,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764005,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":407,"Result Size":1902,"JVM GC Time":18,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1648,"Shuffle Write Time":81000},"Input Metrics":{"Data Read Method":"Memory","Bytes Read":3500016}}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"map at :14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.(:14)\n$line10.$read$$iwC$$iwC$$iwC.(:19)\n$line10.$read$$iwC$$iwC.(:21)\n$line10.$read$$iwC.(:23)\n$line10.$read.(:25)\n$line10.$read$.(:29)\n$line10.$read$.()\n$line10.$eval$.(:7)\n$line10.$eval$.()\n$line10.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1425081763578,"Completion Time":1425081764005,"Accumulables":[]}} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"first at :17","Number of Tasks":1,"RDD Info":[{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.first(RDD.scala:1093)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.(:17)\n$line11.$read$$iwC$$iwC$$iwC.(:22)\n$line11.$read$$iwC$$iwC.(:24)\n$line11.$read$$iwC.(:26)\n$line11.$read.(:28)\n$line11.$read$.(:32)\n$line11.$read$.()\n$line11.$eval$.(:7)\n$line11.$eval$.()\n$line11.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1425081764014,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1425081764014,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764045,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":0,"Executor Run Time":28,"Result Size":1013,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"first at :17","Number of Tasks":1,"RDD Info":[{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.first(RDD.scala:1093)\n$line11.$read$$iwC$$iwC$$iwC$$iwC.(:17)\n$line11.$read$$iwC$$iwC$$iwC.(:22)\n$line11.$read$$iwC$$iwC.(:24)\n$line11.$read$$iwC.(:26)\n$line11.$read.(:28)\n$line11.$read$.(:32)\n$line11.$read$.()\n$line11.$eval$.(:7)\n$line11.$eval$.()\n$line11.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1425081764014,"Completion Time":1425081764045,"Accumulables":[]}} +{"Event":"SparkListenerJobEnd","Job ID":1,"Job Result":{"Result":"JobSucceeded"}} +{"Event":"SparkListenerJobStart","Job ID":2,"Stage Infos":[{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"map at :14","Number of Tasks":8,"RDD Info":[{"RDD ID":1,"Name":"1","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":true,"Use Tachyon":false,"Deserialized":true,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:271)\n$line10.$read$$iwC$$iwC$$iwC$$iwC.(:14)\n$line10.$read$$iwC$$iwC$$iwC.(:19)\n$line10.$read$$iwC$$iwC.(:21)\n$line10.$read$$iwC.(:23)\n$line10.$read.(:25)\n$line10.$read$.(:29)\n$line10.$read$.()\n$line10.$eval$.(:7)\n$line10.$eval$.()\n$line10.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]},{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"saveAsTextFile at :19","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1164)\n$line13.$read$$iwC$$iwC$$iwC$$iwC.(:19)\n$line13.$read$$iwC$$iwC$$iwC.(:24)\n$line13.$read$$iwC$$iwC.(:26)\n$line13.$read$$iwC.(:28)\n$line13.$read.(:30)\n$line13.$read$.(:34)\n$line13.$read$.()\n$line13.$eval$.(:7)\n$line13.$eval$.()\n$line13.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[3,4]} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"saveAsTextFile at :19","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1164)\n$line13.$read$$iwC$$iwC$$iwC$$iwC.(:19)\n$line13.$read$$iwC$$iwC$$iwC.(:24)\n$line13.$read$$iwC$$iwC.(:26)\n$line13.$read$$iwC.(:28)\n$line13.$read.(:30)\n$line13.$read$.(:34)\n$line13.$read$.()\n$line13.$eval$.(:7)\n$line13.$eval$.()\n$line13.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":17,"Index":0,"Attempt":0,"Launch Time":1425081764396,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":18,"Index":1,"Attempt":0,"Launch Time":1425081764396,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":19,"Index":2,"Attempt":0,"Launch Time":1425081764397,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":20,"Index":3,"Attempt":0,"Launch Time":1425081764397,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":21,"Index":4,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":22,"Index":5,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":23,"Index":6,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":24,"Index":7,"Attempt":0,"Launch Time":1425081764399,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":24,"Index":7,"Attempt":0,"Launch Time":1425081764399,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764642,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":240,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}} +{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":19,"Index":2,"Attempt":0,"Launch Time":1425081764397,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764643,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}} +{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":21,"Index":4,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764643,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":240,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}} +{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":22,"Index":5,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764645,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}} +{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":23,"Index":6,"Attempt":0,"Launch Time":1425081764398,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764645,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":241,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}} +{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":17,"Index":0,"Attempt":0,"Launch Time":1425081764396,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764646,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":243,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}} +{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":18,"Index":1,"Attempt":0,"Launch Time":1425081764396,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764646,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":243,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}} +{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":20,"Index":3,"Attempt":0,"Launch Time":1425081764397,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081764648,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":247,"Result Size":824,"JVM GC Time":31,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0}}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"saveAsTextFile at :19","Number of Tasks":8,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1164)\n$line13.$read$$iwC$$iwC$$iwC$$iwC.(:19)\n$line13.$read$$iwC$$iwC$$iwC.(:24)\n$line13.$read$$iwC$$iwC.(:26)\n$line13.$read$$iwC.(:28)\n$line13.$read.(:30)\n$line13.$read$.(:34)\n$line13.$read$.()\n$line13.$eval$.(:7)\n$line13.$eval$.()\n$line13.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1425081764396,"Completion Time":1425081764648,"Accumulables":[]}} +{"Event":"SparkListenerJobEnd","Job ID":2,"Job Result":{"Result":"JobSucceeded"}} +{"Event":"SparkListenerJobStart","Job ID":3,"Stage Infos":[{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"count at :17","Number of Tasks":8,"RDD Info":[{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":4,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line15.$read$$iwC$$iwC$$iwC$$iwC.(:17)\n$line15.$read$$iwC$$iwC$$iwC.(:22)\n$line15.$read$$iwC$$iwC.(:24)\n$line15.$read$$iwC.(:26)\n$line15.$read.(:28)\n$line15.$read$.(:32)\n$line15.$read$.()\n$line15.$eval$.(:7)\n$line15.$eval$.()\n$line15.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[5]} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"count at :17","Number of Tasks":8,"RDD Info":[{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":4,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line15.$read$$iwC$$iwC$$iwC$$iwC.(:17)\n$line15.$read$$iwC$$iwC$$iwC.(:22)\n$line15.$read$$iwC$$iwC.(:24)\n$line15.$read$$iwC.(:26)\n$line15.$read.(:28)\n$line15.$read$.(:32)\n$line15.$read$.()\n$line15.$eval$.(:7)\n$line15.$eval$.()\n$line15.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":25,"Index":0,"Attempt":0,"Launch Time":1425081765026,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":26,"Index":1,"Attempt":0,"Launch Time":1425081765026,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":27,"Index":2,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":28,"Index":3,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":29,"Index":4,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":30,"Index":5,"Attempt":0,"Launch Time":1425081765028,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":31,"Index":6,"Attempt":0,"Launch Time":1425081765028,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":32,"Index":7,"Attempt":0,"Launch Time":1425081765029,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":27,"Index":2,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765045,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}} +{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":30,"Index":5,"Attempt":0,"Launch Time":1425081765028,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765046,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}} +{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":25,"Index":0,"Attempt":0,"Launch Time":1425081765026,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765046,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}} +{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":29,"Index":4,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765047,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}} +{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":28,"Index":3,"Attempt":0,"Launch Time":1425081765027,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765047,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}} +{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":32,"Index":7,"Attempt":0,"Launch Time":1425081765029,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765048,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":13,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}} +{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":31,"Index":6,"Attempt":0,"Launch Time":1425081765028,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765048,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":14,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":16}}} +{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":26,"Index":1,"Attempt":0,"Launch Time":1425081765026,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1425081765049,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":12,"Result Size":1812,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":32}}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"count at :17","Number of Tasks":8,"RDD Info":[{"RDD ID":5,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":4,"Name":"/Users/irashid/spark-examples/tmp_data/sums","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.count(RDD.scala:910)\n$line15.$read$$iwC$$iwC$$iwC$$iwC.(:17)\n$line15.$read$$iwC$$iwC$$iwC.(:22)\n$line15.$read$$iwC$$iwC.(:24)\n$line15.$read$$iwC.(:26)\n$line15.$read.(:28)\n$line15.$read$.(:32)\n$line15.$read$.()\n$line15.$eval$.(:7)\n$line15.$eval$.()\n$line15.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:606)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1425081765026,"Completion Time":1425081765050,"Accumulables":[]}} +{"Event":"SparkListenerJobEnd","Job ID":3,"Job Result":{"Result":"JobSucceeded"}} +{"Event":"SparkListenerApplicationEnd","Timestamp":1425081766912} diff --git a/core/src/test/resources/spark-events/local-1425081759269/SPARK_VERSION_1.2.0 b/core/src/test/resources/spark-events/local-1425081759269/SPARK_VERSION_1.2.0 new file mode 100755 index 0000000000000..e69de29bb2d1d diff --git a/core/src/test/resources/spark-events/local-1426533911241/APPLICATION_COMPLETE b/core/src/test/resources/spark-events/local-1426533911241/APPLICATION_COMPLETE new file mode 100755 index 0000000000000..e69de29bb2d1d diff --git a/core/src/test/resources/spark-events/local-1426533911241/EVENT_LOG_1 b/core/src/test/resources/spark-events/local-1426533911241/EVENT_LOG_1 new file mode 100755 index 0000000000000..9ef5bd5d92de5 --- /dev/null +++ b/core/src/test/resources/spark-events/local-1426533911241/EVENT_LOG_1 @@ -0,0 +1,24 @@ +{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"","Host":"localhost","Port":58610},"Maximum Memory":278019440,"Timestamp":1426533911361} +{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre","Java Version":"1.8.0_25 (Oracle Corporation)","Scala Version":"version 2.10.4"},"Spark Properties":{"spark.driver.host":"192.168.1.105","spark.eventLog.enabled":"true","spark.driver.port":"58608","spark.repl.class.uri":"http://192.168.1.105:58607","spark.jars":"","spark.app.name":"Spark shell","spark.scheduler.mode":"FIFO","spark.executor.id":"driver","spark.master":"local[*]","spark.fileserver.uri":"http://192.168.1.105:58609","spark.tachyonStore.folderName":"spark-5e9b7f26-8e97-4b43-82d6-25c141530da9","spark.app.id":"local-1426533911241"},"System Properties":{"java.io.tmpdir":"/var/folders/36/m29jw1z95qv4ywb1c4n0rz000000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.8","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.8","user.home":"/Users/irashid","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib","user.dir":"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4","java.library.path":"/Users/irashid/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"x86_64","java.vm.version":"25.25-b02","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.8.0_25-b17","java.vm.info":"mixed mode","java.ext.dirs":"/Users/irashid/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","java.class.version":"52.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Chicago","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"10.9.5","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"en","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"irashid","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --class org.apache.spark.repl.Main --conf spark.eventLog.enabled=true spark-shell","java.home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre","java.version":"1.8.0_25","sun.io.unicode.encoding":"UnicodeBig"},"Classpath Entries":{"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/spark-assembly-1.2.0-hadoop2.4.0.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar":"System Classpath","/etc/hadoop":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/conf":"System Classpath"}} +{"Event":"SparkListenerApplicationStart","App Name":"Spark shell","App ID":"local-1426533911241","Timestamp":1426533910242,"User":"irashid","App Attempt ID":"1"} +{"Event":"SparkListenerJobStart","Job ID":0,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"foreach at :15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.(:15)\n$line9.$read$$iwC$$iwC$$iwC.(:20)\n$line9.$read$$iwC$$iwC.(:22)\n$line9.$read$$iwC.(:24)\n$line9.$read.(:26)\n$line9.$read$.(:30)\n$line9.$read$.()\n$line9.$eval$.(:7)\n$line9.$eval$.()\n$line9.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[0]} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"foreach at :15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.(:15)\n$line9.$read$$iwC$$iwC$$iwC.(:20)\n$line9.$read$$iwC$$iwC.(:22)\n$line9.$read$$iwC.(:24)\n$line9.$read.(:26)\n$line9.$read$.(:30)\n$line9.$read$.()\n$line9.$eval$.(:7)\n$line9.$eval$.()\n$line9.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1426533936515,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1426533936521,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1426533936523,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1426533936523,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1426533936524,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936570,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"378","Value":"378"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":13,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936572,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"572","Value":"950"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":13,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1426533936523,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"978","Value":"1928"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1426533936521,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"247","Value":"2175"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":14,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1426533936522,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"678","Value":"2853"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1426533936523,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936575,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"897","Value":"3750"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1426533936524,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936575,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"1222","Value":"4972"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1426533936515,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426533936576,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"78","Value":"5050"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":14,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"foreach at :15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.(:15)\n$line9.$read$$iwC$$iwC$$iwC.(:20)\n$line9.$read$$iwC$$iwC.(:22)\n$line9.$read$$iwC.(:24)\n$line9.$read.(:26)\n$line9.$read$.(:30)\n$line9.$read$.()\n$line9.$eval$.(:7)\n$line9.$eval$.()\n$line9.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1426533936103,"Completion Time":1426533936579,"Accumulables":[{"ID":1,"Name":"my counter","Value":"5050"}]}} +{"Event":"SparkListenerJobEnd","Job ID":0,"Job Result":{"Result":"JobSucceeded"}} +{"Event":"SparkListenerApplicationEnd","Timestamp":1426533945177} diff --git a/core/src/test/resources/spark-events/local-1426533911241/SPARK_VERSION_1.2.0 b/core/src/test/resources/spark-events/local-1426533911241/SPARK_VERSION_1.2.0 new file mode 100755 index 0000000000000..e69de29bb2d1d diff --git a/core/src/test/resources/spark-events/local-1426633911242/APPLICATION_COMPLETE b/core/src/test/resources/spark-events/local-1426633911242/APPLICATION_COMPLETE new file mode 100755 index 0000000000000..e69de29bb2d1d diff --git a/core/src/test/resources/spark-events/local-1426633911242/EVENT_LOG_1 b/core/src/test/resources/spark-events/local-1426633911242/EVENT_LOG_1 new file mode 100755 index 0000000000000..e7043282107d3 --- /dev/null +++ b/core/src/test/resources/spark-events/local-1426633911242/EVENT_LOG_1 @@ -0,0 +1,24 @@ +{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"","Host":"localhost","Port":58610},"Maximum Memory":278019440,"Timestamp":1426633911361} +{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre","Java Version":"1.8.0_25 (Oracle Corporation)","Scala Version":"version 2.10.4"},"Spark Properties":{"spark.driver.host":"192.168.1.105","spark.eventLog.enabled":"true","spark.driver.port":"58608","spark.repl.class.uri":"http://192.168.1.105:58607","spark.jars":"","spark.app.name":"Spark shell","spark.scheduler.mode":"FIFO","spark.executor.id":"driver","spark.master":"local[*]","spark.fileserver.uri":"http://192.168.1.105:58609","spark.tachyonStore.folderName":"spark-5e9b7f26-8e97-4b43-82d6-25c141530da9","spark.app.id":"local-1426633911241"},"System Properties":{"java.io.tmpdir":"/var/folders/36/m29jw1z95qv4ywb1c4n0rz000000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.8","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.8","user.home":"/Users/irashid","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib","user.dir":"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4","java.library.path":"/Users/irashid/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"x86_64","java.vm.version":"25.25-b02","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.8.0_25-b17","java.vm.info":"mixed mode","java.ext.dirs":"/Users/irashid/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","java.class.version":"52.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Chicago","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"10.9.5","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"en","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"irashid","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --class org.apache.spark.repl.Main --conf spark.eventLog.enabled=true spark-shell","java.home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre","java.version":"1.8.0_25","sun.io.unicode.encoding":"UnicodeBig"},"Classpath Entries":{"/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/spark-assembly-1.2.0-hadoop2.4.0.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar":"System Classpath","/etc/hadoop":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar":"System Classpath","/Users/irashid/spark-examples/releases/spark-1.2.0-bin-hadoop2.4/conf":"System Classpath"}} +{"Event":"SparkListenerApplicationStart","App Name":"Spark shell","App ID":"local-1426533911241","Timestamp":1426633910242,"User":"irashid","App Attempt ID":"2"} +{"Event":"SparkListenerJobStart","Job ID":0,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"foreach at :15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.(:15)\n$line9.$read$$iwC$$iwC$$iwC.(:20)\n$line9.$read$$iwC$$iwC.(:22)\n$line9.$read$$iwC.(:24)\n$line9.$read.(:26)\n$line9.$read$.(:30)\n$line9.$read$.()\n$line9.$eval$.(:7)\n$line9.$eval$.()\n$line9.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}],"Stage IDs":[0]} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"foreach at :15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.(:15)\n$line9.$read$$iwC$$iwC$$iwC.(:20)\n$line9.$read$$iwC$$iwC.(:22)\n$line9.$read$$iwC.(:24)\n$line9.$read.(:26)\n$line9.$read$.(:30)\n$line9.$read$.()\n$line9.$eval$.(:7)\n$line9.$eval$.()\n$line9.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1426633936515,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1426633936521,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1426633936523,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1426633936523,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1426633936524,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936570,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"378","Value":"378"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":13,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936572,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"572","Value":"950"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":13,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1426633936523,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"978","Value":"1928"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1426633936521,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"247","Value":"2175"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":14,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1426633936522,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936574,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"678","Value":"2853"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1426633936523,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936575,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"897","Value":"3750"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1426633936524,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936575,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"1222","Value":"4972"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1426633936515,"Executor ID":"","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1426633936576,"Failed":false,"Accumulables":[{"ID":1,"Name":"my counter","Update":"78","Value":"5050"}]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":14,"Executor Run Time":15,"Result Size":697,"JVM GC Time":0,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"foreach at :15","Number of Tasks":8,"RDD Info":[{"RDD ID":0,"Name":"0","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.foreach(RDD.scala:765)\n$line9.$read$$iwC$$iwC$$iwC$$iwC.(:15)\n$line9.$read$$iwC$$iwC$$iwC.(:20)\n$line9.$read$$iwC$$iwC.(:22)\n$line9.$read$$iwC.(:24)\n$line9.$read.(:26)\n$line9.$read$.(:30)\n$line9.$read$.()\n$line9.$eval$.(:7)\n$line9.$eval$.()\n$line9.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852)\norg.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125)\norg.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705)\norg.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669)","Submission Time":1426633936103,"Completion Time":1426633936579,"Accumulables":[{"ID":1,"Name":"my counter","Value":"5050"}]}} +{"Event":"SparkListenerJobEnd","Job ID":0,"Job Result":{"Result":"JobSucceeded"}} +{"Event":"SparkListenerApplicationEnd","Timestamp":1426633945177} diff --git a/core/src/test/resources/spark-events/local-1426633911242/SPARK_VERSION_1.2.0 b/core/src/test/resources/spark-events/local-1426633911242/SPARK_VERSION_1.2.0 new file mode 100755 index 0000000000000..e69de29bb2d1d diff --git a/core/src/test/resources/spark-events/local-1430917381534 b/core/src/test/resources/spark-events/local-1430917381534 new file mode 100755 index 0000000000000..24ac478073e3d --- /dev/null +++ b/core/src/test/resources/spark-events/local-1430917381534 @@ -0,0 +1,231 @@ +{"Event":"SparkListenerLogStart","Spark Version":"1.4.0-SNAPSHOT"} +{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"driver","Host":"localhost","Port":61103},"Maximum Memory":278019440,"Timestamp":1430917381651} +{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre","Java Version":"1.8.0_25 (Oracle Corporation)","Scala Version":"version 2.10.4"},"Spark Properties":{"spark.driver.host":"192.168.1.102","spark.eventLog.enabled":"true","spark.driver.port":"61101","spark.repl.class.uri":"http://192.168.1.102:61100","spark.jars":"","spark.app.name":"Spark shell","spark.scheduler.mode":"FIFO","spark.executor.id":"driver","spark.master":"local[*]","spark.eventLog.dir":"/Users/irashid/github/kraps/core/src/test/resources/spark-events","spark.fileserver.uri":"http://192.168.1.102:61102","spark.tachyonStore.folderName":"spark-aaaf41b3-d1dd-447f-8951-acf51490758b","spark.app.id":"local-1430917381534"},"System Properties":{"java.io.tmpdir":"/var/folders/36/m29jw1z95qv4ywb1c4n0rz000000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.8","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.8","user.home":"/Users/irashid","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib","user.dir":"/Users/irashid/github/spark","java.library.path":"/Users/irashid/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"x86_64","java.vm.version":"25.25-b02","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.8.0_25-b17","java.vm.info":"mixed mode","java.ext.dirs":"/Users/irashid/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","java.class.version":"52.0","scala.usejavacp":"true","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Chicago","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"10.9.5","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"en","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"irashid","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --conf spark.eventLog.enabled=true --conf spark.eventLog.dir=/Users/irashid/github/kraps/core/src/test/resources/spark-events --class org.apache.spark.repl.Main spark-shell","java.home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_25.jdk/Contents/Home/jre","java.version":"1.8.0_25","sun.io.unicode.encoding":"UnicodeBig"},"Classpath Entries":{"/etc/hadoop":"System Classpath","/Users/irashid/github/spark/lib_managed/jars/datanucleus-rdbms-3.2.9.jar":"System Classpath","/Users/irashid/github/spark/conf/":"System Classpath","/Users/irashid/github/spark/assembly/target/scala-2.10/spark-assembly-1.4.0-SNAPSHOT-hadoop2.5.0.jar":"System Classpath","/Users/irashid/github/spark/lib_managed/jars/datanucleus-core-3.2.10.jar":"System Classpath","/Users/irashid/github/spark/lib_managed/jars/datanucleus-api-jdo-3.2.6.jar":"System Classpath"}} +{"Event":"SparkListenerApplicationStart","App Name":"Spark shell","App ID":"local-1430917381534","Timestamp":1430917380893,"User":"irashid"} +{"Event":"SparkListenerJobStart","Job ID":0,"Submission Time":1430917386422,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"map at :26","Number of Tasks":100,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"/Users/irashid/spark-examples/tmp_data/raw_data","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":1,"Name":"/Users/irashid/spark-examples/tmp_data/raw_data","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:287)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:26)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:31)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:33)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC.(:35)\n$line20.$read$$iwC$$iwC$$iwC$$iwC.(:37)\n$line20.$read$$iwC$$iwC$$iwC.(:39)\n$line20.$read$$iwC$$iwC.(:41)\n$line20.$read$$iwC.(:43)\n$line20.$read.(:45)\n$line20.$read$.(:49)\n$line20.$read$.()\n$line20.$eval$.(:7)\n$line20.$eval$.()\n$line20.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)","Accumulables":[]},{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"collect at :26","Number of Tasks":10,"RDD Info":[{"RDD ID":4,"Name":"4","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.collect(RDD.scala:819)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:26)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:31)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:33)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC.(:35)\n$line20.$read$$iwC$$iwC$$iwC$$iwC.(:37)\n$line20.$read$$iwC$$iwC$$iwC.(:39)\n$line20.$read$$iwC$$iwC.(:41)\n$line20.$read$$iwC.(:43)\n$line20.$read.(:45)\n$line20.$read$.(:49)\n$line20.$read$.()\n$line20.$eval$.(:7)\n$line20.$eval$.()\n$line20.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)","Accumulables":[]}],"Stage IDs":[0,1],"Properties":{}} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"map at :26","Number of Tasks":100,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"/Users/irashid/spark-examples/tmp_data/raw_data","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":1,"Name":"/Users/irashid/spark-examples/tmp_data/raw_data","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:287)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:26)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:31)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:33)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC.(:35)\n$line20.$read$$iwC$$iwC$$iwC$$iwC.(:37)\n$line20.$read$$iwC$$iwC$$iwC.(:39)\n$line20.$read$$iwC$$iwC.(:41)\n$line20.$read$$iwC.(:43)\n$line20.$read.(:45)\n$line20.$read$.(:49)\n$line20.$read$.()\n$line20.$eval$.(:7)\n$line20.$eval$.()\n$line20.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)","Accumulables":[]},"Properties":{}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1430917386494,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1430917386502,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1430917386503,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1430917386504,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1430917386504,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1430917386505,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1430917386505,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1430917386506,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":8,"Index":8,"Attempt":0,"Launch Time":1430917386914,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":9,"Index":9,"Attempt":0,"Launch Time":1430917386915,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":10,"Index":10,"Attempt":0,"Launch Time":1430917386916,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":11,"Index":11,"Attempt":0,"Launch Time":1430917386918,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1430917386505,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917386919,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":30,"Executor Run Time":350,"Result Size":2010,"JVM GC Time":7,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":3675510,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":60488,"Records Read":10000}}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1430917386503,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917386922,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":32,"Executor Run Time":348,"Result Size":2010,"JVM GC Time":7,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":89885,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":60488,"Records Read":10000}}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1430917386502,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917386923,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":31,"Executor Run Time":350,"Result Size":2010,"JVM GC Time":7,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":3934399,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":60488,"Records Read":10000}}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1430917386504,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917386923,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":31,"Executor Run Time":349,"Result Size":2010,"JVM GC Time":7,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":83022,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":60488,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":12,"Index":12,"Attempt":0,"Launch Time":1430917386923,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387016,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1430917386505,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917386924,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":29,"Executor Run Time":351,"Result Size":2010,"JVM GC Time":7,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":4016617,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":60488,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":13,"Index":13,"Attempt":0,"Launch Time":1430917386924,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387062,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":14,"Index":14,"Attempt":0,"Launch Time":1430917386925,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387019,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1430917386504,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917386927,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":31,"Executor Run Time":349,"Result Size":2010,"JVM GC Time":7,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":1311694,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":60488,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":15,"Index":15,"Attempt":0,"Launch Time":1430917386928,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387011,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1430917386494,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917386929,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":32,"Executor Run Time":349,"Result Size":2010,"JVM GC Time":7,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":3842811,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":49294,"Records Read":10000}}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1430917386506,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917386929,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":31,"Executor Run Time":349,"Result Size":2010,"JVM GC Time":7,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":2579051,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":60488,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":16,"Index":16,"Attempt":0,"Launch Time":1430917387001,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387099,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":8,"Index":8,"Attempt":0,"Launch Time":1430917386914,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387002,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":80,"Result Size":2010,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":121551,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":60488,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":17,"Index":17,"Attempt":0,"Launch Time":1430917387005,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387128,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":11,"Index":11,"Attempt":0,"Launch Time":1430917386918,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387007,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":6,"Executor Run Time":75,"Result Size":2010,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":94507,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":18,"Index":18,"Attempt":0,"Launch Time":1430917387010,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387115,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":15,"Index":15,"Attempt":0,"Launch Time":1430917386928,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387011,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":76,"Result Size":2010,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":602780,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":19,"Index":19,"Attempt":0,"Launch Time":1430917387012,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387106,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":20,"Index":20,"Attempt":0,"Launch Time":1430917387014,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387104,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":21,"Index":21,"Attempt":0,"Launch Time":1430917387015,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387111,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":10,"Index":10,"Attempt":0,"Launch Time":1430917386916,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387015,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":8,"Executor Run Time":73,"Result Size":2010,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":94709,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":9,"Index":9,"Attempt":0,"Launch Time":1430917386915,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387016,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":9,"Executor Run Time":84,"Result Size":2010,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":101664,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":60489,"Records Read":10000}}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":12,"Index":12,"Attempt":0,"Launch Time":1430917386923,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387016,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":9,"Executor Run Time":77,"Result Size":2010,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":102476,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":22,"Index":22,"Attempt":0,"Launch Time":1430917387018,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387119,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":14,"Index":14,"Attempt":0,"Launch Time":1430917386925,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387019,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":6,"Executor Run Time":83,"Result Size":2010,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":95646,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":23,"Index":23,"Attempt":0,"Launch Time":1430917387031,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387115,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":13,"Index":13,"Attempt":0,"Launch Time":1430917386924,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387062,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":9,"Executor Run Time":76,"Result Size":2010,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":95004,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":24,"Index":24,"Attempt":0,"Launch Time":1430917387098,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":16,"Index":16,"Attempt":0,"Launch Time":1430917387001,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387099,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":84,"Result Size":2010,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":108320,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":25,"Index":25,"Attempt":0,"Launch Time":1430917387103,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":20,"Index":20,"Attempt":0,"Launch Time":1430917387014,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387104,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":83,"Result Size":2010,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":97716,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":26,"Index":26,"Attempt":0,"Launch Time":1430917387105,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":19,"Index":19,"Attempt":0,"Launch Time":1430917387012,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387106,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":84,"Result Size":2010,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":95788,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":27,"Index":27,"Attempt":0,"Launch Time":1430917387110,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":21,"Index":21,"Attempt":0,"Launch Time":1430917387015,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387111,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":88,"Result Size":2010,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":100270,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":28,"Index":28,"Attempt":0,"Launch Time":1430917387113,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":29,"Index":29,"Attempt":0,"Launch Time":1430917387114,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":23,"Index":23,"Attempt":0,"Launch Time":1430917387031,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387115,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":65,"Result Size":2010,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":91844,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":18,"Index":18,"Attempt":0,"Launch Time":1430917387010,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387115,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":92,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":100836,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":30,"Index":30,"Attempt":0,"Launch Time":1430917387118,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":22,"Index":22,"Attempt":0,"Launch Time":1430917387018,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387119,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":93,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":143427,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":31,"Index":31,"Attempt":0,"Launch Time":1430917387127,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":17,"Index":17,"Attempt":0,"Launch Time":1430917387005,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387128,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":11,"Executor Run Time":91,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":99944,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":32,"Index":32,"Attempt":0,"Launch Time":1430917387148,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":33,"Index":33,"Attempt":0,"Launch Time":1430917387149,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":24,"Index":24,"Attempt":0,"Launch Time":1430917387098,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387150,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":43,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":157194,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":27,"Index":27,"Attempt":0,"Launch Time":1430917387110,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387151,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":32,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":102019,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":34,"Index":34,"Attempt":0,"Launch Time":1430917387156,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":26,"Index":26,"Attempt":0,"Launch Time":1430917387105,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387157,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":6,"Executor Run Time":38,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":108213,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":35,"Index":35,"Attempt":0,"Launch Time":1430917387161,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":28,"Index":28,"Attempt":0,"Launch Time":1430917387113,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387162,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":29,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":104299,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":36,"Index":36,"Attempt":0,"Launch Time":1430917387164,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":25,"Index":25,"Attempt":0,"Launch Time":1430917387103,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387164,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":49,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":94134,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":37,"Index":37,"Attempt":0,"Launch Time":1430917387165,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":38,"Index":38,"Attempt":0,"Launch Time":1430917387166,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":29,"Index":29,"Attempt":0,"Launch Time":1430917387114,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387166,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":39,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":114938,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":39,"Index":39,"Attempt":0,"Launch Time":1430917387180,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":30,"Index":30,"Attempt":0,"Launch Time":1430917387118,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387180,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":4,"Executor Run Time":34,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":119770,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":32,"Index":32,"Attempt":0,"Launch Time":1430917387148,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387181,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":17,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":89603,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":40,"Index":40,"Attempt":0,"Launch Time":1430917387197,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":34,"Index":34,"Attempt":0,"Launch Time":1430917387156,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387198,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":6,"Executor Run Time":27,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":127746,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":41,"Index":41,"Attempt":0,"Launch Time":1430917387200,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":31,"Index":31,"Attempt":0,"Launch Time":1430917387127,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387201,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":36,"Executor Run Time":24,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":92619,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":42,"Index":42,"Attempt":0,"Launch Time":1430917387203,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":36,"Index":36,"Attempt":0,"Launch Time":1430917387164,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387204,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":29,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":123855,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":43,"Index":43,"Attempt":0,"Launch Time":1430917387204,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":44,"Index":44,"Attempt":0,"Launch Time":1430917387205,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":45,"Index":45,"Attempt":0,"Launch Time":1430917387206,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":37,"Index":37,"Attempt":0,"Launch Time":1430917387165,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387207,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":4,"Executor Run Time":32,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":111869,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":33,"Index":33,"Attempt":0,"Launch Time":1430917387149,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387207,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":43,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":118329,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":46,"Index":46,"Attempt":0,"Launch Time":1430917387210,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":35,"Index":35,"Attempt":0,"Launch Time":1430917387161,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387211,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":35,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":160963,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":47,"Index":47,"Attempt":0,"Launch Time":1430917387212,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":39,"Index":39,"Attempt":0,"Launch Time":1430917387180,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387212,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":17,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":98748,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":38,"Index":38,"Attempt":0,"Launch Time":1430917387166,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387213,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":31,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":131158,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":48,"Index":48,"Attempt":0,"Launch Time":1430917387220,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":40,"Index":40,"Attempt":0,"Launch Time":1430917387197,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387221,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":4,"Executor Run Time":14,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":94792,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":49,"Index":49,"Attempt":0,"Launch Time":1430917387223,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":41,"Index":41,"Attempt":0,"Launch Time":1430917387200,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387224,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":16,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":90765,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":50,"Index":50,"Attempt":0,"Launch Time":1430917387240,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":44,"Index":44,"Attempt":0,"Launch Time":1430917387205,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387242,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":98293,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":51,"Index":51,"Attempt":0,"Launch Time":1430917387242,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":52,"Index":52,"Attempt":0,"Launch Time":1430917387243,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":43,"Index":43,"Attempt":0,"Launch Time":1430917387204,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387243,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":16,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":171516,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":45,"Index":45,"Attempt":0,"Launch Time":1430917387206,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387243,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":19,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":92985,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":53,"Index":53,"Attempt":0,"Launch Time":1430917387244,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":54,"Index":54,"Attempt":0,"Launch Time":1430917387244,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":42,"Index":42,"Attempt":0,"Launch Time":1430917387203,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387245,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":10,"Executor Run Time":17,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":103713,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":47,"Index":47,"Attempt":0,"Launch Time":1430917387212,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387245,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":103015,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":55,"Index":55,"Attempt":0,"Launch Time":1430917387246,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":56,"Index":56,"Attempt":0,"Launch Time":1430917387249,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":48,"Index":48,"Attempt":0,"Launch Time":1430917387220,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387250,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":24,"Result Size":2065,"JVM GC Time":6,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":139844,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":46,"Index":46,"Attempt":0,"Launch Time":1430917387210,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387253,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":31,"Result Size":2065,"JVM GC Time":6,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":113322,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":57,"Index":57,"Attempt":0,"Launch Time":1430917387257,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":49,"Index":49,"Attempt":0,"Launch Time":1430917387223,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387257,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":7,"Executor Run Time":23,"Result Size":2065,"JVM GC Time":6,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":94984,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":58,"Index":58,"Attempt":0,"Launch Time":1430917387263,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":51,"Index":51,"Attempt":0,"Launch Time":1430917387242,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387263,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":17,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":96013,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":59,"Index":59,"Attempt":0,"Launch Time":1430917387265,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":50,"Index":50,"Attempt":0,"Launch Time":1430917387240,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387266,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":4,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":90836,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":60,"Index":60,"Attempt":0,"Launch Time":1430917387269,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":61,"Index":61,"Attempt":0,"Launch Time":1430917387270,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":52,"Index":52,"Attempt":0,"Launch Time":1430917387243,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387271,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":89664,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":62,"Index":62,"Attempt":0,"Launch Time":1430917387272,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":53,"Index":53,"Attempt":0,"Launch Time":1430917387244,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387273,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":6,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":92835,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":63,"Index":63,"Attempt":0,"Launch Time":1430917387276,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":55,"Index":55,"Attempt":0,"Launch Time":1430917387246,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387276,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":4,"Executor Run Time":21,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":108309,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":64,"Index":64,"Attempt":0,"Launch Time":1430917387277,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":57,"Index":57,"Attempt":0,"Launch Time":1430917387257,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387278,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":16,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":96849,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":65,"Index":65,"Attempt":0,"Launch Time":1430917387279,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":56,"Index":56,"Attempt":0,"Launch Time":1430917387249,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387280,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":20,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":90329,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":66,"Index":66,"Attempt":0,"Launch Time":1430917387285,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":58,"Index":58,"Attempt":0,"Launch Time":1430917387263,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387286,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":16,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":97521,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":67,"Index":67,"Attempt":0,"Launch Time":1430917387288,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":59,"Index":59,"Attempt":0,"Launch Time":1430917387265,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387288,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":17,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":100753,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":54,"Index":54,"Attempt":0,"Launch Time":1430917387244,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387303,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":90506,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":68,"Index":68,"Attempt":0,"Launch Time":1430917387306,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":60,"Index":60,"Attempt":0,"Launch Time":1430917387269,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387307,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":8,"Executor Run Time":28,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":156916,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":69,"Index":69,"Attempt":0,"Launch Time":1430917387308,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":62,"Index":62,"Attempt":0,"Launch Time":1430917387272,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387309,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":32,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":100396,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":70,"Index":70,"Attempt":0,"Launch Time":1430917387311,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":64,"Index":64,"Attempt":0,"Launch Time":1430917387277,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387311,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":25,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":104087,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":71,"Index":71,"Attempt":0,"Launch Time":1430917387313,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":72,"Index":72,"Attempt":0,"Launch Time":1430917387314,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":61,"Index":61,"Attempt":0,"Launch Time":1430917387270,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387315,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":4,"Executor Run Time":36,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":100426,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":73,"Index":73,"Attempt":0,"Launch Time":1430917387316,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":63,"Index":63,"Attempt":0,"Launch Time":1430917387276,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387316,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":20,"Executor Run Time":17,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":102779,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":74,"Index":74,"Attempt":0,"Launch Time":1430917387318,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":66,"Index":66,"Attempt":0,"Launch Time":1430917387285,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387319,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":27,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":100823,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":65,"Index":65,"Attempt":0,"Launch Time":1430917387279,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387321,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":27,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":99123,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":75,"Index":75,"Attempt":0,"Launch Time":1430917387323,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":67,"Index":67,"Attempt":0,"Launch Time":1430917387288,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387323,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":14,"Executor Run Time":19,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":92741,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":76,"Index":76,"Attempt":0,"Launch Time":1430917387327,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":68,"Index":68,"Attempt":0,"Launch Time":1430917387306,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387328,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":16,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":101750,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":77,"Index":77,"Attempt":0,"Launch Time":1430917387329,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":69,"Index":69,"Attempt":0,"Launch Time":1430917387308,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387331,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":89437,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":78,"Index":78,"Attempt":0,"Launch Time":1430917387338,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":71,"Index":71,"Attempt":0,"Launch Time":1430917387313,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387339,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":22,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":183323,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":79,"Index":79,"Attempt":0,"Launch Time":1430917387339,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":70,"Index":70,"Attempt":0,"Launch Time":1430917387311,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387340,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":24,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":105933,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":80,"Index":80,"Attempt":0,"Launch Time":1430917387341,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":74,"Index":74,"Attempt":0,"Launch Time":1430917387318,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387341,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":98047,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":81,"Index":81,"Attempt":0,"Launch Time":1430917387342,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":72,"Index":72,"Attempt":0,"Launch Time":1430917387314,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387343,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":22,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":139584,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":82,"Index":82,"Attempt":0,"Launch Time":1430917387349,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":75,"Index":75,"Attempt":0,"Launch Time":1430917387323,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387359,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":20,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":134733,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":83,"Index":83,"Attempt":0,"Launch Time":1430917387360,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":73,"Index":73,"Attempt":0,"Launch Time":1430917387316,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387360,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":16,"Executor Run Time":25,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":197728,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":84,"Index":84,"Attempt":0,"Launch Time":1430917387368,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":77,"Index":77,"Attempt":0,"Launch Time":1430917387329,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387368,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":30,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":104879,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":85,"Index":85,"Attempt":0,"Launch Time":1430917387371,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":76,"Index":76,"Attempt":0,"Launch Time":1430917387327,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387371,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":38,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":117198,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":86,"Index":86,"Attempt":0,"Launch Time":1430917387374,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":87,"Index":87,"Attempt":0,"Launch Time":1430917387374,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":79,"Index":79,"Attempt":0,"Launch Time":1430917387339,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387375,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":28,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":760569,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":80,"Index":80,"Attempt":0,"Launch Time":1430917387341,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387375,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":13,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":98069,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":88,"Index":88,"Attempt":0,"Launch Time":1430917387376,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":78,"Index":78,"Attempt":0,"Launch Time":1430917387338,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387377,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":31,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":94029,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":89,"Index":89,"Attempt":0,"Launch Time":1430917387384,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":82,"Index":82,"Attempt":0,"Launch Time":1430917387349,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387385,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":23,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":103837,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":90,"Index":90,"Attempt":0,"Launch Time":1430917387385,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":81,"Index":81,"Attempt":0,"Launch Time":1430917387342,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387386,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":38,"Result Size":2065,"JVM GC Time":5,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":102935,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":91,"Index":91,"Attempt":0,"Launch Time":1430917387394,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":84,"Index":84,"Attempt":0,"Launch Time":1430917387368,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387395,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":22,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":97176,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":92,"Index":92,"Attempt":0,"Launch Time":1430917387400,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":85,"Index":85,"Attempt":0,"Launch Time":1430917387371,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387401,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":19,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":103743,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":93,"Index":93,"Attempt":0,"Launch Time":1430917387402,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":86,"Index":86,"Attempt":0,"Launch Time":1430917387374,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387402,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":16,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":95848,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":94,"Index":94,"Attempt":0,"Launch Time":1430917387404,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":83,"Index":83,"Attempt":0,"Launch Time":1430917387360,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387405,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":37,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":134049,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":95,"Index":95,"Attempt":0,"Launch Time":1430917387406,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":96,"Index":96,"Attempt":0,"Launch Time":1430917387406,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":97,"Index":97,"Attempt":0,"Launch Time":1430917387407,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":88,"Index":88,"Attempt":0,"Launch Time":1430917387376,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387407,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":19,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":94269,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":90,"Index":90,"Attempt":0,"Launch Time":1430917387385,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387408,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":17,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":98472,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":98,"Index":98,"Attempt":0,"Launch Time":1430917387410,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":87,"Index":87,"Attempt":0,"Launch Time":1430917387374,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387410,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":12,"Executor Run Time":17,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":102159,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":89,"Index":89,"Attempt":0,"Launch Time":1430917387384,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387417,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":18,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":97226,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":99,"Index":99,"Attempt":0,"Launch Time":1430917387426,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":91,"Index":91,"Attempt":0,"Launch Time":1430917387394,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387427,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":8,"Executor Run Time":21,"Result Size":2065,"JVM GC Time":4,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":98301,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":94,"Index":94,"Attempt":0,"Launch Time":1430917387404,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387441,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":5,"Executor Run Time":22,"Result Size":2065,"JVM GC Time":4,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":102455,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":92,"Index":92,"Attempt":0,"Launch Time":1430917387400,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387442,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":31,"Result Size":2065,"JVM GC Time":4,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":114766,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":96,"Index":96,"Attempt":0,"Launch Time":1430917387406,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387443,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":26,"Result Size":2065,"JVM GC Time":4,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":100678,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":93,"Index":93,"Attempt":0,"Launch Time":1430917387402,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387443,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":32,"Result Size":2065,"JVM GC Time":4,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":95818,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":95,"Index":95,"Attempt":0,"Launch Time":1430917387406,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387444,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":28,"Result Size":2065,"JVM GC Time":4,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":117803,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":98,"Index":98,"Attempt":0,"Launch Time":1430917387410,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387444,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":7,"Executor Run Time":22,"Result Size":2065,"JVM GC Time":4,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":105039,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":99,"Index":99,"Attempt":0,"Launch Time":1430917387426,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387448,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":17,"Result Size":2065,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":133964,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70565,"Records Read":10000}}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":97,"Index":97,"Attempt":0,"Launch Time":1430917387407,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387450,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":38,"Result Size":2065,"JVM GC Time":4,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Write Metrics":{"Shuffle Bytes Written":1710,"Shuffle Write Time":131987,"Shuffle Records Written":10},"Input Metrics":{"Data Read Method":"Hadoop","Bytes Read":70564,"Records Read":10000}}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"map at :26","Number of Tasks":100,"RDD Info":[{"RDD ID":3,"Name":"3","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":2,"Name":"2","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":0,"Name":"/Users/irashid/spark-examples/tmp_data/raw_data","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0},{"RDD ID":1,"Name":"/Users/irashid/spark-examples/tmp_data/raw_data","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":100,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.map(RDD.scala:287)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:26)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:31)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:33)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC.(:35)\n$line20.$read$$iwC$$iwC$$iwC$$iwC.(:37)\n$line20.$read$$iwC$$iwC$$iwC.(:39)\n$line20.$read$$iwC$$iwC.(:41)\n$line20.$read$$iwC.(:43)\n$line20.$read.(:45)\n$line20.$read$.(:49)\n$line20.$read$.()\n$line20.$eval$.(:7)\n$line20.$eval$.()\n$line20.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)","Submission Time":1430917386485,"Completion Time":1430917387451,"Accumulables":[]}} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"collect at :26","Number of Tasks":10,"RDD Info":[{"RDD ID":4,"Name":"4","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.collect(RDD.scala:819)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:26)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:31)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:33)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC.(:35)\n$line20.$read$$iwC$$iwC$$iwC$$iwC.(:37)\n$line20.$read$$iwC$$iwC$$iwC.(:39)\n$line20.$read$$iwC$$iwC.(:41)\n$line20.$read$$iwC.(:43)\n$line20.$read.(:45)\n$line20.$read$.(:49)\n$line20.$read$.()\n$line20.$eval$.(:7)\n$line20.$eval$.()\n$line20.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)","Accumulables":[]},"Properties":{}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":100,"Index":0,"Attempt":0,"Launch Time":1430917387462,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":101,"Index":1,"Attempt":0,"Launch Time":1430917387464,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":102,"Index":2,"Attempt":0,"Launch Time":1430917387465,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":103,"Index":3,"Attempt":0,"Launch Time":1430917387465,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":104,"Index":4,"Attempt":0,"Launch Time":1430917387465,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":105,"Index":5,"Attempt":0,"Launch Time":1430917387466,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":106,"Index":6,"Attempt":0,"Launch Time":1430917387466,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":107,"Index":7,"Attempt":0,"Launch Time":1430917387466,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":108,"Index":8,"Attempt":0,"Launch Time":1430917387546,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":103,"Index":3,"Attempt":0,"Launch Time":1430917387465,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387546,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":75,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":0,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}} +{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":109,"Index":9,"Attempt":0,"Launch Time":1430917387546,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":105,"Index":5,"Attempt":0,"Launch Time":1430917387466,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387547,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":76,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":0,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":100,"Index":0,"Attempt":0,"Launch Time":1430917387462,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387548,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":74,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":0,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":107,"Index":7,"Attempt":0,"Launch Time":1430917387466,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387548,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":76,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":0,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":101,"Index":1,"Attempt":0,"Launch Time":1430917387464,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387548,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":1,"Executor Run Time":76,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":1,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":106,"Index":6,"Attempt":0,"Launch Time":1430917387466,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387549,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":75,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":0,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":104,"Index":4,"Attempt":0,"Launch Time":1430917387465,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387549,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":3,"Executor Run Time":74,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":1,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":102,"Index":2,"Attempt":0,"Launch Time":1430917387465,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387550,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":79,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":0,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":108,"Index":8,"Attempt":0,"Launch Time":1430917387546,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387583,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":31,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":0,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}} +{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":109,"Index":9,"Attempt":0,"Launch Time":1430917387546,"Executor ID":"driver","Host":"localhost","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1430917387585,"Failed":false,"Accumulables":[]},"Task Metrics":{"Host Name":"localhost","Executor Deserialize Time":2,"Executor Run Time":30,"Result Size":1034,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":100,"Fetch Wait Time":1,"Remote Bytes Read":0,"Local Bytes Read":17100,"Total Records Read":100}}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"collect at :26","Number of Tasks":10,"RDD Info":[{"RDD ID":4,"Name":"4","Storage Level":{"Use Disk":false,"Use Memory":false,"Use Tachyon":false,"Deserialized":false,"Replication":1},"Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Tachyon Size":0,"Disk Size":0}],"Details":"org.apache.spark.rdd.RDD.collect(RDD.scala:819)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:26)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:31)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.(:33)\n$line20.$read$$iwC$$iwC$$iwC$$iwC$$iwC.(:35)\n$line20.$read$$iwC$$iwC$$iwC$$iwC.(:37)\n$line20.$read$$iwC$$iwC$$iwC.(:39)\n$line20.$read$$iwC$$iwC.(:41)\n$line20.$read$$iwC.(:43)\n$line20.$read.(:45)\n$line20.$read$.(:49)\n$line20.$read$.()\n$line20.$eval$.(:7)\n$line20.$eval$.()\n$line20.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:483)\norg.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)","Submission Time":1430917387462,"Completion Time":1430917387585,"Accumulables":[]}} +{"Event":"SparkListenerJobEnd","Job ID":0,"Completion Time":1430917387589,"Job Result":{"Result":"JobSucceeded"}} +{"Event":"SparkListenerApplicationEnd","Timestamp":1430917391398} diff --git a/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala index 75399461f2a5f..746a40a21bf9e 100644 --- a/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala +++ b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala @@ -103,7 +103,7 @@ class AccumulatorSuite extends FunSuite with Matchers with LocalSparkContext { sc = new SparkContext("local[" + nThreads + "]", "test") val setAcc = sc.accumulableCollection(mutable.HashSet[Int]()) val bufferAcc = sc.accumulableCollection(mutable.ArrayBuffer[Int]()) - val mapAcc = sc.accumulableCollection(mutable.HashMap[Int,String]()) + val mapAcc = sc.accumulableCollection(mutable.HashMap[Int, String]()) val d = sc.parallelize((1 to maxI) ++ (1 to maxI)) d.foreach { x => {setAcc += x; bufferAcc += x; mapAcc += (x -> x.toString)} diff --git a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala index e1faddeabec79..91d8fdedbe0f3 100644 --- a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala +++ b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala @@ -218,10 +218,10 @@ class CheckpointSuite extends FunSuite with LocalSparkContext with Logging { val pairRDD = generateFatPairRDD() pairRDD.checkpoint() val unionRDD = new PartitionerAwareUnionRDD(sc, Array(pairRDD)) - val partitionBeforeCheckpoint = serializeDeserialize( + val partitionBeforeCheckpoint = serializeDeserialize( unionRDD.partitions.head.asInstanceOf[PartitionerAwareUnionRDDPartition]) pairRDD.count() - val partitionAfterCheckpoint = serializeDeserialize( + val partitionAfterCheckpoint = serializeDeserialize( unionRDD.partitions.head.asInstanceOf[PartitionerAwareUnionRDDPartition]) assert( partitionBeforeCheckpoint.parents.head.getClass != diff --git a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala index c7868ddcf770f..4a48f6580c78e 100644 --- a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala +++ b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala @@ -158,13 +158,14 @@ class ContextCleanerSuite extends ContextCleanerSuiteBase { rdd.count() // Test that GC does not cause RDD cleanup due to a strong reference - val preGCTester = new CleanerTester(sc, rddIds = Seq(rdd.id)) + val preGCTester = new CleanerTester(sc, rddIds = Seq(rdd.id)) runGC() intercept[Exception] { preGCTester.assertCleanup()(timeout(1000 millis)) } // Test that GC causes RDD cleanup after dereferencing the RDD + // Note rdd is used after previous GC to avoid early collection by the JVM val postGCTester = new CleanerTester(sc, rddIds = Seq(rdd.id)) rdd = null // Make RDD out of scope runGC() @@ -181,6 +182,7 @@ class ContextCleanerSuite extends ContextCleanerSuiteBase { intercept[Exception] { preGCTester.assertCleanup()(timeout(1000 millis)) } + rdd.count() // Defeat early collection by the JVM // Test that GC causes shuffle cleanup after dereferencing the RDD val postGCTester = new CleanerTester(sc, shuffleIds = Seq(0)) @@ -193,13 +195,14 @@ class ContextCleanerSuite extends ContextCleanerSuiteBase { var broadcast = newBroadcast() // Test that GC does not cause broadcast cleanup due to a strong reference - val preGCTester = new CleanerTester(sc, broadcastIds = Seq(broadcast.id)) + val preGCTester = new CleanerTester(sc, broadcastIds = Seq(broadcast.id)) runGC() intercept[Exception] { preGCTester.assertCleanup()(timeout(1000 millis)) } // Test that GC causes broadcast cleanup after dereferencing the broadcast variable + // Note broadcast is used after previous GC to avoid early collection by the JVM val postGCTester = new CleanerTester(sc, broadcastIds = Seq(broadcast.id)) broadcast = null // Make broadcast variable out of scope runGC() @@ -225,7 +228,7 @@ class ContextCleanerSuite extends ContextCleanerSuiteBase { // the checkpoint is not cleaned by default (without the configuration set) var postGCTester = new CleanerTester(sc, Seq(rddId), Nil, Nil, Nil) - rdd = null // Make RDD out of scope + rdd = null // Make RDD out of scope, ok if collected earlier runGC() postGCTester.assertCleanup() assert(fs.exists(RDDCheckpointData.rddCheckpointDataPath(sc, rddId).get)) @@ -244,6 +247,9 @@ class ContextCleanerSuite extends ContextCleanerSuiteBase { // Confirm the checkpoint directory exists assert(fs.exists(RDDCheckpointData.rddCheckpointDataPath(sc, rddId).get)) + // Reference rdd to defeat any early collection by the JVM + rdd.count() + // Test that GC causes checkpoint data cleanup after dereferencing the RDD postGCTester = new CleanerTester(sc, Seq(rddId), Nil, Nil, Seq(rddId)) rdd = null // Make RDD out of scope @@ -261,7 +267,7 @@ class ContextCleanerSuite extends ContextCleanerSuiteBase { val shuffleIds = 0 until sc.newShuffleId val broadcastIds = broadcastBuffer.map(_.id) - val preGCTester = new CleanerTester(sc, rddIds, shuffleIds, broadcastIds) + val preGCTester = new CleanerTester(sc, rddIds, shuffleIds, broadcastIds) runGC() intercept[Exception] { preGCTester.assertCleanup()(timeout(1000 millis)) @@ -351,6 +357,7 @@ class SortShuffleContextCleanerSuite extends ContextCleanerSuiteBase(classOf[Sor intercept[Exception] { preGCTester.assertCleanup()(timeout(1000 millis)) } + rdd.count() // Defeat early collection by the JVM // Test that GC causes shuffle cleanup after dereferencing the RDD val postGCTester = new CleanerTester(sc, shuffleIds = Seq(0)) diff --git a/core/src/test/scala/org/apache/spark/DriverSuite.scala b/core/src/test/scala/org/apache/spark/DriverSuite.scala index 9bd5dfec8703a..c42dfbc82ada4 100644 --- a/core/src/test/scala/org/apache/spark/DriverSuite.scala +++ b/core/src/test/scala/org/apache/spark/DriverSuite.scala @@ -28,7 +28,7 @@ import org.apache.spark.util.Utils class DriverSuite extends FunSuite with Timeouts { - test("driver should exit after finishing without cleanup (SPARK-530)") { + ignore("driver should exit after finishing without cleanup (SPARK-530)") { val sparkHome = sys.props.getOrElse("spark.test.home", fail("spark.test.home is not set!")) val masters = Table("master", "local", "local-cluster[2,1,512]") forAll(masters) { (master: String) => diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala index 49e6de4e0bafd..84f787ee3715d 100644 --- a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala @@ -727,7 +727,7 @@ private object ExecutorAllocationManagerSuite extends PrivateMethodTester { private val executorIdleTimeout = 3L private def createStageInfo(stageId: Int, numTasks: Int): StageInfo = { - new StageInfo(stageId, 0, "name", numTasks, Seq.empty, "no details") + new StageInfo(stageId, 0, "name", numTasks, Seq.empty, Seq.empty, "no details") } private def createTaskInfo(taskId: Int, taskIndex: Int, executorId: String): TaskInfo = { diff --git a/core/src/test/scala/org/apache/spark/FailureSuite.scala b/core/src/test/scala/org/apache/spark/FailureSuite.scala index 1212d0b43207d..e50da1c93c420 100644 --- a/core/src/test/scala/org/apache/spark/FailureSuite.scala +++ b/core/src/test/scala/org/apache/spark/FailureSuite.scala @@ -57,7 +57,7 @@ class FailureSuite extends FunSuite with LocalSparkContext { FailureSuiteState.synchronized { assert(FailureSuiteState.tasksRun === 4) } - assert(results.toList === List(1,4,9)) + assert(results.toList === List(1, 4, 9)) FailureSuiteState.clear() } @@ -119,7 +119,7 @@ class FailureSuite extends FunSuite with LocalSparkContext { sc.parallelize(1 to 10, 2).map(x => a).count() } assert(thrown.getClass === classOf[SparkException]) - assert(thrown.getMessage.contains("NotSerializableException") || + assert(thrown.getMessage.contains("NotSerializableException") || thrown.getCause.getClass === classOf[NotSerializableException]) // Non-serializable closure in an earlier stage @@ -127,7 +127,7 @@ class FailureSuite extends FunSuite with LocalSparkContext { sc.parallelize(1 to 10, 2).map(x => (x, a)).partitionBy(new HashPartitioner(3)).count() } assert(thrown1.getClass === classOf[SparkException]) - assert(thrown1.getMessage.contains("NotSerializableException") || + assert(thrown1.getMessage.contains("NotSerializableException") || thrown1.getCause.getClass === classOf[NotSerializableException]) // Non-serializable closure in foreach function @@ -135,7 +135,7 @@ class FailureSuite extends FunSuite with LocalSparkContext { sc.parallelize(1 to 10, 2).foreach(x => println(a)) } assert(thrown2.getClass === classOf[SparkException]) - assert(thrown2.getMessage.contains("NotSerializableException") || + assert(thrown2.getMessage.contains("NotSerializableException") || thrown2.getCause.getClass === classOf[NotSerializableException]) FailureSuiteState.clear() diff --git a/core/src/test/scala/org/apache/spark/FileServerSuite.scala b/core/src/test/scala/org/apache/spark/FileServerSuite.scala index c0439f934813e..bff2d10b9946c 100644 --- a/core/src/test/scala/org/apache/spark/FileServerSuite.scala +++ b/core/src/test/scala/org/apache/spark/FileServerSuite.scala @@ -81,7 +81,7 @@ class FileServerSuite extends FunSuite with LocalSparkContext { test("Distributing files locally") { sc = new SparkContext("local[4]", "test", newConf) sc.addFile(tmpFile.toString) - val testData = Array((1,1), (1,1), (2,1), (3,5), (2,2), (3,0)) + val testData = Array((1, 1), (1, 1), (2, 1), (3, 5), (2, 2), (3, 0)) val result = sc.parallelize(testData).reduceByKey { val path = SparkFiles.get("FileServerSuite.txt") val in = new BufferedReader(new FileReader(path)) @@ -89,7 +89,7 @@ class FileServerSuite extends FunSuite with LocalSparkContext { in.close() _ * fileVal + _ * fileVal }.collect() - assert(result.toSet === Set((1,200), (2,300), (3,500))) + assert(result.toSet === Set((1, 200), (2, 300), (3, 500))) } test("Distributing files locally security On") { @@ -100,7 +100,7 @@ class FileServerSuite extends FunSuite with LocalSparkContext { sc.addFile(tmpFile.toString) assert(sc.env.securityManager.isAuthenticationEnabled() === true) - val testData = Array((1,1), (1,1), (2,1), (3,5), (2,2), (3,0)) + val testData = Array((1, 1), (1, 1), (2, 1), (3, 5), (2, 2), (3, 0)) val result = sc.parallelize(testData).reduceByKey { val path = SparkFiles.get("FileServerSuite.txt") val in = new BufferedReader(new FileReader(path)) @@ -108,14 +108,14 @@ class FileServerSuite extends FunSuite with LocalSparkContext { in.close() _ * fileVal + _ * fileVal }.collect() - assert(result.toSet === Set((1,200), (2,300), (3,500))) + assert(result.toSet === Set((1, 200), (2, 300), (3, 500))) } test("Distributing files locally using URL as input") { // addFile("file:///....") sc = new SparkContext("local[4]", "test", newConf) sc.addFile(new File(tmpFile.toString).toURI.toString) - val testData = Array((1,1), (1,1), (2,1), (3,5), (2,2), (3,0)) + val testData = Array((1, 1), (1, 1), (2, 1), (3, 5), (2, 2), (3, 0)) val result = sc.parallelize(testData).reduceByKey { val path = SparkFiles.get("FileServerSuite.txt") val in = new BufferedReader(new FileReader(path)) @@ -123,7 +123,7 @@ class FileServerSuite extends FunSuite with LocalSparkContext { in.close() _ * fileVal + _ * fileVal }.collect() - assert(result.toSet === Set((1,200), (2,300), (3,500))) + assert(result.toSet === Set((1, 200), (2, 300), (3, 500))) } test ("Dynamically adding JARS locally") { @@ -140,7 +140,7 @@ class FileServerSuite extends FunSuite with LocalSparkContext { test("Distributing files on a standalone cluster") { sc = new SparkContext("local-cluster[1,1,512]", "test", newConf) sc.addFile(tmpFile.toString) - val testData = Array((1,1), (1,1), (2,1), (3,5), (2,2), (3,0)) + val testData = Array((1, 1), (1, 1), (2, 1), (3, 5), (2, 2), (3, 0)) val result = sc.parallelize(testData).reduceByKey { val path = SparkFiles.get("FileServerSuite.txt") val in = new BufferedReader(new FileReader(path)) @@ -148,13 +148,13 @@ class FileServerSuite extends FunSuite with LocalSparkContext { in.close() _ * fileVal + _ * fileVal }.collect() - assert(result.toSet === Set((1,200), (2,300), (3,500))) + assert(result.toSet === Set((1, 200), (2, 300), (3, 500))) } test ("Dynamically adding JARS on a standalone cluster") { sc = new SparkContext("local-cluster[1,1,512]", "test", newConf) sc.addJar(tmpJarUrl) - val testData = Array((1,1)) + val testData = Array((1, 1)) sc.parallelize(testData).foreach { x => if (Thread.currentThread.getContextClassLoader.getResource("FileServerSuite.txt") == null) { throw new SparkException("jar not added") @@ -165,7 +165,7 @@ class FileServerSuite extends FunSuite with LocalSparkContext { test ("Dynamically adding JARS on a standalone cluster using local: URL") { sc = new SparkContext("local-cluster[1,1,512]", "test", newConf) sc.addJar(tmpJarUrl.replace("file", "local")) - val testData = Array((1,1)) + val testData = Array((1, 1)) sc.parallelize(testData).foreach { x => if (Thread.currentThread.getContextClassLoader.getResource("FileServerSuite.txt") == null) { throw new SparkException("jar not added") diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala b/core/src/test/scala/org/apache/spark/FileSuite.scala index c8f08eed47c76..d67de8692df62 100644 --- a/core/src/test/scala/org/apache/spark/FileSuite.scala +++ b/core/src/test/scala/org/apache/spark/FileSuite.scala @@ -334,7 +334,7 @@ class FileSuite extends FunSuite with LocalSparkContext { } val copyRdd = mappedRdd.flatMap { curData: (String, PortableDataStream) => - for(i <- 1 to numOfCopies) yield (i, curData._2) + for (i <- 1 to numOfCopies) yield (i, curData._2) } val copyArr: Array[(Int, PortableDataStream)] = copyRdd.collect() diff --git a/core/src/test/scala/org/apache/spark/ImplicitOrderingSuite.scala b/core/src/test/scala/org/apache/spark/ImplicitOrderingSuite.scala index 51348c039b5c9..67a4a97af8290 100644 --- a/core/src/test/scala/org/apache/spark/ImplicitOrderingSuite.scala +++ b/core/src/test/scala/org/apache/spark/ImplicitOrderingSuite.scala @@ -29,11 +29,11 @@ class ImplicitOrderingSuite extends FunSuite with LocalSparkContext { // These RDD methods are in the companion object so that the unserializable ScalaTest Engine // won't be reachable from the closure object - + // Infer orderings after basic maps to particular types val basicMapExpectations = ImplicitOrderingSuite.basicMapExpectations(rdd) basicMapExpectations.map({case (met, explain) => assert(met, explain)}) - + // Infer orderings for other RDD methods val otherRDDMethodExpectations = ImplicitOrderingSuite.otherRDDMethodExpectations(rdd) otherRDDMethodExpectations.map({case (met, explain) => assert(met, explain)}) @@ -44,36 +44,36 @@ private object ImplicitOrderingSuite { class NonOrderedClass {} class ComparableClass extends Comparable[ComparableClass] { - override def compareTo(o: ComparableClass): Int = ??? + override def compareTo(o: ComparableClass): Int = throw new UnsupportedOperationException } class OrderedClass extends Ordered[OrderedClass] { - override def compare(o: OrderedClass): Int = ??? + override def compare(o: OrderedClass): Int = throw new UnsupportedOperationException } - + def basicMapExpectations(rdd: RDD[Int]): List[(Boolean, String)] = { - List((rdd.map(x => (x, x)).keyOrdering.isDefined, + List((rdd.map(x => (x, x)).keyOrdering.isDefined, "rdd.map(x => (x, x)).keyOrdering.isDefined"), - (rdd.map(x => (1, x)).keyOrdering.isDefined, + (rdd.map(x => (1, x)).keyOrdering.isDefined, "rdd.map(x => (1, x)).keyOrdering.isDefined"), - (rdd.map(x => (x.toString, x)).keyOrdering.isDefined, + (rdd.map(x => (x.toString, x)).keyOrdering.isDefined, "rdd.map(x => (x.toString, x)).keyOrdering.isDefined"), - (rdd.map(x => (null, x)).keyOrdering.isDefined, + (rdd.map(x => (null, x)).keyOrdering.isDefined, "rdd.map(x => (null, x)).keyOrdering.isDefined"), - (rdd.map(x => (new NonOrderedClass, x)).keyOrdering.isEmpty, + (rdd.map(x => (new NonOrderedClass, x)).keyOrdering.isEmpty, "rdd.map(x => (new NonOrderedClass, x)).keyOrdering.isEmpty"), - (rdd.map(x => (new ComparableClass, x)).keyOrdering.isDefined, + (rdd.map(x => (new ComparableClass, x)).keyOrdering.isDefined, "rdd.map(x => (new ComparableClass, x)).keyOrdering.isDefined"), - (rdd.map(x => (new OrderedClass, x)).keyOrdering.isDefined, + (rdd.map(x => (new OrderedClass, x)).keyOrdering.isDefined, "rdd.map(x => (new OrderedClass, x)).keyOrdering.isDefined")) } - + def otherRDDMethodExpectations(rdd: RDD[Int]): List[(Boolean, String)] = { - List((rdd.groupBy(x => x).keyOrdering.isDefined, + List((rdd.groupBy(x => x).keyOrdering.isDefined, "rdd.groupBy(x => x).keyOrdering.isDefined"), - (rdd.groupBy(x => new NonOrderedClass).keyOrdering.isEmpty, + (rdd.groupBy(x => new NonOrderedClass).keyOrdering.isEmpty, "rdd.groupBy(x => new NonOrderedClass).keyOrdering.isEmpty"), - (rdd.groupBy(x => new ComparableClass).keyOrdering.isDefined, + (rdd.groupBy(x => new ComparableClass).keyOrdering.isDefined, "rdd.groupBy(x => new ComparableClass).keyOrdering.isDefined"), (rdd.groupBy(x => new OrderedClass).keyOrdering.isDefined, "rdd.groupBy(x => new OrderedClass).keyOrdering.isDefined"), diff --git a/core/src/test/scala/org/apache/spark/JsonTestUtils.scala b/core/src/test/scala/org/apache/spark/JsonTestUtils.scala new file mode 100644 index 0000000000000..ba367cd476146 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/JsonTestUtils.scala @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark + +import org.json4s._ +import org.json4s.jackson.JsonMethods + +trait JsonTestUtils { + def assertValidDataInJson(validateJson: JValue, expectedJson: JValue) { + val Diff(c, a, d) = validateJson.diff(expectedJson) + val validatePretty = JsonMethods.pretty(validateJson) + val expectedPretty = JsonMethods.pretty(expectedJson) + val errorMessage = s"Expected:\n$expectedPretty\nFound:\n$validatePretty" + import org.scalactic.TripleEquals._ + assert(c === JNothing, s"$errorMessage\nChanged:\n${JsonMethods.pretty(c)}") + assert(a === JNothing, s"$errorMessage\nAdded:\n${JsonMethods.pretty(a)}") + assert(d === JNothing, s"$errorMessage\nDeleted:\n${JsonMethods.pretty(d)}") + } + +} diff --git a/core/src/test/scala/org/apache/spark/SSLSampleConfigs.scala b/core/src/test/scala/org/apache/spark/SSLSampleConfigs.scala index 308b9ea17708d..1a099da2c6c8e 100644 --- a/core/src/test/scala/org/apache/spark/SSLSampleConfigs.scala +++ b/core/src/test/scala/org/apache/spark/SSLSampleConfigs.scala @@ -34,7 +34,7 @@ object SSLSampleConfigs { conf.set("spark.ssl.trustStore", trustStorePath) conf.set("spark.ssl.trustStorePassword", "password") conf.set("spark.ssl.enabledAlgorithms", - "TLS_RSA_WITH_AES_128_CBC_SHA, SSL_RSA_WITH_DES_CBC_SHA") + "SSL_RSA_WITH_RC4_128_SHA, SSL_RSA_WITH_DES_CBC_SHA") conf.set("spark.ssl.protocol", "TLSv1") conf } @@ -48,7 +48,7 @@ object SSLSampleConfigs { conf.set("spark.ssl.trustStore", trustStorePath) conf.set("spark.ssl.trustStorePassword", "password") conf.set("spark.ssl.enabledAlgorithms", - "TLS_RSA_WITH_AES_128_CBC_SHA, SSL_RSA_WITH_DES_CBC_SHA") + "SSL_RSA_WITH_RC4_128_SHA, SSL_RSA_WITH_DES_CBC_SHA") conf.set("spark.ssl.protocol", "TLSv1") conf } diff --git a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala index 62cb7649c0284..61571be44252a 100644 --- a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala @@ -147,7 +147,7 @@ class SecurityManagerSuite extends FunSuite { assert(securityManager.fileServerSSLOptions.keyPassword === Some("password")) assert(securityManager.fileServerSSLOptions.protocol === Some("TLSv1")) assert(securityManager.fileServerSSLOptions.enabledAlgorithms === - Set("TLS_RSA_WITH_AES_128_CBC_SHA", "SSL_RSA_WITH_DES_CBC_SHA")) + Set("SSL_RSA_WITH_RC4_128_SHA", "SSL_RSA_WITH_DES_CBC_SHA")) assert(securityManager.akkaSSLOptions.trustStore.isDefined === true) assert(securityManager.akkaSSLOptions.trustStore.get.getName === "truststore") @@ -158,7 +158,7 @@ class SecurityManagerSuite extends FunSuite { assert(securityManager.akkaSSLOptions.keyPassword === Some("password")) assert(securityManager.akkaSSLOptions.protocol === Some("TLSv1")) assert(securityManager.akkaSSLOptions.enabledAlgorithms === - Set("TLS_RSA_WITH_AES_128_CBC_SHA", "SSL_RSA_WITH_DES_CBC_SHA")) + Set("SSL_RSA_WITH_RC4_128_SHA", "SSL_RSA_WITH_DES_CBC_SHA")) } test("ssl off setup") { diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala index 68d08e32f9aa4..fafc9d47503b7 100644 --- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala @@ -34,18 +34,18 @@ class SparkConfSuite extends FunSuite with LocalSparkContext with ResetSystemPro val conf = new SparkConf() // Simply exercise the API, we don't need a complete conversion test since that's handled in // UtilsSuite.scala - assert(conf.getSizeAsBytes("fake","1k") === ByteUnit.KiB.toBytes(1)) - assert(conf.getSizeAsKb("fake","1k") === ByteUnit.KiB.toKiB(1)) - assert(conf.getSizeAsMb("fake","1k") === ByteUnit.KiB.toMiB(1)) - assert(conf.getSizeAsGb("fake","1k") === ByteUnit.KiB.toGiB(1)) + assert(conf.getSizeAsBytes("fake", "1k") === ByteUnit.KiB.toBytes(1)) + assert(conf.getSizeAsKb("fake", "1k") === ByteUnit.KiB.toKiB(1)) + assert(conf.getSizeAsMb("fake", "1k") === ByteUnit.KiB.toMiB(1)) + assert(conf.getSizeAsGb("fake", "1k") === ByteUnit.KiB.toGiB(1)) } test("Test timeString conversion") { val conf = new SparkConf() // Simply exercise the API, we don't need a complete conversion test since that's handled in // UtilsSuite.scala - assert(conf.getTimeAsMs("fake","1ms") === TimeUnit.MILLISECONDS.toMillis(1)) - assert(conf.getTimeAsSeconds("fake","1000ms") === TimeUnit.MILLISECONDS.toSeconds(1000)) + assert(conf.getTimeAsMs("fake", "1ms") === TimeUnit.MILLISECONDS.toMillis(1)) + assert(conf.getTimeAsSeconds("fake", "1000ms") === TimeUnit.MILLISECONDS.toSeconds(1000)) } test("loading from system properties") { @@ -241,6 +241,9 @@ class SparkConfSuite extends FunSuite with LocalSparkContext with ResetSystemPro conf.set("spark.yarn.applicationMaster.waitTries", "42") assert(conf.getTimeAsSeconds("spark.yarn.am.waitTime") === 420) + + conf.set("spark.kryoserializer.buffer.mb", "1.1") + assert(conf.getSizeAsKb("spark.kryoserializer.buffer") === 1100) } test("akka deprecated configs") { diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala index 9049db7755358..6b43b4e93226b 100644 --- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala @@ -73,22 +73,22 @@ class SparkContextSuite extends FunSuite with LocalSparkContext { var sc2: SparkContext = null SparkContext.clearActiveContext() val conf = new SparkConf().setAppName("test").setMaster("local") - + sc = SparkContext.getOrCreate(conf) - + assert(sc.getConf.get("spark.app.name").equals("test")) sc2 = SparkContext.getOrCreate(new SparkConf().setAppName("test2").setMaster("local")) assert(sc2.getConf.get("spark.app.name").equals("test")) assert(sc === sc2) assert(sc eq sc2) - + // Try creating second context to confirm that it's still possible, if desired sc2 = new SparkContext(new SparkConf().setAppName("test3").setMaster("local") .set("spark.driver.allowMultipleContexts", "true")) - + sc2.stop() } - + test("BytesWritable implicit conversion is correct") { // Regression test for SPARK-3121 val bytesWritable = new BytesWritable() @@ -222,8 +222,8 @@ class SparkContextSuite extends FunSuite with LocalSparkContext { val dir1 = Utils.createTempDir() val dir2 = Utils.createTempDir() - val dirpath1=dir1.getAbsolutePath - val dirpath2=dir2.getAbsolutePath + val dirpath1 = dir1.getAbsolutePath + val dirpath2 = dir2.getAbsolutePath // file1 and file2 are placed inside dir1, they are also used for // textFile, hadoopFile, and newAPIHadoopFile @@ -235,11 +235,11 @@ class SparkContextSuite extends FunSuite with LocalSparkContext { val file4 = new File(dir2, "part-00001") val file5 = new File(dir2, "part-00002") - val filepath1=file1.getAbsolutePath - val filepath2=file2.getAbsolutePath - val filepath3=file3.getAbsolutePath - val filepath4=file4.getAbsolutePath - val filepath5=file5.getAbsolutePath + val filepath1 = file1.getAbsolutePath + val filepath2 = file2.getAbsolutePath + val filepath3 = file3.getAbsolutePath + val filepath4 = file4.getAbsolutePath + val filepath5 = file5.getAbsolutePath try { diff --git a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala index c8fdfa693912e..c38e306b6ac40 100644 --- a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala +++ b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala @@ -17,9 +17,11 @@ package org.apache.spark.broadcast +import scala.concurrent.duration._ import scala.util.Random import org.scalatest.{Assertions, FunSuite} +import org.scalatest.concurrent.Eventually._ import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkException, SparkEnv} import org.apache.spark.io.SnappyCompressionCodec @@ -284,7 +286,7 @@ class BroadcastSuite extends FunSuite with LocalSparkContext { assert(statuses.size === expectedNumBlocks) } - testUnpersistBroadcast(distributed, numSlaves, torrentConf, afterCreation, + testUnpersistBroadcast(distributed, numSlaves, torrentConf, afterCreation, afterUsingBroadcast, afterUnpersist, removeFromDriver) } @@ -307,7 +309,17 @@ class BroadcastSuite extends FunSuite with LocalSparkContext { removeFromDriver: Boolean) { sc = if (distributed) { - new SparkContext("local-cluster[%d, 1, 512]".format(numSlaves), "test", broadcastConf) + val _sc = + new SparkContext("local-cluster[%d, 1, 512]".format(numSlaves), "test", broadcastConf) + // Wait until all salves are up + eventually(timeout(10.seconds), interval(10.milliseconds)) { + _sc.jobProgressListener.synchronized { + val numBlockManagers = _sc.jobProgressListener.blockManagerIds.size + assert(numBlockManagers == numSlaves + 1, + s"Expect ${numSlaves + 1} block managers, but was ${numBlockManagers}") + } + } + _sc } else { new SparkContext("local", "test", broadcastConf) } diff --git a/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala b/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala new file mode 100644 index 0000000000000..7d39984424842 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala @@ -0,0 +1,261 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy + +import java.io.{File, FileInputStream, FileOutputStream} +import java.util.jar.{JarEntry, JarOutputStream} + +import com.google.common.io.{Files, ByteStreams} + +import org.apache.commons.io.FileUtils + +import org.apache.spark.TestUtils.{createCompiledClass, JavaSourceFromString} +import org.apache.spark.deploy.SparkSubmitUtils.MavenCoordinate + +private[deploy] object IvyTestUtils { + + /** + * Create the path for the jar and pom from the maven coordinate. Extension should be `jar` + * or `pom`. + */ + private def pathFromCoordinate( + artifact: MavenCoordinate, + prefix: File, + ext: String, + useIvyLayout: Boolean): File = { + val groupDirs = artifact.groupId.replace(".", File.separator) + val artifactDirs = artifact.artifactId + val artifactPath = + if (!useIvyLayout) { + Seq(groupDirs, artifactDirs, artifact.version).mkString(File.separator) + } else { + Seq(groupDirs, artifactDirs, artifact.version, ext + "s").mkString(File.separator) + } + new File(prefix, artifactPath) + } + + private def artifactName(artifact: MavenCoordinate, ext: String = ".jar"): String = { + s"${artifact.artifactId}-${artifact.version}$ext" + } + + /** Write the contents to a file to the supplied directory. */ + private def writeFile(dir: File, fileName: String, contents: String): File = { + val outputFile = new File(dir, fileName) + val outputStream = new FileOutputStream(outputFile) + outputStream.write(contents.toCharArray.map(_.toByte)) + outputStream.close() + outputFile + } + + /** Create an example Python file. */ + private def createPythonFile(dir: File): File = { + val contents = + """def myfunc(x): + | return x + 1 + """.stripMargin + writeFile(dir, "mylib.py", contents) + } + + /** Create a simple testable Class. */ + private def createJavaClass(dir: File, className: String, packageName: String): File = { + val contents = + s"""package $packageName; + | + |import java.lang.Integer; + | + |class $className implements java.io.Serializable { + | + | public $className() {} + | + | public Integer myFunc(Integer x) { + | return x + 1; + | } + |} + """.stripMargin + val sourceFile = + new JavaSourceFromString(new File(dir, className + ".java").getAbsolutePath, contents) + createCompiledClass(className, dir, sourceFile, Seq.empty) + } + + /** Helper method to write artifact information in the pom. */ + private def pomArtifactWriter(artifact: MavenCoordinate, tabCount: Int = 1): String = { + var result = "\n" + " " * tabCount + s"${artifact.groupId}" + result += "\n" + " " * tabCount + s"${artifact.artifactId}" + result += "\n" + " " * tabCount + s"${artifact.version}" + result + } + + /** Create a pom file for this artifact. */ + private def createPom( + dir: File, + artifact: MavenCoordinate, + dependencies: Option[Seq[MavenCoordinate]]): File = { + var content = """ + | + | + | 4.0.0 + """.stripMargin.trim + content += pomArtifactWriter(artifact) + content += dependencies.map { deps => + val inside = deps.map { dep => + "\t" + pomArtifactWriter(dep, 3) + "\n\t" + }.mkString("\n") + "\n \n" + inside + "\n " + }.getOrElse("") + content += "\n" + writeFile(dir, artifactName(artifact, ".pom"), content.trim) + } + + /** Create the jar for the given maven coordinate, using the supplied files. */ + private def packJar( + dir: File, + artifact: MavenCoordinate, + files: Seq[(String, File)]): File = { + val jarFile = new File(dir, artifactName(artifact)) + val jarFileStream = new FileOutputStream(jarFile) + val jarStream = new JarOutputStream(jarFileStream, new java.util.jar.Manifest()) + + for (file <- files) { + val jarEntry = new JarEntry(file._1) + jarStream.putNextEntry(jarEntry) + + val in = new FileInputStream(file._2) + ByteStreams.copy(in, jarStream) + in.close() + } + jarStream.close() + jarFileStream.close() + + jarFile + } + + /** + * Creates a jar and pom file, mocking a Maven repository. The root path can be supplied with + * `tempDir`, dependencies can be created into the same repo, and python files can also be packed + * inside the jar. + * + * @param artifact The maven coordinate to generate the jar and pom for. + * @param dependencies List of dependencies this artifact might have to also create jars and poms. + * @param tempDir The root folder of the repository + * @param useIvyLayout whether to mock the Ivy layout for local repository testing + * @param withPython Whether to pack python files inside the jar for extensive testing. + * @return Root path of the repository + */ + private def createLocalRepository( + artifact: MavenCoordinate, + dependencies: Option[Seq[MavenCoordinate]] = None, + tempDir: Option[File] = None, + useIvyLayout: Boolean = false, + withPython: Boolean = false): File = { + // Where the root of the repository exists, and what Ivy will search in + val tempPath = tempDir.getOrElse(Files.createTempDir()) + // Create directory if it doesn't exist + Files.createParentDirs(tempPath) + // Where to create temporary class files and such + val root = new File(tempPath, tempPath.hashCode().toString) + Files.createParentDirs(new File(root, "dummy")) + try { + val jarPath = pathFromCoordinate(artifact, tempPath, "jar", useIvyLayout) + Files.createParentDirs(new File(jarPath, "dummy")) + val className = "MyLib" + + val javaClass = createJavaClass(root, className, artifact.groupId) + // A tuple of files representation in the jar, and the file + val javaFile = (artifact.groupId.replace(".", "/") + "/" + javaClass.getName, javaClass) + val allFiles = + if (withPython) { + val pythonFile = createPythonFile(root) + Seq(javaFile, (pythonFile.getName, pythonFile)) + } else { + Seq(javaFile) + } + val jarFile = packJar(jarPath, artifact, allFiles) + assert(jarFile.exists(), "Problem creating Jar file") + val pomPath = pathFromCoordinate(artifact, tempPath, "pom", useIvyLayout) + Files.createParentDirs(new File(pomPath, "dummy")) + val pomFile = createPom(pomPath, artifact, dependencies) + assert(pomFile.exists(), "Problem creating Pom file") + } finally { + FileUtils.deleteDirectory(root) + } + tempPath + } + + /** + * Creates a suite of jars and poms, with or without dependencies, mocking a maven repository. + * @param artifact The main maven coordinate to generate the jar and pom for. + * @param dependencies List of dependencies this artifact might have to also create jars and poms. + * @param rootDir The root folder of the repository (like `~/.m2/repositories`) + * @param useIvyLayout whether to mock the Ivy layout for local repository testing + * @param withPython Whether to pack python files inside the jar for extensive testing. + * @return Root path of the repository. Will be `rootDir` if supplied. + */ + private[deploy] def createLocalRepositoryForTests( + artifact: MavenCoordinate, + dependencies: Option[String], + rootDir: Option[File], + useIvyLayout: Boolean = false, + withPython: Boolean = false): File = { + val deps = dependencies.map(SparkSubmitUtils.extractMavenCoordinates) + val mainRepo = createLocalRepository(artifact, deps, rootDir, useIvyLayout, withPython) + deps.foreach { seq => seq.foreach { dep => + createLocalRepository(dep, None, Some(mainRepo), useIvyLayout, withPython = false) + }} + mainRepo + } + + /** + * Creates a repository for a test, and cleans it up afterwards. + * + * @param artifact The main maven coordinate to generate the jar and pom for. + * @param dependencies List of dependencies this artifact might have to also create jars and poms. + * @param rootDir The root folder of the repository (like `~/.m2/repositories`) + * @param useIvyLayout whether to mock the Ivy layout for local repository testing + * @param withPython Whether to pack python files inside the jar for extensive testing. + * @return Root path of the repository. Will be `rootDir` if supplied. + */ + private[deploy] def withRepository( + artifact: MavenCoordinate, + dependencies: Option[String], + rootDir: Option[File], + useIvyLayout: Boolean = false, + withPython: Boolean = false)(f: String => Unit): Unit = { + val repo = createLocalRepositoryForTests(artifact, dependencies, rootDir, useIvyLayout, + withPython) + try { + f(repo.toURI.toString) + } finally { + // Clean up + if (repo.toString.contains(".m2") || repo.toString.contains(".ivy2")) { + FileUtils.deleteDirectory(new File(repo, + artifact.groupId.replace(".", File.separator) + File.separator + artifact.artifactId)) + dependencies.map(SparkSubmitUtils.extractMavenCoordinates).foreach { seq => + seq.foreach { dep => + FileUtils.deleteDirectory(new File(repo, + dep.artifactId.replace(".", File.separator))) + } + } + } else { + FileUtils.deleteDirectory(repo) + } + } + } +} diff --git a/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala index b58d62567afe1..e04a79284175c 100644 --- a/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala @@ -28,9 +28,9 @@ import org.scalatest.FunSuite import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, WorkerStateResponse} import org.apache.spark.deploy.master.{ApplicationInfo, DriverInfo, RecoveryState, WorkerInfo} import org.apache.spark.deploy.worker.{DriverRunner, ExecutorRunner} -import org.apache.spark.{SecurityManager, SparkConf} +import org.apache.spark.{JsonTestUtils, SecurityManager, SparkConf} -class JsonProtocolSuite extends FunSuite { +class JsonProtocolSuite extends FunSuite with JsonTestUtils { test("writeApplicationInfo") { val output = JsonProtocol.writeApplicationInfo(createAppInfo()) @@ -136,16 +136,6 @@ class JsonProtocolSuite extends FunSuite { case e: JsonParseException => fail("Invalid Json detected", e) } } - - def assertValidDataInJson(validateJson: JValue, expectedJson: JValue) { - val Diff(c, a, d) = validateJson diff expectedJson - val validatePretty = JsonMethods.pretty(validateJson) - val expectedPretty = JsonMethods.pretty(expectedJson) - val errorMessage = s"Expected:\n$expectedPretty\nFound:\n$validatePretty" - assert(c === JNothing, s"$errorMessage\nChanged:\n${JsonMethods.pretty(c)}") - assert(a === JNothing, s"$errorMessage\nAdded:\n${JsonMethods.pretty(a)}") - assert(d === JNothing, s"$errorMessage\nDelected:\n${JsonMethods.pretty(d)}") - } } object JsonConstants { diff --git a/core/src/test/scala/org/apache/spark/deploy/PythonRunnerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/PythonRunnerSuite.scala index bb6251fb4bfbe..80f2cc02516fe 100644 --- a/core/src/test/scala/org/apache/spark/deploy/PythonRunnerSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/PythonRunnerSuite.scala @@ -19,6 +19,8 @@ package org.apache.spark.deploy import org.scalatest.FunSuite +import org.apache.spark.util.Utils + class PythonRunnerSuite extends FunSuite { // Test formatting a single path to be added to the PYTHONPATH @@ -28,10 +30,14 @@ class PythonRunnerSuite extends FunSuite { assert(PythonRunner.formatPath("file:///spark.py") === "/spark.py") assert(PythonRunner.formatPath("local:/spark.py") === "/spark.py") assert(PythonRunner.formatPath("local:///spark.py") === "/spark.py") - assert(PythonRunner.formatPath("C:/a/b/spark.py", testWindows = true) === "C:/a/b/spark.py") - assert(PythonRunner.formatPath("/C:/a/b/spark.py", testWindows = true) === "C:/a/b/spark.py") - assert(PythonRunner.formatPath("file:/C:/a/b/spark.py", testWindows = true) === - "C:/a/b/spark.py") + if (Utils.isWindows) { + assert(PythonRunner.formatPath("file:/C:/a/b/spark.py", testWindows = true) === + "C:/a/b/spark.py") + assert(PythonRunner.formatPath("C:\\a\\b\\spark.py", testWindows = true) === + "C:/a/b/spark.py") + assert(PythonRunner.formatPath("C:\\a b\\spark.py", testWindows = true) === + "C:/a b/spark.py") + } intercept[IllegalArgumentException] { PythonRunner.formatPath("one:two") } intercept[IllegalArgumentException] { PythonRunner.formatPath("hdfs:s3:xtremeFS") } intercept[IllegalArgumentException] { PythonRunner.formatPath("hdfs:/path/to/some.py") } @@ -45,14 +51,15 @@ class PythonRunnerSuite extends FunSuite { Array("/app.py", "/spark.py")) assert(PythonRunner.formatPaths("me.py,file:/you.py,local:/we.py") === Array("me.py", "/you.py", "/we.py")) - assert(PythonRunner.formatPaths("C:/a/b/spark.py", testWindows = true) === - Array("C:/a/b/spark.py")) - assert(PythonRunner.formatPaths("/C:/a/b/spark.py", testWindows = true) === - Array("C:/a/b/spark.py")) - assert(PythonRunner.formatPaths("C:/free.py,pie.py", testWindows = true) === - Array("C:/free.py", "pie.py")) - assert(PythonRunner.formatPaths("lovely.py,C:/free.py,file:/d:/fry.py", testWindows = true) === - Array("lovely.py", "C:/free.py", "d:/fry.py")) + if (Utils.isWindows) { + assert(PythonRunner.formatPaths("C:\\a\\b\\spark.py", testWindows = true) === + Array("C:/a/b/spark.py")) + assert(PythonRunner.formatPaths("C:\\free.py,pie.py", testWindows = true) === + Array("C:/free.py", "pie.py")) + assert(PythonRunner.formatPaths("lovely.py,C:\\free.py,file:/d:/fry.py", + testWindows = true) === + Array("lovely.py", "C:/free.py", "d:/fry.py")) + } intercept[IllegalArgumentException] { PythonRunner.formatPaths("one:two,three") } intercept[IllegalArgumentException] { PythonRunner.formatPaths("two,three,four:five:six") } intercept[IllegalArgumentException] { PythonRunner.formatPaths("hdfs:/some.py,foo.py") } diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala index 029a1156fda6b..ea9227a7e9af5 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala @@ -30,6 +30,7 @@ import org.scalatest.time.SpanSugar._ import org.apache.spark._ import org.apache.spark.deploy.SparkSubmit._ +import org.apache.spark.deploy.SparkSubmitUtils.MavenCoordinate import org.apache.spark.util.{ResetSystemProperties, Utils} // Note: this suite mixes in ResetSystemProperties because SparkSubmit.main() sets a bunch @@ -320,7 +321,7 @@ class SparkSubmitSuite extends FunSuite with Matchers with ResetSystemProperties runSparkSubmit(args) } - test("includes jars passed in through --jars") { + ignore("includes jars passed in through --jars") { val unusedJar = TestUtils.createJarWithClasses(Seq.empty) val jar1 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassA")) val jar2 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassB")) @@ -334,18 +335,23 @@ class SparkSubmitSuite extends FunSuite with Matchers with ResetSystemProperties runSparkSubmit(args) } + // SPARK-7287 ignore("includes jars passed in through --packages") { val unusedJar = TestUtils.createJarWithClasses(Seq.empty) - val packagesString = "com.databricks:spark-csv_2.10:0.1,com.databricks:spark-avro_2.10:0.1" - val args = Seq( - "--class", JarCreationTest.getClass.getName.stripSuffix("$"), - "--name", "testApp", - "--master", "local-cluster[2,1,512]", - "--packages", packagesString, - "--conf", "spark.ui.enabled=false", - unusedJar.toString, - "com.databricks.spark.csv.DefaultSource", "com.databricks.spark.avro.DefaultSource") - runSparkSubmit(args) + val main = MavenCoordinate("my.great.lib", "mylib", "0.1") + val dep = MavenCoordinate("my.great.dep", "mylib", "0.1") + IvyTestUtils.withRepository(main, Some(dep.toString), None) { repo => + val args = Seq( + "--class", JarCreationTest.getClass.getName.stripSuffix("$"), + "--name", "testApp", + "--master", "local-cluster[2,1,512]", + "--packages", Seq(main, dep).mkString(","), + "--repositories", repo, + "--conf", "spark.ui.enabled=false", + unusedJar.toString, + "my.great.lib.MyLib", "my.great.dep.MyLib") + runSparkSubmit(args) + } } test("resolves command line argument paths correctly") { diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala index 2df2597e058cd..088ca3cb93b49 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala @@ -17,17 +17,17 @@ package org.apache.spark.deploy -import java.io.{PrintStream, OutputStream, File} - -import org.apache.ivy.core.settings.IvySettings +import java.io.{File, PrintStream, OutputStream} import scala.collection.mutable.ArrayBuffer - import org.scalatest.{BeforeAndAfterAll, FunSuite} import org.apache.ivy.core.module.descriptor.MDArtifact +import org.apache.ivy.core.settings.IvySettings import org.apache.ivy.plugins.resolver.IBiblioResolver +import org.apache.spark.deploy.SparkSubmitUtils.MavenCoordinate + class SparkSubmitUtilsSuite extends FunSuite with BeforeAndAfterAll { private val noOpOutputStream = new OutputStream { @@ -89,7 +89,7 @@ class SparkSubmitUtilsSuite extends FunSuite with BeforeAndAfterAll { } test("ivy path works correctly") { - val ivyPath = "dummy/ivy" + val ivyPath = "dummy" + File.separator + "ivy" val md = SparkSubmitUtils.getModuleDescriptor val artifacts = for (i <- 0 until 3) yield new MDArtifact(md, s"jar-$i", "jar", "jar") var jPaths = SparkSubmitUtils.resolveDependencyPaths(artifacts.toArray, new File(ivyPath)) @@ -98,17 +98,38 @@ class SparkSubmitUtilsSuite extends FunSuite with BeforeAndAfterAll { assert(index >= 0) jPaths = jPaths.substring(index + ivyPath.length) } - // end to end - val jarPath = SparkSubmitUtils.resolveMavenCoordinates( - "com.databricks:spark-csv_2.10:0.1", None, Option(ivyPath), true) - assert(jarPath.indexOf(ivyPath) >= 0, "should use non-default ivy path") + val main = MavenCoordinate("my.awesome.lib", "mylib", "0.1") + IvyTestUtils.withRepository(main, None, None) { repo => + // end to end + val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, Option(repo), + Option(ivyPath), true) + assert(jarPath.indexOf(ivyPath) >= 0, "should use non-default ivy path") + } } - test("search for artifact at other repositories") { - val path = SparkSubmitUtils.resolveMavenCoordinates("com.agimatec:agimatec-validation:0.9.3", - Option("https://oss.sonatype.org/content/repositories/agimatec/"), None, true) - assert(path.indexOf("agimatec-validation") >= 0, "should find package. If it doesn't, check" + - "if package still exists. If it has been removed, replace the example in this test.") + test("search for artifact at local repositories") { + val main = new MavenCoordinate("my.awesome.lib", "mylib", "0.1") + // Local M2 repository + IvyTestUtils.withRepository(main, None, Some(SparkSubmitUtils.m2Path)) { repo => + val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, None, None, true) + assert(jarPath.indexOf("mylib") >= 0, "should find artifact") + } + // Local Ivy Repository + val settings = new IvySettings + val ivyLocal = new File(settings.getDefaultIvyUserDir, "local" + File.separator) + IvyTestUtils.withRepository(main, None, Some(ivyLocal), true) { repo => + val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, None, None, true) + assert(jarPath.indexOf("mylib") >= 0, "should find artifact") + } + // Local ivy repository with modified home + val dummyIvyPath = "dummy" + File.separator + "ivy" + val dummyIvyLocal = new File(dummyIvyPath, "local" + File.separator) + IvyTestUtils.withRepository(main, None, Some(dummyIvyLocal), true) { repo => + val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, None, + Some(dummyIvyPath), true) + assert(jarPath.indexOf("mylib") >= 0, "should find artifact") + assert(jarPath.indexOf(dummyIvyPath) >= 0, "should be in new ivy path") + } } test("dependency not found throws RuntimeException") { @@ -117,7 +138,7 @@ class SparkSubmitUtilsSuite extends FunSuite with BeforeAndAfterAll { } } - ignore("neglects Spark and Spark's dependencies") { + test("neglects Spark and Spark's dependencies") { val components = Seq("bagel_", "catalyst_", "core_", "graphx_", "hive_", "mllib_", "repl_", "sql_", "streaming_", "yarn_", "network-common_", "network-shuffle_", "network-yarn_") @@ -127,11 +148,11 @@ class SparkSubmitUtilsSuite extends FunSuite with BeforeAndAfterAll { val path = SparkSubmitUtils.resolveMavenCoordinates(coordinates, None, None, true) assert(path === "", "should return empty path") - // Should not exclude the following dependency. Will throw an error, because it doesn't exist, - // but the fact that it is checking means that it wasn't excluded. - intercept[RuntimeException] { - SparkSubmitUtils.resolveMavenCoordinates(coordinates + - ",org.apache.spark:spark-streaming-kafka-assembly_2.10:1.2.0", None, None, true) + val main = MavenCoordinate("org.apache.spark", "spark-streaming-kafka-assembly_2.10", "1.2.0") + IvyTestUtils.withRepository(main, None, None) { repo => + val files = SparkSubmitUtils.resolveMavenCoordinates(coordinates + "," + main.toString, + Some(repo), None, true) + assert(files.indexOf(main.artifactId) >= 0, "Did not return artifact") } } } diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala index 71ba9c18257b8..e10dd4cf837aa 100644 --- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala @@ -14,22 +14,164 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.spark.deploy.history -import javax.servlet.http.HttpServletRequest - -import scala.collection.mutable +import java.io.{File, FileInputStream, FileWriter, IOException} +import java.net.{HttpURLConnection, URL} +import javax.servlet.http.{HttpServletRequest, HttpServletResponse} -import org.apache.hadoop.fs.Path -import org.mockito.Mockito.{when} -import org.scalatest.FunSuite -import org.scalatest.Matchers +import org.apache.commons.io.{FileUtils, IOUtils} +import org.mockito.Mockito.when +import org.scalatest.{BeforeAndAfter, FunSuite, Matchers} import org.scalatest.mock.MockitoSugar +import org.apache.spark.{JsonTestUtils, SecurityManager, SparkConf} import org.apache.spark.ui.SparkUI -class HistoryServerSuite extends FunSuite with Matchers with MockitoSugar { +/** + * A collection of tests against the historyserver, including comparing responses from the json + * metrics api to a set of known "golden files". If new endpoints / parameters are added, + * cases should be added to this test suite. The expected outcomes can be genered by running + * the HistoryServerSuite.main. Note that this will blindly generate new expectation files matching + * the current behavior -- the developer must verify that behavior is correct. + * + * Similarly, if the behavior is changed, HistoryServerSuite.main can be run to update the + * expectations. However, in general this should be done with extreme caution, as the metrics + * are considered part of Spark's public api. + */ +class HistoryServerSuite extends FunSuite with BeforeAndAfter with Matchers with MockitoSugar + with JsonTestUtils { + + private val logDir = new File("src/test/resources/spark-events") + private val expRoot = new File("src/test/resources/HistoryServerExpectations/") + + private var provider: FsHistoryProvider = null + private var server: HistoryServer = null + private var port: Int = -1 + + def init(): Unit = { + val conf = new SparkConf() + .set("spark.history.fs.logDirectory", logDir.getAbsolutePath) + .set("spark.history.fs.updateInterval", "0") + .set("spark.testing", "true") + provider = new FsHistoryProvider(conf) + provider.checkForLogs() + val securityManager = new SecurityManager(conf) + + server = new HistoryServer(conf, provider, securityManager, 18080) + server.initialize() + server.bind() + port = server.boundPort + } + + def stop(): Unit = { + server.stop() + } + + before { + init() + } + + after{ + stop() + } + + val cases = Seq( + "application list json" -> "applications", + "completed app list json" -> "applications?status=completed", + "running app list json" -> "applications?status=running", + "minDate app list json" -> "applications?minDate=2015-02-10", + "maxDate app list json" -> "applications?maxDate=2015-02-10", + "maxDate2 app list json" -> "applications?maxDate=2015-02-03T16:42:40.000GMT", + "one app json" -> "applications/local-1422981780767", + "one app multi-attempt json" -> "applications/local-1426533911241", + "job list json" -> "applications/local-1422981780767/jobs", + "job list from multi-attempt app json(1)" -> "applications/local-1426533911241/1/jobs", + "job list from multi-attempt app json(2)" -> "applications/local-1426533911241/2/jobs", + "one job json" -> "applications/local-1422981780767/jobs/0", + "succeeded job list json" -> "applications/local-1422981780767/jobs?status=succeeded", + "succeeded&failed job list json" -> + "applications/local-1422981780767/jobs?status=succeeded&status=failed", + "executor list json" -> "applications/local-1422981780767/executors", + "stage list json" -> "applications/local-1422981780767/stages", + "complete stage list json" -> "applications/local-1422981780767/stages?status=complete", + "failed stage list json" -> "applications/local-1422981780767/stages?status=failed", + "one stage json" -> "applications/local-1422981780767/stages/1", + "one stage attempt json" -> "applications/local-1422981780767/stages/1/0", + + "stage task summary w shuffle write" + -> "applications/local-1430917381534/stages/0/0/taskSummary", + "stage task summary w shuffle read" + -> "applications/local-1430917381534/stages/1/0/taskSummary", + "stage task summary w/ custom quantiles" -> + "applications/local-1430917381534/stages/0/0/taskSummary?quantiles=0.01,0.5,0.99", + + "stage task list" -> "applications/local-1430917381534/stages/0/0/taskList", + "stage task list w/ offset & length" -> + "applications/local-1430917381534/stages/0/0/taskList?offset=10&length=50", + "stage task list w/ sortBy" -> + "applications/local-1430917381534/stages/0/0/taskList?sortBy=DECREASING_RUNTIME", + "stage task list w/ sortBy short names: -runtime" -> + "applications/local-1430917381534/stages/0/0/taskList?sortBy=-runtime", + "stage task list w/ sortBy short names: runtime" -> + "applications/local-1430917381534/stages/0/0/taskList?sortBy=runtime", + + "stage list with accumulable json" -> "applications/local-1426533911241/1/stages", + "stage with accumulable json" -> "applications/local-1426533911241/1/stages/0/0", + "stage task list from multi-attempt app json(1)" -> + "applications/local-1426533911241/1/stages/0/0/taskList", + "stage task list from multi-attempt app json(2)" -> + "applications/local-1426533911241/2/stages/0/0/taskList", + + "rdd list storage json" -> "applications/local-1422981780767/storage/rdd", + "one rdd storage json" -> "applications/local-1422981780767/storage/rdd/0" + ) + + // run a bunch of characterization tests -- just verify the behavior is the same as what is saved + // in the test resource folder + cases.foreach { case (name, path) => + test(name) { + val (code, jsonOpt, errOpt) = getContentAndCode(path) + code should be (HttpServletResponse.SC_OK) + jsonOpt should be ('defined) + errOpt should be (None) + val json = jsonOpt.get + val exp = IOUtils.toString(new FileInputStream( + new File(expRoot, HistoryServerSuite.sanitizePath(name) + "_expectation.json"))) + // compare the ASTs so formatting differences don't cause failures + import org.json4s._ + import org.json4s.jackson.JsonMethods._ + val jsonAst = parse(json) + val expAst = parse(exp) + assertValidDataInJson(jsonAst, expAst) + } + } + + test("response codes on bad paths") { + val badAppId = getContentAndCode("applications/foobar") + badAppId._1 should be (HttpServletResponse.SC_NOT_FOUND) + badAppId._3 should be (Some("unknown app: foobar")) + + val badStageId = getContentAndCode("applications/local-1422981780767/stages/12345") + badStageId._1 should be (HttpServletResponse.SC_NOT_FOUND) + badStageId._3 should be (Some("unknown stage: 12345")) + + val badStageAttemptId = getContentAndCode("applications/local-1422981780767/stages/1/1") + badStageAttemptId._1 should be (HttpServletResponse.SC_NOT_FOUND) + badStageAttemptId._3 should be (Some("unknown attempt for stage 1. Found attempts: [0]")) + + val badStageId2 = getContentAndCode("applications/local-1422981780767/stages/flimflam") + badStageId2._1 should be (HttpServletResponse.SC_NOT_FOUND) + // will take some mucking w/ jersey to get a better error msg in this case + + val badQuantiles = getContentAndCode( + "applications/local-1430917381534/stages/0/0/taskSummary?quantiles=foo,0.1") + badQuantiles._1 should be (HttpServletResponse.SC_BAD_REQUEST) + badQuantiles._3 should be (Some("Bad value for parameter \"quantiles\". Expected a double, " + + "got \"foo\"")) + + getContentAndCode("foobar")._1 should be (HttpServletResponse.SC_NOT_FOUND) + } test("generate history page with relative links") { val historyServer = mock[HistoryServer] @@ -54,4 +196,75 @@ class HistoryServerSuite extends FunSuite with Matchers with MockitoSugar { } yield (attrs.toString) justHrefs should contain(link) } + + def getContentAndCode(path: String, port: Int = port): (Int, Option[String], Option[String]) = { + HistoryServerSuite.getContentAndCode(new URL(s"http://localhost:$port/api/v1/$path")) + } + + def getUrl(path: String): String = { + HistoryServerSuite.getUrl(new URL(s"http://localhost:$port/api/v1/$path")) + } + + def generateExpectation(name: String, path: String): Unit = { + val json = getUrl(path) + val file = new File(expRoot, HistoryServerSuite.sanitizePath(name) + "_expectation.json") + val out = new FileWriter(file) + out.write(json) + out.close() + } +} + +object HistoryServerSuite { + def main(args: Array[String]): Unit = { + // generate the "expected" results for the characterization tests. Just blindly assume the + // current behavior is correct, and write out the returned json to the test/resource files + + val suite = new HistoryServerSuite + FileUtils.deleteDirectory(suite.expRoot) + suite.expRoot.mkdirs() + try { + suite.init() + suite.cases.foreach { case (name, path) => + suite.generateExpectation(name, path) + } + } finally { + suite.stop() + } + } + + def getContentAndCode(url: URL): (Int, Option[String], Option[String]) = { + val connection = url.openConnection().asInstanceOf[HttpURLConnection] + connection.setRequestMethod("GET") + connection.connect() + val code = connection.getResponseCode() + val inString = try { + val in = Option(connection.getInputStream()) + in.map(IOUtils.toString) + } catch { + case io: IOException => None + } + val errString = try { + val err = Option(connection.getErrorStream()) + err.map(IOUtils.toString) + } catch { + case io: IOException => None + } + (code, inString, errString) + } + + + def sanitizePath(path: String): String = { + // this doesn't need to be perfect, just good enough to avoid collisions + path.replaceAll("\\W", "_") + } + + def getUrl(path: URL): String = { + val (code, resultOpt, error) = getContentAndCode(path) + if (code == 200) { + resultOpt.get + } else { + throw new RuntimeException( + "got code: " + code + " when getting " + path + " w/ error: " + error) + } + } } diff --git a/core/src/test/scala/org/apache/spark/deploy/master/CustomRecoveryModeFactory.scala b/core/src/test/scala/org/apache/spark/deploy/master/CustomRecoveryModeFactory.scala new file mode 100644 index 0000000000000..f4e56632e426a --- /dev/null +++ b/core/src/test/scala/org/apache/spark/deploy/master/CustomRecoveryModeFactory.scala @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// This file is placed in different package to make sure all of these components work well +// when they are outside of org.apache.spark. +package other.supplier + +import scala.collection.mutable +import scala.reflect.ClassTag + +import akka.serialization.Serialization + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.master._ + +class CustomRecoveryModeFactory( + conf: SparkConf, + serialization: Serialization +) extends StandaloneRecoveryModeFactory(conf, serialization) { + + CustomRecoveryModeFactory.instantiationAttempts += 1 + + /** + * PersistenceEngine defines how the persistent data(Information about worker, driver etc..) + * is handled for recovery. + * + */ + override def createPersistenceEngine(): PersistenceEngine = + new CustomPersistenceEngine(serialization) + + /** + * Create an instance of LeaderAgent that decides who gets elected as master. + */ + override def createLeaderElectionAgent(master: LeaderElectable): LeaderElectionAgent = + new CustomLeaderElectionAgent(master) +} + +object CustomRecoveryModeFactory { + @volatile var instantiationAttempts = 0 +} + +class CustomPersistenceEngine(serialization: Serialization) extends PersistenceEngine { + val data = mutable.HashMap[String, Array[Byte]]() + + CustomPersistenceEngine.lastInstance = Some(this) + + /** + * Defines how the object is serialized and persisted. Implementation will + * depend on the store used. + */ + override def persist(name: String, obj: Object): Unit = { + CustomPersistenceEngine.persistAttempts += 1 + serialization.serialize(obj) match { + case util.Success(bytes) => data += name -> bytes + case util.Failure(cause) => throw new RuntimeException(cause) + } + } + + /** + * Defines how the object referred by its name is removed from the store. + */ + override def unpersist(name: String): Unit = { + CustomPersistenceEngine.unpersistAttempts += 1 + data -= name + } + + /** + * Gives all objects, matching a prefix. This defines how objects are + * read/deserialized back. + */ + override def read[T: ClassTag](prefix: String): Seq[T] = { + CustomPersistenceEngine.readAttempts += 1 + val clazz = implicitly[ClassTag[T]].runtimeClass.asInstanceOf[Class[T]] + val results = for ((name, bytes) <- data; if name.startsWith(prefix)) + yield serialization.deserialize(bytes, clazz) + + results.find(_.isFailure).foreach { + case util.Failure(cause) => throw new RuntimeException(cause) + } + + results.flatMap(_.toOption).toSeq + } +} + +object CustomPersistenceEngine { + @volatile var persistAttempts = 0 + @volatile var unpersistAttempts = 0 + @volatile var readAttempts = 0 + + @volatile var lastInstance: Option[CustomPersistenceEngine] = None +} + +class CustomLeaderElectionAgent(val masterActor: LeaderElectable) extends LeaderElectionAgent { + masterActor.electedLeader() +} + diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala index 34c74d87f0a62..f97e5ff6db31d 100644 --- a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala @@ -17,12 +17,24 @@ package org.apache.spark.deploy.master +import java.util.Date + +import scala.concurrent.Await +import scala.concurrent.duration._ +import scala.io.Source +import scala.language.postfixOps + import akka.actor.Address -import org.scalatest.FunSuite +import org.json4s._ +import org.json4s.jackson.JsonMethods._ +import org.scalatest.{FunSuite, Matchers} +import org.scalatest.concurrent.Eventually +import other.supplier.{CustomPersistenceEngine, CustomRecoveryModeFactory} -import org.apache.spark.{SSLOptions, SparkConf, SparkException} +import org.apache.spark.{SparkConf, SparkException} +import org.apache.spark.deploy._ -class MasterSuite extends FunSuite { +class MasterSuite extends FunSuite with Matchers with Eventually { test("toAkkaUrl") { val conf = new SparkConf(loadDefaults = false) @@ -63,4 +75,113 @@ class MasterSuite extends FunSuite { } assert("Invalid master URL: spark://1.2. 3.4:1234" === e.getMessage) } + + test("can use a custom recovery mode factory") { + val conf = new SparkConf(loadDefaults = false) + conf.set("spark.deploy.recoveryMode", "CUSTOM") + conf.set("spark.deploy.recoveryMode.factory", + classOf[CustomRecoveryModeFactory].getCanonicalName) + + val instantiationAttempts = CustomRecoveryModeFactory.instantiationAttempts + + val commandToPersist = new Command( + mainClass = "", + arguments = Nil, + environment = Map.empty, + classPathEntries = Nil, + libraryPathEntries = Nil, + javaOpts = Nil + ) + + val appToPersist = new ApplicationInfo( + startTime = 0, + id = "test_app", + desc = new ApplicationDescription( + name = "", + maxCores = None, + memoryPerExecutorMB = 0, + command = commandToPersist, + appUiUrl = "", + eventLogDir = None, + eventLogCodec = None, + coresPerExecutor = None), + submitDate = new Date(), + driver = null, + defaultCores = 0 + ) + + val driverToPersist = new DriverInfo( + startTime = 0, + id = "test_driver", + desc = new DriverDescription( + jarUrl = "", + mem = 0, + cores = 0, + supervise = false, + command = commandToPersist + ), + submitDate = new Date() + ) + + val workerToPersist = new WorkerInfo( + id = "test_worker", + host = "127.0.0.1", + port = 10000, + cores = 0, + memory = 0, + actor = null, + webUiPort = 0, + publicAddress = "" + ) + + val (actorSystem, port, uiPort, restPort) = + Master.startSystemAndActor("127.0.0.1", 7077, 8080, conf) + + try { + Await.result(actorSystem.actorSelection("/user/Master").resolveOne(10 seconds), 10 seconds) + + CustomPersistenceEngine.lastInstance.isDefined shouldBe true + val persistenceEngine = CustomPersistenceEngine.lastInstance.get + + persistenceEngine.addApplication(appToPersist) + persistenceEngine.addDriver(driverToPersist) + persistenceEngine.addWorker(workerToPersist) + + val (apps, drivers, workers) = persistenceEngine.readPersistedData() + + apps.map(_.id) should contain(appToPersist.id) + drivers.map(_.id) should contain(driverToPersist.id) + workers.map(_.id) should contain(workerToPersist.id) + + } finally { + actorSystem.shutdown() + actorSystem.awaitTermination() + } + + CustomRecoveryModeFactory.instantiationAttempts should be > instantiationAttempts + } + + test("Master & worker web ui available") { + implicit val formats = org.json4s.DefaultFormats + val conf = new SparkConf() + val localCluster = new LocalSparkCluster(2, 2, 512, conf) + localCluster.start() + try { + eventually(timeout(5 seconds), interval(100 milliseconds)) { + val json = Source.fromURL(s"http://localhost:${localCluster.masterWebUIPort}/json") + .getLines().mkString("\n") + val JArray(workers) = (parse(json) \ "workers") + workers.size should be (2) + workers.foreach { workerSummaryJson => + val JString(workerWebUi) = workerSummaryJson \ "webuiaddress" + val workerResponse = parse(Source.fromURL(s"${workerWebUi}/json") + .getLines().mkString("\n")) + (workerResponse \ "cores").extract[Int] should be (2) + } + } + } finally { + localCluster.stop() + } + } + } diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerArgumentsTest.scala b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerArgumentsTest.scala index 7cc2104281464..e432b8e94654a 100644 --- a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerArgumentsTest.scala +++ b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerArgumentsTest.scala @@ -66,7 +66,7 @@ class WorkerArgumentsTest extends FunSuite { } } val conf = new MySparkConf() - val workerArgs = new WorkerArguments(args, conf) + val workerArgs = new WorkerArguments(args, conf) assert(workerArgs.memory === 5120) } diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala index 450fba21f4b5c..93a779d5ce6f2 100644 --- a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala @@ -25,7 +25,7 @@ import org.scalatest.{Matchers, FunSuite} class WorkerSuite extends FunSuite with Matchers { def cmd(javaOpts: String*): Command = { - Command("", Seq.empty, Map.empty, Seq.empty, Seq.empty, Seq(javaOpts:_*)) + Command("", Seq.empty, Map.empty, Seq.empty, Seq.empty, Seq(javaOpts : _*)) } def conf(opts: (String, String)*): SparkConf = new SparkConf(loadDefaults = false).setAll(opts) diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/ui/LogPageSuite.scala b/core/src/test/scala/org/apache/spark/deploy/worker/ui/LogPageSuite.scala new file mode 100644 index 0000000000000..2d021224030a1 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/deploy/worker/ui/LogPageSuite.scala @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy.worker.ui + +import java.io.{File, FileWriter} + +import org.mockito.Mockito.mock +import org.scalatest.{FunSuite, PrivateMethodTester} + +class LogPageSuite extends FunSuite with PrivateMethodTester { + + test("get logs simple") { + val webui = mock(classOf[WorkerWebUI]) + val logPage = new LogPage(webui) + + // Prepare some fake log files to read later + val out = "some stdout here" + val err = "some stderr here" + val tmpDir = new File(sys.props("java.io.tmpdir")) + val tmpOut = new File(tmpDir, "stdout") + val tmpErr = new File(tmpDir, "stderr") + val tmpRand = new File(tmpDir, "random") + write(tmpOut, out) + write(tmpErr, err) + write(tmpRand, "1 6 4 5 2 7 8") + + // Get the logs. All log types other than "stderr" or "stdout" will be rejected + val getLog = PrivateMethod[(String, Long, Long, Long)]('getLog) + val (stdout, _, _, _) = + logPage invokePrivate getLog(tmpDir.getAbsolutePath, "stdout", None, 100) + val (stderr, _, _, _) = + logPage invokePrivate getLog(tmpDir.getAbsolutePath, "stderr", None, 100) + val (error1, _, _, _) = + logPage invokePrivate getLog(tmpDir.getAbsolutePath, "random", None, 100) + val (error2, _, _, _) = + logPage invokePrivate getLog(tmpDir.getAbsolutePath, "does-not-exist.txt", None, 100) + assert(stdout === out) + assert(stderr === err) + assert(error1.startsWith("Error")) + assert(error2.startsWith("Error")) + } + + /** Write the specified string to the file. */ + private def write(f: File, s: String): Unit = { + val writer = new FileWriter(f) + try { + writer.write(s) + } finally { + writer.close() + } + } + +} diff --git a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala index 8c6035fb367fe..cf6a143537889 100644 --- a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala +++ b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.io import java.io.{ByteArrayInputStream, ByteArrayOutputStream} +import com.google.common.io.ByteStreams import org.scalatest.FunSuite import org.apache.spark.SparkConf @@ -62,6 +63,14 @@ class CompressionCodecSuite extends FunSuite { testCodec(codec) } + test("lz4 does not support concatenation of serialized streams") { + val codec = CompressionCodec.createCodec(conf, classOf[LZ4CompressionCodec].getName) + assert(codec.getClass === classOf[LZ4CompressionCodec]) + intercept[Exception] { + testConcatenationOfSerializedStreams(codec) + } + } + test("lzf compression codec") { val codec = CompressionCodec.createCodec(conf, classOf[LZFCompressionCodec].getName) assert(codec.getClass === classOf[LZFCompressionCodec]) @@ -74,6 +83,12 @@ class CompressionCodecSuite extends FunSuite { testCodec(codec) } + test("lzf supports concatenation of serialized streams") { + val codec = CompressionCodec.createCodec(conf, classOf[LZFCompressionCodec].getName) + assert(codec.getClass === classOf[LZFCompressionCodec]) + testConcatenationOfSerializedStreams(codec) + } + test("snappy compression codec") { val codec = CompressionCodec.createCodec(conf, classOf[SnappyCompressionCodec].getName) assert(codec.getClass === classOf[SnappyCompressionCodec]) @@ -86,9 +101,38 @@ class CompressionCodecSuite extends FunSuite { testCodec(codec) } + test("snappy does not support concatenation of serialized streams") { + val codec = CompressionCodec.createCodec(conf, classOf[SnappyCompressionCodec].getName) + assert(codec.getClass === classOf[SnappyCompressionCodec]) + intercept[Exception] { + testConcatenationOfSerializedStreams(codec) + } + } + test("bad compression codec") { intercept[IllegalArgumentException] { CompressionCodec.createCodec(conf, "foobar") } } + + private def testConcatenationOfSerializedStreams(codec: CompressionCodec): Unit = { + val bytes1: Array[Byte] = { + val baos = new ByteArrayOutputStream() + val out = codec.compressedOutputStream(baos) + (0 to 64).foreach(out.write) + out.close() + baos.toByteArray + } + val bytes2: Array[Byte] = { + val baos = new ByteArrayOutputStream() + val out = codec.compressedOutputStream(baos) + (65 to 127).foreach(out.write) + out.close() + baos.toByteArray + } + val concatenatedBytes = codec.compressedInputStream(new ByteArrayInputStream(bytes1 ++ bytes2)) + val decompressed: Array[Byte] = new Array[Byte](128) + ByteStreams.readFully(concatenatedBytes, decompressed) + assert(decompressed.toSeq === (0 to 127)) + } } diff --git a/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala b/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala index ef3e213f1fcce..60dba3b2d6719 100644 --- a/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala +++ b/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala @@ -263,7 +263,7 @@ class InputOutputMetricsSuite extends FunSuite with SharedSparkContext val tmpRdd = sc.textFile(tmpFilePath, numPartitions) - val firstSize= runAndReturnBytesRead { + val firstSize = runAndReturnBytesRead { aRdd.count() } val secondSize = runAndReturnBytesRead { @@ -433,10 +433,10 @@ class OldCombineTextRecordReaderWrapper( /** * Hadoop 2 has a version of this, but we can't use it for backwards compatibility */ -class NewCombineTextInputFormat extends NewCombineFileInputFormat[LongWritable,Text] { +class NewCombineTextInputFormat extends NewCombineFileInputFormat[LongWritable, Text] { def createRecordReader(split: NewInputSplit, context: TaskAttemptContext) : NewRecordReader[LongWritable, Text] = { - new NewCombineFileRecordReader[LongWritable,Text](split.asInstanceOf[NewCombineFileSplit], + new NewCombineFileRecordReader[LongWritable, Text](split.asInstanceOf[NewCombineFileSplit], context, classOf[NewCombineTextRecordReaderWrapper]) } } diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala new file mode 100644 index 0000000000000..a41f8b7ce5ce0 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.network.netty + +import org.apache.spark.network.BlockDataManager +import org.apache.spark.{SecurityManager, SparkConf} +import org.mockito.Mockito.mock +import org.scalatest._ + +class NettyBlockTransferServiceSuite extends FunSuite with BeforeAndAfterEach with ShouldMatchers { + private var service0: NettyBlockTransferService = _ + private var service1: NettyBlockTransferService = _ + + override def afterEach() { + if (service0 != null) { + service0.close() + service0 = null + } + + if (service1 != null) { + service1.close() + service1 = null + } + } + + test("can bind to a random port") { + service0 = createService(port = 0) + service0.port should not be 0 + } + + test("can bind to two random ports") { + service0 = createService(port = 0) + service1 = createService(port = 0) + service0.port should not be service1.port + } + + test("can bind to a specific port") { + val port = 17634 + service0 = createService(port) + service0.port should be >= port + service0.port should be <= (port + 10) // avoid testing equality in case of simultaneous tests + } + + test("can bind to a specific port twice and the second increments") { + val port = 17634 + service0 = createService(port) + service1 = createService(port) + service0.port should be >= port + service0.port should be <= (port + 10) + service1.port should be (service0.port + 1) + } + + private def createService(port: Int): NettyBlockTransferService = { + val conf = new SparkConf() + .set("spark.app.id", s"test-${getClass.getName}") + .set("spark.blockManager.port", port.toString) + val securityManager = new SecurityManager(conf) + val blockDataManager = mock(classOf[BlockDataManager]) + val service = new NettyBlockTransferService(conf, securityManager, numCores = 1) + service.init(blockDataManager) + service + } +} diff --git a/core/src/test/scala/org/apache/spark/rdd/JdbcRDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/JdbcRDDSuite.scala index be8467354b222..9037a8e7ee1d4 100644 --- a/core/src/test/scala/org/apache/spark/rdd/JdbcRDDSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/JdbcRDDSuite.scala @@ -82,7 +82,7 @@ class JdbcRDDSuite extends FunSuite with BeforeAndAfter with LocalSparkContext { assert(rdd.count === 100) assert(rdd.reduce(_ + _) === 10100) } - + test("large id overflow") { sc = new SparkContext("local", "test") val rdd = new JdbcRDD( diff --git a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala index ca0d953d306d8..6564232986cfa 100644 --- a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala @@ -512,17 +512,17 @@ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext { } test("lookup") { - val pairs = sc.parallelize(Array((1,2), (3,4), (5,6), (5,7))) + val pairs = sc.parallelize(Array((1, 2), (3, 4), (5, 6), (5, 7))) assert(pairs.partitioner === None) assert(pairs.lookup(1) === Seq(2)) - assert(pairs.lookup(5) === Seq(6,7)) + assert(pairs.lookup(5) === Seq(6, 7)) assert(pairs.lookup(-1) === Seq()) } test("lookup with partitioner") { - val pairs = sc.parallelize(Array((1,2), (3,4), (5,6), (5,7))) + val pairs = sc.parallelize(Array((1, 2), (3, 4), (5, 6), (5, 7))) val p = new Partitioner { def numPartitions: Int = 2 @@ -533,12 +533,12 @@ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext { assert(shuffled.partitioner === Some(p)) assert(shuffled.lookup(1) === Seq(2)) - assert(shuffled.lookup(5) === Seq(6,7)) + assert(shuffled.lookup(5) === Seq(6, 7)) assert(shuffled.lookup(-1) === Seq()) } test("lookup with bad partitioner") { - val pairs = sc.parallelize(Array((1,2), (3,4), (5,6), (5,7))) + val pairs = sc.parallelize(Array((1, 2), (3, 4), (5, 6), (5, 7))) val p = new Partitioner { def numPartitions: Int = 2 diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDOperationScopeSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDOperationScopeSuite.scala new file mode 100644 index 0000000000000..4434ed858c60c --- /dev/null +++ b/core/src/test/scala/org/apache/spark/rdd/RDDOperationScopeSuite.scala @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.rdd + +import org.scalatest.{BeforeAndAfter, FunSuite} + +import org.apache.spark.{TaskContext, Partition, SparkContext} + +/** + * Tests whether scopes are passed from the RDD operation to the RDDs correctly. + */ +class RDDOperationScopeSuite extends FunSuite with BeforeAndAfter { + private var sc: SparkContext = null + private val scope1 = new RDDOperationScope("scope1") + private val scope2 = new RDDOperationScope("scope2", Some(scope1)) + private val scope3 = new RDDOperationScope("scope3", Some(scope2)) + + before { + sc = new SparkContext("local", "test") + } + + after { + sc.stop() + } + + test("getAllScopes") { + assert(scope1.getAllScopes === Seq(scope1)) + assert(scope2.getAllScopes === Seq(scope1, scope2)) + assert(scope3.getAllScopes === Seq(scope1, scope2, scope3)) + } + + test("json de/serialization") { + val scope1Json = scope1.toJson + val scope2Json = scope2.toJson + val scope3Json = scope3.toJson + assert(scope1Json === s"""{"id":"${scope1.id}","name":"scope1"}""") + assert(scope2Json === s"""{"id":"${scope2.id}","name":"scope2","parent":$scope1Json}""") + assert(scope3Json === s"""{"id":"${scope3.id}","name":"scope3","parent":$scope2Json}""") + assert(RDDOperationScope.fromJson(scope1Json) === scope1) + assert(RDDOperationScope.fromJson(scope2Json) === scope2) + assert(RDDOperationScope.fromJson(scope3Json) === scope3) + } + + test("withScope") { + val rdd0: MyCoolRDD = new MyCoolRDD(sc) + var rdd1: MyCoolRDD = null + var rdd2: MyCoolRDD = null + var rdd3: MyCoolRDD = null + RDDOperationScope.withScope(sc, "scope1", allowNesting = false, ignoreParent = false) { + rdd1 = new MyCoolRDD(sc) + RDDOperationScope.withScope(sc, "scope2", allowNesting = false, ignoreParent = false) { + rdd2 = new MyCoolRDD(sc) + RDDOperationScope.withScope(sc, "scope3", allowNesting = false, ignoreParent = false) { + rdd3 = new MyCoolRDD(sc) + } + } + } + assert(rdd0.scope.isEmpty) + assert(rdd1.scope.isDefined) + assert(rdd2.scope.isDefined) + assert(rdd3.scope.isDefined) + assert(rdd1.scope.get.getAllScopes.map(_.name) === Seq("scope1")) + assert(rdd2.scope.get.getAllScopes.map(_.name) === Seq("scope1")) + assert(rdd3.scope.get.getAllScopes.map(_.name) === Seq("scope1")) + } + + test("withScope with partial nesting") { + val rdd0: MyCoolRDD = new MyCoolRDD(sc) + var rdd1: MyCoolRDD = null + var rdd2: MyCoolRDD = null + var rdd3: MyCoolRDD = null + // allow nesting here + RDDOperationScope.withScope(sc, "scope1", allowNesting = true, ignoreParent = false) { + rdd1 = new MyCoolRDD(sc) + // stop nesting here + RDDOperationScope.withScope(sc, "scope2", allowNesting = false, ignoreParent = false) { + rdd2 = new MyCoolRDD(sc) + RDDOperationScope.withScope(sc, "scope3", allowNesting = false, ignoreParent = false) { + rdd3 = new MyCoolRDD(sc) + } + } + } + assert(rdd0.scope.isEmpty) + assert(rdd1.scope.isDefined) + assert(rdd2.scope.isDefined) + assert(rdd3.scope.isDefined) + assert(rdd1.scope.get.getAllScopes.map(_.name) === Seq("scope1")) + assert(rdd2.scope.get.getAllScopes.map(_.name) === Seq("scope1", "scope2")) + assert(rdd3.scope.get.getAllScopes.map(_.name) === Seq("scope1", "scope2")) + } + + test("withScope with multiple layers of nesting") { + val rdd0: MyCoolRDD = new MyCoolRDD(sc) + var rdd1: MyCoolRDD = null + var rdd2: MyCoolRDD = null + var rdd3: MyCoolRDD = null + RDDOperationScope.withScope(sc, "scope1", allowNesting = true, ignoreParent = false) { + rdd1 = new MyCoolRDD(sc) + RDDOperationScope.withScope(sc, "scope2", allowNesting = true, ignoreParent = false) { + rdd2 = new MyCoolRDD(sc) + RDDOperationScope.withScope(sc, "scope3", allowNesting = true, ignoreParent = false) { + rdd3 = new MyCoolRDD(sc) + } + } + } + assert(rdd0.scope.isEmpty) + assert(rdd1.scope.isDefined) + assert(rdd2.scope.isDefined) + assert(rdd3.scope.isDefined) + assert(rdd1.scope.get.getAllScopes.map(_.name) === Seq("scope1")) + assert(rdd2.scope.get.getAllScopes.map(_.name) === Seq("scope1", "scope2")) + assert(rdd3.scope.get.getAllScopes.map(_.name) === Seq("scope1", "scope2", "scope3")) + } + +} + +private class MyCoolRDD(sc: SparkContext) extends RDD[Int](sc, Nil) { + override def getPartitions: Array[Partition] = Array.empty + override def compute(p: Partition, context: TaskContext): Iterator[Int] = { Nil.toIterator } +} diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala index ef8c36a28655b..8079d5dcaea81 100644 --- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala @@ -89,6 +89,8 @@ class RDDSuite extends FunSuite with SharedSparkContext { val simpleRdd = sc.makeRDD(uniformDistro, 10) assert(error(simpleRdd.countApproxDistinct(8, 0), size) < 0.2) assert(error(simpleRdd.countApproxDistinct(12, 0), size) < 0.1) + assert(error(simpleRdd.countApproxDistinct(0.02), size) < 0.1) + assert(error(simpleRdd.countApproxDistinct(0.5), size) < 0.22) } test("SparkContext.union") { @@ -336,10 +338,10 @@ class RDDSuite extends FunSuite with SharedSparkContext { } test("coalesced RDDs with locality") { - val data3 = sc.makeRDD(List((1,List("a","c")), (2,List("a","b","c")), (3,List("b")))) + val data3 = sc.makeRDD(List((1, List("a", "c")), (2, List("a", "b", "c")), (3, List("b")))) val coal3 = data3.coalesce(3) val list3 = coal3.partitions.flatMap(_.asInstanceOf[CoalescedRDDPartition].preferredLocation) - assert(list3.sorted === Array("a","b","c"), "Locality preferences are dropped") + assert(list3.sorted === Array("a", "b", "c"), "Locality preferences are dropped") // RDD with locality preferences spread (non-randomly) over 6 machines, m0 through m5 val data = sc.makeRDD((1 to 9).map(i => (i, (i to (i + 2)).map{ j => "m" + (j%6)}))) @@ -589,8 +591,8 @@ class RDDSuite extends FunSuite with SharedSparkContext { assert(sc.emptyRDD.isEmpty()) assert(sc.parallelize(Seq[Int]()).isEmpty()) assert(!sc.parallelize(Seq(1)).isEmpty()) - assert(sc.parallelize(Seq(1,2,3), 3).filter(_ < 0).isEmpty()) - assert(!sc.parallelize(Seq(1,2,3), 3).filter(_ > 1).isEmpty()) + assert(sc.parallelize(Seq(1, 2, 3), 3).filter(_ < 0).isEmpty()) + assert(!sc.parallelize(Seq(1, 2, 3), 3).filter(_ > 1).isEmpty()) } test("sample preserves partitioner") { @@ -607,49 +609,49 @@ class RDDSuite extends FunSuite with SharedSparkContext { val data = sc.parallelize(1 to n, 2) for (num <- List(5, 20, 100)) { - val sample = data.takeSample(withReplacement=false, num=num) + val sample = data.takeSample(withReplacement = false, num = num) assert(sample.size === num) // Got exactly num elements assert(sample.toSet.size === num) // Elements are distinct assert(sample.forall(x => 1 <= x && x <= n), s"elements not in [1, $n]") } for (seed <- 1 to 5) { - val sample = data.takeSample(withReplacement=false, 20, seed) + val sample = data.takeSample(withReplacement = false, 20, seed) assert(sample.size === 20) // Got exactly 20 elements assert(sample.toSet.size === 20) // Elements are distinct assert(sample.forall(x => 1 <= x && x <= n), s"elements not in [1, $n]") } for (seed <- 1 to 5) { - val sample = data.takeSample(withReplacement=false, 100, seed) + val sample = data.takeSample(withReplacement = false, 100, seed) assert(sample.size === 100) // Got only 100 elements assert(sample.toSet.size === 100) // Elements are distinct assert(sample.forall(x => 1 <= x && x <= n), s"elements not in [1, $n]") } for (seed <- 1 to 5) { - val sample = data.takeSample(withReplacement=true, 20, seed) + val sample = data.takeSample(withReplacement = true, 20, seed) assert(sample.size === 20) // Got exactly 20 elements assert(sample.forall(x => 1 <= x && x <= n), s"elements not in [1, $n]") } { - val sample = data.takeSample(withReplacement=true, num=20) + val sample = data.takeSample(withReplacement = true, num = 20) assert(sample.size === 20) // Got exactly 100 elements assert(sample.toSet.size <= 20, "sampling with replacement returned all distinct elements") assert(sample.forall(x => 1 <= x && x <= n), s"elements not in [1, $n]") } { - val sample = data.takeSample(withReplacement=true, num=n) + val sample = data.takeSample(withReplacement = true, num = n) assert(sample.size === n) // Got exactly 100 elements // Chance of getting all distinct elements is astronomically low, so test we got < 100 assert(sample.toSet.size < n, "sampling with replacement returned all distinct elements") assert(sample.forall(x => 1 <= x && x <= n), s"elements not in [1, $n]") } for (seed <- 1 to 5) { - val sample = data.takeSample(withReplacement=true, n, seed) + val sample = data.takeSample(withReplacement = true, n, seed) assert(sample.size === n) // Got exactly 100 elements // Chance of getting all distinct elements is astronomically low, so test we got < 100 assert(sample.toSet.size < n, "sampling with replacement returned all distinct elements") } for (seed <- 1 to 5) { - val sample = data.takeSample(withReplacement=true, 2 * n, seed) + val sample = data.takeSample(withReplacement = true, 2 * n, seed) assert(sample.size === 2 * n) // Got exactly 200 elements // Chance of getting all distinct elements is still quite low, so test we got < 100 assert(sample.toSet.size < n, "sampling with replacement returned all distinct elements") @@ -689,7 +691,7 @@ class RDDSuite extends FunSuite with SharedSparkContext { } test("sortByKey") { - val data = sc.parallelize(Seq("5|50|A","4|60|C", "6|40|B")) + val data = sc.parallelize(Seq("5|50|A", "4|60|C", "6|40|B")) val col1 = Array("4|60|C", "5|50|A", "6|40|B") val col2 = Array("6|40|B", "5|50|A", "4|60|C") @@ -701,7 +703,7 @@ class RDDSuite extends FunSuite with SharedSparkContext { } test("sortByKey ascending parameter") { - val data = sc.parallelize(Seq("5|50|A","4|60|C", "6|40|B")) + val data = sc.parallelize(Seq("5|50|A", "4|60|C", "6|40|B")) val asc = Array("4|60|C", "5|50|A", "6|40|B") val desc = Array("6|40|B", "5|50|A", "4|60|C") @@ -762,9 +764,9 @@ class RDDSuite extends FunSuite with SharedSparkContext { } test("intersection strips duplicates in an input") { - val a = sc.parallelize(Seq(1,2,3,3)) - val b = sc.parallelize(Seq(1,1,2,3)) - val intersection = Array(1,2,3) + val a = sc.parallelize(Seq(1, 2, 3, 3)) + val b = sc.parallelize(Seq(1, 1, 2, 3)) + val intersection = Array(1, 2, 3) assert(a.intersection(b).collect().sorted === intersection) assert(b.intersection(a).collect().sorted === intersection) diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuiteUtils.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuiteUtils.scala index fe695d85e29dd..194dc45d6e399 100644 --- a/core/src/test/scala/org/apache/spark/rdd/RDDSuiteUtils.scala +++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuiteUtils.scala @@ -21,11 +21,11 @@ object RDDSuiteUtils { case class Person(first: String, last: String, age: Int) object AgeOrdering extends Ordering[Person] { - def compare(a:Person, b:Person): Int = a.age.compare(b.age) + def compare(a: Person, b: Person): Int = a.age.compare(b.age) } object NameOrdering extends Ordering[Person] { - def compare(a:Person, b:Person): Int = - implicitly[Ordering[Tuple2[String,String]]].compare((a.last, a.first), (b.last, b.first)) + def compare(a: Person, b: Person): Int = + implicitly[Ordering[Tuple2[String, String]]].compare((a.last, a.first), (b.last, b.first)) } } diff --git a/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala b/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala index 64b1c24c47168..54fc914722b46 100644 --- a/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala @@ -26,7 +26,7 @@ class SortingSuite extends FunSuite with SharedSparkContext with Matchers with L test("sortByKey") { val pairs = sc.parallelize(Array((1, 0), (2, 0), (0, 0), (3, 0)), 2) - assert(pairs.sortByKey().collect() === Array((0,0), (1,0), (2,0), (3,0))) + assert(pairs.sortByKey().collect() === Array((0, 0), (1, 0), (2, 0), (3, 0))) } test("large array") { @@ -136,7 +136,7 @@ class SortingSuite extends FunSuite with SharedSparkContext with Matchers with L test("get a range of elements in an array not partitioned by a range partitioner") { val pairArr = util.Random.shuffle((1 to 1000).toList).map(x => (x, x)) - val pairs = sc.parallelize(pairArr,10) + val pairs = sc.parallelize(pairArr, 10) val range = pairs.filterByRange(200, 800).collect() assert((800 to 200 by -1).toArray.sorted === range.map(_._1).sorted) } diff --git a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala index ae3339d80f9c6..21eb71d9acfbd 100644 --- a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala +++ b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala @@ -42,7 +42,7 @@ abstract class RpcEnvSuite extends FunSuite with BeforeAndAfterAll { } override def afterAll(): Unit = { - if(env != null) { + if (env != null) { env.shutdown() } } @@ -75,7 +75,7 @@ abstract class RpcEnvSuite extends FunSuite with BeforeAndAfterAll { } }) - val anotherEnv = createRpcEnv(new SparkConf(), "remote" ,13345) + val anotherEnv = createRpcEnv(new SparkConf(), "remote", 13345) // Use anotherEnv to find out the RpcEndpointRef val rpcEndpointRef = anotherEnv.setupEndpointRef("local", env.address, "send-remotely") try { @@ -338,7 +338,7 @@ abstract class RpcEnvSuite extends FunSuite with BeforeAndAfterAll { test("call receive in sequence") { // If a RpcEnv implementation breaks the `receive` contract, hope this test can expose it - for(i <- 0 until 100) { + for (i <- 0 until 100) { @volatile var result = 0 val endpointRef = env.setupEndpoint(s"receive-in-sequence-$i", new ThreadSafeRpcEndpoint { override val rpcEnv = env diff --git a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala index f77661ccbd1c5..3821166386fa6 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala @@ -26,8 +26,8 @@ class CoarseGrainedSchedulerBackendSuite extends FunSuite with LocalSparkContext test("serialized task larger than akka frame size") { val conf = new SparkConf - conf.set("spark.akka.frameSize","1") - conf.set("spark.default.parallelism","1") + conf.set("spark.akka.frameSize", "1") + conf.set("spark.default.parallelism", "1") sc = new SparkContext("local-cluster[2 , 1 , 512]", "test", conf) val frameSize = AkkaUtils.maxFrameSizeBytes(sc.conf) val buffer = new SerializableBuffer(java.nio.ByteBuffer.allocate(2 * frameSize)) diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala index 2482603f420ee..89b6bc995fe79 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala @@ -174,6 +174,10 @@ class DAGSchedulerSuite dagEventProcessLoopTester = new DAGSchedulerEventProcessLoopTester(scheduler) } + after { + scheduler.stop() + } + override def afterAll() { super.afterAll() } @@ -261,8 +265,9 @@ class DAGSchedulerSuite override def taskSucceeded(partition: Int, value: Any) = numResults += 1 override def jobFailed(exception: Exception) = throw exception } - submit(new MyRDD(sc, 0, Nil), Array(), listener = fakeListener) + val jobId = submit(new MyRDD(sc, 0, Nil), Array(), listener = fakeListener) assert(numResults === 0) + cancel(jobId) } test("run trivial job") { @@ -343,7 +348,7 @@ class DAGSchedulerSuite (1 to 30).foreach(_ => rdd = rdd.zip(rdd)) // getPreferredLocs runs quickly, indicating that exponential graph traversal is avoided. failAfter(10 seconds) { - val preferredLocs = scheduler.getPreferredLocs(rdd,0) + val preferredLocs = scheduler.getPreferredLocs(rdd, 0) // No preferred locations are returned. assert(preferredLocs.length === 0) } @@ -602,8 +607,8 @@ class DAGSchedulerSuite val listener1 = new FailureRecordingJobListener() val listener2 = new FailureRecordingJobListener() - submit(reduceRdd1, Array(0, 1), listener=listener1) - submit(reduceRdd2, Array(0, 1), listener=listener2) + submit(reduceRdd1, Array(0, 1), listener = listener1) + submit(reduceRdd2, Array(0, 1), listener = listener2) val stageFailureMessage = "Exception failure in map stage" failed(taskSets(0), stageFailureMessage) diff --git a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala index cf97707946706..7078a7a12232a 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala @@ -81,7 +81,7 @@ class OutputCommitCoordinatorSuite extends FunSuite with BeforeAndAfter { conf: SparkConf, isLocal: Boolean, listenerBus: LiveListenerBus): SparkEnv = { - outputCommitCoordinator = spy(new OutputCommitCoordinator(conf)) + outputCommitCoordinator = spy(new OutputCommitCoordinator(conf, isDriver = true)) // Use Mockito.spy() to maintain the default infrastructure everywhere else. // This mocking allows us to control the coordinator responses in test cases. SparkEnv.createDriverEnv(conf, isLocal, listenerBus, Some(outputCommitCoordinator)) diff --git a/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala index e8f461e2f56c9..456451b676bed 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala @@ -97,9 +97,9 @@ class PoolSuite extends FunSuite with LocalSparkContext { assert(rootPool.getSchedulableByName("3").weight === 1) val properties1 = new Properties() - properties1.setProperty("spark.scheduler.pool","1") + properties1.setProperty("spark.scheduler.pool", "1") val properties2 = new Properties() - properties2.setProperty("spark.scheduler.pool","2") + properties2.setProperty("spark.scheduler.pool", "2") val taskSetManager10 = createTaskSetManager(0, 1, taskScheduler) val taskSetManager11 = createTaskSetManager(1, 1, taskScheduler) diff --git a/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MemoryUtilsSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MemoryUtilsSuite.scala index 3fa0115e68259..352778505bf58 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MemoryUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MemoryUtilsSuite.scala @@ -29,11 +29,11 @@ class MemoryUtilsSuite extends FunSuite with MockitoSugar { val sc = mock[SparkContext] when(sc.conf).thenReturn(sparkConf) - + // 384 > sc.executorMemory * 0.1 => 512 + 384 = 896 when(sc.executorMemory).thenReturn(512) assert(MemoryUtils.calculateTotalMemory(sc) === 896) - + // 384 < sc.executorMemory * 0.1 => 4096 + (4096 * 0.1) = 4505.6 when(sc.executorMemory).thenReturn(4096) assert(MemoryUtils.calculateTotalMemory(sc) === 4505) diff --git a/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendSuite.scala index ab863f3d8d672..b145687bb1bc3 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendSuite.scala @@ -80,11 +80,11 @@ class MesosSchedulerBackendSuite extends FunSuite with LocalSparkContext with Mo .set("spark.mesos.executor.docker.image", "spark/mock") .set("spark.mesos.executor.docker.volumes", "/a,/b:/b,/c:/c:rw,/d:ro,/e:/e:ro") .set("spark.mesos.executor.docker.portmaps", "80:8080,53:53:tcp") - + val listenerBus = mock[LiveListenerBus] listenerBus.post( SparkListenerExecutorAdded(anyLong, "s1", new ExecutorInfo("host1", 2, Map.empty))) - + val sc = mock[SparkContext] when(sc.executorMemory).thenReturn(100) when(sc.getSparkHome()).thenReturn(Option("/spark-home")) diff --git a/core/src/test/scala/org/apache/spark/serializer/JavaSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/JavaSerializerSuite.scala new file mode 100644 index 0000000000000..ed4d8ce632e16 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/serializer/JavaSerializerSuite.scala @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.serializer + +import org.apache.spark.SparkConf +import org.scalatest.FunSuite + +class JavaSerializerSuite extends FunSuite { + test("JavaSerializer instances are serializable") { + val serializer = new JavaSerializer(new SparkConf()) + val instance = serializer.newInstance() + instance.deserialize[JavaSerializer](instance.serialize(serializer)) + } +} diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala index 778a7eee73b23..751893dcdcef9 100644 --- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala +++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala @@ -17,6 +17,8 @@ package org.apache.spark.serializer +import java.io.{ByteArrayInputStream, ByteArrayOutputStream} + import scala.collection.mutable import scala.reflect.ClassTag @@ -32,6 +34,36 @@ class KryoSerializerSuite extends FunSuite with SharedSparkContext { conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") conf.set("spark.kryo.registrator", classOf[MyRegistrator].getName) + test("configuration limits") { + val conf1 = conf.clone() + val kryoBufferProperty = "spark.kryoserializer.buffer" + val kryoBufferMaxProperty = "spark.kryoserializer.buffer.max" + conf1.set(kryoBufferProperty, "64k") + conf1.set(kryoBufferMaxProperty, "64m") + new KryoSerializer(conf1).newInstance() + // 2048m = 2097152k + conf1.set(kryoBufferProperty, "2097151k") + conf1.set(kryoBufferMaxProperty, "64m") + // should not throw exception when kryoBufferMaxProperty < kryoBufferProperty + new KryoSerializer(conf1).newInstance() + conf1.set(kryoBufferMaxProperty, "2097151k") + new KryoSerializer(conf1).newInstance() + val conf2 = conf.clone() + conf2.set(kryoBufferProperty, "2048m") + val thrown1 = intercept[IllegalArgumentException](new KryoSerializer(conf2).newInstance()) + assert(thrown1.getMessage.contains(kryoBufferProperty)) + val conf3 = conf.clone() + conf3.set(kryoBufferMaxProperty, "2048m") + val thrown2 = intercept[IllegalArgumentException](new KryoSerializer(conf3).newInstance()) + assert(thrown2.getMessage.contains(kryoBufferMaxProperty)) + val conf4 = conf.clone() + conf4.set(kryoBufferProperty, "2g") + conf4.set(kryoBufferMaxProperty, "3g") + val thrown3 = intercept[IllegalArgumentException](new KryoSerializer(conf4).newInstance()) + assert(thrown3.getMessage.contains(kryoBufferProperty)) + assert(!thrown3.getMessage.contains(kryoBufferMaxProperty)) + } + test("basic types") { val ser = new KryoSerializer(conf).newInstance() def check[T: ClassTag](t: T) { @@ -70,7 +102,7 @@ class KryoSerializerSuite extends FunSuite with SharedSparkContext { check((1, 1)) check((1, 1L)) check((1L, 1)) - check((1L, 1L)) + check((1L, 1L)) check((1.0, 1)) check((1, 1.0)) check((1.0, 1.0)) @@ -108,7 +140,7 @@ class KryoSerializerSuite extends FunSuite with SharedSparkContext { check(List(Some(mutable.HashMap(1->1, 2->2)), None, Some(mutable.HashMap(3->4)))) check(List( mutable.HashMap("one" -> 1, "two" -> 2), - mutable.HashMap(1->"one",2->"two",3->"three"))) + mutable.HashMap(1->"one", 2->"two", 3->"three"))) } test("ranges") { @@ -289,8 +321,74 @@ class KryoSerializerSuite extends FunSuite with SharedSparkContext { val ser2 = new KryoSerializer(conf).newInstance().asInstanceOf[KryoSerializerInstance] assert(!ser2.getAutoReset) } + + private def testSerializerInstanceReuse(autoReset: Boolean, referenceTracking: Boolean): Unit = { + val conf = new SparkConf(loadDefaults = false) + .set("spark.kryo.referenceTracking", referenceTracking.toString) + if (!autoReset) { + conf.set("spark.kryo.registrator", classOf[RegistratorWithoutAutoReset].getName) + } + val ser = new KryoSerializer(conf) + val serInstance = ser.newInstance().asInstanceOf[KryoSerializerInstance] + assert (serInstance.getAutoReset() === autoReset) + val obj = ("Hello", "World") + def serializeObjects(): Array[Byte] = { + val baos = new ByteArrayOutputStream() + val serStream = serInstance.serializeStream(baos) + serStream.writeObject(obj) + serStream.writeObject(obj) + serStream.close() + baos.toByteArray + } + val output1: Array[Byte] = serializeObjects() + val output2: Array[Byte] = serializeObjects() + assert (output1 === output2) + } + + // Regression test for SPARK-7766, an issue where disabling auto-reset and enabling + // reference-tracking would lead to corrupted output when serializer instances are re-used + for (referenceTracking <- Set(true, false); autoReset <- Set(true, false)) { + test(s"instance reuse with autoReset = $autoReset, referenceTracking = $referenceTracking") { + testSerializerInstanceReuse(autoReset = autoReset, referenceTracking = referenceTracking) + } + } } +class KryoSerializerAutoResetDisabledSuite extends FunSuite with SharedSparkContext { + conf.set("spark.serializer", classOf[KryoSerializer].getName) + conf.set("spark.kryo.registrator", classOf[RegistratorWithoutAutoReset].getName) + conf.set("spark.kryo.referenceTracking", "true") + conf.set("spark.shuffle.manager", "sort") + conf.set("spark.shuffle.sort.bypassMergeThreshold", "200") + + test("sort-shuffle with bypassMergeSort (SPARK-7873)") { + val myObject = ("Hello", "World") + assert(sc.parallelize(Seq.fill(100)(myObject)).repartition(2).collect().toSet === Set(myObject)) + } + + test("calling deserialize() after deserializeStream()") { + val serInstance = new KryoSerializer(conf).newInstance().asInstanceOf[KryoSerializerInstance] + assert(!serInstance.getAutoReset()) + val hello = "Hello" + val world = "World" + // Here, we serialize the same value twice, so the reference-tracking should cause us to store + // references to some of these values + val helloHello = serInstance.serialize((hello, hello)) + // Here's a stream which only contains one value + val worldWorld: Array[Byte] = { + val baos = new ByteArrayOutputStream() + val serStream = serInstance.serializeStream(baos) + serStream.writeObject(world) + serStream.writeObject(world) + serStream.close() + baos.toByteArray + } + val deserializationStream = serInstance.deserializeStream(new ByteArrayInputStream(worldWorld)) + assert(deserializationStream.readValue[Any]() === world) + deserializationStream.close() + assert(serInstance.deserialize[Any](helloHello) === (hello, hello)) + } +} class ClassLoaderTestingObject diff --git a/core/src/test/scala/org/apache/spark/serializer/ProactiveClosureSerializationSuite.scala b/core/src/test/scala/org/apache/spark/serializer/ProactiveClosureSerializationSuite.scala index 433fd6bb4a11d..96fa57294c96f 100644 --- a/core/src/test/scala/org/apache/spark/serializer/ProactiveClosureSerializationSuite.scala +++ b/core/src/test/scala/org/apache/spark/serializer/ProactiveClosureSerializationSuite.scala @@ -25,7 +25,7 @@ import org.apache.spark.rdd.RDD /* A trivial (but unserializable) container for trivial functions */ class UnserializableClass { def op[T](x: T): String = x.toString - + def pred[T](x: T): Boolean = x.toString.length % 2 == 0 } @@ -47,7 +47,7 @@ class ProactiveClosureSerializationSuite extends FunSuite with SharedSparkContex // iterating over a map from transformation names to functions that perform that // transformation on a given RDD, creating one test case for each - for (transformation <- + for (transformation <- Map("map" -> xmap _, "flatMap" -> xflatMap _, "filter" -> xfilter _, @@ -60,24 +60,24 @@ class ProactiveClosureSerializationSuite extends FunSuite with SharedSparkContex val ex = intercept[SparkException] { xf(data, uc) } - assert(ex.getMessage.contains("Task not serializable"), + assert(ex.getMessage.contains("Task not serializable"), s"RDD.$name doesn't proactively throw NotSerializableException") } } - private def xmap(x: RDD[String], uc: UnserializableClass): RDD[String] = - x.map(y=>uc.op(y)) + private def xmap(x: RDD[String], uc: UnserializableClass): RDD[String] = + x.map(y => uc.op(y)) + + private def xflatMap(x: RDD[String], uc: UnserializableClass): RDD[String] = + x.flatMap(y => Seq(uc.op(y))) - private def xflatMap(x: RDD[String], uc: UnserializableClass): RDD[String] = - x.flatMap(y=>Seq(uc.op(y))) + private def xfilter(x: RDD[String], uc: UnserializableClass): RDD[String] = + x.filter(y => uc.pred(y)) - private def xfilter(x: RDD[String], uc: UnserializableClass): RDD[String] = - x.filter(y=>uc.pred(y)) + private def xmapPartitions(x: RDD[String], uc: UnserializableClass): RDD[String] = + x.mapPartitions(_.map(y => uc.op(y))) - private def xmapPartitions(x: RDD[String], uc: UnserializableClass): RDD[String] = - x.mapPartitions(_.map(y=>uc.op(y))) + private def xmapPartitionsWithIndex(x: RDD[String], uc: UnserializableClass): RDD[String] = + x.mapPartitionsWithIndex((_, it) => it.map(y => uc.op(y))) - private def xmapPartitionsWithIndex(x: RDD[String], uc: UnserializableClass): RDD[String] = - x.mapPartitionsWithIndex((_, it) => it.map(y=>uc.op(y))) - } diff --git a/core/src/test/scala/org/apache/spark/serializer/SerializerPropertiesSuite.scala b/core/src/test/scala/org/apache/spark/serializer/SerializerPropertiesSuite.scala new file mode 100644 index 0000000000000..bb34033fe9e7e --- /dev/null +++ b/core/src/test/scala/org/apache/spark/serializer/SerializerPropertiesSuite.scala @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.serializer + +import java.io.{ByteArrayInputStream, ByteArrayOutputStream} + +import scala.util.Random + +import org.scalatest.{Assertions, FunSuite} + +import org.apache.spark.SparkConf +import org.apache.spark.serializer.KryoTest.RegistratorWithoutAutoReset + +/** + * Tests to ensure that [[Serializer]] implementations obey the API contracts for methods that + * describe properties of the serialized stream, such as + * [[Serializer.supportsRelocationOfSerializedObjects]]. + */ +class SerializerPropertiesSuite extends FunSuite { + + import SerializerPropertiesSuite._ + + test("JavaSerializer does not support relocation") { + // Per a comment on the SPARK-4550 JIRA ticket, Java serialization appears to write out the + // full class name the first time an object is written to an output stream, but subsequent + // references to the class write a more compact identifier; this prevents relocation. + val ser = new JavaSerializer(new SparkConf()) + testSupportsRelocationOfSerializedObjects(ser, generateRandomItem) + } + + test("KryoSerializer supports relocation when auto-reset is enabled") { + val ser = new KryoSerializer(new SparkConf) + assert(ser.newInstance().asInstanceOf[KryoSerializerInstance].getAutoReset()) + testSupportsRelocationOfSerializedObjects(ser, generateRandomItem) + } + + test("KryoSerializer does not support relocation when auto-reset is disabled") { + val conf = new SparkConf().set("spark.kryo.registrator", + classOf[RegistratorWithoutAutoReset].getName) + val ser = new KryoSerializer(conf) + assert(!ser.newInstance().asInstanceOf[KryoSerializerInstance].getAutoReset()) + testSupportsRelocationOfSerializedObjects(ser, generateRandomItem) + } + +} + +object SerializerPropertiesSuite extends Assertions { + + def generateRandomItem(rand: Random): Any = { + val randomFunctions: Seq[() => Any] = Seq( + () => rand.nextInt(), + () => rand.nextString(rand.nextInt(10)), + () => rand.nextDouble(), + () => rand.nextBoolean(), + () => (rand.nextInt(), rand.nextString(rand.nextInt(10))), + () => MyCaseClass(rand.nextInt(), rand.nextString(rand.nextInt(10))), + () => { + val x = MyCaseClass(rand.nextInt(), rand.nextString(rand.nextInt(10))) + (x, x) + } + ) + randomFunctions(rand.nextInt(randomFunctions.size)).apply() + } + + def testSupportsRelocationOfSerializedObjects( + serializer: Serializer, + generateRandomItem: Random => Any): Unit = { + if (!serializer.supportsRelocationOfSerializedObjects) { + return + } + val NUM_TRIALS = 5 + val rand = new Random(42) + for (_ <- 1 to NUM_TRIALS) { + val items = { + // Make sure that we have duplicate occurrences of the same object in the stream: + val randomItems = Seq.fill(10)(generateRandomItem(rand)) + randomItems ++ randomItems.take(5) + } + val baos = new ByteArrayOutputStream() + val serStream = serializer.newInstance().serializeStream(baos) + def serializeItem(item: Any): Array[Byte] = { + val itemStartOffset = baos.toByteArray.length + serStream.writeObject(item) + serStream.flush() + val itemEndOffset = baos.toByteArray.length + baos.toByteArray.slice(itemStartOffset, itemEndOffset).clone() + } + val itemsAndSerializedItems: Seq[(Any, Array[Byte])] = { + val serItems = items.map { + item => (item, serializeItem(item)) + } + serStream.close() + rand.shuffle(serItems) + } + val reorderedSerializedData: Array[Byte] = itemsAndSerializedItems.flatMap(_._2).toArray + val deserializedItemsStream = serializer.newInstance().deserializeStream( + new ByteArrayInputStream(reorderedSerializedData)) + assert(deserializedItemsStream.asIterator.toSeq === itemsAndSerializedItems.map(_._1)) + deserializedItemsStream.close() + } + } +} + +private case class MyCaseClass(foo: Int, bar: String) diff --git a/core/src/test/scala/org/apache/spark/serializer/TestSerializer.scala b/core/src/test/scala/org/apache/spark/serializer/TestSerializer.scala index 86fcf447287f7..c1e0a29a34bb1 100644 --- a/core/src/test/scala/org/apache/spark/serializer/TestSerializer.scala +++ b/core/src/test/scala/org/apache/spark/serializer/TestSerializer.scala @@ -32,16 +32,19 @@ class TestSerializer extends Serializer { class TestSerializerInstance extends SerializerInstance { - override def serialize[T: ClassTag](t: T): ByteBuffer = ??? + override def serialize[T: ClassTag](t: T): ByteBuffer = throw new UnsupportedOperationException - override def serializeStream(s: OutputStream): SerializationStream = ??? + override def serializeStream(s: OutputStream): SerializationStream = + throw new UnsupportedOperationException override def deserializeStream(s: InputStream): TestDeserializationStream = new TestDeserializationStream - override def deserialize[T: ClassTag](bytes: ByteBuffer): T = ??? + override def deserialize[T: ClassTag](bytes: ByteBuffer): T = + throw new UnsupportedOperationException - override def deserialize[T: ClassTag](bytes: ByteBuffer, loader: ClassLoader): T = ??? + override def deserialize[T: ClassTag](bytes: ByteBuffer, loader: ClassLoader): T = + throw new UnsupportedOperationException } diff --git a/core/src/test/scala/org/apache/spark/shuffle/hash/HashShuffleManagerSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/hash/HashShuffleManagerSuite.scala index 84384bb48999a..0537bf66ad020 100644 --- a/core/src/test/scala/org/apache/spark/shuffle/hash/HashShuffleManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/shuffle/hash/HashShuffleManagerSuite.scala @@ -27,7 +27,7 @@ import org.apache.spark.{SparkEnv, SparkContext, LocalSparkContext, SparkConf} import org.apache.spark.executor.ShuffleWriteMetrics import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer} import org.apache.spark.serializer.JavaSerializer -import org.apache.spark.shuffle.FileShuffleBlockManager +import org.apache.spark.shuffle.FileShuffleBlockResolver import org.apache.spark.storage.{ShuffleBlockId, FileSegment} class HashShuffleManagerSuite extends FunSuite with LocalSparkContext { @@ -53,10 +53,10 @@ class HashShuffleManagerSuite extends FunSuite with LocalSparkContext { sc = new SparkContext("local", "test", conf) - val shuffleBlockManager = - SparkEnv.get.shuffleManager.shuffleBlockResolver.asInstanceOf[FileShuffleBlockManager] + val shuffleBlockResolver = + SparkEnv.get.shuffleManager.shuffleBlockResolver.asInstanceOf[FileShuffleBlockResolver] - val shuffle1 = shuffleBlockManager.forMapTask(1, 1, 1, new JavaSerializer(conf), + val shuffle1 = shuffleBlockResolver.forMapTask(1, 1, 1, new JavaSerializer(conf), new ShuffleWriteMetrics) for (writer <- shuffle1.writers) { writer.write("test1", "value") @@ -69,7 +69,7 @@ class HashShuffleManagerSuite extends FunSuite with LocalSparkContext { val shuffle1Segment = shuffle1.writers(0).fileSegment() shuffle1.releaseWriters(success = true) - val shuffle2 = shuffleBlockManager.forMapTask(1, 2, 1, new JavaSerializer(conf), + val shuffle2 = shuffleBlockResolver.forMapTask(1, 2, 1, new JavaSerializer(conf), new ShuffleWriteMetrics) for (writer <- shuffle2.writers) { @@ -88,7 +88,7 @@ class HashShuffleManagerSuite extends FunSuite with LocalSparkContext { // of block based on remaining data in file : which could mess things up when there is // concurrent read and writes happening to the same shuffle group. - val shuffle3 = shuffleBlockManager.forMapTask(1, 3, 1, new JavaSerializer(testConf), + val shuffle3 = shuffleBlockResolver.forMapTask(1, 3, 1, new JavaSerializer(testConf), new ShuffleWriteMetrics) for (writer <- shuffle3.writers) { writer.write("test3", "value") @@ -98,10 +98,10 @@ class HashShuffleManagerSuite extends FunSuite with LocalSparkContext { writer.commitAndClose() } // check before we register. - checkSegments(shuffle2Segment, shuffleBlockManager.getBlockData(ShuffleBlockId(1, 2, 0))) + checkSegments(shuffle2Segment, shuffleBlockResolver.getBlockData(ShuffleBlockId(1, 2, 0))) shuffle3.releaseWriters(success = true) - checkSegments(shuffle2Segment, shuffleBlockManager.getBlockData(ShuffleBlockId(1, 2, 0))) - shuffleBlockManager.removeShuffle(1) + checkSegments(shuffle2Segment, shuffleBlockResolver.getBlockData(ShuffleBlockId(1, 2, 0))) + shuffleBlockResolver.removeShuffle(1) } def writeToFile(file: File, numBytes: Int) { diff --git a/core/src/test/scala/org/apache/spark/shuffle/unsafe/UnsafeShuffleManagerSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/unsafe/UnsafeShuffleManagerSuite.scala new file mode 100644 index 0000000000000..49a04a2a45280 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/shuffle/unsafe/UnsafeShuffleManagerSuite.scala @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.shuffle.unsafe + +import org.mockito.Mockito._ +import org.mockito.invocation.InvocationOnMock +import org.mockito.stubbing.Answer +import org.scalatest.{FunSuite, Matchers} + +import org.apache.spark._ +import org.apache.spark.serializer.{JavaSerializer, KryoSerializer, Serializer} + +/** + * Tests for the fallback logic in UnsafeShuffleManager. Actual tests of shuffling data are + * performed in other suites. + */ +class UnsafeShuffleManagerSuite extends FunSuite with Matchers { + + import UnsafeShuffleManager.canUseUnsafeShuffle + + private class RuntimeExceptionAnswer extends Answer[Object] { + override def answer(invocation: InvocationOnMock): Object = { + throw new RuntimeException("Called non-stubbed method, " + invocation.getMethod.getName) + } + } + + private def shuffleDep( + partitioner: Partitioner, + serializer: Option[Serializer], + keyOrdering: Option[Ordering[Any]], + aggregator: Option[Aggregator[Any, Any, Any]], + mapSideCombine: Boolean): ShuffleDependency[Any, Any, Any] = { + val dep = mock(classOf[ShuffleDependency[Any, Any, Any]], new RuntimeExceptionAnswer()) + doReturn(0).when(dep).shuffleId + doReturn(partitioner).when(dep).partitioner + doReturn(serializer).when(dep).serializer + doReturn(keyOrdering).when(dep).keyOrdering + doReturn(aggregator).when(dep).aggregator + doReturn(mapSideCombine).when(dep).mapSideCombine + dep + } + + test("supported shuffle dependencies") { + val kryo = Some(new KryoSerializer(new SparkConf())) + + assert(canUseUnsafeShuffle(shuffleDep( + partitioner = new HashPartitioner(2), + serializer = kryo, + keyOrdering = None, + aggregator = None, + mapSideCombine = false + ))) + + val rangePartitioner = mock(classOf[RangePartitioner[Any, Any]]) + when(rangePartitioner.numPartitions).thenReturn(2) + assert(canUseUnsafeShuffle(shuffleDep( + partitioner = rangePartitioner, + serializer = kryo, + keyOrdering = None, + aggregator = None, + mapSideCombine = false + ))) + + } + + test("unsupported shuffle dependencies") { + val kryo = Some(new KryoSerializer(new SparkConf())) + val java = Some(new JavaSerializer(new SparkConf())) + + // We only support serializers that support object relocation + assert(!canUseUnsafeShuffle(shuffleDep( + partitioner = new HashPartitioner(2), + serializer = java, + keyOrdering = None, + aggregator = None, + mapSideCombine = false + ))) + + // We do not support shuffles with more than 16 million output partitions + assert(!canUseUnsafeShuffle(shuffleDep( + partitioner = new HashPartitioner(UnsafeShuffleManager.MAX_SHUFFLE_OUTPUT_PARTITIONS + 1), + serializer = kryo, + keyOrdering = None, + aggregator = None, + mapSideCombine = false + ))) + + // We do not support shuffles that perform any kind of aggregation or sorting of keys + assert(!canUseUnsafeShuffle(shuffleDep( + partitioner = new HashPartitioner(2), + serializer = kryo, + keyOrdering = Some(mock(classOf[Ordering[Any]])), + aggregator = None, + mapSideCombine = false + ))) + assert(!canUseUnsafeShuffle(shuffleDep( + partitioner = new HashPartitioner(2), + serializer = kryo, + keyOrdering = None, + aggregator = Some(mock(classOf[Aggregator[Any, Any, Any]])), + mapSideCombine = false + ))) + // We do not support shuffles that perform any kind of aggregation or sorting of keys + assert(!canUseUnsafeShuffle(shuffleDep( + partitioner = new HashPartitioner(2), + serializer = kryo, + keyOrdering = Some(mock(classOf[Ordering[Any]])), + aggregator = Some(mock(classOf[Aggregator[Any, Any, Any]])), + mapSideCombine = true + ))) + } + +} diff --git a/core/src/test/scala/org/apache/spark/shuffle/unsafe/UnsafeShuffleSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/unsafe/UnsafeShuffleSuite.scala new file mode 100644 index 0000000000000..6351539e91e97 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/shuffle/unsafe/UnsafeShuffleSuite.scala @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.shuffle.unsafe + +import java.io.File + +import scala.collection.JavaConverters._ + +import org.apache.commons.io.FileUtils +import org.apache.commons.io.filefilter.TrueFileFilter +import org.scalatest.BeforeAndAfterAll + +import org.apache.spark.{HashPartitioner, ShuffleDependency, SparkContext, ShuffleSuite} +import org.apache.spark.rdd.ShuffledRDD +import org.apache.spark.serializer.{JavaSerializer, KryoSerializer} +import org.apache.spark.util.Utils + +class UnsafeShuffleSuite extends ShuffleSuite with BeforeAndAfterAll { + + // This test suite should run all tests in ShuffleSuite with unsafe-based shuffle. + + override def beforeAll() { + conf.set("spark.shuffle.manager", "tungsten-sort") + // UnsafeShuffleManager requires at least 128 MB of memory per task in order to be able to sort + // shuffle records. + conf.set("spark.shuffle.memoryFraction", "0.5") + } + + test("UnsafeShuffleManager properly cleans up files for shuffles that use the new shuffle path") { + val tmpDir = Utils.createTempDir() + try { + val myConf = conf.clone() + .set("spark.local.dir", tmpDir.getAbsolutePath) + sc = new SparkContext("local", "test", myConf) + // Create a shuffled RDD and verify that it will actually use the new UnsafeShuffle path + val rdd = sc.parallelize(1 to 10, 1).map(x => (x, x)) + val shuffledRdd = new ShuffledRDD[Int, Int, Int](rdd, new HashPartitioner(4)) + .setSerializer(new KryoSerializer(myConf)) + val shuffleDep = shuffledRdd.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]] + assert(UnsafeShuffleManager.canUseUnsafeShuffle(shuffleDep)) + def getAllFiles: Set[File] = + FileUtils.listFiles(tmpDir, TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE).asScala.toSet + val filesBeforeShuffle = getAllFiles + // Force the shuffle to be performed + shuffledRdd.count() + // Ensure that the shuffle actually created files that will need to be cleaned up + val filesCreatedByShuffle = getAllFiles -- filesBeforeShuffle + filesCreatedByShuffle.map(_.getName) should be + Set("shuffle_0_0_0.data", "shuffle_0_0_0.index") + // Check that the cleanup actually removes the files + sc.env.blockManager.master.removeShuffle(0, blocking = true) + for (file <- filesCreatedByShuffle) { + assert (!file.exists(), s"Shuffle file $file was not cleaned up") + } + } finally { + Utils.deleteRecursively(tmpDir) + } + } + + test("UnsafeShuffleManager properly cleans up files for shuffles that use the old shuffle path") { + val tmpDir = Utils.createTempDir() + try { + val myConf = conf.clone() + .set("spark.local.dir", tmpDir.getAbsolutePath) + sc = new SparkContext("local", "test", myConf) + // Create a shuffled RDD and verify that it will actually use the old SortShuffle path + val rdd = sc.parallelize(1 to 10, 1).map(x => (x, x)) + val shuffledRdd = new ShuffledRDD[Int, Int, Int](rdd, new HashPartitioner(4)) + .setSerializer(new JavaSerializer(myConf)) + val shuffleDep = shuffledRdd.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]] + assert(!UnsafeShuffleManager.canUseUnsafeShuffle(shuffleDep)) + def getAllFiles: Set[File] = + FileUtils.listFiles(tmpDir, TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE).asScala.toSet + val filesBeforeShuffle = getAllFiles + // Force the shuffle to be performed + shuffledRdd.count() + // Ensure that the shuffle actually created files that will need to be cleaned up + val filesCreatedByShuffle = getAllFiles -- filesBeforeShuffle + filesCreatedByShuffle.map(_.getName) should be + Set("shuffle_0_0_0.data", "shuffle_0_0_0.index") + // Check that the cleanup actually removes the files + sc.env.blockManager.master.removeShuffle(0, blocking = true) + for (file <- filesCreatedByShuffle) { + assert (!file.exists(), s"Shuffle file $file was not cleaned up") + } + } finally { + Utils.deleteRecursively(tmpDir) + } + } +} diff --git a/core/src/test/scala/org/apache/spark/status/api/v1/SimpleDateParamSuite.scala b/core/src/test/scala/org/apache/spark/status/api/v1/SimpleDateParamSuite.scala new file mode 100644 index 0000000000000..731d1f557ed33 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/status/api/v1/SimpleDateParamSuite.scala @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.status.api.v1 + +import org.scalatest.{Matchers, FunSuite} + +class SimpleDateParamSuite extends FunSuite with Matchers { + + test("date parsing") { + new SimpleDateParam("2015-02-20T23:21:17.190GMT").timestamp should be (1424474477190L) + new SimpleDateParam("2015-02-20T17:21:17.190EST").timestamp should be (1424470877190L) + new SimpleDateParam("2015-02-20").timestamp should be (1424390400000L) // GMT + } + +} diff --git a/core/src/test/scala/org/apache/spark/storage/FlatmapIteratorSuite.scala b/core/src/test/scala/org/apache/spark/storage/FlatmapIteratorSuite.scala index bcf138b5ee6d0..47341b74e9c0f 100644 --- a/core/src/test/scala/org/apache/spark/storage/FlatmapIteratorSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/FlatmapIteratorSuite.scala @@ -59,10 +59,10 @@ class FlatmapIteratorSuite extends FunSuite with LocalSparkContext { .set("spark.serializer.objectStreamReset", "10") sc = new SparkContext(sconf) val expand_size = 500 - val data = sc.parallelize(Seq(1,2)). + val data = sc.parallelize(Seq(1, 2)). flatMap(x => Stream.range(1, expand_size). - map(y => "%d: string test %d".format(y,x))) - var persisted = data.persist(StorageLevel.MEMORY_ONLY_SER) + map(y => "%d: string test %d".format(y, x))) + val persisted = data.persist(StorageLevel.MEMORY_ONLY_SER) assert(persisted.filter(_.startsWith("1:")).count()===2) } diff --git a/core/src/test/scala/org/apache/spark/storage/StorageSuite.scala b/core/src/test/scala/org/apache/spark/storage/StorageSuite.scala index ee1071cbcde69..17193ddbfd894 100644 --- a/core/src/test/scala/org/apache/spark/storage/StorageSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/StorageSuite.scala @@ -287,8 +287,8 @@ class StorageSuite extends FunSuite { // For testing StorageUtils.updateRddInfo private def stockRDDInfos: Seq[RDDInfo] = { - val info0 = new RDDInfo(0, "0", 10, memAndDisk) - val info1 = new RDDInfo(1, "1", 3, memAndDisk) + val info0 = new RDDInfo(0, "0", 10, memAndDisk, Seq(3)) + val info1 = new RDDInfo(1, "1", 3, memAndDisk, Seq(4)) Seq(info0, info1) } diff --git a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala index d53d7f3ba5ae7..a727a43f44dfc 100644 --- a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala +++ b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala @@ -18,11 +18,13 @@ package org.apache.spark.ui import java.net.{HttpURLConnection, URL} -import javax.servlet.http.HttpServletRequest +import javax.servlet.http.{HttpServletResponse, HttpServletRequest} import scala.collection.JavaConversions._ import scala.xml.Node +import org.json4s._ +import org.json4s.jackson.JsonMethods import org.openqa.selenium.htmlunit.HtmlUnitDriver import org.openqa.selenium.{By, WebDriver} import org.scalatest._ @@ -33,8 +35,9 @@ import org.scalatest.time.SpanSugar._ import org.apache.spark.LocalSparkContext._ import org.apache.spark._ import org.apache.spark.api.java.StorageLevels +import org.apache.spark.deploy.history.HistoryServerSuite import org.apache.spark.shuffle.FetchFailedException - +import org.apache.spark.status.api.v1.{JacksonMessageWriter, StageStatus} /** * Selenium tests for the Spark Web UI. @@ -42,6 +45,8 @@ import org.apache.spark.shuffle.FetchFailedException class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with BeforeAndAfterAll { implicit var webDriver: WebDriver = _ + implicit val formats = DefaultFormats + override def beforeAll(): Unit = { webDriver = new HtmlUnitDriver @@ -76,28 +81,42 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before val rdd = sc.parallelize(Seq(1, 2, 3)) rdd.persist(StorageLevels.DISK_ONLY).count() eventually(timeout(5 seconds), interval(50 milliseconds)) { - go to (ui.appUIAddress.stripSuffix("/") + "/storage") + goToUi(ui, "/storage") val tableRowText = findAll(cssSelector("#storage-by-rdd-table td")).map(_.text).toSeq tableRowText should contain (StorageLevels.DISK_ONLY.description) } eventually(timeout(5 seconds), interval(50 milliseconds)) { - go to (ui.appUIAddress.stripSuffix("/") + "/storage/rdd/?id=0") + goToUi(ui, "/storage/rdd/?id=0") val tableRowText = findAll(cssSelector("#rdd-storage-by-block-table td")).map(_.text).toSeq tableRowText should contain (StorageLevels.DISK_ONLY.description) } + val storageJson = getJson(ui, "storage/rdd") + storageJson.children.length should be (1) + (storageJson \ "storageLevel").extract[String] should be (StorageLevels.DISK_ONLY.description) + val rddJson = getJson(ui, "storage/rdd/0") + (rddJson \ "storageLevel").extract[String] should be (StorageLevels.DISK_ONLY.description) + rdd.unpersist() rdd.persist(StorageLevels.MEMORY_ONLY).count() eventually(timeout(5 seconds), interval(50 milliseconds)) { - go to (ui.appUIAddress.stripSuffix("/") + "/storage") + goToUi(ui, "/storage") val tableRowText = findAll(cssSelector("#storage-by-rdd-table td")).map(_.text).toSeq tableRowText should contain (StorageLevels.MEMORY_ONLY.description) } eventually(timeout(5 seconds), interval(50 milliseconds)) { - go to (ui.appUIAddress.stripSuffix("/") + "/storage/rdd/?id=0") + goToUi(ui, "/storage/rdd/?id=0") val tableRowText = findAll(cssSelector("#rdd-storage-by-block-table td")).map(_.text).toSeq tableRowText should contain (StorageLevels.MEMORY_ONLY.description) } + + val updatedStorageJson = getJson(ui, "storage/rdd") + updatedStorageJson.children.length should be (1) + (updatedStorageJson \ "storageLevel").extract[String] should be ( + StorageLevels.MEMORY_ONLY.description) + val updatedRddJson = getJson(ui, "storage/rdd/0") + (updatedRddJson \ "storageLevel").extract[String] should be ( + StorageLevels.MEMORY_ONLY.description) } } @@ -108,10 +127,13 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before sc.parallelize(1 to 10).map { x => throw new Exception()}.collect() } eventually(timeout(5 seconds), interval(50 milliseconds)) { - go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/stages") + goToUi(sc, "/stages") find(id("active")) should be(None) // Since we hide empty tables find(id("failed")).get.text should be("Failed Stages (1)") } + val stageJson = getJson(sc.ui.get, "stages") + stageJson.children.length should be (1) + (stageJson \ "status").extract[String] should be (StageStatus.FAILED.name()) // Regression test for SPARK-2105 class NotSerializable @@ -120,12 +142,15 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before sc.parallelize(1 to 10).map { x => unserializableObject}.collect() } eventually(timeout(5 seconds), interval(50 milliseconds)) { - go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/stages") + goToUi(sc, "/stages") find(id("active")) should be(None) // Since we hide empty tables // The failure occurs before the stage becomes active, hence we should still show only one // failed stage, not two: find(id("failed")).get.text should be("Failed Stages (1)") } + + val updatedStageJson = getJson(sc.ui.get, "stages") + updatedStageJson should be (stageJson) } } @@ -138,7 +163,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before withSpark(newSparkContext(killEnabled = true)) { sc => runSlowJob(sc) eventually(timeout(5 seconds), interval(50 milliseconds)) { - go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/stages") + goToUi(sc, "/stages") assert(hasKillLink) } } @@ -146,7 +171,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before withSpark(newSparkContext(killEnabled = false)) { sc => runSlowJob(sc) eventually(timeout(5 seconds), interval(50 milliseconds)) { - go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/stages") + goToUi(sc, "/stages") assert(!hasKillLink) } } @@ -157,7 +182,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before // If no job has been run in a job group, then "(Job Group)" should not appear in the header sc.parallelize(Seq(1, 2, 3)).count() eventually(timeout(5 seconds), interval(50 milliseconds)) { - go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs") + goToUi(sc, "/jobs") val tableHeaders = findAll(cssSelector("th")).map(_.text).toSeq tableHeaders should not contain "Job Id (Job Group)" } @@ -165,10 +190,22 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before sc.setJobGroup("my-job-group", "my-job-group-description") sc.parallelize(Seq(1, 2, 3)).count() eventually(timeout(5 seconds), interval(50 milliseconds)) { - go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs") + goToUi(sc, "/jobs") val tableHeaders = findAll(cssSelector("th")).map(_.text).toSeq tableHeaders should contain ("Job Id (Job Group)") } + + val jobJson = getJson(sc.ui.get, "jobs") + for { + job @ JObject(_) <- jobJson + JInt(jobId) <- job \ "jobId" + jobGroup = job \ "jobGroup" + } { + jobId.toInt match { + case 0 => jobGroup should be (JNothing) + case 1 => jobGroup should be (JString("my-job-group")) + } + } } } @@ -195,7 +232,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before } mappedData.count() eventually(timeout(5 seconds), interval(50 milliseconds)) { - go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs") + goToUi(sc, "/jobs") find(cssSelector(".stage-progress-cell")).get.text should be ("2/2 (1 failed)") // Ideally, the following test would pass, but currently we overcount completed tasks // if task recomputations occur: @@ -204,6 +241,32 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before // of completed tasks may be higher: find(cssSelector(".progress-cell .progress")).get.text should be ("3/2 (1 failed)") } + val jobJson = getJson(sc.ui.get, "jobs") + (jobJson \ "numTasks").extract[Int]should be (2) + (jobJson \ "numCompletedTasks").extract[Int] should be (3) + (jobJson \ "numFailedTasks").extract[Int] should be (1) + (jobJson \ "numCompletedStages").extract[Int] should be (2) + (jobJson \ "numFailedStages").extract[Int] should be (1) + val stageJson = getJson(sc.ui.get, "stages") + + for { + stage @ JObject(_) <- stageJson + JString(status) <- stage \ "status" + JInt(stageId) <- stage \ "stageId" + JInt(attemptId) <- stage \ "attemptId" + } { + val exp = if (attemptId == 0 && stageId == 1) StageStatus.FAILED else StageStatus.COMPLETE + status should be (exp.name()) + } + + for { + stageId <- 0 to 1 + attemptId <- 0 to 1 + } { + val exp = if (attemptId == 0 && stageId == 1) StageStatus.FAILED else StageStatus.COMPLETE + val stageJson = getJson(sc.ui.get, s"stages/$stageId/$attemptId") + (stageJson \ "status").extract[String] should be (exp.name()) + } } } @@ -218,7 +281,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before // Start the job: rdd.countAsync() eventually(timeout(10 seconds), interval(50 milliseconds)) { - go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs/job/?id=0") + goToUi(sc, "/jobs/job/?id=0") find(id("active")).get.text should be ("Active Stages (1)") find(id("pending")).get.text should be ("Pending Stages (2)") // Essentially, we want to check that none of the stage rows show @@ -244,7 +307,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before rdd.count() rdd.count() eventually(timeout(10 seconds), interval(50 milliseconds)) { - go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs") + goToUi(sc, "/jobs") // The completed jobs table should have two rows. The first row will be the most recent job: val firstRow = find(cssSelector("tbody tr")).get.underlying val firstRowColumns = firstRow.findElements(By.tagName("td")) @@ -271,7 +334,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before rdd.count() rdd.count() eventually(timeout(10 seconds), interval(50 milliseconds)) { - go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs/job/?id=1") + goToUi(sc, "/jobs/job/?id=1") find(id("pending")) should be (None) find(id("active")) should be (None) find(id("failed")) should be (None) @@ -299,7 +362,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before rdd.count() rdd.count() eventually(timeout(10 seconds), interval(50 milliseconds)) { - go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/jobs") + goToUi(sc, "/jobs") findAll(cssSelector("tbody tr a")).foreach { link => link.text.toLowerCase should include ("count") link.text.toLowerCase should not include "unknown" @@ -321,7 +384,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before } sparkUI.attachTab(newTab) eventually(timeout(10 seconds), interval(50 milliseconds)) { - go to (sc.ui.get.appUIAddress.stripSuffix("/")) + goToUi(sc, "") find(cssSelector("""ul li a[href*="jobs"]""")) should not be(None) find(cssSelector("""ul li a[href*="stages"]""")) should not be(None) find(cssSelector("""ul li a[href*="storage"]""")) should not be(None) @@ -330,12 +393,12 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before } eventually(timeout(10 seconds), interval(50 milliseconds)) { // check whether new page exists - go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/foo") + goToUi(sc, "/foo") find(cssSelector("b")).get.text should include ("html magic") } sparkUI.detachTab(newTab) eventually(timeout(10 seconds), interval(50 milliseconds)) { - go to (sc.ui.get.appUIAddress.stripSuffix("/")) + goToUi(sc, "") find(cssSelector("""ul li a[href*="jobs"]""")) should not be(None) find(cssSelector("""ul li a[href*="stages"]""")) should not be(None) find(cssSelector("""ul li a[href*="storage"]""")) should not be(None) @@ -344,7 +407,7 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before } eventually(timeout(10 seconds), interval(50 milliseconds)) { // check new page not exist - go to (sc.ui.get.appUIAddress.stripSuffix("/") + "/foo") + goToUi(sc, "/foo") find(cssSelector("b")) should be(None) } } @@ -371,4 +434,163 @@ class UISeleniumSuite extends FunSuite with WebBrowser with Matchers with Before } } } + + test("stage & job retention") { + val conf = new SparkConf() + .setMaster("local") + .setAppName("test") + .set("spark.ui.enabled", "true") + .set("spark.ui.port", "0") + .set("spark.ui.retainedStages", "3") + .set("spark.ui.retainedJobs", "2") + val sc = new SparkContext(conf) + assert(sc.ui.isDefined) + + withSpark(sc) { sc => + // run a few jobs & stages ... + (0 until 5).foreach { idx => + // NOTE: if we reverse the order, things don't really behave nicely + // we lose the stage for a job we keep, and then the job doesn't know + // about its last stage + sc.parallelize(idx to (idx + 3)).map(identity).groupBy(identity).map(identity) + .groupBy(identity).count() + sc.parallelize(idx to (idx + 3)).collect() + } + + val expJobInfo = Seq( + ("9", "collect"), + ("8", "count") + ) + + eventually(timeout(1 second), interval(50 milliseconds)) { + goToUi(sc, "/jobs") + // The completed jobs table should have two rows. The first row will be the most recent job: + find("completed-summary").get.text should be ("Completed Jobs: 10, only showing 2") + find("completed").get.text should be ("Completed Jobs (10, only showing 2)") + val rows = findAll(cssSelector("tbody tr")).toIndexedSeq.map{_.underlying} + rows.size should be (expJobInfo.size) + for { + (row, idx) <- rows.zipWithIndex + columns = row.findElements(By.tagName("td")) + id = columns(0).getText() + description = columns(1).getText() + } { + id should be (expJobInfo(idx)._1) + description should include (expJobInfo(idx)._2) + } + } + + val jobsJson = getJson(sc.ui.get, "jobs") + jobsJson.children.size should be (expJobInfo.size) + for { + (job @ JObject(_), idx) <- jobsJson.children.zipWithIndex + id = (job \ "jobId").extract[String] + name = (job \ "name").extract[String] + } { + withClue(s"idx = $idx; id = $id; name = ${name.substring(0, 20)}") { + id should be (expJobInfo(idx)._1) + name should include (expJobInfo(idx)._2) + } + } + + // what about when we query for a job that did exist, but has been cleared? + goToUi(sc, "/jobs/job/?id=7") + find("no-info").get.text should be ("No information to display for job 7") + + val badJob = HistoryServerSuite.getContentAndCode(apiUrl(sc.ui.get, "jobs/7")) + badJob._1 should be (HttpServletResponse.SC_NOT_FOUND) + badJob._2 should be (None) + badJob._3 should be (Some("unknown job: 7")) + + val expStageInfo = Seq( + ("19", "collect"), + ("18", "count"), + ("17", "groupBy") + ) + + eventually(timeout(1 second), interval(50 milliseconds)) { + goToUi(sc, "/stages") + find("completed-summary").get.text should be ("Completed Stages: 20, only showing 3") + find("completed").get.text should be ("Completed Stages (20, only showing 3)") + val rows = findAll(cssSelector("tbody tr")).toIndexedSeq.map{_.underlying} + rows.size should be (3) + for { + (row, idx) <- rows.zipWithIndex + columns = row.findElements(By.tagName("td")) + id = columns(0).getText() + description = columns(1).getText() + } { + id should be (expStageInfo(idx)._1) + description should include (expStageInfo(idx)._2) + } + } + + val stagesJson = getJson(sc.ui.get, "stages") + stagesJson.children.size should be (3) + for { + (stage @ JObject(_), idx) <- stagesJson.children.zipWithIndex + id = (stage \ "stageId").extract[String] + name = (stage \ "name").extract[String] + } { + id should be (expStageInfo(idx)._1) + name should include (expStageInfo(idx)._2) + } + + // nonexistent stage + + goToUi(sc, "/stages/stage/?id=12&attempt=0") + find("no-info").get.text should be ("No information to display for Stage 12 (Attempt 0)") + val badStage = HistoryServerSuite.getContentAndCode(apiUrl(sc.ui.get, "stages/12/0")) + badStage._1 should be (HttpServletResponse.SC_NOT_FOUND) + badStage._2 should be (None) + badStage._3 should be (Some("unknown stage: 12")) + + val badAttempt = HistoryServerSuite.getContentAndCode(apiUrl(sc.ui.get, "stages/19/15")) + badAttempt._1 should be (HttpServletResponse.SC_NOT_FOUND) + badAttempt._2 should be (None) + badAttempt._3 should be (Some("unknown attempt for stage 19. Found attempts: [0]")) + + val badStageAttemptList = HistoryServerSuite.getContentAndCode( + apiUrl(sc.ui.get, "stages/12")) + badStageAttemptList._1 should be (HttpServletResponse.SC_NOT_FOUND) + badStageAttemptList._2 should be (None) + badStageAttemptList._3 should be (Some("unknown stage: 12")) + } + } + + test("live UI json application list") { + withSpark(newSparkContext()) { sc => + val appListRawJson = HistoryServerSuite.getUrl(new URL( + sc.ui.get.appUIAddress + "/api/v1/applications")) + val appListJsonAst = JsonMethods.parse(appListRawJson) + appListJsonAst.children.length should be (1) + val attempts = (appListJsonAst \ "attempts").children + attempts.size should be (1) + (attempts(0) \ "completed").extract[Boolean] should be (false) + parseDate(attempts(0) \ "startTime") should be (sc.startTime) + parseDate(attempts(0) \ "endTime") should be (-1) + val oneAppJsonAst = getJson(sc.ui.get, "") + oneAppJsonAst should be (appListJsonAst.children(0)) + } + } + + def goToUi(sc: SparkContext, path: String): Unit = { + goToUi(sc.ui.get, path) + } + + def goToUi(ui: SparkUI, path: String): Unit = { + go to (ui.appUIAddress.stripSuffix("/") + path) + } + + def parseDate(json: JValue): Long = { + JacksonMessageWriter.makeISODateFormat.parse(json.extract[String]).getTime + } + + def getJson(ui: SparkUI, path: String): JValue = { + JsonMethods.parse(HistoryServerSuite.getUrl(apiUrl(ui, path))) + } + + def apiUrl(ui: SparkUI, path: String): URL = { + new URL(ui.appUIAddress + "/api/v1/applications/test/" + path) + } } diff --git a/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala b/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala index 21d8267114133..967dd0821ebd0 100644 --- a/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala +++ b/core/src/test/scala/org/apache/spark/ui/jobs/JobProgressListenerSuite.scala @@ -34,12 +34,12 @@ class JobProgressListenerSuite extends FunSuite with LocalSparkContext with Matc val jobCompletionTime = 1421191296660L private def createStageStartEvent(stageId: Int) = { - val stageInfo = new StageInfo(stageId, 0, stageId.toString, 0, null, "") + val stageInfo = new StageInfo(stageId, 0, stageId.toString, 0, null, null, "") SparkListenerStageSubmitted(stageInfo) } private def createStageEndEvent(stageId: Int, failed: Boolean = false) = { - val stageInfo = new StageInfo(stageId, 0, stageId.toString, 0, null, "") + val stageInfo = new StageInfo(stageId, 0, stageId.toString, 0, null, null, "") if (failed) { stageInfo.failureReason = Some("Failed!") } @@ -51,7 +51,7 @@ class JobProgressListenerSuite extends FunSuite with LocalSparkContext with Matc stageIds: Seq[Int], jobGroup: Option[String] = None): SparkListenerJobStart = { val stageInfos = stageIds.map { stageId => - new StageInfo(stageId, 0, stageId.toString, 0, null, "") + new StageInfo(stageId, 0, stageId.toString, 0, null, null, "") } val properties: Option[Properties] = jobGroup.map { groupId => val props = new Properties() diff --git a/core/src/test/scala/org/apache/spark/ui/scope/RDDOperationGraphListenerSuite.scala b/core/src/test/scala/org/apache/spark/ui/scope/RDDOperationGraphListenerSuite.scala new file mode 100644 index 0000000000000..c1126f3af52e6 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/ui/scope/RDDOperationGraphListenerSuite.scala @@ -0,0 +1,231 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ui.scope + +import org.scalatest.FunSuite + +import org.apache.spark.SparkConf +import org.apache.spark.scheduler._ +import org.apache.spark.scheduler.SparkListenerStageSubmitted +import org.apache.spark.scheduler.SparkListenerStageCompleted +import org.apache.spark.scheduler.SparkListenerJobStart + +/** + * Tests that this listener populates and cleans up its data structures properly. + */ +class RDDOperationGraphListenerSuite extends FunSuite { + private var jobIdCounter = 0 + private var stageIdCounter = 0 + private val maxRetainedJobs = 10 + private val maxRetainedStages = 10 + private val conf = new SparkConf() + .set("spark.ui.retainedJobs", maxRetainedJobs.toString) + .set("spark.ui.retainedStages", maxRetainedStages.toString) + + test("run normal jobs") { + val startingJobId = jobIdCounter + val startingStageId = stageIdCounter + val listener = new RDDOperationGraphListener(conf) + assert(listener.jobIdToStageIds.isEmpty) + assert(listener.jobIdToSkippedStageIds.isEmpty) + assert(listener.stageIdToJobId.isEmpty) + assert(listener.stageIdToGraph.isEmpty) + assert(listener.completedStageIds.isEmpty) + assert(listener.jobIds.isEmpty) + assert(listener.stageIds.isEmpty) + + // Run a few jobs, but not enough for clean up yet + (1 to 3).foreach { numStages => startJob(numStages, listener) } // start 3 jobs and 6 stages + (0 to 5).foreach { i => endStage(startingStageId + i, listener) } // finish all 6 stages + (0 to 2).foreach { i => endJob(startingJobId + i, listener) } // finish all 3 jobs + + assert(listener.jobIdToStageIds.size === 3) + assert(listener.jobIdToStageIds(startingJobId).size === 1) + assert(listener.jobIdToStageIds(startingJobId + 1).size === 2) + assert(listener.jobIdToStageIds(startingJobId + 2).size === 3) + assert(listener.jobIdToSkippedStageIds.size === 3) + assert(listener.jobIdToSkippedStageIds.values.forall(_.isEmpty)) // no skipped stages + assert(listener.stageIdToJobId.size === 6) + assert(listener.stageIdToJobId(startingStageId) === startingJobId) + assert(listener.stageIdToJobId(startingStageId + 1) === startingJobId + 1) + assert(listener.stageIdToJobId(startingStageId + 2) === startingJobId + 1) + assert(listener.stageIdToJobId(startingStageId + 3) === startingJobId + 2) + assert(listener.stageIdToJobId(startingStageId + 4) === startingJobId + 2) + assert(listener.stageIdToJobId(startingStageId + 5) === startingJobId + 2) + assert(listener.stageIdToGraph.size === 6) + assert(listener.completedStageIds.size === 6) + assert(listener.jobIds.size === 3) + assert(listener.stageIds.size === 6) + } + + test("run jobs with skipped stages") { + val startingJobId = jobIdCounter + val startingStageId = stageIdCounter + val listener = new RDDOperationGraphListener(conf) + + // Run a few jobs, but not enough for clean up yet + // Leave some stages unfinished so that they are marked as skipped + (1 to 3).foreach { numStages => startJob(numStages, listener) } // start 3 jobs and 6 stages + (4 to 5).foreach { i => endStage(startingStageId + i, listener) } // finish only last 2 stages + (0 to 2).foreach { i => endJob(startingJobId + i, listener) } // finish all 3 jobs + + assert(listener.jobIdToSkippedStageIds.size === 3) + assert(listener.jobIdToSkippedStageIds(startingJobId).size === 1) + assert(listener.jobIdToSkippedStageIds(startingJobId + 1).size === 2) + assert(listener.jobIdToSkippedStageIds(startingJobId + 2).size === 1) // 2 stages not skipped + assert(listener.completedStageIds.size === 2) + + // The rest should be the same as before + assert(listener.jobIdToStageIds.size === 3) + assert(listener.jobIdToStageIds(startingJobId).size === 1) + assert(listener.jobIdToStageIds(startingJobId + 1).size === 2) + assert(listener.jobIdToStageIds(startingJobId + 2).size === 3) + assert(listener.stageIdToJobId.size === 6) + assert(listener.stageIdToJobId(startingStageId) === startingJobId) + assert(listener.stageIdToJobId(startingStageId + 1) === startingJobId + 1) + assert(listener.stageIdToJobId(startingStageId + 2) === startingJobId + 1) + assert(listener.stageIdToJobId(startingStageId + 3) === startingJobId + 2) + assert(listener.stageIdToJobId(startingStageId + 4) === startingJobId + 2) + assert(listener.stageIdToJobId(startingStageId + 5) === startingJobId + 2) + assert(listener.stageIdToGraph.size === 6) + assert(listener.jobIds.size === 3) + assert(listener.stageIds.size === 6) + } + + test("clean up metadata") { + val startingJobId = jobIdCounter + val startingStageId = stageIdCounter + val listener = new RDDOperationGraphListener(conf) + + // Run many jobs and stages to trigger clean up + (1 to 10000).foreach { i => + // Note: this must be less than `maxRetainedStages` + val numStages = i % (maxRetainedStages - 2) + 1 + val startingStageIdForJob = stageIdCounter + val jobId = startJob(numStages, listener) + // End some, but not all, stages that belong to this job + // This is to ensure that we have both completed and skipped stages + (startingStageIdForJob until stageIdCounter) + .filter { i => i % 2 == 0 } + .foreach { i => endStage(i, listener) } + // End all jobs + endJob(jobId, listener) + } + + // Ensure we never exceed the max retained thresholds + assert(listener.jobIdToStageIds.size <= maxRetainedJobs) + assert(listener.jobIdToSkippedStageIds.size <= maxRetainedJobs) + assert(listener.stageIdToJobId.size <= maxRetainedStages) + assert(listener.stageIdToGraph.size <= maxRetainedStages) + assert(listener.completedStageIds.size <= maxRetainedStages) + assert(listener.jobIds.size <= maxRetainedJobs) + assert(listener.stageIds.size <= maxRetainedStages) + + // Also ensure we're actually populating these data structures + // Otherwise the previous group of asserts will be meaningless + assert(listener.jobIdToStageIds.nonEmpty) + assert(listener.jobIdToSkippedStageIds.nonEmpty) + assert(listener.stageIdToJobId.nonEmpty) + assert(listener.stageIdToGraph.nonEmpty) + assert(listener.completedStageIds.nonEmpty) + assert(listener.jobIds.nonEmpty) + assert(listener.stageIds.nonEmpty) + + // Ensure we clean up old jobs and stages, not arbitrary ones + assert(!listener.jobIdToStageIds.contains(startingJobId)) + assert(!listener.jobIdToSkippedStageIds.contains(startingJobId)) + assert(!listener.stageIdToJobId.contains(startingStageId)) + assert(!listener.stageIdToGraph.contains(startingStageId)) + assert(!listener.completedStageIds.contains(startingStageId)) + assert(!listener.stageIds.contains(startingStageId)) + assert(!listener.jobIds.contains(startingJobId)) + } + + test("fate sharing between jobs and stages") { + val startingJobId = jobIdCounter + val startingStageId = stageIdCounter + val listener = new RDDOperationGraphListener(conf) + + // Run 3 jobs and 8 stages, finishing all 3 jobs but only 2 stages + startJob(5, listener) + startJob(1, listener) + startJob(2, listener) + (0 until 8).foreach { i => startStage(i + startingStageId, listener) } + endStage(startingStageId + 3, listener) + endStage(startingStageId + 4, listener) + (0 until 3).foreach { i => endJob(i + startingJobId, listener) } + + // First, assert the old stuff + assert(listener.jobIdToStageIds.size === 3) + assert(listener.jobIdToSkippedStageIds.size === 3) + assert(listener.stageIdToJobId.size === 8) + assert(listener.stageIdToGraph.size === 8) + assert(listener.completedStageIds.size === 2) + + // Cleaning the third job should clean all of its stages + listener.cleanJob(startingJobId + 2) + assert(listener.jobIdToStageIds.size === 2) + assert(listener.jobIdToSkippedStageIds.size === 2) + assert(listener.stageIdToJobId.size === 6) + assert(listener.stageIdToGraph.size === 6) + assert(listener.completedStageIds.size === 2) + + // Cleaning one of the stages in the first job should clean that job and all of its stages + // Note that we still keep around the last stage because it belongs to a different job + listener.cleanStage(startingStageId) + assert(listener.jobIdToStageIds.size === 1) + assert(listener.jobIdToSkippedStageIds.size === 1) + assert(listener.stageIdToJobId.size === 1) + assert(listener.stageIdToGraph.size === 1) + assert(listener.completedStageIds.size === 0) + } + + /** Start a job with the specified number of stages. */ + private def startJob(numStages: Int, listener: RDDOperationGraphListener): Int = { + assert(numStages > 0, "I will not run a job with 0 stages for you.") + val stageInfos = (0 until numStages).map { _ => + val stageInfo = new StageInfo(stageIdCounter, 0, "s", 0, Seq.empty, Seq.empty, "d") + stageIdCounter += 1 + stageInfo + } + val jobId = jobIdCounter + listener.onJobStart(new SparkListenerJobStart(jobId, 0, stageInfos)) + // Also start all stages that belong to this job + stageInfos.map(_.stageId).foreach { sid => startStage(sid, listener) } + jobIdCounter += 1 + jobId + } + + /** Start the stage specified by the given ID. */ + private def startStage(stageId: Int, listener: RDDOperationGraphListener): Unit = { + val stageInfo = new StageInfo(stageId, 0, "s", 0, Seq.empty, Seq.empty, "d") + listener.onStageSubmitted(new SparkListenerStageSubmitted(stageInfo)) + } + + /** Finish the stage specified by the given ID. */ + private def endStage(stageId: Int, listener: RDDOperationGraphListener): Unit = { + val stageInfo = new StageInfo(stageId, 0, "s", 0, Seq.empty, Seq.empty, "d") + listener.onStageCompleted(new SparkListenerStageCompleted(stageInfo)) + } + + /** Finish the job specified by the given ID. */ + private def endJob(jobId: Int, listener: RDDOperationGraphListener): Unit = { + listener.onJobEnd(new SparkListenerJobEnd(jobId, 0, JobSucceeded)) + } + +} diff --git a/core/src/test/scala/org/apache/spark/ui/storage/StorageTabSuite.scala b/core/src/test/scala/org/apache/spark/ui/storage/StorageTabSuite.scala index 0ea9ea47b4288..8778042e34657 100644 --- a/core/src/test/scala/org/apache/spark/ui/storage/StorageTabSuite.scala +++ b/core/src/test/scala/org/apache/spark/ui/storage/StorageTabSuite.scala @@ -35,10 +35,10 @@ class StorageTabSuite extends FunSuite with BeforeAndAfter { private val none = StorageLevel.NONE private val taskInfo = new TaskInfo(0, 0, 0, 0, "big", "dog", TaskLocality.ANY, false) private val taskInfo1 = new TaskInfo(1, 1, 1, 1, "big", "cat", TaskLocality.ANY, false) - private def rddInfo0 = new RDDInfo(0, "freedom", 100, memOnly) - private def rddInfo1 = new RDDInfo(1, "hostage", 200, memOnly) - private def rddInfo2 = new RDDInfo(2, "sanity", 300, memAndDisk) - private def rddInfo3 = new RDDInfo(3, "grace", 400, memAndDisk) + private def rddInfo0 = new RDDInfo(0, "freedom", 100, memOnly, Seq(10)) + private def rddInfo1 = new RDDInfo(1, "hostage", 200, memOnly, Seq(10)) + private def rddInfo2 = new RDDInfo(2, "sanity", 300, memAndDisk, Seq(10)) + private def rddInfo3 = new RDDInfo(3, "grace", 400, memAndDisk, Seq(10)) private val bm1 = BlockManagerId("big", "dog", 1) before { @@ -54,7 +54,7 @@ class StorageTabSuite extends FunSuite with BeforeAndAfter { assert(storageListener.rddInfoList.isEmpty) // 2 RDDs are known, but none are cached - val stageInfo0 = new StageInfo(0, 0, "0", 100, Seq(rddInfo0, rddInfo1), "details") + val stageInfo0 = new StageInfo(0, 0, "0", 100, Seq(rddInfo0, rddInfo1), Seq.empty, "details") bus.postToAll(SparkListenerStageSubmitted(stageInfo0)) assert(storageListener._rddInfoMap.size === 2) assert(storageListener.rddInfoList.isEmpty) @@ -64,15 +64,16 @@ class StorageTabSuite extends FunSuite with BeforeAndAfter { val rddInfo3Cached = rddInfo3 rddInfo2Cached.numCachedPartitions = 1 rddInfo3Cached.numCachedPartitions = 1 - val stageInfo1 = new StageInfo(1, 0, "0", 100, Seq(rddInfo2Cached, rddInfo3Cached), "details") + val stageInfo1 = new StageInfo( + 1, 0, "0", 100, Seq(rddInfo2Cached, rddInfo3Cached), Seq.empty, "details") bus.postToAll(SparkListenerStageSubmitted(stageInfo1)) assert(storageListener._rddInfoMap.size === 4) assert(storageListener.rddInfoList.size === 2) // Submitting RDDInfos with duplicate IDs does nothing - val rddInfo0Cached = new RDDInfo(0, "freedom", 100, StorageLevel.MEMORY_ONLY) + val rddInfo0Cached = new RDDInfo(0, "freedom", 100, StorageLevel.MEMORY_ONLY, Seq(10)) rddInfo0Cached.numCachedPartitions = 1 - val stageInfo0Cached = new StageInfo(0, 0, "0", 100, Seq(rddInfo0), "details") + val stageInfo0Cached = new StageInfo(0, 0, "0", 100, Seq(rddInfo0), Seq.empty, "details") bus.postToAll(SparkListenerStageSubmitted(stageInfo0Cached)) assert(storageListener._rddInfoMap.size === 4) assert(storageListener.rddInfoList.size === 2) @@ -88,7 +89,8 @@ class StorageTabSuite extends FunSuite with BeforeAndAfter { val rddInfo1Cached = rddInfo1 rddInfo0Cached.numCachedPartitions = 1 rddInfo1Cached.numCachedPartitions = 1 - val stageInfo0 = new StageInfo(0, 0, "0", 100, Seq(rddInfo0Cached, rddInfo1Cached), "details") + val stageInfo0 = new StageInfo( + 0, 0, "0", 100, Seq(rddInfo0Cached, rddInfo1Cached), Seq.empty, "details") bus.postToAll(SparkListenerStageSubmitted(stageInfo0)) assert(storageListener._rddInfoMap.size === 2) assert(storageListener.rddInfoList.size === 2) @@ -108,7 +110,7 @@ class StorageTabSuite extends FunSuite with BeforeAndAfter { val myRddInfo1 = rddInfo1 val myRddInfo2 = rddInfo2 val stageInfo0 = new StageInfo( - 0, 0, "0", 100, Seq(myRddInfo0, myRddInfo1, myRddInfo2), "details") + 0, 0, "0", 100, Seq(myRddInfo0, myRddInfo1, myRddInfo2), Seq.empty, "details") bus.postToAll(SparkListenerBlockManagerAdded(1L, bm1, 1000L)) bus.postToAll(SparkListenerStageSubmitted(stageInfo0)) assert(storageListener._rddInfoMap.size === 3) @@ -166,10 +168,10 @@ class StorageTabSuite extends FunSuite with BeforeAndAfter { test("verify StorageTab contains all cached rdds") { - val rddInfo0 = new RDDInfo(0, "rdd0", 1, memOnly) - val rddInfo1 = new RDDInfo(1, "rdd1", 1 ,memOnly) - val stageInfo0 = new StageInfo(0, 0, "stage0", 1, Seq(rddInfo0), "details") - val stageInfo1 = new StageInfo(1, 0, "stage1", 1, Seq(rddInfo1), "details") + val rddInfo0 = new RDDInfo(0, "rdd0", 1, memOnly, Seq(4)) + val rddInfo1 = new RDDInfo(1, "rdd1", 1, memOnly, Seq(4)) + val stageInfo0 = new StageInfo(0, 0, "stage0", 1, Seq(rddInfo0), Seq.empty, "details") + val stageInfo1 = new StageInfo(1, 0, "stage1", 1, Seq(rddInfo1), Seq.empty, "details") val taskMetrics0 = new TaskMetrics val taskMetrics1 = new TaskMetrics val block0 = (RDDBlockId(0, 1), BlockStatus(memOnly, 100L, 0L, 0L)) diff --git a/core/src/test/scala/org/apache/spark/util/AkkaUtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/AkkaUtilsSuite.scala index bec79fc4dc8f7..ccdb3f571429d 100644 --- a/core/src/test/scala/org/apache/spark/util/AkkaUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/AkkaUtilsSuite.scala @@ -138,7 +138,7 @@ class AkkaUtilsSuite extends FunSuite with LocalSparkContext with ResetSystemPro assert(securityManagerGood.isAuthenticationEnabled() === true) - val slaveRpcEnv =RpcEnv.create("spark-slave", hostname, 0, goodconf, securityManagerGood) + val slaveRpcEnv = RpcEnv.create("spark-slave", hostname, 0, goodconf, securityManagerGood) val slaveTracker = new MapOutputTrackerWorker(conf) slaveTracker.trackerEndpoint = slaveRpcEnv.setupEndpointRef("spark", rpcEnv.address, MapOutputTracker.ENDPOINT_NAME) diff --git a/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala b/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala index ff1bfe0774a2f..5c47bddb8e3fc 100644 --- a/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/ClosureCleanerSuite.scala @@ -17,10 +17,15 @@ package org.apache.spark.util +import java.io.NotSerializableException +import java.util.Random + import org.scalatest.FunSuite import org.apache.spark.LocalSparkContext._ -import org.apache.spark.{SparkContext, SparkException} +import org.apache.spark.{TaskContext, SparkContext, SparkException} +import org.apache.spark.partial.CountEvaluator +import org.apache.spark.rdd.RDD class ClosureCleanerSuite extends FunSuite { test("closures inside an object") { @@ -52,17 +57,72 @@ class ClosureCleanerSuite extends FunSuite { } test("toplevel return statements in closures are identified at cleaning time") { - val ex = intercept[SparkException] { + intercept[ReturnStatementInClosureException] { TestObjectWithBogusReturns.run() } - - assert(ex.getMessage.contains("Return statements aren't allowed in Spark closures")) } test("return statements from named functions nested in closures don't raise exceptions") { val result = TestObjectWithNestedReturns.run() assert(result === 1) } + + test("user provided closures are actually cleaned") { + + // We use return statements as an indication that a closure is actually being cleaned + // We expect closure cleaner to find the return statements in the user provided closures + def expectCorrectException(body: => Unit): Unit = { + try { + body + } catch { + case rse: ReturnStatementInClosureException => // Success! + case e @ (_: NotSerializableException | _: SparkException) => + fail(s"Expected ReturnStatementInClosureException, but got $e.\n" + + "This means the closure provided by user is not actually cleaned.") + } + } + + withSpark(new SparkContext("local", "test")) { sc => + val rdd = sc.parallelize(1 to 10) + val pairRdd = rdd.map { i => (i, i) } + expectCorrectException { TestUserClosuresActuallyCleaned.testMap(rdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testFlatMap(rdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testFilter(rdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testSortBy(rdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testGroupBy(rdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testKeyBy(rdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testMapPartitions(rdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testMapPartitionsWithIndex(rdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testMapPartitionsWithContext(rdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testFlatMapWith(rdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testFilterWith(rdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testForEachWith(rdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testMapWith(rdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testZipPartitions2(rdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testZipPartitions3(rdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testZipPartitions4(rdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testForeach(rdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testForeachPartition(rdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testReduce(rdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testTreeReduce(rdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testFold(rdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testAggregate(rdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testTreeAggregate(rdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testCombineByKey(pairRdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testAggregateByKey(pairRdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testFoldByKey(pairRdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testReduceByKey(pairRdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testReduceByKeyLocally(pairRdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testMapValues(pairRdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testFlatMapValues(pairRdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testForeachAsync(rdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testForeachPartitionAsync(rdd) } + expectCorrectException { TestUserClosuresActuallyCleaned.testRunJob1(sc) } + expectCorrectException { TestUserClosuresActuallyCleaned.testRunJob2(sc) } + expectCorrectException { TestUserClosuresActuallyCleaned.testRunApproximateJob(sc) } + expectCorrectException { TestUserClosuresActuallyCleaned.testSubmitJob(sc) } + } + } } // A non-serializable class we create in closures to make sure that we aren't @@ -143,7 +203,7 @@ object TestObjectWithNestedReturns { def run(): Int = { withSpark(new SparkContext("local", "test")) { sc => val nums = sc.parallelize(Array(1, 2, 3, 4)) - nums.map {x => + nums.map {x => // this return is fine since it will not transfer control outside the closure def foo(): Int = { return 5; 1 } foo() @@ -187,3 +247,108 @@ class TestClassWithNesting(val y: Int) extends Serializable { } } } + +/** + * Test whether closures passed in through public APIs are actually cleaned. + * + * We put a return statement in each of these closures as a mechanism to detect whether the + * ClosureCleaner actually cleaned our closure. If it did, then it would throw an appropriate + * exception explicitly complaining about the return statement. Otherwise, we know the + * ClosureCleaner did not actually clean our closure, in which case we should fail the test. + */ +private object TestUserClosuresActuallyCleaned { + def testMap(rdd: RDD[Int]): Unit = { rdd.map { _ => return; 0 }.count() } + def testFlatMap(rdd: RDD[Int]): Unit = { rdd.flatMap { _ => return; Seq() }.count() } + def testFilter(rdd: RDD[Int]): Unit = { rdd.filter { _ => return; true }.count() } + def testSortBy(rdd: RDD[Int]): Unit = { rdd.sortBy { _ => return; 1 }.count() } + def testKeyBy(rdd: RDD[Int]): Unit = { rdd.keyBy { _ => return; 1 }.count() } + def testGroupBy(rdd: RDD[Int]): Unit = { rdd.groupBy { _ => return; 1 }.count() } + def testMapPartitions(rdd: RDD[Int]): Unit = { rdd.mapPartitions { it => return; it }.count() } + def testMapPartitionsWithIndex(rdd: RDD[Int]): Unit = { + rdd.mapPartitionsWithIndex { (_, it) => return; it }.count() + } + def testFlatMapWith(rdd: RDD[Int]): Unit = { + rdd.flatMapWith ((index: Int) => new Random(index + 42)){ (_, it) => return; Seq() }.count() + } + def testMapWith(rdd: RDD[Int]): Unit = { + rdd.mapWith ((index: Int) => new Random(index + 42)){ (_, it) => return; 0 }.count() + } + def testFilterWith(rdd: RDD[Int]): Unit = { + rdd.filterWith ((index: Int) => new Random(index + 42)){ (_, it) => return; true }.count() + } + def testForEachWith(rdd: RDD[Int]): Unit = { + rdd.foreachWith ((index: Int) => new Random(index + 42)){ (_, it) => return } + } + def testMapPartitionsWithContext(rdd: RDD[Int]): Unit = { + rdd.mapPartitionsWithContext { (_, it) => return; it }.count() + } + def testZipPartitions2(rdd: RDD[Int]): Unit = { + rdd.zipPartitions(rdd) { case (it1, it2) => return; it1 }.count() + } + def testZipPartitions3(rdd: RDD[Int]): Unit = { + rdd.zipPartitions(rdd, rdd) { case (it1, it2, it3) => return; it1 }.count() + } + def testZipPartitions4(rdd: RDD[Int]): Unit = { + rdd.zipPartitions(rdd, rdd, rdd) { case (it1, it2, it3, it4) => return; it1 }.count() + } + def testForeach(rdd: RDD[Int]): Unit = { rdd.foreach { _ => return } } + def testForeachPartition(rdd: RDD[Int]): Unit = { rdd.foreachPartition { _ => return } } + def testReduce(rdd: RDD[Int]): Unit = { rdd.reduce { case (_, _) => return; 1 } } + def testTreeReduce(rdd: RDD[Int]): Unit = { rdd.treeReduce { case (_, _) => return; 1 } } + def testFold(rdd: RDD[Int]): Unit = { rdd.fold(0) { case (_, _) => return; 1 } } + def testAggregate(rdd: RDD[Int]): Unit = { + rdd.aggregate(0)({ case (_, _) => return; 1 }, { case (_, _) => return; 1 }) + } + def testTreeAggregate(rdd: RDD[Int]): Unit = { + rdd.treeAggregate(0)({ case (_, _) => return; 1 }, { case (_, _) => return; 1 }) + } + + // Test pair RDD functions + def testCombineByKey(rdd: RDD[(Int, Int)]): Unit = { + rdd.combineByKey( + { _ => return; 1 }: Int => Int, + { case (_, _) => return; 1 }: (Int, Int) => Int, + { case (_, _) => return; 1 }: (Int, Int) => Int + ).count() + } + def testAggregateByKey(rdd: RDD[(Int, Int)]): Unit = { + rdd.aggregateByKey(0)({ case (_, _) => return; 1 }, { case (_, _) => return; 1 }).count() + } + def testFoldByKey(rdd: RDD[(Int, Int)]): Unit = { rdd.foldByKey(0) { case (_, _) => return; 1 } } + def testReduceByKey(rdd: RDD[(Int, Int)]): Unit = { rdd.reduceByKey { case (_, _) => return; 1 } } + def testReduceByKeyLocally(rdd: RDD[(Int, Int)]): Unit = { + rdd.reduceByKeyLocally { case (_, _) => return; 1 } + } + def testMapValues(rdd: RDD[(Int, Int)]): Unit = { rdd.mapValues { _ => return; 1 } } + def testFlatMapValues(rdd: RDD[(Int, Int)]): Unit = { rdd.flatMapValues { _ => return; Seq() } } + + // Test async RDD actions + def testForeachAsync(rdd: RDD[Int]): Unit = { rdd.foreachAsync { _ => return } } + def testForeachPartitionAsync(rdd: RDD[Int]): Unit = { rdd.foreachPartitionAsync { _ => return } } + + // Test SparkContext runJob + def testRunJob1(sc: SparkContext): Unit = { + val rdd = sc.parallelize(1 to 10, 10) + sc.runJob(rdd, { (ctx: TaskContext, iter: Iterator[Int]) => return; 1 } ) + } + def testRunJob2(sc: SparkContext): Unit = { + val rdd = sc.parallelize(1 to 10, 10) + sc.runJob(rdd, { iter: Iterator[Int] => return; 1 } ) + } + def testRunApproximateJob(sc: SparkContext): Unit = { + val rdd = sc.parallelize(1 to 10, 10) + val evaluator = new CountEvaluator(1, 0.5) + sc.runApproximateJob( + rdd, { (ctx: TaskContext, iter: Iterator[Int]) => return; 1L }, evaluator, 1000) + } + def testSubmitJob(sc: SparkContext): Unit = { + val rdd = sc.parallelize(1 to 10, 10) + sc.submitJob( + rdd, + { _ => return; 1 }: Iterator[Int] => Int, + Seq.empty, + { case (_, _) => return }: (Int, Int) => Unit, + { return } + ) + } +} diff --git a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala index 0c9cf5bc685c1..0c5221d10d79d 100644 --- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala @@ -29,6 +29,7 @@ import org.scalatest.FunSuite import org.apache.spark._ import org.apache.spark.executor._ +import org.apache.spark.rdd.RDDOperationScope import org.apache.spark.scheduler._ import org.apache.spark.storage._ @@ -162,7 +163,7 @@ class JsonProtocolSuite extends FunSuite { assertEquals(exceptionFailure, JsonProtocol.taskEndReasonFromJson(oldEvent)) } - test("StageInfo backward compatibility") { + test("StageInfo backward compatibility (details, accumulables)") { val info = makeStageInfo(1, 2, 3, 4L, 5L) val newJson = JsonProtocol.stageInfoToJson(info) @@ -297,7 +298,7 @@ class JsonProtocolSuite extends FunSuite { val stageIds = Seq[Int](1, 2, 3, 4) val stageInfos = stageIds.map(x => makeStageInfo(x, x * 200, x * 300, x * 400, x * 500)) val dummyStageInfos = - stageIds.map(id => new StageInfo(id, 0, "unknown", 0, Seq.empty, "unknown")) + stageIds.map(id => new StageInfo(id, 0, "unknown", 0, Seq.empty, Seq.empty, "unknown")) val jobStart = SparkListenerJobStart(10, jobSubmissionTime, stageInfos, properties) val oldEvent = JsonProtocol.jobStartToJson(jobStart).removeField({_._1 == "Stage Infos"}) val expectedJobStart = @@ -323,6 +324,25 @@ class JsonProtocolSuite extends FunSuite { assertEquals(expectedJobEnd, JsonProtocol.jobEndFromJson(oldEndEvent)) } + test("RDDInfo backward compatibility (scope, parent IDs)") { + // Prior to Spark 1.4.0, RDDInfo did not have the "Scope" and "Parent IDs" properties + val rddInfo = new RDDInfo( + 1, "one", 100, StorageLevel.NONE, Seq(1, 6, 8), Some(new RDDOperationScope("fable"))) + val oldRddInfoJson = JsonProtocol.rddInfoToJson(rddInfo) + .removeField({ _._1 == "Parent IDs"}) + .removeField({ _._1 == "Scope"}) + val expectedRddInfo = new RDDInfo(1, "one", 100, StorageLevel.NONE, Seq.empty, scope = None) + assertEquals(expectedRddInfo, JsonProtocol.rddInfoFromJson(oldRddInfoJson)) + } + + test("StageInfo backward compatibility (parent IDs)") { + // Prior to Spark 1.4.0, StageInfo did not have the "Parent IDs" property + val stageInfo = new StageInfo(1, 1, "me-stage", 1, Seq.empty, Seq(1, 2, 3), "details") + val oldStageInfo = JsonProtocol.stageInfoToJson(stageInfo).removeField({ _._1 == "Parent IDs"}) + val expectedStageInfo = new StageInfo(1, 1, "me-stage", 1, Seq.empty, Seq.empty, "details") + assertEquals(expectedStageInfo, JsonProtocol.stageInfoFromJson(oldStageInfo)) + } + /** -------------------------- * | Helper test running methods | * --------------------------- */ @@ -645,7 +665,7 @@ class JsonProtocolSuite extends FunSuite { } private def makeRddInfo(a: Int, b: Int, c: Int, d: Long, e: Long) = { - val r = new RDDInfo(a, "mayor", b, StorageLevel.MEMORY_AND_DISK) + val r = new RDDInfo(a, "mayor", b, StorageLevel.MEMORY_AND_DISK, Seq(1, 4, 7)) r.numCachedPartitions = c r.memSize = d r.diskSize = e @@ -654,7 +674,7 @@ class JsonProtocolSuite extends FunSuite { private def makeStageInfo(a: Int, b: Int, c: Int, d: Long, e: Long) = { val rddInfos = (0 until a % 5).map { i => makeRddInfo(a + i, b + i, c + i, d + i, e + i) } - val stageInfo = new StageInfo(a, 0, "greetings", b, rddInfos, "details") + val stageInfo = new StageInfo(a, 0, "greetings", b, rddInfos, Seq(100, 200, 300), "details") val (acc1, acc2) = (makeAccumulableInfo(1), makeAccumulableInfo(2)) stageInfo.accumulables(acc1.id) = acc1 stageInfo.accumulables(acc2.id) = acc2 @@ -747,6 +767,7 @@ class JsonProtocolSuite extends FunSuite { | "Stage Name": "greetings", | "Number of Tasks": 200, | "RDD Info": [], + | "ParentIDs" : [100, 200, 300], | "Details": "details", | "Accumulables": [ | { @@ -785,6 +806,7 @@ class JsonProtocolSuite extends FunSuite { | { | "RDD ID": 101, | "Name": "mayor", + | "Parent IDs": [1, 4, 7], | "Storage Level": { | "Use Disk": true, | "Use Memory": true, @@ -799,6 +821,7 @@ class JsonProtocolSuite extends FunSuite { | "Disk Size": 501 | } | ], + | "ParentIDs" : [100, 200, 300], | "Details": "details", | "Accumulables": [ | { @@ -1168,6 +1191,7 @@ class JsonProtocolSuite extends FunSuite { | { | "RDD ID": 1, | "Name": "mayor", + | "Parent IDs": [1, 4, 7], | "Storage Level": { | "Use Disk": true, | "Use Memory": true, @@ -1182,6 +1206,7 @@ class JsonProtocolSuite extends FunSuite { | "Disk Size": 500 | } | ], + | "Parent IDs" : [100, 200, 300], | "Details": "details", | "Accumulables": [ | { @@ -1207,6 +1232,7 @@ class JsonProtocolSuite extends FunSuite { | { | "RDD ID": 2, | "Name": "mayor", + | "Parent IDs": [1, 4, 7], | "Storage Level": { | "Use Disk": true, | "Use Memory": true, @@ -1223,6 +1249,7 @@ class JsonProtocolSuite extends FunSuite { | { | "RDD ID": 3, | "Name": "mayor", + | "Parent IDs": [1, 4, 7], | "Storage Level": { | "Use Disk": true, | "Use Memory": true, @@ -1237,6 +1264,7 @@ class JsonProtocolSuite extends FunSuite { | "Disk Size": 1001 | } | ], + | "ParentIDs" : [100, 200, 300], | "Details": "details", | "Accumulables": [ | { @@ -1262,6 +1290,7 @@ class JsonProtocolSuite extends FunSuite { | { | "RDD ID": 3, | "Name": "mayor", + | "Parent IDs": [1, 4, 7], | "Storage Level": { | "Use Disk": true, | "Use Memory": true, @@ -1278,6 +1307,7 @@ class JsonProtocolSuite extends FunSuite { | { | "RDD ID": 4, | "Name": "mayor", + | "Parent IDs": [1, 4, 7], | "Storage Level": { | "Use Disk": true, | "Use Memory": true, @@ -1294,6 +1324,7 @@ class JsonProtocolSuite extends FunSuite { | { | "RDD ID": 5, | "Name": "mayor", + | "Parent IDs": [1, 4, 7], | "Storage Level": { | "Use Disk": true, | "Use Memory": true, @@ -1308,6 +1339,7 @@ class JsonProtocolSuite extends FunSuite { | "Disk Size": 1502 | } | ], + | "ParentIDs" : [100, 200, 300], | "Details": "details", | "Accumulables": [ | { @@ -1333,6 +1365,7 @@ class JsonProtocolSuite extends FunSuite { | { | "RDD ID": 4, | "Name": "mayor", + | "Parent IDs": [1, 4, 7], | "Storage Level": { | "Use Disk": true, | "Use Memory": true, @@ -1349,6 +1382,7 @@ class JsonProtocolSuite extends FunSuite { | { | "RDD ID": 5, | "Name": "mayor", + | "Parent IDs": [1, 4, 7], | "Storage Level": { | "Use Disk": true, | "Use Memory": true, @@ -1365,6 +1399,7 @@ class JsonProtocolSuite extends FunSuite { | { | "RDD ID": 6, | "Name": "mayor", + | "Parent IDs": [1, 4, 7], | "Storage Level": { | "Use Disk": true, | "Use Memory": true, @@ -1381,6 +1416,7 @@ class JsonProtocolSuite extends FunSuite { | { | "RDD ID": 7, | "Name": "mayor", + | "Parent IDs": [1, 4, 7], | "Storage Level": { | "Use Disk": true, | "Use Memory": true, @@ -1395,6 +1431,7 @@ class JsonProtocolSuite extends FunSuite { | "Disk Size": 2003 | } | ], + | "ParentIDs" : [100, 200, 300], | "Details": "details", | "Accumulables": [ | { diff --git a/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala index a3aa3e953fbec..751d3df9cc8f7 100644 --- a/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala @@ -20,6 +20,9 @@ package org.apache.spark.util import java.util.concurrent.{CountDownLatch, TimeUnit} +import scala.concurrent.{Await, Future} +import scala.concurrent.duration._ + import org.scalatest.FunSuite class ThreadUtilsSuite extends FunSuite { @@ -54,4 +57,13 @@ class ThreadUtilsSuite extends FunSuite { executor.shutdownNow() } } + + test("sameThread") { + val callerThreadName = Thread.currentThread().getName() + val f = Future { + Thread.currentThread().getName() + }(ThreadUtils.sameThread) + val futureThreadName = Await.result(f, 10.seconds) + assert(futureThreadName === callerThreadName) + } } diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala index 651ead6ff1de2..afa5cdc819746 100644 --- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala @@ -367,51 +367,58 @@ class UtilsSuite extends FunSuite with ResetSystemProperties with Logging { } test("resolveURI") { - def assertResolves(before: String, after: String, testWindows: Boolean = false): Unit = { + def assertResolves(before: String, after: String): Unit = { // This should test only single paths assume(before.split(",").length === 1) // Repeated invocations of resolveURI should yield the same result - def resolve(uri: String): String = Utils.resolveURI(uri, testWindows).toString + def resolve(uri: String): String = Utils.resolveURI(uri).toString assert(resolve(after) === after) assert(resolve(resolve(after)) === after) assert(resolve(resolve(resolve(after))) === after) // Also test resolveURIs with single paths - assert(new URI(Utils.resolveURIs(before, testWindows)) === new URI(after)) - assert(new URI(Utils.resolveURIs(after, testWindows)) === new URI(after)) + assert(new URI(Utils.resolveURIs(before)) === new URI(after)) + assert(new URI(Utils.resolveURIs(after)) === new URI(after)) } - val cwd = System.getProperty("user.dir") + val rawCwd = System.getProperty("user.dir") + val cwd = if (Utils.isWindows) s"/$rawCwd".replace("\\", "/") else rawCwd assertResolves("hdfs:/root/spark.jar", "hdfs:/root/spark.jar") assertResolves("hdfs:///root/spark.jar#app.jar", "hdfs:/root/spark.jar#app.jar") assertResolves("spark.jar", s"file:$cwd/spark.jar") - assertResolves("spark.jar#app.jar", s"file:$cwd/spark.jar#app.jar") - assertResolves("C:/path/to/file.txt", "file:/C:/path/to/file.txt", testWindows = true) - assertResolves("C:\\path\\to\\file.txt", "file:/C:/path/to/file.txt", testWindows = true) - assertResolves("file:/C:/path/to/file.txt", "file:/C:/path/to/file.txt", testWindows = true) - assertResolves("file:///C:/path/to/file.txt", "file:/C:/path/to/file.txt", testWindows = true) - assertResolves("file:/C:/file.txt#alias.txt", "file:/C:/file.txt#alias.txt", testWindows = true) - intercept[IllegalArgumentException] { Utils.resolveURI("file:foo") } - intercept[IllegalArgumentException] { Utils.resolveURI("file:foo:baby") } + assertResolves("spark.jar#app.jar", s"file:$cwd/spark.jar%23app.jar") + assertResolves("path to/file.txt", s"file:$cwd/path%20to/file.txt") + if (Utils.isWindows) { + assertResolves("C:\\path\\to\\file.txt", "file:/C:/path/to/file.txt") + assertResolves("C:\\path to\\file.txt", "file:/C:/path%20to/file.txt") + } + assertResolves("file:/C:/path/to/file.txt", "file:/C:/path/to/file.txt") + assertResolves("file:///C:/path/to/file.txt", "file:/C:/path/to/file.txt") + assertResolves("file:/C:/file.txt#alias.txt", "file:/C:/file.txt#alias.txt") + assertResolves("file:foo", s"file:foo") + assertResolves("file:foo:baby", s"file:foo:baby") } test("resolveURIs with multiple paths") { - def assertResolves(before: String, after: String, testWindows: Boolean = false): Unit = { + def assertResolves(before: String, after: String): Unit = { assume(before.split(",").length > 1) - assert(Utils.resolveURIs(before, testWindows) === after) - assert(Utils.resolveURIs(after, testWindows) === after) + assert(Utils.resolveURIs(before) === after) + assert(Utils.resolveURIs(after) === after) // Repeated invocations of resolveURIs should yield the same result - def resolve(uri: String): String = Utils.resolveURIs(uri, testWindows) + def resolve(uri: String): String = Utils.resolveURIs(uri) assert(resolve(after) === after) assert(resolve(resolve(after)) === after) assert(resolve(resolve(resolve(after))) === after) } - val cwd = System.getProperty("user.dir") + val rawCwd = System.getProperty("user.dir") + val cwd = if (Utils.isWindows) s"/$rawCwd".replace("\\", "/") else rawCwd assertResolves("jar1,jar2", s"file:$cwd/jar1,file:$cwd/jar2") assertResolves("file:/jar1,file:/jar2", "file:/jar1,file:/jar2") assertResolves("hdfs:/jar1,file:/jar2,jar3", s"hdfs:/jar1,file:/jar2,file:$cwd/jar3") - assertResolves("hdfs:/jar1,file:/jar2,jar3,jar4#jar5", - s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:$cwd/jar4#jar5") - assertResolves("hdfs:/jar1,file:/jar2,jar3,C:\\pi.py#py.pi", - s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:/C:/pi.py#py.pi", testWindows = true) + assertResolves("hdfs:/jar1,file:/jar2,jar3,jar4#jar5,path to/jar6", + s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:$cwd/jar4%23jar5,file:$cwd/path%20to/jar6") + if (Utils.isWindows) { + assertResolves("""hdfs:/jar1,file:/jar2,jar3,C:\pi.py#py.pi,C:\path to\jar4""", + s"hdfs:/jar1,file:/jar2,file:$cwd/jar3,file:/C:/pi.py%23py.pi,file:/C:/path%20to/jar4") + } } test("nonLocalPaths") { @@ -426,6 +433,8 @@ class UtilsSuite extends FunSuite with ResetSystemProperties with Logging { assert(Utils.nonLocalPaths("local:/spark.jar,file:/smart.jar,family.py") === Array.empty) assert(Utils.nonLocalPaths("hdfs:/spark.jar,s3:/smart.jar") === Array("hdfs:/spark.jar", "s3:/smart.jar")) + assert(Utils.nonLocalPaths("hdfs:/spark.jar,path to/a.jar,s3:/smart.jar") === + Array("hdfs:/spark.jar", "s3:/smart.jar")) assert(Utils.nonLocalPaths("hdfs:/spark.jar,s3:/smart.jar,local.py,file:/hello/pi.py") === Array("hdfs:/spark.jar", "s3:/smart.jar")) assert(Utils.nonLocalPaths("local.py,hdfs:/spark.jar,file:/hello/pi.py,s3:/smart.jar") === @@ -542,12 +551,17 @@ class UtilsSuite extends FunSuite with ResetSystemProperties with Logging { test("fetch hcfs dir") { val tempDir = Utils.createTempDir() val sourceDir = new File(tempDir, "source-dir") - val innerSourceDir = Utils.createTempDir(root=sourceDir.getPath) + val innerSourceDir = Utils.createTempDir(root = sourceDir.getPath) val sourceFile = File.createTempFile("someprefix", "somesuffix", innerSourceDir) val targetDir = new File(tempDir, "target-dir") Files.write("some text", sourceFile, UTF_8) - val path = new Path("file://" + sourceDir.getAbsolutePath) + val path = + if (Utils.isWindows) { + new Path("file:/" + sourceDir.getAbsolutePath.replace("\\", "/")) + } else { + new Path("file://" + sourceDir.getAbsolutePath) + } val conf = new Configuration() val fs = Utils.getHadoopFileSystem(path.toString, conf) @@ -567,7 +581,12 @@ class UtilsSuite extends FunSuite with ResetSystemProperties with Logging { val destInnerFile = new File(destInnerDir, sourceFile.getName) assert(destInnerFile.isFile()) - val filePath = new Path("file://" + sourceFile.getAbsolutePath) + val filePath = + if (Utils.isWindows) { + new Path("file:/" + sourceFile.getAbsolutePath.replace("\\", "/")) + } else { + new Path("file://" + sourceFile.getAbsolutePath) + } val testFileDir = new File(tempDir, "test-filename") val testFileName = "testFName" val testFilefs = Utils.getHadoopFileSystem(filePath.toString, conf) diff --git a/core/src/test/scala/org/apache/spark/util/collection/BitSetSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/BitSetSuite.scala index b85a409a4b2e9..ffc206991906a 100644 --- a/core/src/test/scala/org/apache/spark/util/collection/BitSetSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/collection/BitSetSuite.scala @@ -94,7 +94,7 @@ class BitSetSuite extends FunSuite { test( "xor len(bitsetX) > len(bitsetY)" ) { val setBitsX = Seq( 0, 1, 3, 37, 38, 41, 85) - val setBitsY = Seq( 0, 2, 3, 37, 41 ) + val setBitsY = Seq( 0, 2, 3, 37, 41) val bitsetX = new BitSet(100) setBitsX.foreach( i => bitsetX.set(i)) val bitsetY = new BitSet(60) diff --git a/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala b/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala index 20944b62473c5..322af6d2ac31d 100644 --- a/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala @@ -76,7 +76,7 @@ class RandomSamplerSuite extends FunSuite with Matchers { } // Returns iterator over gap lengths between samples. - // This function assumes input data is integers sampled from the sequence of + // This function assumes input data is integers sampled from the sequence of // increasing integers: {0, 1, 2, ...}. This works because that is how I generate them, // and the samplers preserve their input order def gaps(data: Iterator[Int]): Iterator[Int] = { diff --git a/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala b/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala index 03f5f2d1b8528..5eba208682bce 100644 --- a/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala @@ -28,7 +28,7 @@ import scala.language.reflectiveCalls class XORShiftRandomSuite extends FunSuite with Matchers { - def fixture: Object {val seed: Long; val hundMil: Int; val xorRand: XORShiftRandom} = new { + private def fixture = new { val seed = 1L val xorRand = new XORShiftRandom(seed) val hundMil = 1e8.toInt diff --git a/data/mllib/sample_multiclass_classification_data.txt b/data/mllib/sample_multiclass_classification_data.txt new file mode 100644 index 0000000000000..a0d7f90113919 --- /dev/null +++ b/data/mllib/sample_multiclass_classification_data.txt @@ -0,0 +1,150 @@ +1 1:-0.222222 2:0.5 3:-0.762712 4:-0.833333 +1 1:-0.555556 2:0.25 3:-0.864407 4:-0.916667 +1 1:-0.722222 2:-0.166667 3:-0.864407 4:-0.833333 +1 1:-0.722222 2:0.166667 3:-0.694915 4:-0.916667 +0 1:0.166667 2:-0.416667 3:0.457627 4:0.5 +1 1:-0.833333 3:-0.864407 4:-0.916667 +2 1:-1.32455e-07 2:-0.166667 3:0.220339 4:0.0833333 +2 1:-1.32455e-07 2:-0.333333 3:0.0169491 4:-4.03573e-08 +1 1:-0.5 2:0.75 3:-0.830508 4:-1 +0 1:0.611111 3:0.694915 4:0.416667 +0 1:0.222222 2:-0.166667 3:0.423729 4:0.583333 +1 1:-0.722222 2:-0.166667 3:-0.864407 4:-1 +1 1:-0.5 2:0.166667 3:-0.864407 4:-0.916667 +2 1:-0.222222 2:-0.333333 3:0.0508474 4:-4.03573e-08 +2 1:-0.0555556 2:-0.833333 3:0.0169491 4:-0.25 +2 1:-0.166667 2:-0.416667 3:-0.0169491 4:-0.0833333 +1 1:-0.944444 3:-0.898305 4:-0.916667 +2 1:-0.277778 2:-0.583333 3:-0.0169491 4:-0.166667 +0 1:0.111111 2:-0.333333 3:0.38983 4:0.166667 +2 1:-0.222222 2:-0.166667 3:0.0847457 4:-0.0833333 +0 1:0.166667 2:-0.333333 3:0.559322 4:0.666667 +1 1:-0.611111 2:0.0833333 3:-0.864407 4:-0.916667 +2 1:-0.333333 2:-0.583333 3:0.0169491 4:-4.03573e-08 +0 1:0.555555 2:-0.166667 3:0.661017 4:0.666667 +2 1:0.166667 3:0.186441 4:0.166667 +2 1:0.111111 2:-0.75 3:0.152542 4:-4.03573e-08 +2 1:0.166667 2:-0.25 3:0.118644 4:-4.03573e-08 +0 1:-0.0555556 2:-0.833333 3:0.355932 4:0.166667 +0 1:-0.277778 2:-0.333333 3:0.322034 4:0.583333 +2 1:-0.222222 2:-0.5 3:-0.152542 4:-0.25 +2 1:-0.111111 3:0.288136 4:0.416667 +2 1:-0.0555556 2:-0.25 3:0.186441 4:0.166667 +2 1:0.333333 2:-0.166667 3:0.355932 4:0.333333 +1 1:-0.611111 2:0.25 3:-0.898305 4:-0.833333 +0 1:0.166667 2:-0.333333 3:0.559322 4:0.75 +0 1:0.111111 2:-0.25 3:0.559322 4:0.416667 +0 1:0.833333 2:-0.166667 3:0.898305 4:0.666667 +2 1:-0.277778 2:-0.166667 3:0.186441 4:0.166667 +0 1:-0.666667 2:-0.583333 3:0.186441 4:0.333333 +1 1:-0.666667 2:-0.0833334 3:-0.830508 4:-1 +1 1:-0.166667 2:0.666667 3:-0.932203 4:-0.916667 +0 1:0.0555554 2:-0.333333 3:0.288136 4:0.416667 +1 1:-0.666667 2:-0.0833334 3:-0.830508 4:-1 +1 1:-0.833333 2:0.166667 3:-0.864407 4:-0.833333 +0 1:0.0555554 2:0.166667 3:0.491525 4:0.833333 +0 1:0.722222 2:-0.333333 3:0.728813 4:0.5 +2 1:-0.166667 2:-0.416667 3:0.0508474 4:-0.25 +2 1:0.5 3:0.254237 4:0.0833333 +0 1:0.111111 2:-0.583333 3:0.355932 4:0.5 +1 1:-0.944444 2:-0.166667 3:-0.898305 4:-0.916667 +2 1:0.277778 2:-0.25 3:0.220339 4:-4.03573e-08 +0 1:0.666667 2:-0.25 3:0.79661 4:0.416667 +0 1:0.111111 2:0.0833333 3:0.694915 4:1 +0 1:0.444444 3:0.59322 4:0.833333 +2 1:-0.0555556 2:0.166667 3:0.186441 4:0.25 +1 1:-0.833333 2:0.333333 3:-1 4:-0.916667 +1 1:-0.555556 2:0.416667 3:-0.830508 4:-0.75 +2 1:-0.333333 2:-0.5 3:0.152542 4:-0.0833333 +1 1:-1 2:-0.166667 3:-0.966102 4:-1 +1 1:-0.333333 2:0.25 3:-0.898305 4:-0.916667 +2 1:0.388889 2:-0.333333 3:0.288136 4:0.0833333 +2 1:0.277778 2:-0.166667 3:0.152542 4:0.0833333 +0 1:0.333333 2:0.0833333 3:0.59322 4:0.666667 +1 1:-0.777778 3:-0.79661 4:-0.916667 +1 1:-0.444444 2:0.416667 3:-0.830508 4:-0.916667 +0 1:0.222222 2:-0.166667 3:0.627119 4:0.75 +1 1:-0.555556 2:0.5 3:-0.79661 4:-0.916667 +1 1:-0.555556 2:0.5 3:-0.694915 4:-0.75 +2 1:-1.32455e-07 2:-0.25 3:0.254237 4:0.0833333 +1 1:-0.5 2:0.25 3:-0.830508 4:-0.916667 +0 1:0.166667 3:0.457627 4:0.833333 +2 1:0.444444 2:-0.0833334 3:0.322034 4:0.166667 +0 1:0.111111 2:0.166667 3:0.559322 4:0.916667 +1 1:-0.611111 2:0.25 3:-0.79661 4:-0.583333 +0 1:0.388889 3:0.661017 4:0.833333 +1 1:-0.722222 2:0.166667 3:-0.79661 4:-0.916667 +1 1:-0.722222 2:-0.0833334 3:-0.79661 4:-0.916667 +1 1:-0.555556 2:0.166667 3:-0.830508 4:-0.916667 +2 1:-0.666667 2:-0.666667 3:-0.220339 4:-0.25 +2 1:-0.611111 2:-0.75 3:-0.220339 4:-0.25 +2 1:0.0555554 2:-0.833333 3:0.186441 4:0.166667 +0 1:-0.166667 2:-0.416667 3:0.38983 4:0.5 +0 1:0.611111 2:0.333333 3:0.728813 4:1 +2 1:0.0555554 2:-0.25 3:0.118644 4:-4.03573e-08 +1 1:-0.666667 2:-0.166667 3:-0.864407 4:-0.916667 +1 1:-0.833333 2:-0.0833334 3:-0.830508 4:-0.916667 +0 1:0.611111 2:-0.166667 3:0.627119 4:0.25 +0 1:0.888889 2:0.5 3:0.932203 4:0.75 +2 1:0.222222 2:-0.333333 3:0.220339 4:0.166667 +1 1:-0.555556 2:0.25 3:-0.864407 4:-0.833333 +0 1:-1.32455e-07 2:-0.166667 3:0.322034 4:0.416667 +0 1:-1.32455e-07 2:-0.5 3:0.559322 4:0.0833333 +1 1:-0.611111 3:-0.932203 4:-0.916667 +1 1:-0.333333 2:0.833333 3:-0.864407 4:-0.916667 +0 1:-0.166667 2:-0.333333 3:0.38983 4:0.916667 +2 1:-0.333333 2:-0.666667 3:-0.0847458 4:-0.25 +2 1:-0.0555556 2:-0.416667 3:0.38983 4:0.25 +1 1:-0.388889 2:0.416667 3:-0.830508 4:-0.916667 +0 1:0.444444 2:-0.0833334 3:0.38983 4:0.833333 +1 1:-0.611111 2:0.333333 3:-0.864407 4:-0.916667 +0 1:0.111111 2:-0.416667 3:0.322034 4:0.416667 +0 1:0.166667 2:-0.0833334 3:0.525424 4:0.416667 +2 1:0.333333 2:-0.0833334 3:0.152542 4:0.0833333 +0 1:-0.0555556 2:-0.166667 3:0.288136 4:0.416667 +0 1:-0.166667 2:-0.416667 3:0.38983 4:0.5 +1 1:-0.611111 2:0.166667 3:-0.830508 4:-0.916667 +0 1:0.888889 2:-0.166667 3:0.728813 4:0.833333 +2 1:-0.277778 2:-0.25 3:-0.118644 4:-4.03573e-08 +2 1:-0.222222 2:-0.333333 3:0.186441 4:-4.03573e-08 +0 1:0.333333 2:-0.583333 3:0.627119 4:0.416667 +0 1:0.444444 2:-0.0833334 3:0.491525 4:0.666667 +2 1:-0.222222 2:-0.25 3:0.0847457 4:-4.03573e-08 +1 1:-0.611111 2:0.166667 3:-0.79661 4:-0.75 +2 1:-0.277778 2:-0.166667 3:0.0508474 4:-4.03573e-08 +0 1:1 2:0.5 3:0.830508 4:0.583333 +2 1:-0.333333 2:-0.666667 3:-0.0508475 4:-0.166667 +2 1:-0.277778 2:-0.416667 3:0.0847457 4:-4.03573e-08 +0 1:0.888889 2:-0.333333 3:0.932203 4:0.583333 +2 1:-0.111111 2:-0.166667 3:0.0847457 4:0.166667 +2 1:0.111111 2:-0.583333 3:0.322034 4:0.166667 +0 1:0.333333 2:0.0833333 3:0.59322 4:1 +0 1:0.222222 2:-0.166667 3:0.525424 4:0.416667 +1 1:-0.555556 2:0.5 3:-0.830508 4:-0.833333 +0 1:-0.111111 2:-0.166667 3:0.38983 4:0.416667 +0 1:0.888889 2:-0.5 3:1 4:0.833333 +1 1:-0.388889 2:0.583333 3:-0.898305 4:-0.75 +2 1:0.111111 2:0.0833333 3:0.254237 4:0.25 +0 1:0.333333 2:-0.166667 3:0.423729 4:0.833333 +1 1:-0.388889 2:0.166667 3:-0.762712 4:-0.916667 +0 1:0.333333 2:-0.0833334 3:0.559322 4:0.916667 +2 1:-0.333333 2:-0.75 3:0.0169491 4:-4.03573e-08 +1 1:-0.222222 2:1 3:-0.830508 4:-0.75 +1 1:-0.388889 2:0.583333 3:-0.762712 4:-0.75 +2 1:-0.611111 2:-1 3:-0.152542 4:-0.25 +2 1:-1.32455e-07 2:-0.333333 3:0.254237 4:-0.0833333 +2 1:-0.5 2:-0.416667 3:-0.0169491 4:0.0833333 +1 1:-0.888889 2:-0.75 3:-0.898305 4:-0.833333 +1 1:-0.666667 2:-0.0833334 3:-0.830508 4:-1 +2 1:-0.555556 2:-0.583333 3:-0.322034 4:-0.166667 +2 1:-0.166667 2:-0.5 3:0.0169491 4:-0.0833333 +1 1:-0.555556 2:0.0833333 3:-0.762712 4:-0.666667 +1 1:-0.777778 3:-0.898305 4:-0.916667 +0 1:0.388889 2:-0.166667 3:0.525424 4:0.666667 +0 1:0.222222 3:0.38983 4:0.583333 +2 1:0.333333 2:-0.0833334 3:0.254237 4:0.166667 +2 1:-0.388889 2:-0.166667 3:0.186441 4:0.166667 +0 1:-0.222222 2:-0.583333 3:0.355932 4:0.583333 +1 1:-0.611111 2:-0.166667 3:-0.79661 4:-0.916667 +1 1:-0.944444 2:-0.25 3:-0.864407 4:-0.916667 +1 1:-0.388889 2:0.166667 3:-0.830508 4:-0.75 diff --git a/dev/create-release/create-release.sh b/dev/create-release/create-release.sh index 3dbb35f7054a2..0b14a618e755c 100755 --- a/dev/create-release/create-release.sh +++ b/dev/create-release/create-release.sh @@ -118,14 +118,14 @@ if [[ ! "$@" =~ --skip-publish ]]; then rm -rf $SPARK_REPO - build/mvn -DskipTests -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \ - -Pyarn -Phive -Phive-thriftserver -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \ + build/mvn -DskipTests -Pyarn -Phive \ + -Phive-thriftserver -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \ clean install ./dev/change-version-to-2.11.sh - build/mvn -DskipTests -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \ - -Dscala-2.11 -Pyarn -Phive -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \ + build/mvn -DskipTests -Pyarn -Phive \ + -Dscala-2.11 -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \ clean install ./dev/change-version-to-2.10.sh @@ -228,14 +228,14 @@ if [[ ! "$@" =~ --skip-package ]]; then # We increment the Zinc port each time to avoid OOM's and other craziness if multiple builds # share the same Zinc server. - make_binary_release "hadoop1" "-Phive -Phive-thriftserver -Dhadoop.version=1.0.4" "3030" & - make_binary_release "hadoop1-scala2.11" "-Phive -Dscala-2.11" "3031" & - make_binary_release "cdh4" "-Phive -Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0" "3032" & - make_binary_release "hadoop2.3" "-Phadoop-2.3 -Phive -Phive-thriftserver -Pyarn" "3033" & - make_binary_release "hadoop2.4" "-Phadoop-2.4 -Phive -Phive-thriftserver -Pyarn" "3034" & - make_binary_release "mapr3" "-Pmapr3 -Phive -Phive-thriftserver" "3035" & - make_binary_release "mapr4" "-Pmapr4 -Pyarn -Phive -Phive-thriftserver" "3036" & - make_binary_release "hadoop2.4-without-hive" "-Phadoop-2.4 -Pyarn" "3037" & + make_binary_release "hadoop1" "-Psparkr -Psparkr-docs -Phadoop-1 -Phive -Phive-thriftserver" "3030" & + make_binary_release "hadoop1-scala2.11" "-Psparkr -Psparkr-docs -Phadoop-1 -Phive -Dscala-2.11" "3031" & + make_binary_release "cdh4" "-Psparkr -Psparkr-docs -Phadoop-1 -Phive -Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0" "3032" & + make_binary_release "hadoop2.3" "-Psparkr -Psparkr-docs -Phadoop-2.3 -Phive -Phive-thriftserver -Pyarn" "3033" & + make_binary_release "hadoop2.4" "-Psparkr -Psparkr-docs -Phadoop-2.4 -Phive -Phive-thriftserver -Pyarn" "3034" & + make_binary_release "mapr3" "-Pmapr3 -Psparkr -Psparkr-docs -Phive -Phive-thriftserver" "3035" & + make_binary_release "mapr4" "-Pmapr4 -Psparkr -Psparkr-docs -Pyarn -Phive -Phive-thriftserver" "3036" & + make_binary_release "hadoop2.4-without-hive" "-Psparkr -Psparkr-docs -Phadoop-2.4 -Pyarn" "3037" & wait rm -rf spark-$RELEASE_VERSION-bin-*/ diff --git a/dev/create-release/generate-changelist.py b/dev/create-release/generate-changelist.py index 2e1a35a629342..77d6ea8bd3815 100755 --- a/dev/create-release/generate-changelist.py +++ b/dev/create-release/generate-changelist.py @@ -31,8 +31,8 @@ import traceback SPARK_HOME = os.environ["SPARK_HOME"] -NEW_RELEASE_VERSION = "1.0.0" -PREV_RELEASE_GIT_TAG = "v0.9.1" +NEW_RELEASE_VERSION = "1.4.0" +PREV_RELEASE_GIT_TAG = "v1.3.1" CHANGELIST = "CHANGES.txt" OLD_CHANGELIST = "%s.old" % (CHANGELIST) diff --git a/dev/github_jira_sync.py b/dev/github_jira_sync.py index 8051080117062..ff1e39664ee04 100755 --- a/dev/github_jira_sync.py +++ b/dev/github_jira_sync.py @@ -33,6 +33,7 @@ # User facing configs GITHUB_API_BASE = os.environ.get("GITHUB_API_BASE", "https://api.github.com/repos/apache/spark") +JIRA_PROJECT_NAME = os.environ.get("JIRA_PROJECT_NAME", "SPARK") JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira") JIRA_USERNAME = os.environ.get("JIRA_USERNAME", "apachespark") JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", "XXX") @@ -68,7 +69,7 @@ def get_jira_prs(): page_json = get_json(page) for pull in page_json: - jiras = re.findall("SPARK-[0-9]{4,5}", pull['title']) + jiras = re.findall(JIRA_PROJECT_NAME + "-[0-9]{4,5}", pull['title']) for jira in jiras: result = result + [(jira, pull)] diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py index b69cd15f99f63..f583fdac5da87 100755 --- a/dev/merge_spark_pr.py +++ b/dev/merge_spark_pr.py @@ -44,9 +44,9 @@ # Remote name which points to Apache git PUSH_REMOTE_NAME = os.environ.get("PUSH_REMOTE_NAME", "apache") # ASF JIRA username -JIRA_USERNAME = os.environ.get("JIRA_USERNAME", "pwendell") +JIRA_USERNAME = os.environ.get("JIRA_USERNAME", "") # ASF JIRA password -JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", "35500") +JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", "") GITHUB_BASE = "https://github.com/apache/spark/pull" GITHUB_API_BASE = "https://api.github.com/repos/apache/spark" diff --git a/dev/run-tests b/dev/run-tests index 861d1671182c2..ba1eee6366c78 100755 --- a/dev/run-tests +++ b/dev/run-tests @@ -40,11 +40,11 @@ function handle_error () { { if [ -n "$AMPLAB_JENKINS_BUILD_PROFILE" ]; then if [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop1.0" ]; then - export SBT_MAVEN_PROFILES_ARGS="-Dhadoop.version=1.0.4" + export SBT_MAVEN_PROFILES_ARGS="-Phadoop-1 -Dhadoop.version=1.2.1" elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.0" ]; then - export SBT_MAVEN_PROFILES_ARGS="-Dhadoop.version=2.0.0-mr1-cdh4.1.1" + export SBT_MAVEN_PROFILES_ARGS="-Phadoop-1 -Dhadoop.version=2.0.0-mr1-cdh4.1.1" elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.2" ]; then - export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0" + export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.2" elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.3" ]; then export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0" fi @@ -82,24 +82,31 @@ export SBT_MAVEN_PROFILES_ARGS="$SBT_MAVEN_PROFILES_ARGS -Pkinesis-asl" if [ -n "$AMPLAB_JENKINS" ]; then git fetch origin master:master - sql_diffs=$( - git diff --name-only master \ - | grep -e "^sql/" -e "^bin/spark-sql" -e "^sbin/start-thriftserver.sh" - ) - - non_sql_diffs=$( - git diff --name-only master \ - | grep -v -e "^sql/" -e "^bin/spark-sql" -e "^sbin/start-thriftserver.sh" - ) - - if [ -n "$sql_diffs" ]; then - echo "[info] Detected changes in SQL. Will run Hive test suite." - _RUN_SQL_TESTS=true - - if [ -z "$non_sql_diffs" ]; then - echo "[info] Detected no changes except in SQL. Will only run SQL tests." - _SQL_TESTS_ONLY=true + # AMP_JENKINS_PRB indicates if the current build is a pull request build. + if [ -n "$AMP_JENKINS_PRB" ]; then + # It is a pull request build. + sql_diffs=$( + git diff --name-only master \ + | grep -e "^sql/" -e "^bin/spark-sql" -e "^sbin/start-thriftserver.sh" + ) + + non_sql_diffs=$( + git diff --name-only master \ + | grep -v -e "^sql/" -e "^bin/spark-sql" -e "^sbin/start-thriftserver.sh" + ) + + if [ -n "$sql_diffs" ]; then + echo "[info] Detected changes in SQL. Will run Hive test suite." + _RUN_SQL_TESTS=true + + if [ -z "$non_sql_diffs" ]; then + echo "[info] Detected no changes except in SQL. Will only run SQL tests." + _SQL_TESTS_ONLY=true + fi fi + else + # It is a regular build. We should run SQL tests. + _RUN_SQL_TESTS=true fi fi @@ -142,29 +149,6 @@ CURRENT_BLOCK=$BLOCK_BUILD { HIVE_BUILD_ARGS="$SBT_MAVEN_PROFILES_ARGS -Phive -Phive-thriftserver" - HIVE_12_BUILD_ARGS="$HIVE_BUILD_ARGS -Phive-0.12.0" - - # First build with Hive 0.12.0 to ensure patches do not break the Hive 0.12.0 build - echo "[info] Compile with Hive 0.12.0" - [ -d "lib_managed" ] && rm -rf lib_managed - echo "[info] Building Spark with these arguments: $HIVE_12_BUILD_ARGS" - - if [ "${AMPLAB_JENKINS_BUILD_TOOL}" == "maven" ]; then - build/mvn $HIVE_12_BUILD_ARGS clean package -DskipTests - else - # NOTE: echo "q" is needed because sbt on encountering a build file with failure - # (either resolution or compilation) prompts the user for input either q, r, etc - # to quit or retry. This echo is there to make it not block. - # NOTE: Do not quote $BUILD_MVN_PROFILE_ARGS or else it will be interpreted as a - # single argument! - # QUESTION: Why doesn't 'yes "q"' work? - # QUESTION: Why doesn't 'grep -v -e "^\[info\] Resolving"' work? - echo -e "q\n" \ - | build/sbt $HIVE_12_BUILD_ARGS clean hive/compile hive-thriftserver/compile \ - | grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including" - fi - - # Then build with default Hive version (0.13.1) because tests are based on this version echo "[info] Compile with Hive 0.13.1" [ -d "lib_managed" ] && rm -rf lib_managed echo "[info] Building Spark with these arguments: $HIVE_BUILD_ARGS" diff --git a/dev/run-tests-jenkins b/dev/run-tests-jenkins index f452ab66efcd8..8b2a44fd72ba5 100755 --- a/dev/run-tests-jenkins +++ b/dev/run-tests-jenkins @@ -185,6 +185,8 @@ done # run tests { + # Marks this build is a pull request build. + export AMP_JENKINS_PRB=true timeout "${TESTS_TIMEOUT}" ./dev/run-tests test_result="$?" diff --git a/dev/scalastyle b/dev/scalastyle index 4e03f89ed5d5d..ad93f7e85b27c 100755 --- a/dev/scalastyle +++ b/dev/scalastyle @@ -17,11 +17,11 @@ # limitations under the License. # -echo -e "q\n" | build/sbt -Phive -Phive-thriftserver scalastyle > scalastyle.txt -echo -e "q\n" | build/sbt -Phive -Phive-thriftserver test:scalastyle >> scalastyle.txt +echo -e "q\n" | build/sbt -Pkinesis-asl -Phive -Phive-thriftserver scalastyle > scalastyle.txt +echo -e "q\n" | build/sbt -Pkinesis-asl -Phive -Phive-thriftserver test:scalastyle >> scalastyle.txt # Check style with YARN built too -echo -e "q\n" | build/sbt -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 scalastyle >> scalastyle.txt -echo -e "q\n" | build/sbt -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 test:scalastyle >> scalastyle.txt +echo -e "q\n" | build/sbt -Pkinesis-asl -Pyarn -Phadoop-2.2 scalastyle >> scalastyle.txt +echo -e "q\n" | build/sbt -Pkinesis-asl -Pyarn -Phadoop-2.2 test:scalastyle >> scalastyle.txt ERRORS=$(cat scalastyle.txt | awk '{if($1~/error/)print}') rm scalastyle.txt diff --git a/docs/_config.yml b/docs/_config.yml index b22b627f09007..71f1c2978efd5 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -14,7 +14,7 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 1.4.0-SNAPSHOT +SPARK_VERSION: 1.4.0 SPARK_VERSION_SHORT: 1.4.0 SCALA_BINARY_VERSION: "2.10" SCALA_VERSION: "2.10.4" diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html index b92c75f90b11c..eebb3faf90fc0 100755 --- a/docs/_layouts/global.html +++ b/docs/_layouts/global.html @@ -75,6 +75,7 @@
    • MLlib (Machine Learning)
    • GraphX (Graph Processing)
    • Bagel (Pregel on Spark)
    • +
    • SparkR (R on Spark)
    diff --git a/docs/_plugins/copy_api_dirs.rb b/docs/_plugins/copy_api_dirs.rb index 0ea3f8eab461b..6073b3626c45b 100644 --- a/docs/_plugins/copy_api_dirs.rb +++ b/docs/_plugins/copy_api_dirs.rb @@ -18,50 +18,52 @@ require 'fileutils' include FileUtils -if not (ENV['SKIP_API'] == '1' or ENV['SKIP_SCALADOC'] == '1') - # Build Scaladoc for Java/Scala +if not (ENV['SKIP_API'] == '1') + if not (ENV['SKIP_SCALADOC'] == '1') + # Build Scaladoc for Java/Scala - puts "Moving to project root and building API docs." - curr_dir = pwd - cd("..") + puts "Moving to project root and building API docs." + curr_dir = pwd + cd("..") - puts "Running 'build/sbt -Pkinesis-asl compile unidoc' from " + pwd + "; this may take a few minutes..." - puts `build/sbt -Pkinesis-asl compile unidoc` + puts "Running 'build/sbt -Pkinesis-asl compile unidoc' from " + pwd + "; this may take a few minutes..." + puts `build/sbt -Pkinesis-asl compile unidoc` - puts "Moving back into docs dir." - cd("docs") + puts "Moving back into docs dir." + cd("docs") - # Copy over the unified ScalaDoc for all projects to api/scala. - # This directory will be copied over to _site when `jekyll` command is run. - source = "../target/scala-2.10/unidoc" - dest = "api/scala" + # Copy over the unified ScalaDoc for all projects to api/scala. + # This directory will be copied over to _site when `jekyll` command is run. + source = "../target/scala-2.10/unidoc" + dest = "api/scala" - puts "Making directory " + dest - mkdir_p dest + puts "Making directory " + dest + mkdir_p dest - # From the rubydoc: cp_r('src', 'dest') makes src/dest, but this doesn't. - puts "cp -r " + source + "/. " + dest - cp_r(source + "/.", dest) + # From the rubydoc: cp_r('src', 'dest') makes src/dest, but this doesn't. + puts "cp -r " + source + "/. " + dest + cp_r(source + "/.", dest) - # Append custom JavaScript - js = File.readlines("./js/api-docs.js") - js_file = dest + "/lib/template.js" - File.open(js_file, 'a') { |f| f.write("\n" + js.join()) } + # Append custom JavaScript + js = File.readlines("./js/api-docs.js") + js_file = dest + "/lib/template.js" + File.open(js_file, 'a') { |f| f.write("\n" + js.join()) } - # Append custom CSS - css = File.readlines("./css/api-docs.css") - css_file = dest + "/lib/template.css" - File.open(css_file, 'a') { |f| f.write("\n" + css.join()) } + # Append custom CSS + css = File.readlines("./css/api-docs.css") + css_file = dest + "/lib/template.css" + File.open(css_file, 'a') { |f| f.write("\n" + css.join()) } - # Copy over the unified JavaDoc for all projects to api/java. - source = "../target/javaunidoc" - dest = "api/java" + # Copy over the unified JavaDoc for all projects to api/java. + source = "../target/javaunidoc" + dest = "api/java" - puts "Making directory " + dest - mkdir_p dest + puts "Making directory " + dest + mkdir_p dest - puts "cp -r " + source + "/. " + dest - cp_r(source + "/.", dest) + puts "cp -r " + source + "/. " + dest + cp_r(source + "/.", dest) + end # Build Sphinx docs for Python diff --git a/docs/api.md b/docs/api.md index 03460383335e8..45df77ac05f78 100644 --- a/docs/api.md +++ b/docs/api.md @@ -7,4 +7,5 @@ Here you can API docs for Spark and its submodules. - [Spark Scala API (Scaladoc)](api/scala/index.html) - [Spark Java API (Javadoc)](api/java/index.html) -- [Spark Python API (Epydoc)](api/python/index.html) +- [Spark Python API (Sphinx)](api/python/index.html) +- [Spark R API (Roxygen2)](api/R/index.html) diff --git a/docs/building-spark.md b/docs/building-spark.md index 287fcd3c4034f..78cb9086f95e8 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -34,6 +34,16 @@ and in `project/SparkBuild.scala` add: to the `sharedSettings` val. See also [this PR](https://github.com/apache/spark/pull/2883/files) if you are unsure of where to add these lines. +# Building a Runnable Distribution + +To create a Spark distribution like those distributed by the +[Spark Downloads](http://spark.apache.org/downloads.html) page, and that is laid out so as +to be runnable, use `make-distribution.sh` in the project root directory. It can be configured +with Maven profile settings and so on like the direct Maven build. Example: + + ./make-distribution.sh --name custom-spark --tgz -Phadoop-2.4 -Pyarn + +For more information on usage, run `./make-distribution.sh --help` # Setting up Maven's Memory Usage @@ -59,17 +69,18 @@ You can fix this by setting the `MAVEN_OPTS` variable as discussed before. # Specifying the Hadoop Version -Because HDFS is not protocol-compatible across versions, if you want to read from HDFS, you'll need to build Spark against the specific HDFS version in your environment. You can do this through the "hadoop.version" property. If unset, Spark will build against Hadoop 1.0.4 by default. Note that certain build profiles are required for particular Hadoop versions: +Because HDFS is not protocol-compatible across versions, if you want to read from HDFS, you'll need to build Spark against the specific HDFS version in your environment. You can do this through the "hadoop.version" property. If unset, Spark will build against Hadoop 2.2.0 by default. Note that certain build profiles are required for particular Hadoop versions: - + +
    Hadoop versionProfile required
    1.x to 2.1.x(none)
    1.x to 2.1.xhadoop-1
    2.2.xhadoop-2.2
    2.3.xhadoop-2.3
    2.4.xhadoop-2.4
    2.6.x and later 2.xhadoop-2.6
    @@ -77,10 +88,10 @@ For Apache Hadoop versions 1.x, Cloudera CDH "mr1" distributions, and other Hado {% highlight bash %} # Apache Hadoop 1.2.1 -mvn -Dhadoop.version=1.2.1 -DskipTests clean package +mvn -Dhadoop.version=1.2.1 -Phadoop-1 -DskipTests clean package # Cloudera CDH 4.2.0 with MapReduce v1 -mvn -Dhadoop.version=2.0.0-mr1-cdh4.2.0 -DskipTests clean package +mvn -Dhadoop.version=2.0.0-mr1-cdh4.2.0 -Phadoop-1 -DskipTests clean package {% endhighlight %} You can enable the "yarn" profile and optionally set the "yarn.version" property if it is different from "hadoop.version". Spark only supports YARN versions 2.2.0 and later. @@ -88,8 +99,9 @@ You can enable the "yarn" profile and optionally set the "yarn.version" property Examples: {% highlight bash %} + # Apache Hadoop 2.2.X -mvn -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 -DskipTests clean package +mvn -Pyarn -Phadoop-2.2 -DskipTests clean package # Apache Hadoop 2.3.X mvn -Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0 -DskipTests clean package @@ -107,14 +119,10 @@ mvn -Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0 -Dyarn.version=2.2.0 -DskipTests # Building With Hive and JDBC Support To enable Hive integration for Spark SQL along with its JDBC server and CLI, add the `-Phive` and `Phive-thriftserver` profiles to your existing build options. -By default Spark will build with Hive 0.13.1 bindings. You can also build for -Hive 0.12.0 using the `-Phive-0.12.0` profile. +By default Spark will build with Hive 0.13.1 bindings. {% highlight bash %} # Apache Hadoop 2.4.X with Hive 13 support mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -Phive -Phive-thriftserver -DskipTests clean package - -# Apache Hadoop 2.4.X with Hive 12 support -mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -Phive -Phive-0.12.0 -Phive-thriftserver -DskipTests clean package {% endhighlight %} # Building for Scala 2.11 @@ -123,9 +131,7 @@ To produce a Spark package compiled with Scala 2.11, use the `-Dscala-2.11` prop dev/change-version-to-2.11.sh mvn -Pyarn -Phadoop-2.4 -Dscala-2.11 -DskipTests clean package -Scala 2.11 support in Spark does not support a few features due to dependencies -which are themselves not Scala 2.11 ready. Specifically, Spark's external -Kafka library and JDBC component are not yet supported in Scala 2.11 builds. +Spark does not yet support its JDBC component for Scala 2.11. # Spark Tests in Maven @@ -169,7 +175,7 @@ Thus, the full flow for running continuous-compilation of the `core` submodule m # Building Spark with IntelliJ IDEA or Eclipse For help in setting up IntelliJ IDEA or Eclipse for Spark development, and troubleshooting, refer to the -[wiki page for IDE setup](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-IDESetup). +[wiki page for IDE setup](https://cwiki.apache.org/confluence/display/SPARK/Useful+Developer+Tools#UsefulDeveloperTools-IDESetup). # Running Java 8 Test Suites diff --git a/docs/configuration.md b/docs/configuration.md index 64066bc0d70cd..3a48da4592dd9 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1,4 +1,4 @@ --- +--- layout: global displayTitle: Spark Configuration title: Configuration @@ -1194,7 +1194,7 @@ Apart from these, the following properties are also available, and may be useful spark.dynamicAllocation.executorIdleTimeout - 600s + 60s If dynamic allocation is enabled and an executor has been idle for more than this duration, the executor will be removed. For more detail, see this @@ -1224,7 +1224,7 @@ Apart from these, the following properties are also available, and may be useful spark.dynamicAllocation.schedulerBacklogTimeout - 5s + 1s If dynamic allocation is enabled and there have been pending tasks backlogged for more than this duration, new executors will be requested. For more detail, see this @@ -1474,6 +1474,13 @@ Apart from these, the following properties are also available, and may be useful the new Kafka direct stream API. + + spark.streaming.ui.retainedBatches + 1000 + + How many batches the Spark Streaming UI and status APIs remember before garbage collecting. + + #### Cluster Managers diff --git a/docs/hadoop-third-party-distributions.md b/docs/hadoop-third-party-distributions.md index 96bd69ca3b33b..795dd82a6be06 100644 --- a/docs/hadoop-third-party-distributions.md +++ b/docs/hadoop-third-party-distributions.md @@ -14,7 +14,7 @@ property. For certain versions, you will need to specify additional profiles. Fo see the guide on [building with maven](building-spark.html#specifying-the-hadoop-version): mvn -Dhadoop.version=1.0.4 -DskipTests clean package - mvn -Phadoop-2.2 -Dhadoop.version=2.2.0 -DskipTests clean package + mvn -Phadoop-2.3 -Dhadoop.version=2.3.0 -DskipTests clean package The table below lists the corresponding `hadoop.version` code for each CDH/HDP release. Note that some Hadoop releases are binary compatible across client versions. This means the pre-built Spark diff --git a/docs/index.md b/docs/index.md index b5b016e34795e..fac071da81e60 100644 --- a/docs/index.md +++ b/docs/index.md @@ -6,7 +6,7 @@ description: Apache Spark SPARK_VERSION_SHORT documentation homepage --- Apache Spark is a fast and general-purpose cluster computing system. -It provides high-level APIs in Java, Scala and Python, +It provides high-level APIs in Java, Scala, Python and R, and an optimized engine that supports general execution graphs. It also supports a rich set of higher-level tools including [Spark SQL](sql-programming-guide.html) for SQL and structured data processing, [MLlib](mllib-guide.html) for machine learning, [GraphX](graphx-programming-guide.html) for graph processing, and [Spark Streaming](streaming-programming-guide.html). @@ -20,13 +20,13 @@ Spark runs on both Windows and UNIX-like systems (e.g. Linux, Mac OS). It's easy locally on one machine --- all you need is to have `java` installed on your system `PATH`, or the `JAVA_HOME` environment variable pointing to a Java installation. -Spark runs on Java 6+ and Python 2.6+. For the Scala API, Spark {{site.SPARK_VERSION}} uses +Spark runs on Java 6+, Python 2.6+ and R 3.1+. For the Scala API, Spark {{site.SPARK_VERSION}} uses Scala {{site.SCALA_BINARY_VERSION}}. You will need to use a compatible Scala version ({{site.SCALA_BINARY_VERSION}}.x). # Running the Examples and Shell -Spark comes with several sample programs. Scala, Java and Python examples are in the +Spark comes with several sample programs. Scala, Java, Python and R examples are in the `examples/src/main` directory. To run one of the Java or Scala sample programs, use `bin/run-example [params]` in the top-level Spark directory. (Behind the scenes, this invokes the more general @@ -54,6 +54,15 @@ Example applications are also provided in Python. For example, ./bin/spark-submit examples/src/main/python/pi.py 10 +Spark also provides an experimental [R API](sparkr.html) since 1.4 (only DataFrames APIs included). +To run Spark interactively in a R interpreter, use `bin/sparkR`: + + ./bin/sparkR --master local[2] + +Example applications are also provided in R. For example, + + ./bin/spark-submit examples/src/main/r/dataframe.R + # Launching on a Cluster The Spark [cluster mode overview](cluster-overview.html) explains the key concepts in running on a cluster. @@ -71,7 +80,7 @@ options for deployment: * [Quick Start](quick-start.html): a quick introduction to the Spark API; start here! * [Spark Programming Guide](programming-guide.html): detailed overview of Spark - in all supported languages (Scala, Java, Python) + in all supported languages (Scala, Java, Python, R) * Modules built on Spark: * [Spark Streaming](streaming-programming-guide.html): processing real-time data streams * [Spark SQL and DataFrames](sql-programming-guide.html): support for structured data and relational queries @@ -83,7 +92,8 @@ options for deployment: * [Spark Scala API (Scaladoc)](api/scala/index.html#org.apache.spark.package) * [Spark Java API (Javadoc)](api/java/index.html) -* [Spark Python API (Epydoc)](api/python/index.html) +* [Spark Python API (Sphinx)](api/python/index.html) +* [Spark R API (Roxygen2)](api/R/index.html) **Deployment Guides:** @@ -124,4 +134,5 @@ options for deployment: available online for free. * [Code Examples](http://spark.apache.org/examples.html): more are also available in the `examples` subfolder of Spark ([Scala]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/scala/org/apache/spark/examples), [Java]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/java/org/apache/spark/examples), - [Python]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/python)) + [Python]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/python), + [R]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/r)) diff --git a/docs/ml-ensembles.md b/docs/ml-ensembles.md new file mode 100644 index 0000000000000..9ff50e95fc479 --- /dev/null +++ b/docs/ml-ensembles.md @@ -0,0 +1,129 @@ +--- +layout: global +title: Ensembles +displayTitle: ML - Ensembles +--- + +**Table of Contents** + +* This will become a table of contents (this text will be scraped). +{:toc} + +An [ensemble method](http://en.wikipedia.org/wiki/Ensemble_learning) +is a learning algorithm which creates a model composed of a set of other base models. +The Pipelines API supports the following ensemble algorithms: [`OneVsRest`](api/scala/index.html#org.apache.spark.ml.classifier.OneVsRest) + +## OneVsRest + +[OneVsRest](http://en.wikipedia.org/wiki/Multiclass_classification#One-vs.-rest) is an example of a machine learning reduction for performing multiclass classification given a base classifier that can perform binary classification efficiently. + +`OneVsRest` is implemented as an `Estimator`. For the base classifier it takes instances of `Classifier` and creates a binary classification problem for each of the k classes. The classifier for class i is trained to predict whether the label is i or not, distinguishing class i from all other classes. + +Predictions are done by evaluating each binary classifier and the index of the most confident classifier is output as label. + +### Example + +The example below demonstrates how to load the +[Iris dataset](http://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass/iris.scale), parse it as a DataFrame and perform multiclass classification using `OneVsRest`. The test error is calculated to measure the algorithm accuracy. + +
    +
    +{% highlight scala %} +import org.apache.spark.ml.classification.{LogisticRegression, OneVsRest} +import org.apache.spark.mllib.evaluation.MulticlassMetrics +import org.apache.spark.mllib.util.MLUtils +import org.apache.spark.sql.{Row, SQLContext} + +val sqlContext = new SQLContext(sc) + +// parse data into dataframe +val data = MLUtils.loadLibSVMFile(sc, + "data/mllib/sample_multiclass_classification_data.txt") +val Array(train, test) = data.toDF().randomSplit(Array(0.7, 0.3)) + +// instantiate multiclass learner and train +val ovr = new OneVsRest().setClassifier(new LogisticRegression) + +val ovrModel = ovr.fit(train) + +// score model on test data +val predictions = ovrModel.transform(test).select("prediction", "label") +val predictionsAndLabels = predictions.map {case Row(p: Double, l: Double) => (p, l)} + +// compute confusion matrix +val metrics = new MulticlassMetrics(predictionsAndLabels) +println(metrics.confusionMatrix) + +// the Iris DataSet has three classes +val numClasses = 3 + +println("label\tfpr\n") +(0 until numClasses).foreach { index => + val label = index.toDouble + println(label + "\t" + metrics.falsePositiveRate(label)) +} +{% endhighlight %} +
    +
    +{% highlight java %} + +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.ml.classification.LogisticRegression; +import org.apache.spark.ml.classification.OneVsRest; +import org.apache.spark.ml.classification.OneVsRestModel; +import org.apache.spark.mllib.evaluation.MulticlassMetrics; +import org.apache.spark.mllib.linalg.Matrix; +import org.apache.spark.mllib.regression.LabeledPoint; +import org.apache.spark.mllib.util.MLUtils; +import org.apache.spark.rdd.RDD; +import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.SQLContext; + +SparkConf conf = new SparkConf().setAppName("JavaOneVsRestExample"); +JavaSparkContext jsc = new JavaSparkContext(conf); +SQLContext jsql = new SQLContext(jsc); + +RDD data = MLUtils.loadLibSVMFile(jsc.sc(), + "data/mllib/sample_multiclass_classification_data.txt"); + +DataFrame dataFrame = jsql.createDataFrame(data, LabeledPoint.class); +DataFrame[] splits = dataFrame.randomSplit(new double[]{0.7, 0.3}, 12345); +DataFrame train = splits[0]; +DataFrame test = splits[1]; + +// instantiate the One Vs Rest Classifier +OneVsRest ovr = new OneVsRest().setClassifier(new LogisticRegression()); + +// train the multiclass model +OneVsRestModel ovrModel = ovr.fit(train.cache()); + +// score the model on test data +DataFrame predictions = ovrModel + .transform(test) + .select("prediction", "label"); + +// obtain metrics +MulticlassMetrics metrics = new MulticlassMetrics(predictions); +Matrix confusionMatrix = metrics.confusionMatrix(); + +// output the Confusion Matrix +System.out.println("Confusion Matrix"); +System.out.println(confusionMatrix); + +// compute the false positive rate per label +System.out.println(); +System.out.println("label\tfpr\n"); + +// the Iris DataSet has three classes +int numClasses = 3; +for (int index = 0; index < numClasses; index++) { + double label = (double) index; + System.out.print(label); + System.out.print("\t"); + System.out.print(metrics.falsePositiveRate(label)); + System.out.println(); +} +{% endhighlight %} +
    +
    diff --git a/docs/ml-features.md b/docs/ml-features.md new file mode 100644 index 0000000000000..f88c0248c1a8a --- /dev/null +++ b/docs/ml-features.md @@ -0,0 +1,1198 @@ +--- +layout: global +title: Feature Extraction, Transformation, and Selection - SparkML +displayTitle: ML - Features +--- + +This section covers algorithms for working with features, roughly divided into these groups: + +* Extraction: Extracting features from "raw" data +* Transformation: Scaling, converting, or modifying features +* Selection: Selecting a subset from a larger set of features + +**Table of Contents** + +* This will become a table of contents (this text will be scraped). +{:toc} + + +# Feature Extractors + +## TF-IDF (HashingTF and IDF) + +[Term Frequency-Inverse Document Frequency (TF-IDF)](http://en.wikipedia.org/wiki/Tf%E2%80%93idf) is a common text pre-processing step. In Spark ML, TF-IDF is separate into two parts: TF (+hashing) and IDF. + +**TF**: `HashingTF` is a `Transformer` which takes sets of terms and converts those sets into fixed-length feature vectors. In text processing, a "set of terms" might be a bag of words. +The algorithm combines Term Frequency (TF) counts with the [hashing trick](http://en.wikipedia.org/wiki/Feature_hashing) for dimensionality reduction. + +**IDF**: `IDF` is an `Estimator` which fits on a dataset and produces an `IDFModel`. The `IDFModel` takes feature vectors (generally created from `HashingTF`) and scales each column. Intuitively, it down-weights columns which appear frequently in a corpus. + +Please refer to the [MLlib user guide on TF-IDF](mllib-feature-extraction.html#tf-idf) for more details on Term Frequency and Inverse Document Frequency. +For API details, refer to the [HashingTF API docs](api/scala/index.html#org.apache.spark.ml.feature.HashingTF) and the [IDF API docs](api/scala/index.html#org.apache.spark.ml.feature.IDF). + +In the following code segment, we start with a set of sentences. We split each sentence into words using `Tokenizer`. For each sentence (bag of words), we use `HashingTF` to hash the sentence into a feature vector. We use `IDF` to rescale the feature vectors; this generally improves performance when using text as features. Our feature vectors could then be passed to a learning algorithm. + +
    +
    +{% highlight scala %} +import org.apache.spark.ml.feature.{HashingTF, IDF, Tokenizer} + +val sentenceData = sqlContext.createDataFrame(Seq( + (0, "Hi I heard about Spark"), + (0, "I wish Java could use case classes"), + (1, "Logistic regression models are neat") +)).toDF("label", "sentence") +val tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words") +val wordsData = tokenizer.transform(sentenceData) +val hashingTF = new HashingTF().setInputCol("words").setOutputCol("rawFeatures").setNumFeatures(20) +val featurizedData = hashingTF.transform(wordsData) +val idf = new IDF().setInputCol("rawFeatures").setOutputCol("features") +val idfModel = idf.fit(featurizedData) +val rescaledData = idfModel.transform(featurizedData) +rescaledData.select("features", "label").take(3).foreach(println) +{% endhighlight %} +
    + +
    +{% highlight java %} +import com.google.common.collect.Lists; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.ml.feature.HashingTF; +import org.apache.spark.ml.feature.IDF; +import org.apache.spark.ml.feature.Tokenizer; +import org.apache.spark.mllib.linalg.Vector; +import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.Metadata; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; + +JavaRDD jrdd = jsc.parallelize(Lists.newArrayList( + RowFactory.create(0, "Hi I heard about Spark"), + RowFactory.create(0, "I wish Java could use case classes"), + RowFactory.create(1, "Logistic regression models are neat") +)); +StructType schema = new StructType(new StructField[]{ + new StructField("label", DataTypes.DoubleType, false, Metadata.empty()), + new StructField("sentence", DataTypes.StringType, false, Metadata.empty()) +}); +DataFrame sentenceData = sqlContext.createDataFrame(jrdd, schema); +Tokenizer tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words"); +DataFrame wordsData = tokenizer.transform(sentenceData); +int numFeatures = 20; +HashingTF hashingTF = new HashingTF() + .setInputCol("words") + .setOutputCol("rawFeatures") + .setNumFeatures(numFeatures); +DataFrame featurizedData = hashingTF.transform(wordsData); +IDF idf = new IDF().setInputCol("rawFeatures").setOutputCol("features"); +IDFModel idfModel = idf.fit(featurizedData); +DataFrame rescaledData = idfModel.transform(featurizedData); +for (Row r : rescaledData.select("features", "label").take(3)) { + Vector features = r.getAs(0); + Double label = r.getDouble(1); + System.out.println(features); +} +{% endhighlight %} +
    + +
    +{% highlight python %} +from pyspark.ml.feature import HashingTF, IDF, Tokenizer + +sentenceData = sqlContext.createDataFrame([ + (0, "Hi I heard about Spark"), + (0, "I wish Java could use case classes"), + (1, "Logistic regression models are neat") +], ["label", "sentence"]) +tokenizer = Tokenizer(inputCol="sentence", outputCol="words") +wordsData = tokenizer.transform(sentenceData) +hashingTF = HashingTF(inputCol="words", outputCol="rawFeatures", numFeatures=20) +featurizedData = hashingTF.transform(wordsData) +idf = IDF(inputCol="rawFeatures", outputCol="features") +idfModel = idf.fit(featurizedData) +rescaledData = idfModel.transform(featurizedData) +for features_label in rescaledData.select("features", "label").take(3): + print(features_label) +{% endhighlight %} +
    +
    + +## Word2Vec + +`Word2Vec` is an `Estimator` which takes sequences of words that represents documents and trains a `Word2VecModel`. The model is a `Map(String, Vector)` essentially, which maps each word to an unique fix-sized vector. The `Word2VecModel` transforms each documents into a vector using the average of all words in the document, which aims to other computations of documents such as similarity calculation consequencely. Please refer to the [MLlib user guide on Word2Vec](mllib-feature-extraction.html#Word2Vec) for more details on Word2Vec. + +Word2Vec is implemented in [Word2Vec](api/scala/index.html#org.apache.spark.ml.feature.Word2Vec). In the following code segment, we start with a set of documents, each of them is represented as a sequence of words. For each document, we transform it into a feature vector. This feature vector could then be passed to a learning algorithm. + +
    +
    +{% highlight scala %} +import org.apache.spark.ml.feature.Word2Vec + +// Input data: Each row is a bag of words from a sentence or document. +val documentDF = sqlContext.createDataFrame(Seq( + "Hi I heard about Spark".split(" "), + "I wish Java could use case classes".split(" "), + "Logistic regression models are neat".split(" ") +).map(Tuple1.apply)).toDF("text") + +// Learn a mapping from words to Vectors. +val word2Vec = new Word2Vec() + .setInputCol("text") + .setOutputCol("result") + .setVectorSize(3) + .setMinCount(0) +val model = word2Vec.fit(documentDF) +val result = model.transform(documentDF) +result.select("result").take(3).foreach(println) +{% endhighlight %} +
    + +
    +{% highlight java %} +import com.google.common.collect.Lists; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.types.*; + +JavaSparkContext jsc = ... +SQLContext sqlContext = ... + +// Input data: Each row is a bag of words from a sentence or document. +JavaRDD jrdd = jsc.parallelize(Lists.newArrayList( + RowFactory.create(Lists.newArrayList("Hi I heard about Spark".split(" "))), + RowFactory.create(Lists.newArrayList("I wish Java could use case classes".split(" "))), + RowFactory.create(Lists.newArrayList("Logistic regression models are neat".split(" "))) +)); +StructType schema = new StructType(new StructField[]{ + new StructField("text", new ArrayType(DataTypes.StringType, true), false, Metadata.empty()) +}); +DataFrame documentDF = sqlContext.createDataFrame(jrdd, schema); + +// Learn a mapping from words to Vectors. +Word2Vec word2Vec = new Word2Vec() + .setInputCol("text") + .setOutputCol("result") + .setVectorSize(3) + .setMinCount(0); +Word2VecModel model = word2Vec.fit(documentDF); +DataFrame result = model.transform(documentDF); +for (Row r: result.select("result").take(3)) { + System.out.println(r); +} +{% endhighlight %} +
    + +
    +{% highlight python %} +from pyspark.ml.feature import Word2Vec + +# Input data: Each row is a bag of words from a sentence or document. +documentDF = sqlContext.createDataFrame([ + ("Hi I heard about Spark".split(" "), ), + ("I wish Java could use case classes".split(" "), ), + ("Logistic regression models are neat".split(" "), ) +], ["text"]) +# Learn a mapping from words to Vectors. +word2Vec = Word2Vec(vectorSize=3, minCount=0, inputCol="text", outputCol="result") +model = word2Vec.fit(documentDF) +result = model.transform(documentDF) +for feature in result.select("result").take(3): + print(feature) +{% endhighlight %} +
    +
    + +# Feature Transformers + +## Tokenizer + +[Tokenization](http://en.wikipedia.org/wiki/Lexical_analysis#Tokenization) is the process of taking text (such as a sentence) and breaking it into individual terms (usually words). A simple [Tokenizer](api/scala/index.html#org.apache.spark.ml.feature.Tokenizer) class provides this functionality. The example below shows how to split sentences into sequences of words. + +Note: A more advanced tokenizer is provided via [RegexTokenizer](api/scala/index.html#org.apache.spark.ml.feature.RegexTokenizer). + +
    +
    +{% highlight scala %} +import org.apache.spark.ml.feature.Tokenizer + +val sentenceDataFrame = sqlContext.createDataFrame(Seq( + (0, "Hi I heard about Spark"), + (0, "I wish Java could use case classes"), + (1, "Logistic regression models are neat") +)).toDF("label", "sentence") +val tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words") +val wordsDataFrame = tokenizer.transform(sentenceDataFrame) +wordsDataFrame.select("words", "label").take(3).foreach(println) +{% endhighlight %} +
    + +
    +{% highlight java %} +import com.google.common.collect.Lists; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.ml.feature.Tokenizer; +import org.apache.spark.mllib.linalg.Vector; +import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.Metadata; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; + +JavaRDD jrdd = jsc.parallelize(Lists.newArrayList( + RowFactory.create(0, "Hi I heard about Spark"), + RowFactory.create(0, "I wish Java could use case classes"), + RowFactory.create(1, "Logistic regression models are neat") +)); +StructType schema = new StructType(new StructField[]{ + new StructField("label", DataTypes.DoubleType, false, Metadata.empty()), + new StructField("sentence", DataTypes.StringType, false, Metadata.empty()) +}); +DataFrame sentenceDataFrame = sqlContext.createDataFrame(jrdd, schema); +Tokenizer tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words"); +DataFrame wordsDataFrame = tokenizer.transform(sentenceDataFrame); +for (Row r : wordsDataFrame.select("words", "label").take(3)) { + java.util.List words = r.getList(0); + for (String word : words) System.out.print(word + " "); + System.out.println(); +} +{% endhighlight %} +
    + +
    +{% highlight python %} +from pyspark.ml.feature import Tokenizer + +sentenceDataFrame = sqlContext.createDataFrame([ + (0, "Hi I heard about Spark"), + (0, "I wish Java could use case classes"), + (1, "Logistic regression models are neat") +], ["label", "sentence"]) +tokenizer = Tokenizer(inputCol="sentence", outputCol="words") +wordsDataFrame = tokenizer.transform(sentenceDataFrame) +for words_label in wordsDataFrame.select("words", "label").take(3): + print(words_label) +{% endhighlight %} +
    +
    + + +## Binarizer + +Binarization is the process of thresholding numerical features to binary features. As some probabilistic estimators make assumption that the input data is distributed according to [Bernoulli distribution](http://en.wikipedia.org/wiki/Bernoulli_distribution), a binarizer is useful for pre-processing the input data with continuous numerical features. + +A simple [Binarizer](api/scala/index.html#org.apache.spark.ml.feature.Binarizer) class provides this functionality. Besides the common parameters of `inputCol` and `outputCol`, `Binarizer` has the parameter `threshold` used for binarizing continuous numerical features. The features greater than the threshold, will be binarized to 1.0. The features equal to or less than the threshold, will be binarized to 0.0. The example below shows how to binarize numerical features. + +
    +
    +{% highlight scala %} +import org.apache.spark.ml.feature.Binarizer +import org.apache.spark.sql.DataFrame + +val data = Array( + (0, 0.1), + (1, 0.8), + (2, 0.2) +) +val dataFrame: DataFrame = sqlContext.createDataFrame(data).toDF("label", "feature") + +val binarizer: Binarizer = new Binarizer() + .setInputCol("feature") + .setOutputCol("binarized_feature") + .setThreshold(0.5) + +val binarizedDataFrame = binarizer.transform(dataFrame) +val binarizedFeatures = binarizedDataFrame.select("binarized_feature") +binarizedFeatures.collect().foreach(println) +{% endhighlight %} +
    + +
    +{% highlight java %} +import com.google.common.collect.Lists; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.ml.feature.Binarizer; +import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.Metadata; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; + +JavaRDD jrdd = jsc.parallelize(Lists.newArrayList( + RowFactory.create(0, 0.1), + RowFactory.create(1, 0.8), + RowFactory.create(2, 0.2) +)); +StructType schema = new StructType(new StructField[]{ + new StructField("label", DataTypes.DoubleType, false, Metadata.empty()), + new StructField("feature", DataTypes.DoubleType, false, Metadata.empty()) +}); +DataFrame continuousDataFrame = jsql.createDataFrame(jrdd, schema); +Binarizer binarizer = new Binarizer() + .setInputCol("feature") + .setOutputCol("binarized_feature") + .setThreshold(0.5); +DataFrame binarizedDataFrame = binarizer.transform(continuousDataFrame); +DataFrame binarizedFeatures = binarizedDataFrame.select("binarized_feature"); +for (Row r : binarizedFeatures.collect()) { + Double binarized_value = r.getDouble(0); + System.out.println(binarized_value); +} +{% endhighlight %} +
    + +
    +{% highlight python %} +from pyspark.ml.feature import Binarizer + +continuousDataFrame = sqlContext.createDataFrame([ + (0, 0.1), + (1, 0.8), + (2, 0.2) +], ["label", "feature"]) +binarizer = Binarizer(threshold=0.5, inputCol="feature", outputCol="binarized_feature") +binarizedDataFrame = binarizer.transform(continuousDataFrame) +binarizedFeatures = binarizedDataFrame.select("binarized_feature") +for binarized_feature, in binarizedFeatures.collect(): + print(binarized_feature) +{% endhighlight %} +
    +
    + +## PolynomialExpansion + +[Polynomial expansion](http://en.wikipedia.org/wiki/Polynomial_expansion) is the process of expanding your features into a polynomial space, which is formulated by an n-degree combination of original dimensions. A [PolynomialExpansion](api/scala/index.html#org.apache.spark.ml.feature.PolynomialExpansion) class provides this functionality. The example below shows how to expand your features into a 3-degree polynomial space. + +
    +
    +{% highlight scala %} +import org.apache.spark.ml.feature.PolynomialExpansion +import org.apache.spark.mllib.linalg.Vectors + +val data = Array( + Vectors.dense(-2.0, 2.3), + Vectors.dense(0.0, 0.0), + Vectors.dense(0.6, -1.1) +) +val df = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features") +val polynomialExpansion = new PolynomialExpansion() + .setInputCol("features") + .setOutputCol("polyFeatures") + .setDegree(3) +val polyDF = polynomialExpansion.transform(df) +polyDF.select("polyFeatures").take(3).foreach(println) +{% endhighlight %} +
    + +
    +{% highlight java %} +import com.google.common.collect.Lists; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.mllib.linalg.Vector; +import org.apache.spark.mllib.linalg.VectorUDT; +import org.apache.spark.mllib.linalg.Vectors; +import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.types.Metadata; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; + +JavaSparkContext jsc = ... +SQLContext jsql = ... +PolynomialExpansion polyExpansion = new PolynomialExpansion() + .setInputCol("features") + .setOutputCol("polyFeatures") + .setDegree(3); +JavaRDD data = jsc.parallelize(Lists.newArrayList( + RowFactory.create(Vectors.dense(-2.0, 2.3)), + RowFactory.create(Vectors.dense(0.0, 0.0)), + RowFactory.create(Vectors.dense(0.6, -1.1)) +)); +StructType schema = new StructType(new StructField[] { + new StructField("features", new VectorUDT(), false, Metadata.empty()), +}); +DataFrame df = jsql.createDataFrame(data, schema); +DataFrame polyDF = polyExpansion.transform(df); +Row[] row = polyDF.select("polyFeatures").take(3); +for (Row r : row) { + System.out.println(r.get(0)); +} +{% endhighlight %} +
    + +
    +{% highlight python %} +from pyspark.ml.feature import PolynomialExpansion +from pyspark.mllib.linalg import Vectors + +df = sqlContext.createDataFrame( + [(Vectors.dense([-2.0, 2.3]), ), + (Vectors.dense([0.0, 0.0]), ), + (Vectors.dense([0.6, -1.1]), )], + ["features"]) +px = PolynomialExpansion(degree=2, inputCol="features", outputCol="polyFeatures") +polyDF = px.transform(df) +for expanded in polyDF.select("polyFeatures").take(3): + print(expanded) +{% endhighlight %} +
    +
    + +## StringIndexer + +`StringIndexer` encodes a string column of labels to a column of label indices. +The indices are in `[0, numLabels)`, ordered by label frequencies. +So the most frequent label gets index `0`. +If the input column is numeric, we cast it to string and index the string values. + +**Examples** + +Assume that we have the following DataFrame with columns `id` and `category`: + +~~~~ + id | category +----|---------- + 0 | a + 1 | b + 2 | c + 3 | a + 4 | a + 5 | c +~~~~ + +`category` is a string column with three labels: "a", "b", and "c". +Applying `StringIndexer` with `category` as the input column and `categoryIndex` as the output +column, we should get the following: + +~~~~ + id | category | categoryIndex +----|----------|--------------- + 0 | a | 0.0 + 1 | b | 2.0 + 2 | c | 1.0 + 3 | a | 0.0 + 4 | a | 0.0 + 5 | c | 1.0 +~~~~ + +"a" gets index `0` because it is the most frequent, followed by "c" with index `1` and "b" with +index `2`. + +
    + +
    + +[`StringIndexer`](api/scala/index.html#org.apache.spark.ml.feature.StringIndexer) takes an input +column name and an output column name. + +{% highlight scala %} +import org.apache.spark.ml.feature.StringIndexer + +val df = sqlContext.createDataFrame( + Seq((0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c")) +).toDF("id", "category") +val indexer = new StringIndexer() + .setInputCol("category") + .setOutputCol("categoryIndex") +val indexed = indexer.fit(df).transform(df) +indexed.show() +{% endhighlight %} +
    + +
    +[`StringIndexer`](api/java/org/apache/spark/ml/feature/StringIndexer.html) takes an input column +name and an output column name. + +{% highlight java %} +import java.util.Arrays; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.ml.feature.StringIndexer; +import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; +import static org.apache.spark.sql.types.DataTypes.*; + +JavaRDD jrdd = jsc.parallelize(Arrays.asList( + RowFactory.create(0, "a"), + RowFactory.create(1, "b"), + RowFactory.create(2, "c"), + RowFactory.create(3, "a"), + RowFactory.create(4, "a"), + RowFactory.create(5, "c") +)); +StructType schema = new StructType(new StructField[] { + createStructField("id", DoubleType, false), + createStructField("category", StringType, false) +}); +DataFrame df = sqlContext.createDataFrame(jrdd, schema); +StringIndexer indexer = new StringIndexer() + .setInputCol("category") + .setOutputCol("categoryIndex"); +DataFrame indexed = indexer.fit(df).transform(df); +indexed.show(); +{% endhighlight %} +
    + +
    + +[`StringIndexer`](api/python/pyspark.ml.html#pyspark.ml.feature.StringIndexer) takes an input +column name and an output column name. + +{% highlight python %} +from pyspark.ml.feature import StringIndexer + +df = sqlContext.createDataFrame( + [(0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c")], + ["id", "category"]) +indexer = StringIndexer(inputCol="category", outputCol="categoryIndex") +indexed = indexer.fit(df).transform(df) +indexed.show() +{% endhighlight %} +
    +
    + +## OneHotEncoder + +[One-hot encoding](http://en.wikipedia.org/wiki/One-hot) maps a column of label indices to a column of binary vectors, with at most a single one-value. This encoding allows algorithms which expect continuous features, such as Logistic Regression, to use categorical features + +
    +
    +{% highlight scala %} +import org.apache.spark.ml.feature.{OneHotEncoder, StringIndexer} + +val df = sqlContext.createDataFrame(Seq( + (0, "a"), + (1, "b"), + (2, "c"), + (3, "a"), + (4, "a"), + (5, "c") +)).toDF("id", "category") + +val indexer = new StringIndexer() + .setInputCol("category") + .setOutputCol("categoryIndex") + .fit(df) +val indexed = indexer.transform(df) + +val encoder = new OneHotEncoder().setInputCol("categoryIndex"). + setOutputCol("categoryVec") +val encoded = encoder.transform(indexed) +encoded.select("id", "categoryVec").foreach(println) +{% endhighlight %} +
    + +
    +{% highlight java %} +import com.google.common.collect.Lists; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.ml.feature.OneHotEncoder; +import org.apache.spark.ml.feature.StringIndexer; +import org.apache.spark.ml.feature.StringIndexerModel; +import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.Metadata; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; + +JavaRDD jrdd = jsc.parallelize(Lists.newArrayList( + RowFactory.create(0, "a"), + RowFactory.create(1, "b"), + RowFactory.create(2, "c"), + RowFactory.create(3, "a"), + RowFactory.create(4, "a"), + RowFactory.create(5, "c") +)); +StructType schema = new StructType(new StructField[]{ + new StructField("id", DataTypes.DoubleType, false, Metadata.empty()), + new StructField("category", DataTypes.StringType, false, Metadata.empty()) +}); +DataFrame df = sqlContext.createDataFrame(jrdd, schema); +StringIndexerModel indexer = new StringIndexer() + .setInputCol("category") + .setOutputCol("categoryIndex") + .fit(df); +DataFrame indexed = indexer.transform(df); + +OneHotEncoder encoder = new OneHotEncoder() + .setInputCol("categoryIndex") + .setOutputCol("categoryVec"); +DataFrame encoded = encoder.transform(indexed); +{% endhighlight %} +
    + +
    +{% highlight python %} +from pyspark.ml.feature import OneHotEncoder, StringIndexer + +df = sqlContext.createDataFrame([ + (0, "a"), + (1, "b"), + (2, "c"), + (3, "a"), + (4, "a"), + (5, "c") +], ["id", "category"]) + +stringIndexer = StringIndexer(inputCol="category", outputCol="categoryIndex") +model = stringIndexer.fit(df) +indexed = model.transform(df) +encoder = OneHotEncoder(includeFirst=False, inputCol="categoryIndex", outputCol="categoryVec") +encoded = encoder.transform(indexed) +{% endhighlight %} +
    +
    + +## VectorIndexer + +`VectorIndexer` helps index categorical features in datasets of `Vector`s. +It can both automatically decide which features are categorical and convert original values to category indices. Specifically, it does the following: + +1. Take an input column of type [Vector](api/scala/index.html#org.apache.spark.mllib.linalg.Vector) and a parameter `maxCategories`. +2. Decide which features should be categorical based on the number of distinct values, where features with at most `maxCategories` are declared categorical. +3. Compute 0-based category indices for each categorical feature. +4. Index categorical features and transform original feature values to indices. + +Indexing categorical features allows algorithms such as Decision Trees and Tree Ensembles to treat categorical features appropriately, improving performance. + +Please refer to the [VectorIndexer API docs](api/scala/index.html#org.apache.spark.ml.feature.VectorIndexer) for more details. + +In the example below, we read in a dataset of labeled points and then use `VectorIndexer` to decide which features should be treated as categorical. We transform the categorical feature values to their indices. This transformed data could then be passed to algorithms such as `DecisionTreeRegressor` that handle categorical features. + +
    +
    +{% highlight scala %} +import org.apache.spark.ml.feature.VectorIndexer +import org.apache.spark.mllib.util.MLUtils + +val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF() +val indexer = new VectorIndexer() + .setInputCol("features") + .setOutputCol("indexed") + .setMaxCategories(10) +val indexerModel = indexer.fit(data) +val categoricalFeatures: Set[Int] = indexerModel.categoryMaps.keys.toSet +println(s"Chose ${categoricalFeatures.size} categorical features: " + + categoricalFeatures.mkString(", ")) + +// Create new column "indexed" with categorical values transformed to indices +val indexedData = indexerModel.transform(data) +{% endhighlight %} +
    + +
    +{% highlight java %} +import java.util.Map; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.ml.feature.VectorIndexer; +import org.apache.spark.ml.feature.VectorIndexerModel; +import org.apache.spark.mllib.regression.LabeledPoint; +import org.apache.spark.mllib.util.MLUtils; +import org.apache.spark.sql.DataFrame; + +JavaRDD rdd = MLUtils.loadLibSVMFile(sc.sc(), + "data/mllib/sample_libsvm_data.txt").toJavaRDD(); +DataFrame data = sqlContext.createDataFrame(rdd, LabeledPoint.class); +VectorIndexer indexer = new VectorIndexer() + .setInputCol("features") + .setOutputCol("indexed") + .setMaxCategories(10); +VectorIndexerModel indexerModel = indexer.fit(data); +Map> categoryMaps = indexerModel.javaCategoryMaps(); +System.out.print("Chose " + categoryMaps.size() + "categorical features:"); +for (Integer feature : categoryMaps.keySet()) { + System.out.print(" " + feature); +} +System.out.println(); + +// Create new column "indexed" with categorical values transformed to indices +DataFrame indexedData = indexerModel.transform(data); +{% endhighlight %} +
    + +
    +{% highlight python %} +from pyspark.ml.feature import VectorIndexer +from pyspark.mllib.util import MLUtils + +data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF() +indexer = VectorIndexer(inputCol="features", outputCol="indexed", maxCategories=10) +indexerModel = indexer.fit(data) + +# Create new column "indexed" with categorical values transformed to indices +indexedData = indexerModel.transform(data) +{% endhighlight %} +
    +
    + + +## Normalizer + +`Normalizer` is a `Transformer` which transforms a dataset of `Vector` rows, normalizing each `Vector` to have unit norm. It takes parameter `p`, which specifies the [p-norm](http://en.wikipedia.org/wiki/Norm_%28mathematics%29#p-norm) used for normalization. ($p = 2$ by default.) This normalization can help standardize your input data and improve the behavior of learning algorithms. + +The following example demonstrates how to load a dataset in libsvm format and then normalize each row to have unit $L^2$ norm and unit $L^\infty$ norm. + +
    +
    +{% highlight scala %} +import org.apache.spark.ml.feature.Normalizer +import org.apache.spark.mllib.util.MLUtils + +val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") +val dataFrame = sqlContext.createDataFrame(data) + +// Normalize each Vector using $L^1$ norm. +val normalizer = new Normalizer() + .setInputCol("features") + .setOutputCol("normFeatures") + .setP(1.0) +val l1NormData = normalizer.transform(dataFrame) + +// Normalize each Vector using $L^\infty$ norm. +val lInfNormData = normalizer.transform(dataFrame, normalizer.p -> Double.PositiveInfinity) +{% endhighlight %} +
    + +
    +{% highlight java %} +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.ml.feature.Normalizer; +import org.apache.spark.mllib.regression.LabeledPoint; +import org.apache.spark.mllib.util.MLUtils; +import org.apache.spark.sql.DataFrame; + +JavaRDD data = + MLUtils.loadLibSVMFile(jsc.sc(), "data/mllib/sample_libsvm_data.txt").toJavaRDD(); +DataFrame dataFrame = jsql.createDataFrame(data, LabeledPoint.class); + +// Normalize each Vector using $L^1$ norm. +Normalizer normalizer = new Normalizer() + .setInputCol("features") + .setOutputCol("normFeatures") + .setP(1.0); +DataFrame l1NormData = normalizer.transform(dataFrame); + +// Normalize each Vector using $L^\infty$ norm. +DataFrame lInfNormData = + normalizer.transform(dataFrame, normalizer.p().w(Double.POSITIVE_INFINITY)); +{% endhighlight %} +
    + +
    +{% highlight python %} +from pyspark.mllib.util import MLUtils +from pyspark.ml.feature import Normalizer + +data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") +dataFrame = sqlContext.createDataFrame(data) + +# Normalize each Vector using $L^1$ norm. +normalizer = Normalizer(inputCol="features", outputCol="normFeatures", p=1.0) +l1NormData = normalizer.transform(dataFrame) + +# Normalize each Vector using $L^\infty$ norm. +lInfNormData = normalizer.transform(dataFrame, {normalizer.p: float("inf")}) +{% endhighlight %} +
    +
    + + +## StandardScaler + +`StandardScaler` transforms a dataset of `Vector` rows, normalizing each feature to have unit standard deviation and/or zero mean. It takes parameters: + +* `withStd`: True by default. Scales the data to unit standard deviation. +* `withMean`: False by default. Centers the data with mean before scaling. It will build a dense output, so this does not work on sparse input and will raise an exception. + +`StandardScaler` is a `Model` which can be `fit` on a dataset to produce a `StandardScalerModel`; this amounts to computing summary statistics. The model can then transform a `Vector` column in a dataset to have unit standard deviation and/or zero mean features. + +Note that if the standard deviation of a feature is zero, it will return default `0.0` value in the `Vector` for that feature. + +More details can be found in the API docs for +[StandardScaler](api/scala/index.html#org.apache.spark.ml.feature.StandardScaler) and +[StandardScalerModel](api/scala/index.html#org.apache.spark.ml.feature.StandardScalerModel). + +The following example demonstrates how to load a dataset in libsvm format and then normalize each feature to have unit standard deviation. + +
    +
    +{% highlight scala %} +import org.apache.spark.ml.feature.StandardScaler +import org.apache.spark.mllib.util.MLUtils + +val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") +val dataFrame = sqlContext.createDataFrame(data) +val scaler = new StandardScaler() + .setInputCol("features") + .setOutputCol("scaledFeatures") + .setWithStd(true) + .setWithMean(false) + +// Compute summary statistics by fitting the StandardScaler +val scalerModel = scaler.fit(dataFrame) + +// Normalize each feature to have unit standard deviation. +val scaledData = scalerModel.transform(dataFrame) +{% endhighlight %} +
    + +
    +{% highlight java %} +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.ml.feature.StandardScaler; +import org.apache.spark.mllib.regression.LabeledPoint; +import org.apache.spark.mllib.util.MLUtils; +import org.apache.spark.sql.DataFrame; + +JavaRDD data = + MLUtils.loadLibSVMFile(jsc.sc(), "data/mllib/sample_libsvm_data.txt").toJavaRDD(); +DataFrame dataFrame = jsql.createDataFrame(data, LabeledPoint.class); +StandardScaler scaler = new StandardScaler() + .setInputCol("features") + .setOutputCol("scaledFeatures") + .setWithStd(true) + .setWithMean(false); + +// Compute summary statistics by fitting the StandardScaler +StandardScalerModel scalerModel = scaler.fit(dataFrame); + +// Normalize each feature to have unit standard deviation. +DataFrame scaledData = scalerModel.transform(dataFrame); +{% endhighlight %} +
    + +
    +{% highlight python %} +from pyspark.mllib.util import MLUtils +from pyspark.ml.feature import StandardScaler + +data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") +dataFrame = sqlContext.createDataFrame(data) +scaler = StandardScaler(inputCol="features", outputCol="scaledFeatures", + withStd=True, withMean=False) + +# Compute summary statistics by fitting the StandardScaler +scalerModel = scaler.fit(dataFrame) + +# Normalize each feature to have unit standard deviation. +scaledData = scalerModel.transform(dataFrame) +{% endhighlight %} +
    +
    + +## Bucketizer + +`Bucketizer` transforms a column of continuous features to a column of feature buckets, where the buckets are specified by users. It takes a parameter: + +* `splits`: Parameter for mapping continuous features into buckets. With n+1 splits, there are n buckets. A bucket defined by splits x,y holds values in the range [x,y) except the last bucket, which also includes y. Splits should be strictly increasing. Values at -inf, inf must be explicitly provided to cover all Double values; Otherwise, values outside the splits specified will be treated as errors. Two examples of `splits` are `Array(Double.NegativeInfinity, 0.0, 1.0, Double.PositiveInfinity)` and `Array(0.0, 1.0, 2.0)`. + +Note that if you have no idea of the upper bound and lower bound of the targeted column, you would better add the `Double.NegativeInfinity` and `Double.PositiveInfinity` as the bounds of your splits to prevent a potenial out of Bucketizer bounds exception. + +Note also that the splits that you provided have to be in strictly increasing order, i.e. `s0 < s1 < s2 < ... < sn`. + +More details can be found in the API docs for [Bucketizer](api/scala/index.html#org.apache.spark.ml.feature.Bucketizer). + +The following example demonstrates how to bucketize a column of `Double`s into another index-wised column. + +
    +
    +{% highlight scala %} +import org.apache.spark.ml.feature.Bucketizer +import org.apache.spark.sql.DataFrame + +val splits = Array(Double.NegativeInfinity, -0.5, 0.0, 0.5, Double.PositiveInfinity) + +val data = Array(-0.5, -0.3, 0.0, 0.2) +val dataFrame = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features") + +val bucketizer = new Bucketizer() + .setInputCol("features") + .setOutputCol("bucketedFeatures") + .setSplits(splits) + +// Transform original data into its bucket index. +val bucketedData = bucketizer.transform(dataFrame) +{% endhighlight %} +
    + +
    +{% highlight java %} +import com.google.common.collect.Lists; + +import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.Metadata; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; + +double[] splits = {Double.NEGATIVE_INFINITY, -0.5, 0.0, 0.5, Double.POSITIVE_INFINITY}; + +JavaRDD data = jsc.parallelize(Lists.newArrayList( + RowFactory.create(-0.5), + RowFactory.create(-0.3), + RowFactory.create(0.0), + RowFactory.create(0.2) +)); +StructType schema = new StructType(new StructField[] { + new StructField("features", DataTypes.DoubleType, false, Metadata.empty()) +}); +DataFrame dataFrame = jsql.createDataFrame(data, schema); + +Bucketizer bucketizer = new Bucketizer() + .setInputCol("features") + .setOutputCol("bucketedFeatures") + .setSplits(splits); + +// Transform original data into its bucket index. +DataFrame bucketedData = bucketizer.transform(dataFrame); +{% endhighlight %} +
    + +
    +{% highlight python %} +from pyspark.ml.feature import Bucketizer + +splits = [-float("inf"), -0.5, 0.0, 0.5, float("inf")] + +data = [(-0.5,), (-0.3,), (0.0,), (0.2,)] +dataFrame = sqlContext.createDataFrame(data, ["features"]) + +bucketizer = Bucketizer(splits=splits, inputCol="features", outputCol="bucketedFeatures") + +# Transform original data into its bucket index. +bucketedData = bucketizer.transform(dataFrame) +{% endhighlight %} +
    +
    + +## ElementwiseProduct + +ElementwiseProduct multiplies each input vector by a provided "weight" vector, using element-wise multiplication. In other words, it scales each column of the dataset by a scalar multiplier. This represents the [Hadamard product](https://en.wikipedia.org/wiki/Hadamard_product_%28matrices%29) between the input vector, `v` and transforming vector, `w`, to yield a result vector. + +`\[ \begin{pmatrix} +v_1 \\ +\vdots \\ +v_N +\end{pmatrix} \circ \begin{pmatrix} + w_1 \\ + \vdots \\ + w_N + \end{pmatrix} += \begin{pmatrix} + v_1 w_1 \\ + \vdots \\ + v_N w_N + \end{pmatrix} +\]` + +[`ElementwiseProduct`](api/scala/index.html#org.apache.spark.ml.feature.ElementwiseProduct) takes the following parameter: + +* `scalingVec`: the transforming vector. + +This example below demonstrates how to transform vectors using a transforming vector value. + +
    +
    +{% highlight scala %} +import org.apache.spark.ml.feature.ElementwiseProduct +import org.apache.spark.mllib.linalg.Vectors + +// Create some vector data; also works for sparse vectors +val dataFrame = sqlContext.createDataFrame(Seq( + ("a", Vectors.dense(1.0, 2.0, 3.0)), + ("b", Vectors.dense(4.0, 5.0, 6.0)))).toDF("id", "vector") + +val transformingVector = Vectors.dense(0.0, 1.0, 2.0) +val transformer = new ElementwiseProduct() + .setScalingVec(transformingVector) + .setInputCol("vector") + .setOutputCol("transformedVector") + +// Batch transform the vectors to create new column: +val transformedData = transformer.transform(dataFrame) + +{% endhighlight %} +
    + +
    +{% highlight java %} +import com.google.common.collect.Lists; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.ml.feature.ElementwiseProduct; +import org.apache.spark.mllib.linalg.Vector; +import org.apache.spark.mllib.linalg.Vectors; +import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.Metadata; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; + +// Create some vector data; also works for sparse vectors +JavaRDD jrdd = jsc.parallelize(Lists.newArrayList( + RowFactory.create("a", Vectors.dense(1.0, 2.0, 3.0)), + RowFactory.create("b", Vectors.dense(4.0, 5.0, 6.0)) +)); +List fields = new ArrayList(2); +fields.add(DataTypes.createStructField("id", DataTypes.StringType, false)); +fields.add(DataTypes.createStructField("vector", DataTypes.StringType, false)); +StructType schema = DataTypes.createStructType(fields); +DataFrame dataFrame = sqlContext.createDataFrame(jrdd, schema); +Vector transformingVector = Vectors.dense(0.0, 1.0, 2.0); +ElementwiseProduct transformer = new ElementwiseProduct() + .setScalingVec(transformingVector) + .setInputCol("vector") + .setOutputCol("transformedVector"); +// Batch transform the vectors to create new column: +DataFrame transformedData = transformer.transform(dataFrame); + +{% endhighlight %} +
    +
    + +## VectorAssembler + +`VectorAssembler` is a transformer that combines a given list of columns into a single vector +column. +It is useful for combining raw features and features generated by different feature transformers +into a single feature vector, in order to train ML models like logistic regression and decision +trees. +`VectorAssembler` accepts the following input column types: all numeric types, boolean type, +and vector type. +In each row, the values of the input columns will be concatenated into a vector in the specified +order. + +**Examples** + +Assume that we have a DataFrame with the columns `id`, `hour`, `mobile`, `userFeatures`, +and `clicked`: + +~~~ + id | hour | mobile | userFeatures | clicked +----|------|--------|------------------|--------- + 0 | 18 | 1.0 | [0.0, 10.0, 0.5] | 1.0 +~~~ + +`userFeatures` is a vector column that contains three user features. +We want to combine `hour`, `mobile`, and `userFeatures` into a single feature vector +called `features` and use it to predict `clicked` or not. +If we set `VectorAssembler`'s input columns to `hour`, `mobile`, and `userFeatures` and +output column to `features`, after transformation we should get the following DataFrame: + +~~~ + id | hour | mobile | userFeatures | clicked | features +----|------|--------|------------------|---------|----------------------------- + 0 | 18 | 1.0 | [0.0, 10.0, 0.5] | 1.0 | [18.0, 1.0, 0.0, 10.0, 0.5] +~~~ + +
    +
    + +[`VectorAssembler`](api/scala/index.html#org.apache.spark.ml.feature.VectorAssembler) takes an array +of input column names and an output column name. + +{% highlight scala %} +import org.apache.spark.mllib.linalg.Vectors +import org.apache.spark.ml.feature.VectorAssembler + +val dataset = sqlContext.createDataFrame( + Seq((0, 18, 1.0, Vectors.dense(0.0, 10.0, 0.5), 1.0)) +).toDF("id", "hour", "mobile", "userFeatures", "clicked") +val assembler = new VectorAssembler() + .setInputCols(Array("hour", "mobile", "userFeatures")) + .setOutputCol("features") +val output = assembler.transform(dataset) +println(output.select("features", "clicked").first()) +{% endhighlight %} +
    + +
    + +[`VectorAssembler`](api/java/org/apache/spark/ml/feature/VectorAssembler.html) takes an array +of input column names and an output column name. + +{% highlight java %} +import java.util.Arrays; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.mllib.linalg.VectorUDT; +import org.apache.spark.mllib.linalg.Vectors; +import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.types.*; +import static org.apache.spark.sql.types.DataTypes.*; + +StructType schema = createStructType(new StructField[] { + createStructField("id", IntegerType, false), + createStructField("hour", IntegerType, false), + createStructField("mobile", DoubleType, false), + createStructField("userFeatures", new VectorUDT(), false), + createStructField("clicked", DoubleType, false) +}); +Row row = RowFactory.create(0, 18, 1.0, Vectors.dense(0.0, 10.0, 0.5), 1.0); +JavaRDD rdd = jsc.parallelize(Arrays.asList(row)); +DataFrame dataset = sqlContext.createDataFrame(rdd, schema); + +VectorAssembler assembler = new VectorAssembler() + .setInputCols(new String[] {"hour", "mobile", "userFeatures"}) + .setOutputCol("features"); + +DataFrame output = assembler.transform(dataset); +System.out.println(output.select("features", "clicked").first()); +{% endhighlight %} +
    + +
    + +[`VectorAssembler`](api/python/pyspark.ml.html#pyspark.ml.feature.VectorAssembler) takes a list +of input column names and an output column name. + +{% highlight python %} +from pyspark.mllib.linalg import Vectors +from pyspark.ml.feature import VectorAssembler + +dataset = sqlContext.createDataFrame( + [(0, 18, 1.0, Vectors.dense([0.0, 10.0, 0.5]), 1.0)], + ["id", "hour", "mobile", "userFeatures", "clicked"]) +assembler = VectorAssembler( + inputCols=["hour", "mobile", "userFeatures"], + outputCol="features") +output = assembler.transform(dataset) +print(output.select("features", "clicked").first()) +{% endhighlight %} +
    +
    + +# Feature Selectors + diff --git a/docs/ml-guide.md b/docs/ml-guide.md index 771a07183e26f..4eb622d4b95e8 100644 --- a/docs/ml-guide.md +++ b/docs/ml-guide.md @@ -148,6 +148,16 @@ Parameters belong to specific instances of `Estimator`s and `Transformer`s. For example, if we have two `LogisticRegression` instances `lr1` and `lr2`, then we can build a `ParamMap` with both `maxIter` parameters specified: `ParamMap(lr1.maxIter -> 10, lr2.maxIter -> 20)`. This is useful if there are two algorithms with the `maxIter` parameter in a `Pipeline`. +# Algorithm Guides + +There are now several algorithms in the Pipelines API which are not in the lower-level MLlib API, so we link to documentation for them here. These algorithms are mostly feature transformers, which fit naturally into the `Transformer` abstraction in Pipelines, and ensembles, which fit naturally into the `Estimator` abstraction in the Pipelines. + +**Pipelines API Algorithm Guides** + +* [Feature Extraction, Transformation, and Selection](ml-features.html) +* [Ensembles](ml-ensembles.html) + + # Code Examples This section gives code examples illustrating the functionality discussed above. @@ -197,7 +207,7 @@ val model1 = lr.fit(training.toDF) // we can view the parameters it used during fit(). // This prints the parameter (name: value) pairs, where names are unique IDs for this // LogisticRegression instance. -println("Model 1 was fit using parameters: " + model1.fittingParamMap) +println("Model 1 was fit using parameters: " + model1.parent.extractParamMap) // We may alternatively specify parameters using a ParamMap, // which supports several methods for specifying parameters. @@ -212,7 +222,7 @@ val paramMapCombined = paramMap ++ paramMap2 // Now learn a new model using the paramMapCombined parameters. // paramMapCombined overrides all parameters set earlier via lr.set* methods. val model2 = lr.fit(training.toDF, paramMapCombined) -println("Model 2 was fit using parameters: " + model2.fittingParamMap) +println("Model 2 was fit using parameters: " + model2.parent.extractParamMap) // Prepare test data. val test = sc.parallelize(Seq( @@ -228,7 +238,7 @@ model2.transform(test.toDF) .select("features", "label", "myProbability", "prediction") .collect() .foreach { case Row(features: Vector, label: Double, prob: Vector, prediction: Double) => - println("($features, $label) -> prob=$prob, prediction=$prediction") + println(s"($features, $label) -> prob=$prob, prediction=$prediction") } sc.stop() @@ -279,7 +289,7 @@ LogisticRegressionModel model1 = lr.fit(training); // we can view the parameters it used during fit(). // This prints the parameter (name: value) pairs, where names are unique IDs for this // LogisticRegression instance. -System.out.println("Model 1 was fit using parameters: " + model1.fittingParamMap()); +System.out.println("Model 1 was fit using parameters: " + model1.parent().extractParamMap()); // We may alternatively specify parameters using a ParamMap. ParamMap paramMap = new ParamMap(); @@ -295,7 +305,7 @@ ParamMap paramMapCombined = paramMap.$plus$plus(paramMap2); // Now learn a new model using the paramMapCombined parameters. // paramMapCombined overrides all parameters set earlier via lr.set* methods. LogisticRegressionModel model2 = lr.fit(training, paramMapCombined); -System.out.println("Model 2 was fit using parameters: " + model2.fittingParamMap()); +System.out.println("Model 2 was fit using parameters: " + model2.parent().extractParamMap()); // Prepare test documents. List localTest = Lists.newArrayList( @@ -382,7 +392,7 @@ model.transform(test.toDF) .select("id", "text", "probability", "prediction") .collect() .foreach { case Row(id: Long, text: String, prob: Vector, prediction: Double) => - println("($id, $text) --> prob=$prob, prediction=$prediction") + println(s"($id, $text) --> prob=$prob, prediction=$prediction") } sc.stop() diff --git a/docs/mllib-classification-regression.md b/docs/mllib-classification-regression.md index 8e91d62f4a907..0210950b89906 100644 --- a/docs/mllib-classification-regression.md +++ b/docs/mllib-classification-regression.md @@ -20,7 +20,7 @@ the supported algorithms for each type of problem. Binary Classificationlinear SVMs, logistic regression, decision trees, random forests, gradient-boosted trees, naive Bayes - Multiclass Classificationdecision trees, random forests, naive Bayes + Multiclass Classificationlogistic regression, decision trees, random forests, naive Bayes Regressionlinear least squares, Lasso, ridge regression, decision trees, random forests, gradient-boosted trees, isotonic regression @@ -31,7 +31,7 @@ the supported algorithms for each type of problem. More details for these methods can be found here: * [Linear models](mllib-linear-methods.html) - * [binary classification (SVMs, logistic regression)](mllib-linear-methods.html#binary-classification) + * [classification (SVMs, logistic regression)](mllib-linear-methods.html#classification) * [linear regression (least squares, Lasso, ridge)](mllib-linear-methods.html#linear-least-squares-lasso-and-ridge-regression) * [Decision trees](mllib-decision-tree.html) * [Ensembles of decision trees](mllib-ensembles.html) diff --git a/docs/mllib-clustering.md b/docs/mllib-clustering.md index f5aa15b7d9b79..dac22f736e8cb 100644 --- a/docs/mllib-clustering.md +++ b/docs/mllib-clustering.md @@ -47,7 +47,7 @@ Set Sum of Squared Error (WSSSE). You can reduce this error measure by increasin optimal *k* is usually one where there is an "elbow" in the WSSSE graph. {% highlight scala %} -import org.apache.spark.mllib.clustering.KMeans +import org.apache.spark.mllib.clustering.{KMeans, KMeansModel} import org.apache.spark.mllib.linalg.Vectors // Load and parse the data @@ -62,6 +62,10 @@ val clusters = KMeans.train(parsedData, numClusters, numIterations) // Evaluate clustering by computing Within Set Sum of Squared Errors val WSSSE = clusters.computeCost(parsedData) println("Within Set Sum of Squared Errors = " + WSSSE) + +// Save and load model +clusters.save(sc, "myModelPath") +val sameModel = KMeansModel.load(sc, "myModelPath") {% endhighlight %}
    @@ -110,6 +114,10 @@ public class KMeansExample { // Evaluate clustering by computing Within Set Sum of Squared Errors double WSSSE = clusters.computeCost(parsedData.rdd()); System.out.println("Within Set Sum of Squared Errors = " + WSSSE); + + // Save and load model + clusters.save(sc.sc(), "myModelPath"); + KMeansModel sameModel = KMeansModel.load(sc.sc(), "myModelPath"); } } {% endhighlight %} @@ -124,7 +132,7 @@ Within Set Sum of Squared Error (WSSSE). You can reduce this error measure by in fact the optimal *k* is usually one where there is an "elbow" in the WSSSE graph. {% highlight python %} -from pyspark.mllib.clustering import KMeans +from pyspark.mllib.clustering import KMeans, KMeansModel from numpy import array from math import sqrt @@ -143,6 +151,10 @@ def error(point): WSSSE = parsedData.map(lambda point: error(point)).reduce(lambda x, y: x + y) print("Within Set Sum of Squared Error = " + str(WSSSE)) + +# Save and load model +clusters.save(sc, "myModelPath") +sameModel = KMeansModel.load(sc, "myModelPath") {% endhighlight %}
    @@ -312,12 +324,12 @@ Calling `PowerIterationClustering.run` returns a which contains the computed clustering assignments. {% highlight scala %} -import org.apache.spark.mllib.clustering.PowerIterationClustering +import org.apache.spark.mllib.clustering.{PowerIterationClustering, PowerIterationClusteringModel} import org.apache.spark.mllib.linalg.Vectors val similarities: RDD[(Long, Long, Double)] = ... -val pic = new PowerIteartionClustering() +val pic = new PowerIterationClustering() .setK(3) .setMaxIterations(20) val model = pic.run(similarities) @@ -325,6 +337,10 @@ val model = pic.run(similarities) model.assignments.foreach { a => println(s"${a.id} -> ${a.cluster}") } + +// Save and load model +model.save(sc, "myModelPath") +val sameModel = PowerIterationClusteringModel.load(sc, "myModelPath") {% endhighlight %} A full example that produces the experiment described in the PIC paper can be found under @@ -360,6 +376,10 @@ PowerIterationClusteringModel model = pic.run(similarities); for (PowerIterationClustering.Assignment a: model.assignments().toJavaRDD().collect()) { System.out.println(a.id() + " -> " + a.cluster()); } + +// Save and load model +model.save(sc.sc(), "myModelPath"); +PowerIterationClusteringModel sameModel = PowerIterationClusteringModel.load(sc.sc(), "myModelPath"); {% endhighlight %}
@@ -377,11 +397,11 @@ LDA can be thought of as a clustering algorithm as follows: on a statistical model of how text documents are generated. LDA takes in a collection of documents as vectors of word counts. -It learns clustering using [expectation-maximization](http://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm) -on the likelihood function. After fitting on the documents, LDA provides: +It supports different inference algorithms via `setOptimizer` function. EMLDAOptimizer learns clustering using [expectation-maximization](http://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm) +on the likelihood function and yields comprehensive results, while OnlineLDAOptimizer uses iterative mini-batch sampling for [online variational inference](https://www.cs.princeton.edu/~blei/papers/HoffmanBleiBach2010b.pdf) and is generally memory friendly. After fitting on the documents, LDA provides: * Topics: Inferred topics, each of which is a probability distribution over terms (words). -* Topic distributions for documents: For each document in the training set, LDA gives a probability distribution over topics. +* Topic distributions for documents: For each document in the training set, LDA gives a probability distribution over topics. (EM only) LDA takes the following parameters: diff --git a/docs/mllib-collaborative-filtering.md b/docs/mllib-collaborative-filtering.md index 7b397e30b2d90..dfdf6216b270c 100644 --- a/docs/mllib-collaborative-filtering.md +++ b/docs/mllib-collaborative-filtering.md @@ -107,7 +107,8 @@ other signals), you can use the `trainImplicit` method to get better results. {% highlight scala %} val alpha = 0.01 -val model = ALS.trainImplicit(ratings, rank, numIterations, alpha) +val lambda = 0.01 +val model = ALS.trainImplicit(ratings, rank, numIterations, lambda, alpha) {% endhighlight %} diff --git a/docs/mllib-data-types.md b/docs/mllib-data-types.md index 4f2a2f71048f7..d824dab1d7f7b 100644 --- a/docs/mllib-data-types.md +++ b/docs/mllib-data-types.md @@ -31,7 +31,7 @@ The base class of local vectors is implementations: [`DenseVector`](api/scala/index.html#org.apache.spark.mllib.linalg.DenseVector) and [`SparseVector`](api/scala/index.html#org.apache.spark.mllib.linalg.SparseVector). We recommend using the factory methods implemented in -[`Vectors`](api/scala/index.html#org.apache.spark.mllib.linalg.Vector) to create local vectors. +[`Vectors`](api/scala/index.html#org.apache.spark.mllib.linalg.Vectors$) to create local vectors. {% highlight scala %} import org.apache.spark.mllib.linalg.{Vector, Vectors} @@ -57,7 +57,7 @@ The base class of local vectors is implementations: [`DenseVector`](api/java/org/apache/spark/mllib/linalg/DenseVector.html) and [`SparseVector`](api/java/org/apache/spark/mllib/linalg/SparseVector.html). We recommend using the factory methods implemented in -[`Vectors`](api/java/org/apache/spark/mllib/linalg/Vector.html) to create local vectors. +[`Vectors`](api/java/org/apache/spark/mllib/linalg/Vectors.html) to create local vectors. {% highlight java %} import org.apache.spark.mllib.linalg.Vector; @@ -84,7 +84,7 @@ and the following as sparse vectors: with a single column We recommend using NumPy arrays over lists for efficiency, and using the factory methods implemented -in [`Vectors`](api/python/pyspark.mllib.html#pyspark.mllib.linalg.Vector) to create sparse vectors. +in [`Vectors`](api/python/pyspark.mllib.html#pyspark.mllib.linalg.Vectors) to create sparse vectors. {% highlight python %} import numpy as np @@ -241,7 +241,7 @@ The base class of local matrices is [`Matrix`](api/scala/index.html#org.apache.spark.mllib.linalg.Matrix), and we provide one implementation: [`DenseMatrix`](api/scala/index.html#org.apache.spark.mllib.linalg.DenseMatrix). We recommend using the factory methods implemented -in [`Matrices`](api/scala/index.html#org.apache.spark.mllib.linalg.Matrices) to create local +in [`Matrices`](api/scala/index.html#org.apache.spark.mllib.linalg.Matrices$) to create local matrices. {% highlight scala %} @@ -296,70 +296,6 @@ backed by an RDD of its entries. The underlying RDDs of a distributed matrix must be deterministic, because we cache the matrix size. In general the use of non-deterministic RDDs can lead to errors. -### BlockMatrix - -A `BlockMatrix` is a distributed matrix backed by an RDD of `MatrixBlock`s, where a `MatrixBlock` is -a tuple of `((Int, Int), Matrix)`, where the `(Int, Int)` is the index of the block, and `Matrix` is -the sub-matrix at the given index with size `rowsPerBlock` x `colsPerBlock`. -`BlockMatrix` supports methods such as `add` and `multiply` with another `BlockMatrix`. -`BlockMatrix` also has a helper function `validate` which can be used to check whether the -`BlockMatrix` is set up properly. - -
-
- -A [`BlockMatrix`](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.BlockMatrix) can be -most easily created from an `IndexedRowMatrix` or `CoordinateMatrix` by calling `toBlockMatrix`. -`toBlockMatrix` creates blocks of size 1024 x 1024 by default. -Users may change the block size by supplying the values through `toBlockMatrix(rowsPerBlock, colsPerBlock)`. - -{% highlight scala %} -import org.apache.spark.mllib.linalg.distributed.{BlockMatrix, CoordinateMatrix, MatrixEntry} - -val entries: RDD[MatrixEntry] = ... // an RDD of (i, j, v) matrix entries -// Create a CoordinateMatrix from an RDD[MatrixEntry]. -val coordMat: CoordinateMatrix = new CoordinateMatrix(entries) -// Transform the CoordinateMatrix to a BlockMatrix -val matA: BlockMatrix = coordMat.toBlockMatrix().cache() - -// Validate whether the BlockMatrix is set up properly. Throws an Exception when it is not valid. -// Nothing happens if it is valid. -matA.validate() - -// Calculate A^T A. -val ata = matA.transpose.multiply(matA) -{% endhighlight %} -
- -
- -A [`BlockMatrix`](api/java/org/apache/spark/mllib/linalg/distributed/BlockMatrix.html) can be -most easily created from an `IndexedRowMatrix` or `CoordinateMatrix` by calling `toBlockMatrix`. -`toBlockMatrix` creates blocks of size 1024 x 1024 by default. -Users may change the block size by supplying the values through `toBlockMatrix(rowsPerBlock, colsPerBlock)`. - -{% highlight java %} -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.mllib.linalg.distributed.BlockMatrix; -import org.apache.spark.mllib.linalg.distributed.CoordinateMatrix; -import org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix; - -JavaRDD entries = ... // a JavaRDD of (i, j, v) Matrix Entries -// Create a CoordinateMatrix from a JavaRDD. -CoordinateMatrix coordMat = new CoordinateMatrix(entries.rdd()); -// Transform the CoordinateMatrix to a BlockMatrix -BlockMatrix matA = coordMat.toBlockMatrix().cache(); - -// Validate whether the BlockMatrix is set up properly. Throws an Exception when it is not valid. -// Nothing happens if it is valid. -matA.validate(); - -// Calculate A^T A. -BlockMatrix ata = matA.transpose().multiply(matA); -{% endhighlight %} -
-
- ### RowMatrix A `RowMatrix` is a row-oriented distributed matrix without meaningful row indices, backed by an RDD @@ -530,3 +466,67 @@ IndexedRowMatrix indexedRowMatrix = mat.toIndexedRowMatrix(); {% endhighlight %} + +### BlockMatrix + +A `BlockMatrix` is a distributed matrix backed by an RDD of `MatrixBlock`s, where a `MatrixBlock` is +a tuple of `((Int, Int), Matrix)`, where the `(Int, Int)` is the index of the block, and `Matrix` is +the sub-matrix at the given index with size `rowsPerBlock` x `colsPerBlock`. +`BlockMatrix` supports methods such as `add` and `multiply` with another `BlockMatrix`. +`BlockMatrix` also has a helper function `validate` which can be used to check whether the +`BlockMatrix` is set up properly. + +
+
+ +A [`BlockMatrix`](api/scala/index.html#org.apache.spark.mllib.linalg.distributed.BlockMatrix) can be +most easily created from an `IndexedRowMatrix` or `CoordinateMatrix` by calling `toBlockMatrix`. +`toBlockMatrix` creates blocks of size 1024 x 1024 by default. +Users may change the block size by supplying the values through `toBlockMatrix(rowsPerBlock, colsPerBlock)`. + +{% highlight scala %} +import org.apache.spark.mllib.linalg.distributed.{BlockMatrix, CoordinateMatrix, MatrixEntry} + +val entries: RDD[MatrixEntry] = ... // an RDD of (i, j, v) matrix entries +// Create a CoordinateMatrix from an RDD[MatrixEntry]. +val coordMat: CoordinateMatrix = new CoordinateMatrix(entries) +// Transform the CoordinateMatrix to a BlockMatrix +val matA: BlockMatrix = coordMat.toBlockMatrix().cache() + +// Validate whether the BlockMatrix is set up properly. Throws an Exception when it is not valid. +// Nothing happens if it is valid. +matA.validate() + +// Calculate A^T A. +val ata = matA.transpose.multiply(matA) +{% endhighlight %} +
+ +
+ +A [`BlockMatrix`](api/java/org/apache/spark/mllib/linalg/distributed/BlockMatrix.html) can be +most easily created from an `IndexedRowMatrix` or `CoordinateMatrix` by calling `toBlockMatrix`. +`toBlockMatrix` creates blocks of size 1024 x 1024 by default. +Users may change the block size by supplying the values through `toBlockMatrix(rowsPerBlock, colsPerBlock)`. + +{% highlight java %} +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.mllib.linalg.distributed.BlockMatrix; +import org.apache.spark.mllib.linalg.distributed.CoordinateMatrix; +import org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix; + +JavaRDD entries = ... // a JavaRDD of (i, j, v) Matrix Entries +// Create a CoordinateMatrix from a JavaRDD. +CoordinateMatrix coordMat = new CoordinateMatrix(entries.rdd()); +// Transform the CoordinateMatrix to a BlockMatrix +BlockMatrix matA = coordMat.toBlockMatrix().cache(); + +// Validate whether the BlockMatrix is set up properly. Throws an Exception when it is not valid. +// Nothing happens if it is valid. +matA.validate(); + +// Calculate A^T A. +BlockMatrix ata = matA.transpose().multiply(matA); +{% endhighlight %} +
+
diff --git a/docs/mllib-dimensionality-reduction.md b/docs/mllib-dimensionality-reduction.md index 870fed6cc5024..05f51168d837c 100644 --- a/docs/mllib-dimensionality-reduction.md +++ b/docs/mllib-dimensionality-reduction.md @@ -137,7 +137,7 @@ statistical method to find a rotation such that the first coordinate has the lar possible, and each succeeding coordinate in turn has the largest variance possible. The columns of the rotation matrix are called principal components. PCA is used widely in dimensionality reduction. -MLlib supports PCA for tall-and-skinny matrices stored in row-oriented format. +MLlib supports PCA for tall-and-skinny matrices stored in row-oriented format and any Vectors.
@@ -157,6 +157,23 @@ val pc: Matrix = mat.computePrincipalComponents(10) // Principal components are // Project the rows to the linear space spanned by the top 10 principal components. val projected: RowMatrix = mat.multiply(pc) {% endhighlight %} + +The following code demonstrates how to compute principal components on source vectors +and use them to project the vectors into a low-dimensional space while keeping associated labels: + +{% highlight scala %} +import org.apache.spark.mllib.regression.LabeledPoint +import org.apache.spark.mllib.feature.PCA + +val data: RDD[LabeledPoint] = ... + +// Compute the top 10 principal components. +val pca = new PCA(10).fit(data.map(_.features)) + +// Project vectors to the linear space spanned by the top 10 principal components, keeping the label +val projected = data.map(p => p.copy(features = pca.transform(p.features))) +{% endhighlight %} +
diff --git a/docs/mllib-feature-extraction.md b/docs/mllib-feature-extraction.md index 80842b27effd8..4fe470a8de810 100644 --- a/docs/mllib-feature-extraction.md +++ b/docs/mllib-feature-extraction.md @@ -188,7 +188,7 @@ Here we assume the extracted file is `text8` and in same directory as you run th import org.apache.spark._ import org.apache.spark.rdd._ import org.apache.spark.SparkContext._ -import org.apache.spark.mllib.feature.Word2Vec +import org.apache.spark.mllib.feature.{Word2Vec, Word2VecModel} val input = sc.textFile("text8").map(line => line.split(" ").toSeq) @@ -201,6 +201,10 @@ val synonyms = model.findSynonyms("china", 40) for((synonym, cosineSimilarity) <- synonyms) { println(s"$synonym $cosineSimilarity") } + +// Save and load model +model.save(sc, "myModelPath") +val sameModel = Word2VecModel.load(sc, "myModelPath") {% endhighlight %}
@@ -410,6 +414,7 @@ import org.apache.spark.SparkContext._ import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.util.MLUtils +import org.apache.spark.mllib.feature.ChiSqSelector // Load some data in libsvm format val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") @@ -477,3 +482,138 @@ sc.stop();
+## ElementwiseProduct + +ElementwiseProduct multiplies each input vector by a provided "weight" vector, using element-wise multiplication. In other words, it scales each column of the dataset by a scalar multiplier. This represents the [Hadamard product](https://en.wikipedia.org/wiki/Hadamard_product_%28matrices%29) between the input vector, `v` and transforming vector, `w`, to yield a result vector. + +`\[ \begin{pmatrix} +v_1 \\ +\vdots \\ +v_N +\end{pmatrix} \circ \begin{pmatrix} + w_1 \\ + \vdots \\ + w_N + \end{pmatrix} += \begin{pmatrix} + v_1 w_1 \\ + \vdots \\ + v_N w_N + \end{pmatrix} +\]` + +[`ElementwiseProduct`](api/scala/index.html#org.apache.spark.mllib.feature.ElementwiseProduct) has the following parameter in the constructor: + +* `w`: the transforming vector. + +`ElementwiseProduct` implements [`VectorTransformer`](api/scala/index.html#org.apache.spark.mllib.feature.VectorTransformer) which can apply the weighting on a `Vector` to produce a transformed `Vector` or on an `RDD[Vector]` to produce a transformed `RDD[Vector]`. + +### Example + +This example below demonstrates how to transform vectors using a transforming vector value. + +
+
+{% highlight scala %} +import org.apache.spark.SparkContext._ +import org.apache.spark.mllib.feature.ElementwiseProduct +import org.apache.spark.mllib.linalg.Vectors + +// Create some vector data; also works for sparse vectors +val data = sc.parallelize(Array(Vectors.dense(1.0, 2.0, 3.0), Vectors.dense(4.0, 5.0, 6.0))) + +val transformingVector = Vectors.dense(0.0, 1.0, 2.0) +val transformer = new ElementwiseProduct(transformingVector) + +// Batch transform and per-row transform give the same results: +val transformedData = transformer.transform(data) +val transformedData2 = data.map(x => transformer.transform(x)) + +{% endhighlight %} +
+ +
+{% highlight java %} +import java.util.Arrays; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.mllib.feature.ElementwiseProduct; +import org.apache.spark.mllib.linalg.Vector; +import org.apache.spark.mllib.linalg.Vectors; + +// Create some vector data; also works for sparse vectors +JavaRDD data = sc.parallelize(Arrays.asList( + Vectors.dense(1.0, 2.0, 3.0), Vectors.dense(4.0, 5.0, 6.0))); +Vector transformingVector = Vectors.dense(0.0, 1.0, 2.0); +ElementwiseProduct transformer = new ElementwiseProduct(transformingVector); + +// Batch transform and per-row transform give the same results: +JavaRDD transformedData = transformer.transform(data); +JavaRDD transformedData2 = data.map( + new Function() { + @Override + public Vector call(Vector v) { + return transformer.transform(v); + } + } +); + +{% endhighlight %} +
+
+ + +## PCA + +A feature transformer that projects vectors to a low-dimensional space using PCA. +Details you can read at [dimensionality reduction](mllib-dimensionality-reduction.html). + +### Example + +The following code demonstrates how to compute principal components on a `Vector` +and use them to project the vectors into a low-dimensional space while keeping associated labels +for calculation a [Linear Regression]((mllib-linear-methods.html)) + +
+
+{% highlight scala %} +import org.apache.spark.mllib.regression.LinearRegressionWithSGD +import org.apache.spark.mllib.regression.LabeledPoint +import org.apache.spark.mllib.linalg.Vectors +import org.apache.spark.mllib.feature.PCA + +val data = sc.textFile("data/mllib/ridge-data/lpsa.data").map { line => + val parts = line.split(',') + LabeledPoint(parts(0).toDouble, Vectors.dense(parts(1).split(' ').map(_.toDouble))) +}.cache() + +val splits = data.randomSplit(Array(0.6, 0.4), seed = 11L) +val training = splits(0).cache() +val test = splits(1) + +val pca = new PCA(training.first().features.size/2).fit(data.map(_.features)) +val training_pca = training.map(p => p.copy(features = pca.transform(p.features))) +val test_pca = test.map(p => p.copy(features = pca.transform(p.features))) + +val numIterations = 100 +val model = LinearRegressionWithSGD.train(training, numIterations) +val model_pca = LinearRegressionWithSGD.train(training_pca, numIterations) + +val valuesAndPreds = test.map { point => + val score = model.predict(point.features) + (score, point.label) +} + +val valuesAndPreds_pca = test_pca.map { point => + val score = model_pca.predict(point.features) + (score, point.label) +} + +val MSE = valuesAndPreds.map{case(v, p) => math.pow((v - p), 2)}.mean() +val MSE_pca = valuesAndPreds_pca.map{case(v, p) => math.pow((v - p), 2)}.mean() + +println("Mean Squared Error = " + MSE) +println("PCA Mean Squared Error = " + MSE_pca) +{% endhighlight %} +
+
diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md index f8e879496c135..de7d66fb2dedf 100644 --- a/docs/mllib-guide.md +++ b/docs/mllib-guide.md @@ -39,6 +39,7 @@ filtering, dimensionality reduction, as well as underlying optimization primitiv * [Optimization (developer)](mllib-optimization.html) * stochastic gradient descent * limited-memory BFGS (L-BFGS) +* [PMML model export](mllib-pmml-model-export.html) MLlib is under active development. The APIs marked `Experimental`/`DeveloperApi` may change in future releases, diff --git a/docs/mllib-isotonic-regression.md b/docs/mllib-isotonic-regression.md index b521c2f27cd6e..5732bc4c7e79e 100644 --- a/docs/mllib-isotonic-regression.md +++ b/docs/mllib-isotonic-regression.md @@ -60,7 +60,7 @@ Model is created using the training set and a mean squared error is calculated f labels and real labels in the test set. {% highlight scala %} -import org.apache.spark.mllib.regression.IsotonicRegression +import org.apache.spark.mllib.regression.{IsotonicRegression, IsotonicRegressionModel} val data = sc.textFile("data/mllib/sample_isotonic_regression_data.txt") @@ -88,6 +88,10 @@ val predictionAndLabel = test.map { point => // Calculate mean squared error between predicted and real labels. val meanSquaredError = predictionAndLabel.map{case(p, l) => math.pow((p - l), 2)}.mean() println("Mean Squared Error = " + meanSquaredError) + +// Save and load model +model.save(sc, "myModelPath") +val sameModel = IsotonicRegressionModel.load(sc, "myModelPath") {% endhighlight %} @@ -150,6 +154,10 @@ Double meanSquaredError = new JavaDoubleRDD(predictionAndLabel.map( ).rdd()).mean(); System.out.println("Mean Squared Error = " + meanSquaredError); + +// Save and load model +model.save(sc.sc(), "myModelPath"); +IsotonicRegressionModel sameModel = IsotonicRegressionModel.load(sc.sc(), "myModelPath"); {% endhighlight %} diff --git a/docs/mllib-linear-methods.md b/docs/mllib-linear-methods.md index 2b2be4d9d0273..8029edca16002 100644 --- a/docs/mllib-linear-methods.md +++ b/docs/mllib-linear-methods.md @@ -785,8 +785,7 @@ gradient descent (`stepSize`, `numIterations`, `miniBatchFraction`). For each o all three possible regularizations (none, L1 or L2). For Logistic Regression, [L-BFGS](api/scala/index.html#org.apache.spark.mllib.optimization.LBFGS) -version is implemented under [LogisticRegressionWithLBFGS] -(api/scala/index.html#org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS), and this +version is implemented under [LogisticRegressionWithLBFGS](api/scala/index.html#org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS), and this version supports both binary and multinomial Logistic Regression while SGD version only supports binary Logistic Regression. However, L-BFGS version doesn't support L1 regularization but SGD one supports L1 regularization. When L1 regularization is not required, L-BFGS version is strongly diff --git a/docs/mllib-naive-bayes.md b/docs/mllib-naive-bayes.md index 9780ea52c4994..acdcc371487f8 100644 --- a/docs/mllib-naive-bayes.md +++ b/docs/mllib-naive-bayes.md @@ -14,14 +14,13 @@ and use it for prediction. MLlib supports [multinomial naive Bayes](http://en.wikipedia.org/wiki/Naive_Bayes_classifier#Multinomial_naive_Bayes) -and [Bernoulli naive Bayes] (http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html). -These models are typically used for [document classification] -(http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html). +and [Bernoulli naive Bayes](http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html). +These models are typically used for [document classification](http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html). Within that context, each observation is a document and each feature represents a term whose value is the frequency of the term (in multinomial naive Bayes) or a zero or one indicating whether the term was found in the document (in Bernoulli naive Bayes). Feature values must be nonnegative. The model type is selected with an optional parameter -"Multinomial" or "Bernoulli" with "Multinomial" as the default. +"multinomial" or "bernoulli" with "multinomial" as the default. [Additive smoothing](http://en.wikipedia.org/wiki/Lidstone_smoothing) can be used by setting the parameter $\lambda$ (default to $1.0$). For document classification, the input feature vectors are usually sparse, and sparse vectors should be supplied as input to take advantage of @@ -35,7 +34,7 @@ sparsity. Since the training data is only used once, it is not necessary to cach [NaiveBayes](api/scala/index.html#org.apache.spark.mllib.classification.NaiveBayes$) implements multinomial naive Bayes. It takes an RDD of [LabeledPoint](api/scala/index.html#org.apache.spark.mllib.regression.LabeledPoint) and an optional -smoothing parameter `lambda` as input, an optional model type parameter (default is Multinomial), and outputs a +smoothing parameter `lambda` as input, an optional model type parameter (default is "multinomial"), and outputs a [NaiveBayesModel](api/scala/index.html#org.apache.spark.mllib.classification.NaiveBayesModel), which can be used for evaluation and prediction. @@ -54,7 +53,7 @@ val splits = parsedData.randomSplit(Array(0.6, 0.4), seed = 11L) val training = splits(0) val test = splits(1) -val model = NaiveBayes.train(training, lambda = 1.0, model = "Multinomial") +val model = NaiveBayes.train(training, lambda = 1.0, model = "multinomial") val predictionAndLabel = test.map(p => (model.predict(p.features), p.label)) val accuracy = 1.0 * predictionAndLabel.filter(x => x._1 == x._2).count() / test.count() @@ -75,6 +74,8 @@ optionally smoothing parameter `lambda` as input, and output a can be used for evaluation and prediction. {% highlight java %} +import scala.Tuple2; + import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.function.Function; @@ -82,7 +83,6 @@ import org.apache.spark.api.java.function.PairFunction; import org.apache.spark.mllib.classification.NaiveBayes; import org.apache.spark.mllib.classification.NaiveBayesModel; import org.apache.spark.mllib.regression.LabeledPoint; -import scala.Tuple2; JavaRDD training = ... // training set JavaRDD test = ... // test set diff --git a/docs/mllib-pmml-model-export.md b/docs/mllib-pmml-model-export.md new file mode 100644 index 0000000000000..42ea2ca81f80d --- /dev/null +++ b/docs/mllib-pmml-model-export.md @@ -0,0 +1,86 @@ +--- +layout: global +title: PMML model export - MLlib +displayTitle: MLlib - PMML model export +--- + +* Table of contents +{:toc} + +## MLlib supported models + +MLlib supports model export to Predictive Model Markup Language ([PMML](http://en.wikipedia.org/wiki/Predictive_Model_Markup_Language)). + +The table below outlines the MLlib models that can be exported to PMML and their equivalent PMML model. + + + + + + + + + + + + + + + + + + + + + + + + + +
MLlib modelPMML model
KMeansModelClusteringModel
LinearRegressionModelRegressionModel (functionName="regression")
RidgeRegressionModelRegressionModel (functionName="regression")
LassoModelRegressionModel (functionName="regression")
SVMModelRegressionModel (functionName="classification" normalizationMethod="none")
Binary LogisticRegressionModelRegressionModel (functionName="classification" normalizationMethod="logit")
+ +## Examples +
+ +
+To export a supported `model` (see table above) to PMML, simply call `model.toPMML`. + +Here a complete example of building a KMeansModel and print it out in PMML format: +{% highlight scala %} +import org.apache.spark.mllib.clustering.KMeans +import org.apache.spark.mllib.linalg.Vectors + +// Load and parse the data +val data = sc.textFile("data/mllib/kmeans_data.txt") +val parsedData = data.map(s => Vectors.dense(s.split(' ').map(_.toDouble))).cache() + +// Cluster the data into two classes using KMeans +val numClusters = 2 +val numIterations = 20 +val clusters = KMeans.train(parsedData, numClusters, numIterations) + +// Export to PMML +println("PMML Model:\n" + clusters.toPMML) +{% endhighlight %} + +As well as exporting the PMML model to a String (`model.toPMML` as in the example above), you can export the PMML model to other formats: + +{% highlight scala %} +// Export the model to a String in PMML format +clusters.toPMML + +// Export the model to a local file in PMML format +clusters.toPMML("/tmp/kmeans.xml") + +// Export the model to a directory on a distributed file system in PMML format +clusters.toPMML(sc,"/tmp/kmeans") + +// Export the model to the OutputStream in PMML format +clusters.toPMML(System.out) +{% endhighlight %} + +For unsupported models, either you will not find a `.toPMML` method or an `IllegalArgumentException` will be thrown. + +
+ +
diff --git a/docs/monitoring.md b/docs/monitoring.md index 8a85928d6d44d..e75018499003a 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -174,6 +174,80 @@ making it easy to identify slow tasks, data skew, etc. Note that the history server only displays completed Spark jobs. One way to signal the completion of a Spark job is to stop the Spark Context explicitly (`sc.stop()`), or in Python using the `with SparkContext() as sc:` to handle the Spark Context setup and tear down, and still show the job history on the UI. +## REST API + +In addition to viewing the metrics in the UI, they are also available as JSON. This gives developers +an easy way to create new visualizations and monitoring tools for Spark. The JSON is available for +both running applications, and in the history server. The endpoints are mounted at `/api/v1`. Eg., +for the history server, they would typically be accessible at `http://:18080/api/v1`, and +for a running application, at `http://localhost:4040/api/v1`. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
EndpointMeaning
/applicationsA list of all applications
/applications/[app-id]/jobsA list of all jobs for a given application
/applications/[app-id]/jobs/[job-id]Details for the given job
/applications/[app-id]/stagesA list of all stages for a given application
/applications/[app-id]/stages/[stage-id]A list of all attempts for the given stage
/applications/[app-id]/stages/[stage-id]/[stage-attempt-id]Details for the given stage attempt
/applications/[app-id]/stages/[stage-id]/[stage-attempt-id]/taskSummarySummary metrics of all tasks in the given stage attempt
/applications/[app-id]/stages/[stage-id]/[stage-attempt-id]/taskListA list of all tasks for the given stage attempt
/applications/[app-id]/executorsA list of all executors for the given application
/applications/[app-id]/storage/rddA list of stored RDDs for the given application
/applications/[app-id]/storage/rdd/[rdd-id]Details for the storage status of a given RDD
+ +When running on Yarn, each application has multiple attempts, so `[app-id]` is actually +`[app-id]/[attempt-id]` in all cases. + +These endpoints have been strongly versioned to make it easier to develop applications on top. + In particular, Spark guarantees: + +* Endpoints will never be removed from one version +* Individual fields will never be removed for any given endpoint +* New endpoints may be added +* New fields may be added to existing endpoints +* New versions of the api may be added in the future at a separate endpoint (eg., `api/v2`). New versions are *not* required to be backwards compatible. +* Api versions may be dropped, but only after at least one minor release of co-existing with a new api version + +Note that even when examining the UI of a running applications, the `applications/[app-id]` portion is +still required, though there is only one application available. Eg. to see the list of jobs for the +running app, you would go to `http://localhost:4040/api/v1/applications/[app-id]/jobs`. This is to +keep the paths consistent in both modes. + # Metrics Spark has a configurable metrics system based on the diff --git a/docs/programming-guide.md b/docs/programming-guide.md index 27816515c5de2..10f474f237bfa 100644 --- a/docs/programming-guide.md +++ b/docs/programming-guide.md @@ -41,14 +41,15 @@ In addition, if you wish to access an HDFS cluster, you need to add a dependency artifactId = hadoop-client version = -Finally, you need to import some Spark classes and implicit conversions into your program. Add the following lines: +Finally, you need to import some Spark classes into your program. Add the following lines: {% highlight scala %} import org.apache.spark.SparkContext -import org.apache.spark.SparkContext._ import org.apache.spark.SparkConf {% endhighlight %} +(Before Spark 1.3.0, you need to explicitly `import org.apache.spark.SparkContext._` to enable essential implicit conversions.) +
@@ -97,9 +98,9 @@ to your version of HDFS. Some common HDFS version tags are listed on the [Prebuilt packages](http://spark.apache.org/downloads.html) are also available on the Spark homepage for common HDFS versions. -Finally, you need to import some Spark classes into your program. Add the following lines: +Finally, you need to import some Spark classes into your program. Add the following line: -{% highlight scala %} +{% highlight python %} from pyspark import SparkContext, SparkConf {% endhighlight %} @@ -477,7 +478,6 @@ the [Converter examples]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main for examples of using Cassandra / HBase ```InputFormat``` and ```OutputFormat``` with custom converters.
- ## RDD Operations @@ -821,11 +821,9 @@ by a key. In Scala, these operations are automatically available on RDDs containing [Tuple2](http://www.scala-lang.org/api/{{site.SCALA_VERSION}}/index.html#scala.Tuple2) objects -(the built-in tuples in the language, created by simply writing `(a, b)`), as long as you -import `org.apache.spark.SparkContext._` in your program to enable Spark's implicit -conversions. The key-value pair operations are available in the +(the built-in tuples in the language, created by simply writing `(a, b)`). The key-value pair operations are available in the [PairRDDFunctions](api/scala/index.html#org.apache.spark.rdd.PairRDDFunctions) class, -which automatically wraps around an RDD of tuples if you import the conversions. +which automatically wraps around an RDD of tuples. For example, the following code uses the `reduceByKey` operation on key-value pairs to count how many times each line of text occurs in a file: @@ -916,7 +914,8 @@ The following table lists some of the common transformations supported by Spark. RDD API doc ([Scala](api/scala/index.html#org.apache.spark.rdd.RDD), [Java](api/java/index.html?org/apache/spark/api/java/JavaRDD.html), - [Python](api/python/pyspark.html#pyspark.RDD)) + [Python](api/python/pyspark.html#pyspark.RDD), + [R](api/R/index.html)) and pair RDD functions doc ([Scala](api/scala/index.html#org.apache.spark.rdd.PairRDDFunctions), [Java](api/java/index.html?org/apache/spark/api/java/JavaPairRDD.html)) @@ -1029,7 +1028,9 @@ The following table lists some of the common actions supported by Spark. Refer t RDD API doc ([Scala](api/scala/index.html#org.apache.spark.rdd.RDD), [Java](api/java/index.html?org/apache/spark/api/java/JavaRDD.html), - [Python](api/python/pyspark.html#pyspark.RDD)) + [Python](api/python/pyspark.html#pyspark.RDD), + [R](api/R/index.html)) + and pair RDD functions doc ([Scala](api/scala/index.html#org.apache.spark.rdd.PairRDDFunctions), [Java](api/java/index.html?org/apache/spark/api/java/JavaPairRDD.html)) @@ -1071,7 +1072,7 @@ for details. saveAsSequenceFile(path)
(Java and Scala) - Write the elements of the dataset as a Hadoop SequenceFile in a given path in the local filesystem, HDFS or any other Hadoop-supported file system. This is available on RDDs of key-value pairs that either implement Hadoop's Writable interface. In Scala, it is also + Write the elements of the dataset as a Hadoop SequenceFile in a given path in the local filesystem, HDFS or any other Hadoop-supported file system. This is available on RDDs of key-value pairs that implement Hadoop's Writable interface. In Scala, it is also available on types that are implicitly convertible to Writable (Spark includes conversions for basic types like Int, Double, String, etc). @@ -1122,7 +1123,7 @@ ordered data following shuffle then it's possible to use: * `sortBy` to make a globally ordered RDD Operations which can cause a shuffle include **repartition** operations like -[`repartition`](#RepartitionLink), and [`coalesce`](#CoalesceLink), **'ByKey** operations +[`repartition`](#RepartitionLink) and [`coalesce`](#CoalesceLink), **'ByKey** operations (except for counting) like [`groupByKey`](#GroupByLink) and [`reduceByKey`](#ReduceByLink), and **join** operations like [`cogroup`](#CogroupLink) and [`join`](#JoinLink). @@ -1138,7 +1139,7 @@ read the relevant sorted blocks. Certain shuffle operations can consume significant amounts of heap memory since they employ in-memory data structures to organize records before or after transferring them. Specifically, -`reduceByKey` and `aggregateByKey` create these structures on the map side and `'ByKey` operations +`reduceByKey` and `aggregateByKey` create these structures on the map side, and `'ByKey` operations generate these on the reduce side. When data does not fit in memory Spark will spill these tables to disk, incurring the additional overhead of disk I/O and increased garbage collection. @@ -1213,9 +1214,11 @@ storage levels is: Compared to MEMORY_ONLY_SER, OFF_HEAP reduces garbage collection overhead and allows executors to be smaller and to share a pool of memory, making it attractive in environments with large heaps or multiple concurrent applications. Furthermore, as the RDDs reside in Tachyon, - the crash of an executor does not lead to losing the in-memory cache. In this mode, the memory + the crash of an executor does not lead to losing the in-memory cache. In this mode, the memory in Tachyon is discardable. Thus, Tachyon does not attempt to reconstruct a block that it evicts - from memory. + from memory. If you plan to use Tachyon as the off heap store, Spark is compatible with Tachyon + out-of-the-box. Please refer to this page + for the suggested version pairings. @@ -1566,7 +1569,8 @@ You can see some [example Spark programs](http://spark.apache.org/examples.html) In addition, Spark includes several samples in the `examples` directory ([Scala]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/scala/org/apache/spark/examples), [Java]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/java/org/apache/spark/examples), - [Python]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/python)). + [Python]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/python), + [R]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/r)). You can run Java and Scala examples by passing the class name to Spark's `bin/run-example` script; for instance: ./bin/run-example SparkPi @@ -1575,6 +1579,10 @@ For Python examples, use `spark-submit` instead: ./bin/spark-submit examples/src/main/python/pi.py +For R examples, use `spark-submit` instead: + + ./bin/spark-submit examples/src/main/r/dataframe.R + For help on optimizing your programs, the [configuration](configuration.html) and [tuning](tuning.html) guides provide information on best practices. They are especially important for making sure that your data is stored in memory in an efficient format. @@ -1582,4 +1590,4 @@ For help on deploying, the [cluster mode overview](cluster-overview.html) descri in distributed operation and supported cluster managers. Finally, full API documentation is available in -[Scala](api/scala/#org.apache.spark.package), [Java](api/java/) and [Python](api/python/). +[Scala](api/scala/#org.apache.spark.package), [Java](api/java/), [Python](api/python/) and [R](api/R/). diff --git a/docs/quick-start.md b/docs/quick-start.md index 81143da865cf0..bb39e4111f244 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -184,10 +184,10 @@ scala> linesWithSpark.cache() res7: spark.RDD[String] = spark.FilteredRDD@17e51082 scala> linesWithSpark.count() -res8: Long = 15 +res8: Long = 19 scala> linesWithSpark.count() -res9: Long = 15 +res9: Long = 19 {% endhighlight %} It may seem silly to use Spark to explore and cache a 100-line text file. The interesting part is @@ -202,10 +202,10 @@ a cluster, as described in the [programming guide](programming-guide.html#initia >>> linesWithSpark.cache() >>> linesWithSpark.count() -15 +19 >>> linesWithSpark.count() -15 +19 {% endhighlight %} It may seem silly to use Spark to explore and cache a 100-line text file. The interesting part is @@ -423,14 +423,14 @@ dependencies to `spark-submit` through its `--py-files` argument by packaging th We can run this application using the `bin/spark-submit` script: -{% highlight python %} +{% highlight bash %} # Use spark-submit to run your application $ YOUR_SPARK_HOME/bin/spark-submit \ --master local[4] \ SimpleApp.py ... Lines with a: 46, Lines with b: 23 -{% endhighlight python %} +{% endhighlight %} @@ -444,7 +444,8 @@ Congratulations on running your first Spark application! * Finally, Spark includes several samples in the `examples` directory ([Scala]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/scala/org/apache/spark/examples), [Java]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/java/org/apache/spark/examples), - [Python]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/python)). + [Python]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/python), + [R]({{site.SPARK_GITHUB_URL}}/tree/master/examples/src/main/r)). You can run them as follows: {% highlight bash %} @@ -453,4 +454,7 @@ You can run them as follows: # For Python examples, use spark-submit directly: ./bin/spark-submit examples/src/main/python/pi.py + +# For R examples, use spark-submit directly: +./bin/spark-submit examples/src/main/r/dataframe.R {% endhighlight %} diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index b6701b64c2925..4fb4a90307ec8 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -133,6 +133,13 @@ Most of the configs are the same for Spark on YARN as for other deployment modes Same as spark.yarn.driver.memoryOverhead, but for the Application Master in client mode. + + spark.yarn.am.port + (random) + + Port for the YARN Application Master to listen on. In YARN client mode, this is used to communicate between the Spark driver running on a gateway and the Application Master running on YARN. In YARN cluster mode, this is used for the dynamic executor feature, where it handles the kill from the scheduler backend. + + spark.yarn.queue default diff --git a/docs/sparkr.md b/docs/sparkr.md new file mode 100644 index 0000000000000..4d82129921a37 --- /dev/null +++ b/docs/sparkr.md @@ -0,0 +1,223 @@ +--- +layout: global +displayTitle: SparkR (R on Spark) +title: SparkR (R on Spark) +--- + +* This will become a table of contents (this text will be scraped). +{:toc} + +# Overview +SparkR is an R package that provides a light-weight frontend to use Apache Spark from R. +In Spark {{site.SPARK_VERSION}}, SparkR provides a distributed data frame implementation that +supports operations like selection, filtering, aggregation etc. (similar to R data frames, +[dplyr](https://github.com/hadley/dplyr)) but on large datasets. + +# SparkR DataFrames + +A DataFrame is a distributed collection of data organized into named columns. It is conceptually +equivalent to a table in a relational database or a data frame in R, but with richer +optimizations under the hood. DataFrames can be constructed from a wide array of sources such as: +structured data files, tables in Hive, external databases, or existing local R data frames. + +All of the examples on this page use sample data included in R or the Spark distribution and can be run using the `./bin/sparkR` shell. + +## Starting Up: SparkContext, SQLContext + +
+The entry point into SparkR is the `SparkContext` which connects your R program to a Spark cluster. +You can create a `SparkContext` using `sparkR.init` and pass in options such as the application name +etc. Further, to work with DataFrames we will need a `SQLContext`, which can be created from the +SparkContext. If you are working from the SparkR shell, the `SQLContext` and `SparkContext` should +already be created for you. + +{% highlight r %} +sc <- sparkR.init() +sqlContext <- sparkRSQL.init(sc) +{% endhighlight %} + +
+ +## Creating DataFrames +With a `SQLContext`, applications can create `DataFrame`s from a local R data frame, from a [Hive table](sql-programming-guide.html#hive-tables), or from other [data sources](sql-programming-guide.html#data-sources). + +### From local data frames +The simplest way to create a data frame is to convert a local R data frame into a SparkR DataFrame. Specifically we can use `createDataFrame` and pass in the local R data frame to create a SparkR DataFrame. As an example, the following creates a `DataFrame` based using the `faithful` dataset from R. + +
+{% highlight r %} +df <- createDataFrame(sqlContext, faithful) + +# Displays the content of the DataFrame to stdout +head(df) +## eruptions waiting +##1 3.600 79 +##2 1.800 54 +##3 3.333 74 + +{% endhighlight %} +
+ +### From Data Sources + +SparkR supports operating on a variety of data sources through the `DataFrame` interface. This section describes the general methods for loading and saving data using Data Sources. You can check the Spark SQL programming guide for more [specific options](sql-programming-guide.html#manually-specifying-options) that are available for the built-in data sources. + +The general method for creating DataFrames from data sources is `read.df`. This method takes in the `SQLContext`, the path for the file to load and the type of data source. SparkR supports reading JSON and Parquet files natively and through [Spark Packages](http://spark-packages.org/) you can find data source connectors for popular file formats like [CSV](http://spark-packages.org/package/databricks/spark-csv) and [Avro](http://spark-packages.org/package/databricks/spark-avro). + +We can see how to use data sources using an example JSON input file. Note that the file that is used here is _not_ a typical JSON file. Each line in the file must contain a separate, self-contained valid JSON object. As a consequence, a regular multi-line JSON file will most often fail. + +
+ +{% highlight r %} +people <- read.df(sqlContext, "./examples/src/main/resources/people.json", "json") +head(people) +## age name +##1 NA Michael +##2 30 Andy +##3 19 Justin + +# SparkR automatically infers the schema from the JSON file +printSchema(people) +# root +# |-- age: integer (nullable = true) +# |-- name: string (nullable = true) + +{% endhighlight %} +
+ +The data sources API can also be used to save out DataFrames into multiple file formats. For example we can save the DataFrame from the previous example +to a Parquet file using `write.df` + +
+{% highlight r %} +write.df(people, path="people.parquet", source="parquet", mode="overwrite") +{% endhighlight %} +
+ +### From Hive tables + +You can also create SparkR DataFrames from Hive tables. To do this we will need to create a HiveContext which can access tables in the Hive MetaStore. Note that Spark should have been built with [Hive support](building-spark.html#building-with-hive-and-jdbc-support) and more details on the difference between SQLContext and HiveContext can be found in the [SQL programming guide](sql-programming-guide.html#starting-point-sqlcontext). + +
+{% highlight r %} +# sc is an existing SparkContext. +hiveContext <- sparkRHive.init(sc) + +sql(hiveContext, "CREATE TABLE IF NOT EXISTS src (key INT, value STRING)") +sql(hiveContext, "LOAD DATA LOCAL INPATH 'examples/src/main/resources/kv1.txt' INTO TABLE src") + +# Queries can be expressed in HiveQL. +results <- hiveContext.sql("FROM src SELECT key, value") + +# results is now a DataFrame +head(results) +## key value +## 1 238 val_238 +## 2 86 val_86 +## 3 311 val_311 + +{% endhighlight %} +
+ +## DataFrame Operations + +SparkR DataFrames support a number of functions to do structured data processing. +Here we include some basic examples and a complete list can be found in the [API](api/R/index.html) docs: + +### Selecting rows, columns + +
+{% highlight r %} +# Create the DataFrame +df <- createDataFrame(sqlContext, faithful) + +# Get basic information about the DataFrame +df +## DataFrame[eruptions:double, waiting:double] + +# Select only the "eruptions" column +head(select(df, df$eruptions)) +## eruptions +##1 3.600 +##2 1.800 +##3 3.333 + +# You can also pass in column name as strings +head(select(df, "eruptions")) + +# Filter the DataFrame to only retain rows with wait times shorter than 50 mins +head(filter(df, df$waiting < 50)) +## eruptions waiting +##1 1.750 47 +##2 1.750 47 +##3 1.867 48 + +{% endhighlight %} + +
+ +### Grouping, Aggregation + +SparkR data frames support a number of commonly used functions to aggregate data after grouping. For example we can compute a histogram of the `waiting` time in the `faithful` dataset as shown below + +
+{% highlight r %} + +# We use the `n` operator to count the number of times each waiting time appears +head(summarize(groupBy(df, df$waiting), count = n(df$waiting))) +## waiting count +##1 81 13 +##2 60 6 +##3 68 1 + +# We can also sort the output from the aggregation to get the most common waiting times +waiting_counts <- summarize(groupBy(df, df$waiting), count = n(df$waiting)) +head(arrange(waiting_counts, desc(waiting_counts$count))) + +## waiting count +##1 78 15 +##2 83 14 +##3 81 13 + +{% endhighlight %} +
+ +### Operating on Columns + +SparkR also provides a number of functions that can directly applied to columns for data processing and during aggregation. The example below shows the use of basic arithmetic functions. + +
+{% highlight r %} + +# Convert waiting time from hours to seconds. +# Note that we can assign this to a new column in the same DataFrame +df$waiting_secs <- df$waiting * 60 +head(df) +## eruptions waiting waiting_secs +##1 3.600 79 4740 +##2 1.800 54 3240 +##3 3.333 74 4440 + +{% endhighlight %} +
+ +## Running SQL Queries from SparkR +A SparkR DataFrame can also be registered as a temporary table in Spark SQL and registering a DataFrame as a table allows you to run SQL queries over its data. +The `sql` function enables applications to run SQL queries programmatically and returns the result as a `DataFrame`. + +
+{% highlight r %} +# Load a JSON file +people <- read.df(sqlContext, "./examples/src/main/resources/people.json", "json") + +# Register this DataFrame as a table. +registerTempTable(people, "people") + +# SQL statements can be run by using the sql method +teenagers <- sql(sqlContext, "SELECT name FROM people WHERE age >= 13 AND age <= 19") +head(teenagers) +## name +##1 Justin + +{% endhighlight %} +
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index b8233ae06fdf3..282ea75e1e785 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -11,14 +11,15 @@ title: Spark SQL and DataFrames Spark SQL is a Spark module for structured data processing. It provides a programming abstraction called DataFrames and can also act as distributed SQL query engine. +For how to enable Hive support, please refer to the [Hive Tables](#hive-tables) section. # DataFrames A DataFrame is a distributed collection of data organized into named columns. It is conceptually equivalent to a table in a relational database or a data frame in R/Python, but with richer optimizations under the hood. DataFrames can be constructed from a wide array of sources such as: structured data files, tables in Hive, external databases, or existing RDDs. -The DataFrame API is available in [Scala](api/scala/index.html#org.apache.spark.sql.DataFrame), [Java](api/java/index.html?org/apache/spark/sql/DataFrame.html), and [Python](api/python/pyspark.sql.html#pyspark.sql.DataFrame). +The DataFrame API is available in [Scala](api/scala/index.html#org.apache.spark.sql.DataFrame), [Java](api/java/index.html?org/apache/spark/sql/DataFrame.html), [Python](api/python/pyspark.sql.html#pyspark.sql.DataFrame), and [R](api/R/index.html). -All of the examples on this page use sample data included in the Spark distribution and can be run in the `spark-shell` or the `pyspark` shell. +All of the examples on this page use sample data included in the Spark distribution and can be run in the `spark-shell`, `pyspark` shell, or `sparkR` shell. ## Starting Point: `SQLContext` @@ -64,6 +65,17 @@ from pyspark.sql import SQLContext sqlContext = SQLContext(sc) {% endhighlight %} + + +
+ +The entry point into all relational functionality in Spark is the +`SQLContext` class, or one of its decedents. To create a basic `SQLContext`, all you need is a SparkContext. + +{% highlight r %} +sqlContext <- sparkRSQL.init(sc) +{% endhighlight %} +
@@ -97,7 +109,7 @@ As an example, the following creates a `DataFrame` based on the content of a JSO val sc: SparkContext // An existing SparkContext. val sqlContext = new org.apache.spark.sql.SQLContext(sc) -val df = sqlContext.jsonFile("examples/src/main/resources/people.json") +val df = sqlContext.read.json("examples/src/main/resources/people.json") // Displays the content of the DataFrame to stdout df.show() @@ -110,7 +122,7 @@ df.show() JavaSparkContext sc = ...; // An existing JavaSparkContext. SQLContext sqlContext = new org.apache.spark.sql.SQLContext(sc); -DataFrame df = sqlContext.jsonFile("examples/src/main/resources/people.json"); +DataFrame df = sqlContext.read().json("examples/src/main/resources/people.json"); // Displays the content of the DataFrame to stdout df.show(); @@ -123,13 +135,26 @@ df.show(); from pyspark.sql import SQLContext sqlContext = SQLContext(sc) -df = sqlContext.jsonFile("examples/src/main/resources/people.json") +df = sqlContext.read.json("examples/src/main/resources/people.json") # Displays the content of the DataFrame to stdout df.show() {% endhighlight %} + +
+{% highlight r %} +sqlContext <- SQLContext(sc) + +df <- jsonFile(sqlContext, "examples/src/main/resources/people.json") + +# Displays the content of the DataFrame to stdout +showDF(df) +{% endhighlight %} + +
+ @@ -139,7 +164,6 @@ DataFrames provide a domain-specific language for structured data manipulation i Here we include some basic examples of structured data processing using DataFrames: -
{% highlight scala %} @@ -147,7 +171,7 @@ val sc: SparkContext // An existing SparkContext. val sqlContext = new org.apache.spark.sql.SQLContext(sc) // Create the DataFrame -val df = sqlContext.jsonFile("examples/src/main/resources/people.json") +val df = sqlContext.read.json("examples/src/main/resources/people.json") // Show the content of the DataFrame df.show() @@ -197,7 +221,7 @@ JavaSparkContext sc // An existing SparkContext. SQLContext sqlContext = new org.apache.spark.sql.SQLContext(sc) // Create the DataFrame -DataFrame df = sqlContext.jsonFile("examples/src/main/resources/people.json"); +DataFrame df = sqlContext.read().json("examples/src/main/resources/people.json"); // Show the content of the DataFrame df.show(); @@ -242,12 +266,18 @@ df.groupBy("age").count().show();
+In Python it's possible to access a DataFrame's columns either by attribute +(`df.age`) or by indexing (`df['age']`). While the former is convenient for +interactive data exploration, users are highly encouraged to use the +latter form, which is future proof and won't break with column names that +are also attributes on the DataFrame class. + {% highlight python %} from pyspark.sql import SQLContext sqlContext = SQLContext(sc) # Create the DataFrame -df = sqlContext.jsonFile("examples/src/main/resources/people.json") +df = sqlContext.read.json("examples/src/main/resources/people.json") # Show the content of the DataFrame df.show() @@ -270,14 +300,14 @@ df.select("name").show() ## Justin # Select everybody, but increment the age by 1 -df.select(df.name, df.age + 1).show() +df.select(df['name'], df['age'] + 1).show() ## name (age + 1) ## Michael null ## Andy 31 ## Justin 20 # Select people older than 21 -df.filter(df.age > 21).show() +df.filter(df['age'] > 21).show() ## age name ## 30 Andy @@ -291,6 +321,57 @@ df.groupBy("age").count().show() {% endhighlight %}
+ +
+{% highlight r %} +sqlContext <- sparkRSQL.init(sc) + +# Create the DataFrame +df <- jsonFile(sqlContext, "examples/src/main/resources/people.json") + +# Show the content of the DataFrame +showDF(df) +## age name +## null Michael +## 30 Andy +## 19 Justin + +# Print the schema in a tree format +printSchema(df) +## root +## |-- age: long (nullable = true) +## |-- name: string (nullable = true) + +# Select only the "name" column +showDF(select(df, "name")) +## name +## Michael +## Andy +## Justin + +# Select everybody, but increment the age by 1 +showDF(select(df, df$name, df$age + 1)) +## name (age + 1) +## Michael null +## Andy 31 +## Justin 20 + +# Select people older than 21 +showDF(where(df, df$age > 21)) +## age name +## 30 Andy + +# Count people by age +showDF(count(groupBy(df, "age"))) +## age count +## null 1 +## 19 1 +## 30 1 + +{% endhighlight %} + +
+
@@ -320,6 +401,14 @@ sqlContext = SQLContext(sc) df = sqlContext.sql("SELECT * FROM table") {% endhighlight %} + +
+{% highlight r %} +sqlContext <- sparkRSQL.init(sc) +df <- sql(sqlContext, "SELECT * FROM table") +{% endhighlight %} +
+ @@ -362,11 +451,18 @@ val people = sc.textFile("examples/src/main/resources/people.txt").map(_.split(" people.registerTempTable("people") // SQL statements can be run by using the sql methods provided by sqlContext. -val teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19") +val teenagers = sqlContext.sql("SELECT name, age FROM people WHERE age >= 13 AND age <= 19") // The results of SQL queries are DataFrames and support all the normal RDD operations. -// The columns of a row in the result can be accessed by ordinal. +// The columns of a row in the result can be accessed by field index: teenagers.map(t => "Name: " + t(0)).collect().foreach(println) + +// or by field name: +teenagers.map(t => "Name: " + t.getAs[String]("name")).collect().foreach(println) + +// row.getValuesMap[T] retrieves multiple columns at once into a Map[String, T] +teenagers.map(_.getValuesMap[Any](List("name", "age"))).collect().foreach(println) +// Map("name" -> "Justin", "age" -> 19) {% endhighlight %} @@ -465,7 +561,7 @@ parts = lines.map(lambda l: l.split(",")) people = parts.map(lambda p: Row(name=p[0], age=int(p[1]))) # Infer the schema, and register the DataFrame as a table. -schemaPeople = sqlContext.inferSchema(people) +schemaPeople = sqlContext.createDataFrame(people) schemaPeople.registerTempTable("people") # SQL can be run over DataFrames that have been registered as a table. @@ -533,7 +629,7 @@ peopleDataFrame.registerTempTable("people") val results = sqlContext.sql("SELECT name FROM people") // The results of SQL queries are DataFrames and support all the normal RDD operations. -// The columns of a row in the result can be accessed by ordinal. +// The columns of a row in the result can be accessed by field index or by field name. results.map(t => "Name: " + t(0)).collect().foreach(println) {% endhighlight %} @@ -681,8 +777,8 @@ In the simplest form, the default data source (`parquet` unless otherwise config
{% highlight scala %} -val df = sqlContext.load("examples/src/main/resources/users.parquet") -df.select("name", "favorite_color").save("namesAndFavColors.parquet") +val df = sqlContext.read.load("examples/src/main/resources/users.parquet") +df.select("name", "favorite_color").write.save("namesAndFavColors.parquet") {% endhighlight %}
@@ -691,8 +787,8 @@ df.select("name", "favorite_color").save("namesAndFavColors.parquet") {% highlight java %} -DataFrame df = sqlContext.load("examples/src/main/resources/users.parquet"); -df.select("name", "favorite_color").save("namesAndFavColors.parquet"); +DataFrame df = sqlContext.read().load("examples/src/main/resources/users.parquet"); +df.select("name", "favorite_color").write().save("namesAndFavColors.parquet"); {% endhighlight %} @@ -702,11 +798,20 @@ df.select("name", "favorite_color").save("namesAndFavColors.parquet"); {% highlight python %} -df = sqlContext.load("examples/src/main/resources/users.parquet") -df.select("name", "favorite_color").save("namesAndFavColors.parquet") +df = sqlContext.read.load("examples/src/main/resources/users.parquet") +df.select("name", "favorite_color").write.save("namesAndFavColors.parquet") {% endhighlight %} + + +
+ +{% highlight r %} +df <- loadDF(sqlContext, "people.parquet") +saveDF(select(df, "name", "age"), "namesAndAges.parquet") +{% endhighlight %} +
@@ -722,8 +827,8 @@ using this syntax.
{% highlight scala %} -val df = sqlContext.load("examples/src/main/resources/people.json", "json") -df.select("name", "age").save("namesAndAges.parquet", "parquet") +val df = sqlContext.read.format("json").load("examples/src/main/resources/people.json") +df.select("name", "age").write.format("json").save("namesAndAges.parquet") {% endhighlight %}
@@ -732,8 +837,8 @@ df.select("name", "age").save("namesAndAges.parquet", "parquet") {% highlight java %} -DataFrame df = sqlContext.load("examples/src/main/resources/people.json", "json"); -df.select("name", "age").save("namesAndAges.parquet", "parquet"); +DataFrame df = sqlContext.read().format("json").load("examples/src/main/resources/people.json"); +df.select("name", "age").write().format("parquet").save("namesAndAges.parquet"); {% endhighlight %} @@ -743,8 +848,18 @@ df.select("name", "age").save("namesAndAges.parquet", "parquet"); {% highlight python %} -df = sqlContext.load("examples/src/main/resources/people.json", "json") -df.select("name", "age").save("namesAndAges.parquet", "parquet") +df = sqlContext.read.load("examples/src/main/resources/people.json", format="json") +df.select("name", "age").write.save("namesAndAges.parquet", format="parquet") + +{% endhighlight %} + + +
+ +{% highlight r %} + +df <- loadDF(sqlContext, "people.json", "json") +saveDF(select(df, "name", "age"), "namesAndAges.parquet", "parquet") {% endhighlight %} @@ -792,7 +907,7 @@ new data. Ignore mode means that when saving a DataFrame to a data source, if data already exists, the save operation is expected to not save the contents of the DataFrame and to not - change the existing data. This is similar to a `CREATE TABLE IF NOT EXISTS` in SQL. + change the existing data. This is similar to a CREATE TABLE IF NOT EXISTS in SQL. @@ -832,11 +947,11 @@ import sqlContext.implicits._ val people: RDD[Person] = ... // An RDD of case class objects, from the previous example. // The RDD is implicitly converted to a DataFrame by implicits, allowing it to be stored using Parquet. -people.saveAsParquetFile("people.parquet") +people.write.parquet("people.parquet") // Read in the parquet file created above. Parquet files are self-describing so the schema is preserved. // The result of loading a Parquet file is also a DataFrame. -val parquetFile = sqlContext.parquetFile("people.parquet") +val parquetFile = sqlContext.read.parquet("people.parquet") //Parquet files can also be registered as tables and then used in SQL statements. parquetFile.registerTempTable("parquetFile") @@ -854,11 +969,11 @@ teenagers.map(t => "Name: " + t(0)).collect().foreach(println) DataFrame schemaPeople = ... // The DataFrame from the previous example. // DataFrames can be saved as Parquet files, maintaining the schema information. -schemaPeople.saveAsParquetFile("people.parquet"); +schemaPeople.write().parquet("people.parquet"); // Read in the Parquet file created above. Parquet files are self-describing so the schema is preserved. // The result of loading a parquet file is also a DataFrame. -DataFrame parquetFile = sqlContext.parquetFile("people.parquet"); +DataFrame parquetFile = sqlContext.read().parquet("people.parquet"); //Parquet files can also be registered as tables and then used in SQL statements. parquetFile.registerTempTable("parquetFile"); @@ -880,11 +995,11 @@ List teenagerNames = teenagers.javaRDD().map(new Function() schemaPeople # The DataFrame from the previous example. # DataFrames can be saved as Parquet files, maintaining the schema information. -schemaPeople.saveAsParquetFile("people.parquet") +schemaPeople.read.parquet("people.parquet") # Read in the Parquet file created above. Parquet files are self-describing so the schema is preserved. # The result of loading a parquet file is also a DataFrame. -parquetFile = sqlContext.parquetFile("people.parquet") +parquetFile = sqlContext.write.parquet("people.parquet") # Parquet files can also be registered as tables and then used in SQL statements. parquetFile.registerTempTable("parquetFile"); @@ -896,6 +1011,31 @@ for teenName in teenNames.collect():
+
+ +{% highlight r %} +# sqlContext from the previous example is used in this example. + +schemaPeople # The DataFrame from the previous example. + +# DataFrames can be saved as Parquet files, maintaining the schema information. +saveAsParquetFile(schemaPeople, "people.parquet") + +# Read in the Parquet file created above. Parquet files are self-describing so the schema is preserved. +# The result of loading a parquet file is also a DataFrame. +parquetFile <- parquetFile(sqlContext, "people.parquet") + +# Parquet files can also be registered as tables and then used in SQL statements. +registerTempTable(parquetFile, "parquetFile"); +teenagers <- sql(sqlContext, "SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19") +teenNames <- map(teenagers, function(p) { paste("Name:", p$name)}) +for (teenName in collect(teenNames)) { + cat(teenName, "\n") +} +{% endhighlight %} + +
+
{% highlight sql %} @@ -947,9 +1087,9 @@ path {% endhighlight %} -By passing `path/to/table` to either `SQLContext.parquetFile` or `SQLContext.load`, Spark SQL will -automatically extract the partitioning information from the paths. Now the schema of the returned -DataFrame becomes: +By passing `path/to/table` to either `SQLContext.read.parquet` or `SQLContext.read.load`, Spark SQL +will automatically extract the partitioning information from the paths. +Now the schema of the returned DataFrame becomes: {% highlight text %} @@ -982,15 +1122,15 @@ import sqlContext.implicits._ // Create a simple DataFrame, stored into a partition directory val df1 = sparkContext.makeRDD(1 to 5).map(i => (i, i * 2)).toDF("single", "double") -df1.saveAsParquetFile("data/test_table/key=1") +df1.write.parquet("data/test_table/key=1") // Create another DataFrame in a new partition directory, // adding a new column and dropping an existing column val df2 = sparkContext.makeRDD(6 to 10).map(i => (i, i * 3)).toDF("single", "triple") -df2.saveAsParquetFile("data/test_table/key=2") +df2.write.parquet("data/test_table/key=2") // Read the partitioned table -val df3 = sqlContext.parquetFile("data/test_table") +val df3 = sqlContext.read.parquet("data/test_table") df3.printSchema() // The final schema consists of all 3 columns in the Parquet files together @@ -1021,7 +1161,7 @@ df2 = sqlContext.createDataFrame(sc.parallelize(range(6, 11)) df2.save("data/test_table/key=2", "parquet") # Read the partitioned table -df3 = sqlContext.parquetFile("data/test_table") +df3 = sqlContext.load("data/test_table", "parquet") df3.printSchema() # The final schema consists of all 3 columns in the Parquet files together @@ -1035,6 +1175,33 @@ df3.printSchema()
+
+ +{% highlight r %} +# sqlContext from the previous example is used in this example. + +# Create a simple DataFrame, stored into a partition directory +saveDF(df1, "data/test_table/key=1", "parquet", "overwrite") + +# Create another DataFrame in a new partition directory, +# adding a new column and dropping an existing column +saveDF(df2, "data/test_table/key=2", "parquet", "overwrite") + +# Read the partitioned table +df3 <- loadDF(sqlContext, "data/test_table", "parquet") +printSchema(df3) + +# The final schema consists of all 3 columns in the Parquet files together +# with the partiioning column appeared in the partition directory paths. +# root +# |-- single: int (nullable = true) +# |-- double: int (nullable = true) +# |-- triple: int (nullable = true) +# |-- key : int (nullable = true) +{% endhighlight %} + +
+ ### Configuration @@ -1102,12 +1269,10 @@ Configuration of Parquet can be done using the `setConf` method on `SQLContext`
Spark SQL can automatically infer the schema of a JSON dataset and load it as a DataFrame. -This conversion can be done using one of two methods in a `SQLContext`: - -* `jsonFile` - loads data from a directory of JSON files where each line of the files is a JSON object. -* `jsonRDD` - loads data from an existing RDD where each element of the RDD is a string containing a JSON object. +This conversion can be done using `SQLContext.read.json()` on either an RDD of String, +or a JSON file. -Note that the file that is offered as _jsonFile_ is not a typical JSON file. Each +Note that the file that is offered as _a json file_ is not a typical JSON file. Each line must contain a separate, self-contained valid JSON object. As a consequence, a regular multi-line JSON file will most often fail. @@ -1118,8 +1283,7 @@ val sqlContext = new org.apache.spark.sql.SQLContext(sc) // A JSON dataset is pointed to by path. // The path can be either a single text file or a directory storing text files. val path = "examples/src/main/resources/people.json" -// Create a DataFrame from the file(s) pointed to by path -val people = sqlContext.jsonFile(path) +val people = sqlContext.read.json(path) // The inferred schema can be visualized using the printSchema() method. people.printSchema() @@ -1137,19 +1301,17 @@ val teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age // an RDD[String] storing one JSON object per string. val anotherPeopleRDD = sc.parallelize( """{"name":"Yin","address":{"city":"Columbus","state":"Ohio"}}""" :: Nil) -val anotherPeople = sqlContext.jsonRDD(anotherPeopleRDD) +val anotherPeople = sqlContext.read.json(anotherPeopleRDD) {% endhighlight %}
Spark SQL can automatically infer the schema of a JSON dataset and load it as a DataFrame. -This conversion can be done using one of two methods in a `SQLContext` : +This conversion can be done using `SQLContext.read().json()` on either an RDD of String, +or a JSON file. -* `jsonFile` - loads data from a directory of JSON files where each line of the files is a JSON object. -* `jsonRDD` - loads data from an existing RDD where each element of the RDD is a string containing a JSON object. - -Note that the file that is offered as _jsonFile_ is not a typical JSON file. Each +Note that the file that is offered as _a json file_ is not a typical JSON file. Each line must contain a separate, self-contained valid JSON object. As a consequence, a regular multi-line JSON file will most often fail. @@ -1159,9 +1321,7 @@ SQLContext sqlContext = new org.apache.spark.sql.SQLContext(sc); // A JSON dataset is pointed to by path. // The path can be either a single text file or a directory storing text files. -String path = "examples/src/main/resources/people.json"; -// Create a DataFrame from the file(s) pointed to by path -DataFrame people = sqlContext.jsonFile(path); +DataFrame people = sqlContext.read().json("examples/src/main/resources/people.json"); // The inferred schema can be visualized using the printSchema() method. people.printSchema(); @@ -1180,18 +1340,15 @@ DataFrame teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AN List jsonData = Arrays.asList( "{\"name\":\"Yin\",\"address\":{\"city\":\"Columbus\",\"state\":\"Ohio\"}}"); JavaRDD anotherPeopleRDD = sc.parallelize(jsonData); -DataFrame anotherPeople = sqlContext.jsonRDD(anotherPeopleRDD); +DataFrame anotherPeople = sqlContext.read().json(anotherPeopleRDD); {% endhighlight %}
Spark SQL can automatically infer the schema of a JSON dataset and load it as a DataFrame. -This conversion can be done using one of two methods in a `SQLContext`: - -* `jsonFile` - loads data from a directory of JSON files where each line of the files is a JSON object. -* `jsonRDD` - loads data from an existing RDD where each element of the RDD is a string containing a JSON object. +This conversion can be done using `SQLContext.read.json` on a JSON file. -Note that the file that is offered as _jsonFile_ is not a typical JSON file. Each +Note that the file that is offered as _a json file_ is not a typical JSON file. Each line must contain a separate, self-contained valid JSON object. As a consequence, a regular multi-line JSON file will most often fail. @@ -1202,9 +1359,7 @@ sqlContext = SQLContext(sc) # A JSON dataset is pointed to by path. # The path can be either a single text file or a directory storing text files. -path = "examples/src/main/resources/people.json" -# Create a DataFrame from the file(s) pointed to by path -people = sqlContext.jsonFile(path) +people = sqlContext.read.json("examples/src/main/resources/people.json") # The inferred schema can be visualized using the printSchema() method. people.printSchema() @@ -1226,6 +1381,39 @@ anotherPeople = sqlContext.jsonRDD(anotherPeopleRDD) {% endhighlight %}
+
+Spark SQL can automatically infer the schema of a JSON dataset and load it as a DataFrame. using +the `jsonFile` function, which loads data from a directory of JSON files where each line of the +files is a JSON object. + +Note that the file that is offered as _a json file_ is not a typical JSON file. Each +line must contain a separate, self-contained valid JSON object. As a consequence, +a regular multi-line JSON file will most often fail. + +{% highlight r %} +# sc is an existing SparkContext. +sqlContext <- sparkRSQL.init(sc) + +# A JSON dataset is pointed to by path. +# The path can be either a single text file or a directory storing text files. +path <- "examples/src/main/resources/people.json" +# Create a DataFrame from the file(s) pointed to by path +people <- jsonFile(sqlContex,t path) + +# The inferred schema can be visualized using the printSchema() method. +printSchema(people) +# root +# |-- age: integer (nullable = true) +# |-- name: string (nullable = true) + +# Register this DataFrame as a table. +registerTempTable(people, "people") + +# SQL statements can be run by using the sql methods provided by `sqlContext`. +teenagers <- sql(sqlContext, "SELECT name FROM people WHERE age >= 13 AND age <= 19") +{% endhighlight %} +
+
{% highlight sql %} @@ -1302,10 +1490,7 @@ Row[] results = sqlContext.sql("FROM src SELECT key, value").collect();
When working with Hive one must construct a `HiveContext`, which inherits from `SQLContext`, and -adds support for finding tables in the MetaStore and writing queries using HiveQL. In addition to -the `sql` method a `HiveContext` also provides an `hql` methods, which allows queries to be -expressed in HiveQL. - +adds support for finding tables in the MetaStore and writing queries using HiveQL. {% highlight python %} # sc is an existing SparkContext. from pyspark.sql import HiveContext @@ -1319,9 +1504,91 @@ results = sqlContext.sql("FROM src SELECT key, value").collect() {% endhighlight %} +
+ +
+ +When working with Hive one must construct a `HiveContext`, which inherits from `SQLContext`, and +adds support for finding tables in the MetaStore and writing queries using HiveQL. +{% highlight r %} +# sc is an existing SparkContext. +sqlContext <- sparkRHive.init(sc) + +sql(sqlContext, "CREATE TABLE IF NOT EXISTS src (key INT, value STRING)") +sql(sqlContext, "LOAD DATA LOCAL INPATH 'examples/src/main/resources/kv1.txt' INTO TABLE src") + +# Queries can be expressed in HiveQL. +results = sqlContext.sql("FROM src SELECT key, value").collect() + +{% endhighlight %} +
+### Interacting with Different Versions of Hive Metastore + +One of the most important pieces of Spark SQL's Hive support is interaction with Hive metastore, +which enables Spark SQL to access metadata of Hive tables. Starting from Spark 1.4.0, a single binary build of Spark SQL can be used to query different versions of Hive metastores, using the configuration described below. + +Internally, Spark SQL uses two Hive clients, one for executing native Hive commands like `SET` +and `DESCRIBE`, the other dedicated for communicating with Hive metastore. The former uses Hive +jars of version 0.13.1, which are bundled with Spark 1.4.0. The latter uses Hive jars of the +version specified by users. An isolated classloader is used here to avoid dependency conflicts. + + + + + + + + + + + + + + + + + + + + + + + +
Property NameDefaultMeaning
spark.sql.hive.metastore.version0.13.1 + Version of the Hive metastore. Available + options are 0.12.0 and 0.13.1. Support for more versions is coming in the future. +
spark.sql.hive.metastore.jarsbuiltin + Location of the jars that should be used to instantiate the HiveMetastoreClient. This + property can be one of three options: +
    +
  1. builtin
  2. + Use Hive 0.13.1, which is bundled with the Spark assembly jar when -Phive is + enabled. When this option is chosen, spark.sql.hive.metastore.version must be + either 0.13.1 or not defined. +
  3. maven
  4. + Use Hive jars of specified version downloaded from Maven repositories. +
  5. A classpath in the standard format for both Hive and Hadoop.
  6. +
+
spark.sql.hive.metastore.sharedPrefixescom.mysql.jdbc,
org.postgresql,
com.microsoft.sqlserver,
oracle.jdbc
+

+ A comma separated list of class prefixes that should be loaded using the classloader that is + shared between Spark SQL and a specific version of Hive. An example of classes that should + be shared is JDBC drivers that are needed to talk to the metastore. Other classes that need + to be shared are those that interact with classes that are already shared. For example, + custom appenders that are used by log4j. +

+
spark.sql.hive.metastore.barrierPrefixes(empty) +

+ A comma separated list of class prefixes that should explicitly be reloaded for each version + of Hive that Spark SQL is communicating with. For example, Hive UDFs that are declared in a + prefix that typically would be shared (i.e. org.apache.spark.*). +

+
+ + ## JDBC To Other Databases Spark SQL also includes a data source that can read data from other databases using JDBC. This @@ -1355,7 +1622,7 @@ the Data Sources API. The following options are supported: dbtable - The JDBC table that should be read. Note that anything that is valid in a `FROM` clause of + The JDBC table that should be read. Note that anything that is valid in a FROM clause of a SQL query can be used. For example, instead of a full table you could also use a subquery in parentheses. @@ -1418,6 +1685,16 @@ df = sqlContext.load(source="jdbc", url="jdbc:postgresql:dbserver", dbtable="sch +
+ +{% highlight r %} + +df <- loadDF(sqlContext, source="jdbc", url="jdbc:postgresql:dbserver", dbtable="schema.tablename") + +{% endhighlight %} + +
+
{% highlight sql %} @@ -1489,7 +1766,7 @@ that these options will be deprecated in future release as more optimizations ar Configures the maximum size in bytes for a table that will be broadcast to all worker nodes when performing a join. By setting this value to -1 broadcasting can be disabled. Note that currently statistics are only supported for Hive Metastore tables where the command - `ANALYZE TABLE <tableName> COMPUTE STATISTICS noscan` has been run. + ANALYZE TABLE <tableName> COMPUTE STATISTICS noscan has been run. @@ -1512,7 +1789,9 @@ that these options will be deprecated in future release as more optimizations ar # Distributed SQL Engine -Spark SQL can also act as a distributed query engine using its JDBC/ODBC or command-line interface. In this mode, end-users or applications can interact with Spark SQL directly to run SQL queries, without the need to write any code. +Spark SQL can also act as a distributed query engine using its JDBC/ODBC or command-line interface. +In this mode, end-users or applications can interact with Spark SQL directly to run SQL queries, +without the need to write any code. ## Running the Thrift JDBC/ODBC server @@ -1589,6 +1868,83 @@ options. # Migration Guide +## Upgrading from Spark SQL 1.3 to 1.4 + +#### DataFrame data reader/writer interface + +Based on user feedback, we created a new, more fluid API for reading data in (`SQLContext.read`) +and writing data out (`DataFrame.write`), +and deprecated the old APIs (e.g. `SQLContext.parquetFile`, `SQLContext.jsonFile`). + +See the API docs for `SQLContext.read` ( + Scala, + Java, + Python +) and `DataFrame.write` ( + Scala, + Java, + Python +) more information. + + +#### DataFrame.groupBy retains grouping columns + +Based on user feedback, we changed the default behavior of `DataFrame.groupBy().agg()` to retain the grouping columns in the resulting `DataFrame`. To keep the behavior in 1.3, set `spark.sql.retainGroupColumns` to `false`. + +
+
+{% highlight scala %} + +// In 1.3.x, in order for the grouping column "department" to show up, +// it must be included explicitly as part of the agg function call. +df.groupBy("department").agg($"department", max("age"), sum("expense")) + +// In 1.4+, grouping column "department" is included automatically. +df.groupBy("department").agg(max("age"), sum("expense")) + +// Revert to 1.3 behavior (not retaining grouping column) by: +sqlContext.setConf("spark.sql.retainGroupColumns", "false") + +{% endhighlight %} +
+ +
+{% highlight java %} + +// In 1.3.x, in order for the grouping column "department" to show up, +// it must be included explicitly as part of the agg function call. +df.groupBy("department").agg(col("department"), max("age"), sum("expense")); + +// In 1.4+, grouping column "department" is included automatically. +df.groupBy("department").agg(max("age"), sum("expense")); + +// Revert to 1.3 behavior (not retaining grouping column) by: +sqlContext.setConf("spark.sql.retainGroupColumns", "false"); + +{% endhighlight %} +
+ +
+{% highlight python %} + +import pyspark.sql.functions as func + +# In 1.3.x, in order for the grouping column "department" to show up, +# it must be included explicitly as part of the agg function call. +df.groupBy("department").agg("department"), func.max("age"), func.sum("expense")) + +# In 1.4+, grouping column "department" is included automatically. +df.groupBy("department").agg(func.max("age"), func.sum("expense")) + +# Revert to 1.3.x behavior (not retaining grouping column) by: +sqlContext.setConf("spark.sql.retainGroupColumns", "false") + +{% endhighlight %} +
+ +
+ + ## Upgrading from Spark SQL 1.0-1.2 to 1.3 In Spark 1.3 we removed the "Alpha" label from Spark SQL and as part of this did a cleanup of the @@ -1646,7 +2002,7 @@ moved into the udf object in `SQLContext`.
-{% highlight java %} +{% highlight scala %} sqlContext.udf.register("strLen", (s: String) => s.length()) @@ -1656,7 +2012,7 @@ sqlContext.udf.register("strLen", (s: String) => s.length())
{% highlight java %} -sqlContext.udf().register("strLen", (String s) -> { s.length(); }); +sqlContext.udf().register("strLen", (String s) -> s.length(), DataTypes.IntegerType); {% endhighlight %}
@@ -2284,5 +2640,151 @@ from pyspark.sql.types import *
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Data typeValue type in RAPI to access or create a data type
ByteType + integer
+ Note: Numbers will be converted to 1-byte signed integer numbers at runtime. + Please make sure that numbers are within the range of -128 to 127. +
+ "byte" +
ShortType + integer
+ Note: Numbers will be converted to 2-byte signed integer numbers at runtime. + Please make sure that numbers are within the range of -32768 to 32767. +
+ "short" +
IntegerType integer + "integer" +
LongType + integer
+ Note: Numbers will be converted to 8-byte signed integer numbers at runtime. + Please make sure that numbers are within the range of + -9223372036854775808 to 9223372036854775807. + Otherwise, please convert data to decimal.Decimal and use DecimalType. +
+ "long" +
FloatType + numeric
+ Note: Numbers will be converted to 4-byte single-precision floating + point numbers at runtime. +
+ "float" +
DoubleType numeric + "double" +
DecimalType Not supported + Not supported +
StringType character + "string" +
BinaryType raw + "binary" +
BooleanType logical + "bool" +
TimestampType POSIXct + "timestamp" +
DateType Date + "date" +
ArrayType vector or list + list(type="array", elementType=elementType, containsNull=[containsNull])
+ Note: The default value of containsNull is True. +
MapType enviroment + list(type="map", keyType=keyType, valueType=valueType, valueContainsNull=[valueContainsNull])
+ Note: The default value of valueContainsNull is True. +
StructType named list + list(type="struct", fields=fields)
+ Note: fields is a Seq of StructFields. Also, two fields with the same + name are not allowed. +
StructField The value type in R of the data type of this field + (For example, integer for a StructField with the data type IntegerType) + list(name=name, type=dataType, nullable=nullable) +
+ +
+
diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md index 2f2fea53168a3..42b33947873b0 100644 --- a/docs/streaming-programming-guide.md +++ b/docs/streaming-programming-guide.md @@ -1915,7 +1915,7 @@ In that case, consider [reducing](#reducing-the-processing-time-of-each-batch) the batch processing time. The progress of a Spark Streaming program can also be monitored using the -[StreamingListener](api/scala/index.html#org.apache.spark.scheduler.StreamingListener) interface, +[StreamingListener](api/scala/index.html#org.apache.spark.streaming.scheduler.StreamingListener) interface, which allows you to get receiver status and processing times. Note that this is a developer API and it is likely to be improved upon (i.e., more information reported) in the future. @@ -1946,10 +1946,10 @@ creates a single receiver (running on a worker machine) that receives a single s Receiving multiple data streams can therefore be achieved by creating multiple input DStreams and configuring them to receive different partitions of the data stream from the source(s). For example, a single Kafka input DStream receiving two topics of data can be split into two -Kafka input streams, each receiving only one topic. This would run two receivers on two workers, -thus allowing data to be received in parallel, and increasing overall throughput. These multiple -DStream can be unioned together to create a single DStream. Then the transformations that was -being applied on the single input DStream can applied on the unified stream. This is done as follows. +Kafka input streams, each receiving only one topic. This would run two receivers, +allowing data to be received in parallel, and increasing overall throughput. These multiple +DStreams can be unioned together to create a single DStream. Then the transformations that were +being applied on a single input DStream can be applied on the unified stream. This is done as follows.
diff --git a/docs/tuning.md b/docs/tuning.md index 1cb223e74f382..572c7270e4999 100644 --- a/docs/tuning.md +++ b/docs/tuning.md @@ -94,11 +94,13 @@ We will then cover tuning Spark's cache size and the Java garbage collector. ## Determining Memory Consumption -The best way to size the amount of memory consumption your dataset will require is to create an RDD, put it into cache, and look at the SparkContext logs on your driver program. The logs will tell you how much memory each partition is consuming, which you can aggregate to get the total size of the RDD. You will see messages like this: +The best way to size the amount of memory consumption a dataset will require is to create an RDD, put it +into cache, and look at the "Storage" page in the web UI. The page will tell you how much memory the RDD +is occupying. - INFO BlockManagerMasterActor: Added rdd_0_1 in memory on mbk.local:50311 (size: 717.5 KB, free: 332.3 MB) - -This means that partition 1 of RDD 0 consumed 717.5 KB. +To estimate the memory consumption of a particular object, use `SizeEstimator`'s `estimate` method +This is useful for experimenting with different data layouts to trim memory usage, as well as +determining the amount of space a broadcast variable will occupy on each executor heap. ## Tuning Data Structures diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 87c0818279713..2154046a3a73c 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -19,8 +19,9 @@ # limitations under the License. # -from __future__ import with_statement, print_function +from __future__ import division, print_function, with_statement +import codecs import hashlib import itertools import logging @@ -47,8 +48,10 @@ else: from urllib.request import urlopen, Request from urllib.error import HTTPError + raw_input = input + xrange = range -SPARK_EC2_VERSION = "1.2.1" +SPARK_EC2_VERSION = "1.4.0" SPARK_EC2_DIR = os.path.dirname(os.path.realpath(__file__)) VALID_SPARK_VERSIONS = set([ @@ -65,6 +68,9 @@ "1.1.1", "1.2.0", "1.2.1", + "1.3.0", + "1.3.1", + "1.4.0" ]) SPARK_TACHYON_MAP = { @@ -75,6 +81,8 @@ "1.1.1": "0.5.0", "1.2.0": "0.5.0", "1.2.1": "0.5.0", + "1.3.0": "0.5.0", + "1.3.1": "0.5.0", } DEFAULT_SPARK_VERSION = SPARK_EC2_VERSION @@ -347,46 +355,57 @@ def get_validate_spark_version(version, repo): # Source: http://aws.amazon.com/amazon-linux-ami/instance-type-matrix/ -# Last Updated: 2014-06-20 +# Last Updated: 2015-05-08 # For easy maintainability, please keep this manually-inputted dictionary sorted by key. EC2_INSTANCE_TYPES = { "c1.medium": "pvm", "c1.xlarge": "pvm", + "c3.large": "pvm", + "c3.xlarge": "pvm", "c3.2xlarge": "pvm", "c3.4xlarge": "pvm", "c3.8xlarge": "pvm", - "c3.large": "pvm", - "c3.xlarge": "pvm", + "c4.large": "hvm", + "c4.xlarge": "hvm", + "c4.2xlarge": "hvm", + "c4.4xlarge": "hvm", + "c4.8xlarge": "hvm", "cc1.4xlarge": "hvm", "cc2.8xlarge": "hvm", "cg1.4xlarge": "hvm", "cr1.8xlarge": "hvm", + "d2.xlarge": "hvm", + "d2.2xlarge": "hvm", + "d2.4xlarge": "hvm", + "d2.8xlarge": "hvm", + "g2.2xlarge": "hvm", + "g2.8xlarge": "hvm", "hi1.4xlarge": "pvm", "hs1.8xlarge": "pvm", + "i2.xlarge": "hvm", "i2.2xlarge": "hvm", "i2.4xlarge": "hvm", "i2.8xlarge": "hvm", - "i2.xlarge": "hvm", - "m1.large": "pvm", - "m1.medium": "pvm", "m1.small": "pvm", + "m1.medium": "pvm", + "m1.large": "pvm", "m1.xlarge": "pvm", + "m2.xlarge": "pvm", "m2.2xlarge": "pvm", "m2.4xlarge": "pvm", - "m2.xlarge": "pvm", - "m3.2xlarge": "hvm", - "m3.large": "hvm", "m3.medium": "hvm", + "m3.large": "hvm", "m3.xlarge": "hvm", + "m3.2xlarge": "hvm", + "r3.large": "hvm", + "r3.xlarge": "hvm", "r3.2xlarge": "hvm", "r3.4xlarge": "hvm", "r3.8xlarge": "hvm", - "r3.large": "hvm", - "r3.xlarge": "hvm", "t1.micro": "pvm", - "t2.medium": "hvm", "t2.micro": "hvm", "t2.small": "hvm", + "t2.medium": "hvm", } @@ -408,13 +427,14 @@ def get_spark_ami(opts): b=opts.spark_ec2_git_branch) ami_path = "%s/%s/%s" % (ami_prefix, opts.region, instance_type) + reader = codecs.getreader("ascii") try: - ami = urlopen(ami_path).read().strip() - print("Spark AMI: " + ami) + ami = reader(urlopen(ami_path)).read().strip() except: print("Could not resolve AMI at: " + ami_path, file=stderr) sys.exit(1) + print("Spark AMI: " + ami) return ami @@ -472,6 +492,8 @@ def launch_cluster(conn, opts, cluster_name): master_group.authorize('udp', 2049, 2049, authorized_address) master_group.authorize('tcp', 4242, 4242, authorized_address) master_group.authorize('udp', 4242, 4242, authorized_address) + # RM in YARN mode uses 8088 + master_group.authorize('tcp', 8088, 8088, authorized_address) if opts.ganglia: master_group.authorize('tcp', 5080, 5080, authorized_address) if slave_group.rules == []: # Group was just now created @@ -735,7 +757,7 @@ def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key): 'mapreduce', 'spark-standalone', 'tachyon'] if opts.hadoop_major_version == "1": - modules = filter(lambda x: x != "mapreduce", modules) + modules = list(filter(lambda x: x != "mapreduce", modules)) if opts.ganglia: modules.append('ganglia') @@ -878,44 +900,57 @@ def wait_for_cluster_state(conn, opts, cluster_instances, cluster_state): # Get number of local disks available for a given EC2 instance type. def get_num_disks(instance_type): # Source: http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html - # Last Updated: 2014-06-20 + # Last Updated: 2015-05-08 # For easy maintainability, please keep this manually-inputted dictionary sorted by key. disks_by_instance = { "c1.medium": 1, "c1.xlarge": 4, + "c3.large": 2, + "c3.xlarge": 2, "c3.2xlarge": 2, "c3.4xlarge": 2, "c3.8xlarge": 2, - "c3.large": 2, - "c3.xlarge": 2, + "c4.large": 0, + "c4.xlarge": 0, + "c4.2xlarge": 0, + "c4.4xlarge": 0, + "c4.8xlarge": 0, "cc1.4xlarge": 2, "cc2.8xlarge": 4, "cg1.4xlarge": 2, "cr1.8xlarge": 2, + "d2.xlarge": 3, + "d2.2xlarge": 6, + "d2.4xlarge": 12, + "d2.8xlarge": 24, "g2.2xlarge": 1, + "g2.8xlarge": 2, "hi1.4xlarge": 2, "hs1.8xlarge": 24, + "i2.xlarge": 1, "i2.2xlarge": 2, "i2.4xlarge": 4, "i2.8xlarge": 8, - "i2.xlarge": 1, - "m1.large": 2, - "m1.medium": 1, "m1.small": 1, + "m1.medium": 1, + "m1.large": 2, "m1.xlarge": 4, + "m2.xlarge": 1, "m2.2xlarge": 1, "m2.4xlarge": 2, - "m2.xlarge": 1, - "m3.2xlarge": 2, - "m3.large": 1, "m3.medium": 1, + "m3.large": 1, "m3.xlarge": 2, + "m3.2xlarge": 2, + "r3.large": 1, + "r3.xlarge": 1, "r3.2xlarge": 1, "r3.4xlarge": 1, "r3.8xlarge": 2, - "r3.large": 1, - "r3.xlarge": 1, "t1.micro": 0, + "t2.micro": 0, + "t2.small": 0, + "t2.medium": 0, } if instance_type in disks_by_instance: return disks_by_instance[instance_type] @@ -1128,7 +1163,7 @@ def get_zones(conn, opts): # Gets the number of items in a partition def get_partition(total, num_partitions, current_partitions): - num_slaves_this_zone = total / num_partitions + num_slaves_this_zone = total // num_partitions if (total % num_partitions) - current_partitions > 0: num_slaves_this_zone += 1 return num_slaves_this_zone diff --git a/examples/pom.xml b/examples/pom.xml index 5b04b4f8d6ca0..38ff67d934d2a 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../pom.xml @@ -97,6 +97,11 @@ + + org.apache.spark + spark-streaming-kafka_${scala.binary.version} + ${project.version} + org.apache.hbase hbase-testing-util @@ -392,45 +397,6 @@ - - - scala-2.10 - - !scala-2.11 - - - - org.apache.spark - spark-streaming-kafka_${scala.binary.version} - ${project.version} - - - - - - org.codehaus.mojo - build-helper-maven-plugin - - - add-scala-sources - generate-sources - - add-source - - - - src/main/scala - scala-2.10/src/main/scala - scala-2.10/src/main/java - - - - - - - - diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java index 46377a99c4857..ec533d174ebdc 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java @@ -28,7 +28,7 @@ import org.apache.spark.ml.classification.ClassificationModel; import org.apache.spark.ml.param.IntParam; import org.apache.spark.ml.param.ParamMap; -import org.apache.spark.ml.param.Params$; +import org.apache.spark.ml.util.Identifiable$; import org.apache.spark.mllib.linalg.BLAS; import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.mllib.linalg.Vectors; @@ -104,7 +104,23 @@ public static void main(String[] args) throws Exception { * However, this should still compile and run successfully. */ class MyJavaLogisticRegression - extends Classifier { + extends Classifier { + + public MyJavaLogisticRegression() { + init(); + } + + public MyJavaLogisticRegression(String uid) { + this.uid_ = uid; + init(); + } + + private String uid_ = Identifiable$.MODULE$.randomUID("myJavaLogReg"); + + @Override + public String uid() { + return uid_; + } /** * Param for max number of iterations @@ -118,7 +134,7 @@ class MyJavaLogisticRegression int getMaxIter() { return (Integer) getOrDefault(maxIter); } - public MyJavaLogisticRegression() { + private void init() { setMaxIter(100); } @@ -129,16 +145,16 @@ MyJavaLogisticRegression setMaxIter(int value) { // This method is used by fit(). // In Java, we have to make it public since Java does not understand Scala's protected modifier. - public MyJavaLogisticRegressionModel train(DataFrame dataset, ParamMap paramMap) { + public MyJavaLogisticRegressionModel train(DataFrame dataset) { // Extract columns from data using helper method. - JavaRDD oldDataset = extractLabeledPoints(dataset, paramMap).toJavaRDD(); + JavaRDD oldDataset = extractLabeledPoints(dataset).toJavaRDD(); // Do learning to estimate the weight vector. int numFeatures = oldDataset.take(1).get(0).features().size(); Vector weights = Vectors.zeros(numFeatures); // Learning would happen here. // Create a model, and return it. - return new MyJavaLogisticRegressionModel(this, paramMap, weights); + return new MyJavaLogisticRegressionModel(uid(), weights).setParent(this); } } @@ -150,24 +166,21 @@ public MyJavaLogisticRegressionModel train(DataFrame dataset, ParamMap paramMap) * However, this should still compile and run successfully. */ class MyJavaLogisticRegressionModel - extends ClassificationModel { - - private MyJavaLogisticRegression parent_; - public MyJavaLogisticRegression parent() { return parent_; } - - private ParamMap fittingParamMap_; - public ParamMap fittingParamMap() { return fittingParamMap_; } + extends ClassificationModel { private Vector weights_; public Vector weights() { return weights_; } - public MyJavaLogisticRegressionModel( - MyJavaLogisticRegression parent_, - ParamMap fittingParamMap_, - Vector weights_) { - this.parent_ = parent_; - this.fittingParamMap_ = fittingParamMap_; - this.weights_ = weights_; + public MyJavaLogisticRegressionModel(String uid, Vector weights) { + this.uid_ = uid; + this.weights_ = weights; + } + + private String uid_ = Identifiable$.MODULE$.randomUID("myJavaLogReg"); + + @Override + public String uid() { + return uid_; } // This uses the default implementation of transform(), which reads column "features" and outputs @@ -210,10 +223,8 @@ public Vector predictRaw(Vector features) { * In Java, we have to make this method public since Java does not understand Scala's protected * modifier. */ - public MyJavaLogisticRegressionModel copy() { - MyJavaLogisticRegressionModel m = - new MyJavaLogisticRegressionModel(parent_, fittingParamMap_, weights_); - Params$.MODULE$.inheritValues(this.extractParamMap(), this, m); - return m; + @Override + public MyJavaLogisticRegressionModel copy(ParamMap extra) { + return copyValues(new MyJavaLogisticRegressionModel(uid(), weights_), extra); } } diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaOneVsRestExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaOneVsRestExample.java new file mode 100644 index 0000000000000..75063dbf800d8 --- /dev/null +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaOneVsRestExample.java @@ -0,0 +1,236 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.examples.ml; + +import org.apache.commons.cli.*; + +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.ml.classification.LogisticRegression; +import org.apache.spark.ml.classification.OneVsRest; +import org.apache.spark.ml.classification.OneVsRestModel; +import org.apache.spark.ml.util.MetadataUtils; +import org.apache.spark.mllib.evaluation.MulticlassMetrics; +import org.apache.spark.mllib.linalg.Matrix; +import org.apache.spark.mllib.regression.LabeledPoint; +import org.apache.spark.mllib.util.MLUtils; +import org.apache.spark.rdd.RDD; +import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.types.StructField; + +/** + * An example runner for Multiclass to Binary Reduction with One Vs Rest. + * The example uses Logistic Regression as the base classifier. All parameters that + * can be specified on the base classifier can be passed in to the runner options. + * Run with + *
+ * bin/run-example ml.JavaOneVsRestExample [options]
+ * 
+ */ +public class JavaOneVsRestExample { + + private static class Params { + String input; + String testInput = null; + Integer maxIter = 100; + double tol = 1E-6; + boolean fitIntercept = true; + Double regParam = null; + Double elasticNetParam = null; + double fracTest = 0.2; + } + + public static void main(String[] args) { + // parse the arguments + Params params = parse(args); + SparkConf conf = new SparkConf().setAppName("JavaOneVsRestExample"); + JavaSparkContext jsc = new JavaSparkContext(conf); + SQLContext jsql = new SQLContext(jsc); + + // configure the base classifier + LogisticRegression classifier = new LogisticRegression() + .setMaxIter(params.maxIter) + .setTol(params.tol) + .setFitIntercept(params.fitIntercept); + + if (params.regParam != null) { + classifier.setRegParam(params.regParam); + } + if (params.elasticNetParam != null) { + classifier.setElasticNetParam(params.elasticNetParam); + } + + // instantiate the One Vs Rest Classifier + OneVsRest ovr = new OneVsRest().setClassifier(classifier); + + String input = params.input; + RDD inputData = MLUtils.loadLibSVMFile(jsc.sc(), input); + RDD train; + RDD test; + + // compute the train/ test split: if testInput is not provided use part of input + String testInput = params.testInput; + if (testInput != null) { + train = inputData; + // compute the number of features in the training set. + int numFeatures = inputData.first().features().size(); + test = MLUtils.loadLibSVMFile(jsc.sc(), testInput, numFeatures); + } else { + double f = params.fracTest; + RDD[] tmp = inputData.randomSplit(new double[]{1 - f, f}, 12345); + train = tmp[0]; + test = tmp[1]; + } + + // train the multiclass model + DataFrame trainingDataFrame = jsql.createDataFrame(train, LabeledPoint.class); + OneVsRestModel ovrModel = ovr.fit(trainingDataFrame.cache()); + + // score the model on test data + DataFrame testDataFrame = jsql.createDataFrame(test, LabeledPoint.class); + DataFrame predictions = ovrModel.transform(testDataFrame.cache()) + .select("prediction", "label"); + + // obtain metrics + MulticlassMetrics metrics = new MulticlassMetrics(predictions); + StructField predictionColSchema = predictions.schema().apply("prediction"); + Integer numClasses = (Integer) MetadataUtils.getNumClasses(predictionColSchema).get(); + + // compute the false positive rate per label + StringBuilder results = new StringBuilder(); + results.append("label\tfpr\n"); + for (int label = 0; label < numClasses; label++) { + results.append(label); + results.append("\t"); + results.append(metrics.falsePositiveRate((double) label)); + results.append("\n"); + } + + Matrix confusionMatrix = metrics.confusionMatrix(); + // output the Confusion Matrix + System.out.println("Confusion Matrix"); + System.out.println(confusionMatrix); + System.out.println(); + System.out.println(results); + + jsc.stop(); + } + + private static Params parse(String[] args) { + Options options = generateCommandlineOptions(); + CommandLineParser parser = new PosixParser(); + Params params = new Params(); + + try { + CommandLine cmd = parser.parse(options, args); + String value; + if (cmd.hasOption("input")) { + params.input = cmd.getOptionValue("input"); + } + if (cmd.hasOption("maxIter")) { + value = cmd.getOptionValue("maxIter"); + params.maxIter = Integer.parseInt(value); + } + if (cmd.hasOption("tol")) { + value = cmd.getOptionValue("tol"); + params.tol = Double.parseDouble(value); + } + if (cmd.hasOption("fitIntercept")) { + value = cmd.getOptionValue("fitIntercept"); + params.fitIntercept = Boolean.parseBoolean(value); + } + if (cmd.hasOption("regParam")) { + value = cmd.getOptionValue("regParam"); + params.regParam = Double.parseDouble(value); + } + if (cmd.hasOption("elasticNetParam")) { + value = cmd.getOptionValue("elasticNetParam"); + params.elasticNetParam = Double.parseDouble(value); + } + if (cmd.hasOption("testInput")) { + value = cmd.getOptionValue("testInput"); + params.testInput = value; + } + if (cmd.hasOption("fracTest")) { + value = cmd.getOptionValue("fracTest"); + params.fracTest = Double.parseDouble(value); + } + + } catch (ParseException e) { + printHelpAndQuit(options); + } + return params; + } + + private static Options generateCommandlineOptions() { + Option input = OptionBuilder.withArgName("input") + .hasArg() + .isRequired() + .withDescription("input path to labeled examples. This path must be specified") + .create("input"); + Option testInput = OptionBuilder.withArgName("testInput") + .hasArg() + .withDescription("input path to test examples") + .create("testInput"); + Option fracTest = OptionBuilder.withArgName("testInput") + .hasArg() + .withDescription("fraction of data to hold out for testing." + + " If given option testInput, this option is ignored. default: 0.2") + .create("fracTest"); + Option maxIter = OptionBuilder.withArgName("maxIter") + .hasArg() + .withDescription("maximum number of iterations for Logistic Regression. default:100") + .create("maxIter"); + Option tol = OptionBuilder.withArgName("tol") + .hasArg() + .withDescription("the convergence tolerance of iterations " + + "for Logistic Regression. default: 1E-6") + .create("tol"); + Option fitIntercept = OptionBuilder.withArgName("fitIntercept") + .hasArg() + .withDescription("fit intercept for logistic regression. default true") + .create("fitIntercept"); + Option regParam = OptionBuilder.withArgName( "regParam" ) + .hasArg() + .withDescription("the regularization parameter for Logistic Regression.") + .create("regParam"); + Option elasticNetParam = OptionBuilder.withArgName("elasticNetParam" ) + .hasArg() + .withDescription("the ElasticNet mixing parameter for Logistic Regression.") + .create("elasticNetParam"); + + Options options = new Options() + .addOption(input) + .addOption(testInput) + .addOption(fracTest) + .addOption(maxIter) + .addOption(tol) + .addOption(fitIntercept) + .addOption(regParam) + .addOption(elasticNetParam); + + return options; + } + + private static void printHelpAndQuit(Options options) { + HelpFormatter formatter = new HelpFormatter(); + formatter.printHelp("JavaOneVsRestExample", options); + System.exit(-1); + } +} diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleParamsExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleParamsExample.java index 4e02acce696e6..dac649d1d5ae6 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleParamsExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleParamsExample.java @@ -71,7 +71,7 @@ public static void main(String[] args) { // we can view the parameters it used during fit(). // This prints the parameter (name: value) pairs, where names are unique IDs for this // LogisticRegression instance. - System.out.println("Model 1 was fit using parameters: " + model1.fittingParamMap()); + System.out.println("Model 1 was fit using parameters: " + model1.parent().extractParamMap()); // We may alternatively specify parameters using a ParamMap. ParamMap paramMap = new ParamMap(); @@ -87,7 +87,7 @@ public static void main(String[] args) { // Now learn a new model using the paramMapCombined parameters. // paramMapCombined overrides all parameters set earlier via lr.set* methods. LogisticRegressionModel model2 = lr.fit(training, paramMapCombined); - System.out.println("Model 2 was fit using parameters: " + model2.fittingParamMap()); + System.out.println("Model 2 was fit using parameters: " + model2.parent().extractParamMap()); // Prepare test documents. List localTest = Lists.newArrayList( @@ -97,7 +97,7 @@ public static void main(String[] args) { DataFrame test = jsql.createDataFrame(jsc.parallelize(localTest), LabeledPoint.class); // Make predictions on test documents using the Transformer.transform() method. - // LogisticRegression.transform will only use the 'features' column. + // LogisticRegressionModel.transform will only use the 'features' column. // Note that model2.transform() outputs a 'myProbability' column instead of the usual // 'probability' column since we renamed the lr.probabilityCol parameter previously. DataFrame results = model2.transform(test); diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleTextClassificationPipeline.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleTextClassificationPipeline.java index ef1ec103a879f..54738813d0016 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleTextClassificationPipeline.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleTextClassificationPipeline.java @@ -66,7 +66,7 @@ public static void main(String[] args) { .setOutputCol("features"); LogisticRegression lr = new LogisticRegression() .setMaxIter(10) - .setRegParam(0.01); + .setRegParam(0.001); Pipeline pipeline = new Pipeline() .setStages(new PipelineStage[] {tokenizer, hashingTF, lr}); @@ -77,7 +77,7 @@ public static void main(String[] args) { List localTest = Lists.newArrayList( new Document(4L, "spark i j k"), new Document(5L, "l m n"), - new Document(6L, "mapreduce spark"), + new Document(6L, "spark hadoop spark"), new Document(7L, "apache hadoop")); DataFrame test = jsql.createDataFrame(jsc.parallelize(localTest), Document.class); diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java index 8159ffbe2d269..afee279ec32b1 100644 --- a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java +++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java @@ -94,12 +94,12 @@ public String call(Row row) { System.out.println("=== Data source: Parquet File ==="); // DataFrames can be saved as parquet files, maintaining the schema information. - schemaPeople.saveAsParquetFile("people.parquet"); + schemaPeople.write().parquet("people.parquet"); // Read in the parquet file created above. // Parquet files are self-describing so the schema is preserved. // The result of loading a parquet file is also a DataFrame. - DataFrame parquetFile = sqlContext.parquetFile("people.parquet"); + DataFrame parquetFile = sqlContext.read().parquet("people.parquet"); //Parquet files can also be registered as tables and then used in SQL statements. parquetFile.registerTempTable("parquetFile"); @@ -120,7 +120,7 @@ public String call(Row row) { // The path can be either a single text file or a directory storing text files. String path = "examples/src/main/resources/people.json"; // Create a DataFrame from the file(s) pointed by path - DataFrame peopleFromJsonFile = sqlContext.jsonFile(path); + DataFrame peopleFromJsonFile = sqlContext.read().json(path); // Because the schema of a JSON dataset is automatically inferred, to write queries, // it is better to take a look at what is the schema. @@ -151,7 +151,7 @@ public String call(Row row) { List jsonData = Arrays.asList( "{\"name\":\"Yin\",\"address\":{\"city\":\"Columbus\",\"state\":\"Ohio\"}}"); JavaRDD anotherPeopleRDD = ctx.parallelize(jsonData); - DataFrame peopleFromJsonRDD = sqlContext.jsonRDD(anotherPeopleRDD.rdd()); + DataFrame peopleFromJsonRDD = sqlContext.read().json(anotherPeopleRDD.rdd()); // Take a look at the schema of this new DataFrame. peopleFromJsonRDD.printSchema(); diff --git a/examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java similarity index 100% rename from examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java rename to examples/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java diff --git a/examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaKafkaWordCount.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaKafkaWordCount.java similarity index 100% rename from examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaKafkaWordCount.java rename to examples/src/main/java/org/apache/spark/examples/streaming/JavaKafkaWordCount.java diff --git a/examples/src/main/python/ml/gradient_boosted_trees.py b/examples/src/main/python/ml/gradient_boosted_trees.py new file mode 100644 index 0000000000000..6446f0fe5eeab --- /dev/null +++ b/examples/src/main/python/ml/gradient_boosted_trees.py @@ -0,0 +1,83 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import print_function + +import sys + +from pyspark import SparkContext +from pyspark.ml.classification import GBTClassifier +from pyspark.ml.feature import StringIndexer +from pyspark.ml.regression import GBTRegressor +from pyspark.mllib.evaluation import BinaryClassificationMetrics, RegressionMetrics +from pyspark.mllib.util import MLUtils +from pyspark.sql import Row, SQLContext + +""" +A simple example demonstrating a Gradient Boosted Trees Classification/Regression Pipeline. +Note: GBTClassifier only supports binary classification currently +Run with: + bin/spark-submit examples/src/main/python/ml/gradient_boosted_trees.py +""" + + +def testClassification(train, test): + # Train a GradientBoostedTrees model. + + rf = GBTClassifier(maxIter=30, maxDepth=4, labelCol="indexedLabel") + + model = rf.fit(train) + predictionAndLabels = model.transform(test).select("prediction", "indexedLabel") \ + .map(lambda x: (x.prediction, x.indexedLabel)) + + metrics = BinaryClassificationMetrics(predictionAndLabels) + print("AUC %.3f" % metrics.areaUnderROC) + + +def testRegression(train, test): + # Train a GradientBoostedTrees model. + + rf = GBTRegressor(maxIter=30, maxDepth=4, labelCol="indexedLabel") + + model = rf.fit(train) + predictionAndLabels = model.transform(test).select("prediction", "indexedLabel") \ + .map(lambda x: (x.prediction, x.indexedLabel)) + + metrics = RegressionMetrics(predictionAndLabels) + print("rmse %.3f" % metrics.rootMeanSquaredError) + print("r2 %.3f" % metrics.r2) + print("mae %.3f" % metrics.meanAbsoluteError) + + +if __name__ == "__main__": + if len(sys.argv) > 1: + print("Usage: gradient_boosted_trees", file=sys.stderr) + exit(1) + sc = SparkContext(appName="PythonGBTExample") + sqlContext = SQLContext(sc) + + # Load and parse the data file into a dataframe. + df = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF() + + # Map labels into an indexed column of labels in [0, numLabels) + stringIndexer = StringIndexer(inputCol="label", outputCol="indexedLabel") + si_model = stringIndexer.fit(df) + td = si_model.transform(df) + [train, test] = td.randomSplit([0.7, 0.3]) + testClassification(train, test) + testRegression(train, test) + sc.stop() diff --git a/examples/src/main/python/ml/random_forest_example.py b/examples/src/main/python/ml/random_forest_example.py new file mode 100644 index 0000000000000..c7730e1bfacd9 --- /dev/null +++ b/examples/src/main/python/ml/random_forest_example.py @@ -0,0 +1,87 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import print_function + +import sys + +from pyspark import SparkContext +from pyspark.ml.classification import RandomForestClassifier +from pyspark.ml.feature import StringIndexer +from pyspark.ml.regression import RandomForestRegressor +from pyspark.mllib.evaluation import MulticlassMetrics, RegressionMetrics +from pyspark.mllib.util import MLUtils +from pyspark.sql import Row, SQLContext + +""" +A simple example demonstrating a RandomForest Classification/Regression Pipeline. +Run with: + bin/spark-submit examples/src/main/python/ml/random_forest_example.py +""" + + +def testClassification(train, test): + # Train a RandomForest model. + # Setting featureSubsetStrategy="auto" lets the algorithm choose. + # Note: Use larger numTrees in practice. + + rf = RandomForestClassifier(labelCol="indexedLabel", numTrees=3, maxDepth=4) + + model = rf.fit(train) + predictionAndLabels = model.transform(test).select("prediction", "indexedLabel") \ + .map(lambda x: (x.prediction, x.indexedLabel)) + + metrics = MulticlassMetrics(predictionAndLabels) + print("weighted f-measure %.3f" % metrics.weightedFMeasure()) + print("precision %s" % metrics.precision()) + print("recall %s" % metrics.recall()) + + +def testRegression(train, test): + # Train a RandomForest model. + # Note: Use larger numTrees in practice. + + rf = RandomForestRegressor(labelCol="indexedLabel", numTrees=3, maxDepth=4) + + model = rf.fit(train) + predictionAndLabels = model.transform(test).select("prediction", "indexedLabel") \ + .map(lambda x: (x.prediction, x.indexedLabel)) + + metrics = RegressionMetrics(predictionAndLabels) + print("rmse %.3f" % metrics.rootMeanSquaredError) + print("r2 %.3f" % metrics.r2) + print("mae %.3f" % metrics.meanAbsoluteError) + + +if __name__ == "__main__": + if len(sys.argv) > 1: + print("Usage: random_forest_example", file=sys.stderr) + exit(1) + sc = SparkContext(appName="PythonRandomForestExample") + sqlContext = SQLContext(sc) + + # Load and parse the data file into a dataframe. + df = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF() + + # Map labels into an indexed column of labels in [0, numLabels) + stringIndexer = StringIndexer(inputCol="label", outputCol="indexedLabel") + si_model = stringIndexer.fit(df) + td = si_model.transform(df) + [train, test] = td.randomSplit([0.7, 0.3]) + testClassification(train, test) + testRegression(train, test) + sc.stop() diff --git a/examples/src/main/python/ml/simple_params_example.py b/examples/src/main/python/ml/simple_params_example.py new file mode 100644 index 0000000000000..3933d59b52cd1 --- /dev/null +++ b/examples/src/main/python/ml/simple_params_example.py @@ -0,0 +1,98 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import print_function + +import pprint +import sys + +from pyspark import SparkContext +from pyspark.ml.classification import LogisticRegression +from pyspark.mllib.linalg import DenseVector +from pyspark.mllib.regression import LabeledPoint +from pyspark.sql import SQLContext + +""" +A simple example demonstrating ways to specify parameters for Estimators and Transformers. +Run with: + bin/spark-submit examples/src/main/python/ml/simple_params_example.py +""" + +if __name__ == "__main__": + if len(sys.argv) > 1: + print("Usage: simple_params_example", file=sys.stderr) + exit(1) + sc = SparkContext(appName="PythonSimpleParamsExample") + sqlContext = SQLContext(sc) + + # prepare training data. + # We create an RDD of LabeledPoints and convert them into a DataFrame. + # Spark DataFrames can automatically infer the schema from named tuples + # and LabeledPoint implements __reduce__ to behave like a named tuple. + training = sc.parallelize([ + LabeledPoint(1.0, DenseVector([0.0, 1.1, 0.1])), + LabeledPoint(0.0, DenseVector([2.0, 1.0, -1.0])), + LabeledPoint(0.0, DenseVector([2.0, 1.3, 1.0])), + LabeledPoint(1.0, DenseVector([0.0, 1.2, -0.5]))]).toDF() + + # Create a LogisticRegression instance with maxIter = 10. + # This instance is an Estimator. + lr = LogisticRegression(maxIter=10) + # Print out the parameters, documentation, and any default values. + print("LogisticRegression parameters:\n" + lr.explainParams() + "\n") + + # We may also set parameters using setter methods. + lr.setRegParam(0.01) + + # Learn a LogisticRegression model. This uses the parameters stored in lr. + model1 = lr.fit(training) + + # Since model1 is a Model (i.e., a Transformer produced by an Estimator), + # we can view the parameters it used during fit(). + # This prints the parameter (name: value) pairs, where names are unique IDs for this + # LogisticRegression instance. + print("Model 1 was fit using parameters:\n") + pprint.pprint(model1.extractParamMap()) + + # We may alternatively specify parameters using a parameter map. + # paramMap overrides all lr parameters set earlier. + paramMap = {lr.maxIter: 20, lr.threshold: 0.55, lr.probabilityCol: "myProbability"} + + # Now learn a new model using the new parameters. + model2 = lr.fit(training, paramMap) + print("Model 2 was fit using parameters:\n") + pprint.pprint(model2.extractParamMap()) + + # prepare test data. + test = sc.parallelize([ + LabeledPoint(1.0, DenseVector([-1.0, 1.5, 1.3])), + LabeledPoint(0.0, DenseVector([3.0, 2.0, -0.1])), + LabeledPoint(0.0, DenseVector([0.0, 2.2, -1.5]))]).toDF() + + # Make predictions on test data using the Transformer.transform() method. + # LogisticRegressionModel.transform will only use the 'features' column. + # Note that model2.transform() outputs a 'myProbability' column instead of the usual + # 'probability' column since we renamed the lr.probabilityCol parameter previously. + result = model2.transform(test) \ + .select("features", "label", "myProbability", "prediction") \ + .collect() + + for row in result: + print("features=%s,label=%s -> prob=%s, prediction=%s" + % (row.features, row.label, row.myProbability, row.prediction)) + + sc.stop() diff --git a/examples/src/main/python/ml/simple_text_classification_pipeline.py b/examples/src/main/python/ml/simple_text_classification_pipeline.py index fab21f003b233..b4f06bf888746 100644 --- a/examples/src/main/python/ml/simple_text_classification_pipeline.py +++ b/examples/src/main/python/ml/simple_text_classification_pipeline.py @@ -48,7 +48,7 @@ # Configure an ML pipeline, which consists of tree stages: tokenizer, hashingTF, and lr. tokenizer = Tokenizer(inputCol="text", outputCol="words") hashingTF = HashingTF(inputCol=tokenizer.getOutputCol(), outputCol="features") - lr = LogisticRegression(maxIter=10, regParam=0.01) + lr = LogisticRegression(maxIter=10, regParam=0.001) pipeline = Pipeline(stages=[tokenizer, hashingTF, lr]) # Fit the pipeline to training documents. @@ -58,7 +58,7 @@ Document = Row("id", "text") test = sc.parallelize([(4, "spark i j k"), (5, "l m n"), - (6, "mapreduce spark"), + (6, "spark hadoop spark"), (7, "apache hadoop")]) \ .map(lambda x: Document(*x)).toDF() diff --git a/examples/src/main/python/mllib/kmeans.py b/examples/src/main/python/mllib/kmeans.py index f901a87fa63ac..002fc75799648 100755 --- a/examples/src/main/python/mllib/kmeans.py +++ b/examples/src/main/python/mllib/kmeans.py @@ -43,4 +43,5 @@ def parseVector(line): k = int(sys.argv[2]) model = KMeans.train(data, k) print("Final centers: " + str(model.clusterCenters)) + print("Total Cost: " + str(model.computeCost(data))) sc.stop() diff --git a/examples/src/main/r/dataframe.R b/examples/src/main/r/dataframe.R new file mode 100644 index 0000000000000..53b817144f6ac --- /dev/null +++ b/examples/src/main/r/dataframe.R @@ -0,0 +1,54 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +library(SparkR) + +# Initialize SparkContext and SQLContext +sc <- sparkR.init(appName="SparkR-DataFrame-example") +sqlContext <- sparkRSQL.init(sc) + +# Create a simple local data.frame +localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18)) + +# Convert local data frame to a SparkR DataFrame +df <- createDataFrame(sqlContext, localDF) + +# Print its schema +printSchema(df) +# root +# |-- name: string (nullable = true) +# |-- age: double (nullable = true) + +# Create a DataFrame from a JSON file +path <- file.path(Sys.getenv("SPARK_HOME"), "examples/src/main/resources/people.json") +peopleDF <- jsonFile(sqlContext, path) +printSchema(peopleDF) + +# Register this DataFrame as a table. +registerTempTable(peopleDF, "people") + +# SQL statements can be run by using the sql methods provided by sqlContext +teenagers <- sql(sqlContext, "SELECT name FROM people WHERE age >= 13 AND age <= 19") + +# Call collect to get a local data.frame +teenagersLocalDF <- collect(teenagers) + +# Print the teenagers in our dataset +print(teenagersLocalDF) + +# Stop the SparkContext now +sparkR.stop() diff --git a/examples/src/main/r/kmeans.R b/examples/src/main/r/kmeans.R deleted file mode 100644 index 6e6b5cb93789c..0000000000000 --- a/examples/src/main/r/kmeans.R +++ /dev/null @@ -1,93 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -library(SparkR) - -# Logistic regression in Spark. -# Note: unlike the example in Scala, a point here is represented as a vector of -# doubles. - -parseVectors <- function(lines) { - lines <- strsplit(as.character(lines) , " ", fixed = TRUE) - list(matrix(as.numeric(unlist(lines)), ncol = length(lines[[1]]))) -} - -dist.fun <- function(P, C) { - apply( - C, - 1, - function(x) { - colSums((t(P) - x)^2) - } - ) -} - -closestPoint <- function(P, C) { - max.col(-dist.fun(P, C)) -} -# Main program - -args <- commandArgs(trailing = TRUE) - -if (length(args) != 3) { - print("Usage: kmeans ") - q("no") -} - -sc <- sparkR.init(appName = "RKMeans") -K <- as.integer(args[[2]]) -convergeDist <- as.double(args[[3]]) - -lines <- textFile(sc, args[[1]]) -points <- cache(lapplyPartition(lines, parseVectors)) -# kPoints <- take(points, K) -kPoints <- do.call(rbind, takeSample(points, FALSE, K, 16189L)) -tempDist <- 1.0 - -while (tempDist > convergeDist) { - closest <- lapplyPartition( - lapply(points, - function(p) { - cp <- closestPoint(p, kPoints); - mapply(list, unique(cp), split.data.frame(cbind(1, p), cp), SIMPLIFY=FALSE) - }), - function(x) {do.call(c, x) - }) - - pointStats <- reduceByKey(closest, - function(p1, p2) { - t(colSums(rbind(p1, p2))) - }, - 2L) - - newPoints <- do.call( - rbind, - collect(lapply(pointStats, - function(tup) { - point.sum <- tup[[2]][, -1] - point.count <- tup[[2]][, 1] - point.sum/point.count - }))) - - D <- dist.fun(kPoints, newPoints) - tempDist <- sum(D[cbind(1:3, max.col(-D))]) - kPoints <- newPoints - cat("Finished iteration (delta = ", tempDist, ")\n") -} - -cat("Final centers:\n") -writeLines(unlist(lapply(kPoints, paste, collapse = " "))) diff --git a/examples/src/main/r/linear_solver_mnist.R b/examples/src/main/r/linear_solver_mnist.R deleted file mode 100644 index c864a4232d010..0000000000000 --- a/examples/src/main/r/linear_solver_mnist.R +++ /dev/null @@ -1,107 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Instructions: https://github.com/amplab-extras/SparkR-pkg/wiki/SparkR-Example:-Digit-Recognition-on-EC2 - -library(SparkR) -library(Matrix) - -args <- commandArgs(trailing = TRUE) - -# number of random features; default to 1100 -D <- ifelse(length(args) > 0, as.integer(args[[1]]), 1100) -# number of partitions for training dataset -trainParts <- 12 -# dimension of digits -d <- 784 -# number of test examples -NTrain <- 60000 -# number of training examples -NTest <- 10000 -# scale of features -gamma <- 4e-4 - -sc <- sparkR.init(appName = "SparkR-LinearSolver") - -# You can also use HDFS path to speed things up: -# hdfs:///train-mnist-dense-with-labels.data -file <- textFile(sc, "/data/train-mnist-dense-with-labels.data", trainParts) - -W <- gamma * matrix(nrow=D, ncol=d, data=rnorm(D*d)) -b <- 2 * pi * matrix(nrow=D, ncol=1, data=runif(D)) -broadcastW <- broadcast(sc, W) -broadcastB <- broadcast(sc, b) - -includePackage(sc, Matrix) -numericLines <- lapplyPartitionsWithIndex(file, - function(split, part) { - matList <- sapply(part, function(line) { - as.numeric(strsplit(line, ",", fixed=TRUE)[[1]]) - }, simplify=FALSE) - mat <- Matrix(ncol=d+1, data=unlist(matList, F, F), - sparse=T, byrow=T) - mat - }) - -featureLabels <- cache(lapplyPartition( - numericLines, - function(part) { - label <- part[,1] - mat <- part[,-1] - ones <- rep(1, nrow(mat)) - features <- cos( - mat %*% t(value(broadcastW)) + (matrix(ncol=1, data=ones) %*% t(value(broadcastB)))) - onesMat <- Matrix(ones) - featuresPlus <- cBind(features, onesMat) - labels <- matrix(nrow=nrow(mat), ncol=10, data=-1) - for (i in 1:nrow(mat)) { - labels[i, label[i]] <- 1 - } - list(label=labels, features=featuresPlus) - })) - -FTF <- Reduce("+", collect(lapplyPartition(featureLabels, - function(part) { - t(part$features) %*% part$features - }), flatten=F)) - -FTY <- Reduce("+", collect(lapplyPartition(featureLabels, - function(part) { - t(part$features) %*% part$label - }), flatten=F)) - -# solve for the coefficient matrix -C <- solve(FTF, FTY) - -test <- Matrix(as.matrix(read.csv("/data/test-mnist-dense-with-labels.data", - header=F), sparse=T)) -testData <- test[,-1] -testLabels <- matrix(ncol=1, test[,1]) - -err <- 0 - -# contstruct the feature maps for all examples from this digit -featuresTest <- cos(testData %*% t(value(broadcastW)) + - (matrix(ncol=1, data=rep(1, NTest)) %*% t(value(broadcastB)))) -featuresTest <- cBind(featuresTest, Matrix(rep(1, NTest))) - -# extract the one vs. all assignment -results <- featuresTest %*% C -labelsGot <- apply(results, 1, which.max) -err <- sum(testLabels != labelsGot) / nrow(testLabels) - -cat("\nFinished running. The error rate is: ", err, ".\n") diff --git a/examples/src/main/r/logistic_regression.R b/examples/src/main/r/logistic_regression.R deleted file mode 100644 index 2a86aa98160d3..0000000000000 --- a/examples/src/main/r/logistic_regression.R +++ /dev/null @@ -1,62 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -library(SparkR) - -args <- commandArgs(trailing = TRUE) - -if (length(args) != 3) { - print("Usage: logistic_regression ") - q("no") -} - -# Initialize Spark context -sc <- sparkR.init(appName = "LogisticRegressionR") -iterations <- as.integer(args[[2]]) -D <- as.integer(args[[3]]) - -readPartition <- function(part){ - part = strsplit(part, " ", fixed = T) - list(matrix(as.numeric(unlist(part)), ncol = length(part[[1]]))) -} - -# Read data points and convert each partition to a matrix -points <- cache(lapplyPartition(textFile(sc, args[[1]]), readPartition)) - -# Initialize w to a random value -w <- runif(n=D, min = -1, max = 1) -cat("Initial w: ", w, "\n") - -# Compute logistic regression gradient for a matrix of data points -gradient <- function(partition) { - partition = partition[[1]] - Y <- partition[, 1] # point labels (first column of input file) - X <- partition[, -1] # point coordinates - - # For each point (x, y), compute gradient function - dot <- X %*% w - logit <- 1 / (1 + exp(-Y * dot)) - grad <- t(X) %*% ((logit - 1) * Y) - list(grad) -} - -for (i in 1:iterations) { - cat("On iteration ", i, "\n") - w <- w - reduce(lapplyPartition(points, gradient), "+") -} - -cat("Final w: ", w, "\n") diff --git a/examples/src/main/r/pi.R b/examples/src/main/r/pi.R deleted file mode 100644 index aa7a833e147a0..0000000000000 --- a/examples/src/main/r/pi.R +++ /dev/null @@ -1,46 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -library(SparkR) - -args <- commandArgs(trailing = TRUE) - -sc <- sparkR.init(appName = "PiR") - -slices <- ifelse(length(args) > 1, as.integer(args[[2]]), 2) - -n <- 100000 * slices - -piFunc <- function(elem) { - rands <- runif(n = 2, min = -1, max = 1) - val <- ifelse((rands[1]^2 + rands[2]^2) < 1, 1.0, 0.0) - val -} - - -piFuncVec <- function(elems) { - message(length(elems)) - rands1 <- runif(n = length(elems), min = -1, max = 1) - rands2 <- runif(n = length(elems), min = -1, max = 1) - val <- ifelse((rands1^2 + rands2^2) < 1, 1.0, 0.0) - sum(val) -} - -rdd <- parallelize(sc, 1:n, slices) -count <- reduce(lapplyPartition(rdd, piFuncVec), sum) -cat("Pi is roughly", 4.0 * count / n, "\n") -cat("Num elements in RDD ", count(rdd), "\n") diff --git a/examples/src/main/r/wordcount.R b/examples/src/main/r/wordcount.R deleted file mode 100644 index b734cb0ecf55b..0000000000000 --- a/examples/src/main/r/wordcount.R +++ /dev/null @@ -1,42 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -library(SparkR) - -args <- commandArgs(trailing = TRUE) - -if (length(args) != 1) { - print("Usage: wordcount ") - q("no") -} - -# Initialize Spark context -sc <- sparkR.init(appName = "RwordCount") -lines <- textFile(sc, args[[1]]) - -words <- flatMap(lines, - function(line) { - strsplit(line, " ")[[1]] - }) -wordCount <- lapply(words, function(word) { list(word, 1L) }) - -counts <- reduceByKey(wordCount, "+", 2L) -output <- collect(counts) - -for (wordcount in output) { - cat(wordcount[[1]], ": ", wordcount[[2]], "\n") -} diff --git a/examples/src/main/scala/org/apache/spark/examples/CassandraCQLTest.scala b/examples/src/main/scala/org/apache/spark/examples/CassandraCQLTest.scala index 11d5c92c5952d..023bb3ee2d108 100644 --- a/examples/src/main/scala/org/apache/spark/examples/CassandraCQLTest.scala +++ b/examples/src/main/scala/org/apache/spark/examples/CassandraCQLTest.scala @@ -104,8 +104,8 @@ object CassandraCQLTest { val casRdd = sc.newAPIHadoopRDD(job.getConfiguration(), classOf[CqlPagingInputFormat], - classOf[java.util.Map[String,ByteBuffer]], - classOf[java.util.Map[String,ByteBuffer]]) + classOf[java.util.Map[String, ByteBuffer]], + classOf[java.util.Map[String, ByteBuffer]]) println("Count: " + casRdd.count) val productSaleRDD = casRdd.map { @@ -118,7 +118,7 @@ object CassandraCQLTest { case (productId, saleCount) => println(productId + ":" + saleCount) } - val casoutputCF = aggregatedRDD.map { + val casoutputCF = aggregatedRDD.map { case (productId, saleCount) => { val outColFamKey = Map("prod_id" -> ByteBufferUtil.bytes(productId)) val outKey: java.util.Map[String, ByteBuffer] = outColFamKey diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala index a55e0dc8d36c2..c3fc74a116c0a 100644 --- a/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala +++ b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala @@ -39,7 +39,7 @@ object LocalLR { def generateData: Array[DataPoint] = { def generatePoint(i: Int): DataPoint = { - val y = if(i % 2 == 0) -1 else 1 + val y = if (i % 2 == 0) -1 else 1 val x = DenseVector.fill(D){rand.nextGaussian + y * R} DataPoint(x, y) } diff --git a/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala b/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala index 32e02eab8b031..75c82117cbad2 100644 --- a/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala +++ b/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala @@ -22,7 +22,7 @@ import org.apache.spark.SparkContext._ /** * Executes a roll up-style query against Apache logs. - * + * * Usage: LogQuery [logFile] */ object LogQuery { diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala index 6c0ac8013ce34..30c4261551837 100644 --- a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala @@ -117,7 +117,7 @@ object SparkALS { var us = Array.fill(U)(randomVector(F)) // Iteratively update movies then users - val Rc = sc.broadcast(R) + val Rc = sc.broadcast(R) var msb = sc.broadcast(ms) var usb = sc.broadcast(us) for (iter <- 1 to ITERATIONS) { diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala index 8c01a60844620..1e6b4fb0c7514 100644 --- a/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala @@ -44,7 +44,7 @@ object SparkLR { def generateData: Array[DataPoint] = { def generatePoint(i: Int): DataPoint = { - val y = if(i % 2 == 0) -1 else 1 + val y = if (i % 2 == 0) -1 else 1 val x = DenseVector.fill(D){rand.nextGaussian + y * R} DataPoint(x, y) } diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala index 8d092b6506d33..bd7894f184c4c 100644 --- a/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala @@ -51,7 +51,7 @@ object SparkPageRank { showWarning() val sparkConf = new SparkConf().setAppName("PageRank") - val iters = if (args.length > 0) args(1).toInt else 10 + val iters = if (args.length > 1) args(1).toInt else 10 val ctx = new SparkContext(sparkConf) val lines = ctx.textFile(args(0), 1) val links = lines.map{ s => diff --git a/examples/src/main/scala/org/apache/spark/examples/bagel/PageRankUtils.scala b/examples/src/main/scala/org/apache/spark/examples/bagel/PageRankUtils.scala deleted file mode 100644 index ab6e63deb3c95..0000000000000 --- a/examples/src/main/scala/org/apache/spark/examples/bagel/PageRankUtils.scala +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.examples.bagel - -import org.apache.spark._ -import org.apache.spark.bagel._ - -class PageRankUtils extends Serializable { - def computeWithCombiner(numVertices: Long, epsilon: Double)( - self: PRVertex, messageSum: Option[Double], superstep: Int - ): (PRVertex, Array[PRMessage]) = { - val newValue = messageSum match { - case Some(msgSum) if msgSum != 0 => - 0.15 / numVertices + 0.85 * msgSum - case _ => self.value - } - - val terminate = superstep >= 10 - - val outbox: Array[PRMessage] = - if (!terminate) { - self.outEdges.map(targetId => new PRMessage(targetId, newValue / self.outEdges.size)) - } else { - Array[PRMessage]() - } - - (new PRVertex(newValue, self.outEdges, !terminate), outbox) - } - - def computeNoCombiner(numVertices: Long, epsilon: Double) - (self: PRVertex, messages: Option[Array[PRMessage]], superstep: Int) - : (PRVertex, Array[PRMessage]) = - computeWithCombiner(numVertices, epsilon)(self, messages match { - case Some(msgs) => Some(msgs.map(_.value).sum) - case None => None - }, superstep) -} - -class PRCombiner extends Combiner[PRMessage, Double] with Serializable { - def createCombiner(msg: PRMessage): Double = - msg.value - def mergeMsg(combiner: Double, msg: PRMessage): Double = - combiner + msg.value - def mergeCombiners(a: Double, b: Double): Double = - a + b -} - -class PRVertex() extends Vertex with Serializable { - var value: Double = _ - var outEdges: Array[String] = _ - var active: Boolean = _ - - def this(value: Double, outEdges: Array[String], active: Boolean = true) { - this() - this.value = value - this.outEdges = outEdges - this.active = active - } - - override def toString(): String = { - "PRVertex(value=%f, outEdges.length=%d, active=%s)" - .format(value, outEdges.length, active.toString) - } -} - -class PRMessage() extends Message[String] with Serializable { - var targetId: String = _ - var value: Double = _ - - def this(targetId: String, value: Double) { - this() - this.targetId = targetId - this.value = value - } -} - -class CustomPartitioner(partitions: Int) extends Partitioner { - def numPartitions: Int = partitions - - def getPartition(key: Any): Int = { - val hash = key match { - case k: Long => (k & 0x00000000FFFFFFFFL).toInt - case _ => key.hashCode - } - - val mod = key.hashCode % partitions - if (mod < 0) mod + partitions else mod - } - - override def equals(other: Any): Boolean = other match { - case c: CustomPartitioner => - c.numPartitions == numPartitions - case _ => false - } - - override def hashCode: Int = numPartitions -} diff --git a/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRank.scala b/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRank.scala deleted file mode 100644 index 859abedf2a55e..0000000000000 --- a/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRank.scala +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.examples.bagel - -import org.apache.spark._ -import org.apache.spark.SparkContext._ - -import org.apache.spark.bagel._ - -import scala.xml.{XML,NodeSeq} - -/** - * Run PageRank on XML Wikipedia dumps from http://wiki.freebase.com/wiki/WEX. Uses the "articles" - * files from there, which contains one line per wiki article in a tab-separated format - * (http://wiki.freebase.com/wiki/WEX/Documentation#articles). - */ -object WikipediaPageRank { - def main(args: Array[String]) { - if (args.length < 4) { - System.err.println( - "Usage: WikipediaPageRank ") - System.exit(-1) - } - val sparkConf = new SparkConf() - sparkConf.setAppName("WikipediaPageRank") - sparkConf.registerKryoClasses(Array(classOf[PRVertex], classOf[PRMessage])) - - val inputFile = args(0) - val threshold = args(1).toDouble - val numPartitions = args(2).toInt - val usePartitioner = args(3).toBoolean - - sparkConf.setAppName("WikipediaPageRank") - val sc = new SparkContext(sparkConf) - - // Parse the Wikipedia page data into a graph - val input = sc.textFile(inputFile) - - println("Counting vertices...") - val numVertices = input.count() - println("Done counting vertices.") - - println("Parsing input file...") - var vertices = input.map(line => { - val fields = line.split("\t") - val (title, body) = (fields(1), fields(3).replace("\\n", "\n")) - val links = - if (body == "\\N") { - NodeSeq.Empty - } else { - try { - XML.loadString(body) \\ "link" \ "target" - } catch { - case e: org.xml.sax.SAXParseException => - System.err.println("Article \"" + title + "\" has malformed XML in body:\n" + body) - NodeSeq.Empty - } - } - val outEdges = links.map(link => new String(link.text)).toArray - val id = new String(title) - (id, new PRVertex(1.0 / numVertices, outEdges)) - }) - if (usePartitioner) { - vertices = vertices.partitionBy(new HashPartitioner(sc.defaultParallelism)).cache() - } else { - vertices = vertices.cache() - } - println("Done parsing input file.") - - // Do the computation - val epsilon = 0.01 / numVertices - val messages = sc.parallelize(Array[(String, PRMessage)]()) - val utils = new PageRankUtils - val result = - Bagel.run( - sc, vertices, messages, combiner = new PRCombiner(), - numPartitions = numPartitions)( - utils.computeWithCombiner(numVertices, epsilon)) - - // Print the result - System.err.println("Articles with PageRank >= " + threshold + ":") - val top = - (result - .filter { case (id, vertex) => vertex.value >= threshold } - .map { case (id, vertex) => "%s\t%s\n".format(id, vertex.value) } - .collect().mkString) - println(top) - - sc.stop() - } -} diff --git a/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRankStandalone.scala b/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRankStandalone.scala deleted file mode 100644 index 576a3e371b993..0000000000000 --- a/examples/src/main/scala/org/apache/spark/examples/bagel/WikipediaPageRankStandalone.scala +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.examples.bagel - -import java.io.{InputStream, OutputStream, DataInputStream, DataOutputStream} -import java.nio.ByteBuffer - -import scala.collection.mutable.ArrayBuffer -import scala.xml.{XML, NodeSeq} - -import org.apache.spark._ -import org.apache.spark.serializer.{DeserializationStream, SerializationStream, SerializerInstance} -import org.apache.spark.SparkContext._ -import org.apache.spark.rdd.RDD - -import scala.reflect.ClassTag - -object WikipediaPageRankStandalone { - def main(args: Array[String]) { - if (args.length < 4) { - System.err.println("Usage: WikipediaPageRankStandalone " + - " ") - System.exit(-1) - } - val sparkConf = new SparkConf() - sparkConf.set("spark.serializer", "spark.bagel.examples.WPRSerializer") - - val inputFile = args(0) - val threshold = args(1).toDouble - val numIterations = args(2).toInt - val usePartitioner = args(3).toBoolean - - sparkConf.setAppName("WikipediaPageRankStandalone") - - val sc = new SparkContext(sparkConf) - - val input = sc.textFile(inputFile) - val partitioner = new HashPartitioner(sc.defaultParallelism) - val links = - if (usePartitioner) { - input.map(parseArticle _).partitionBy(partitioner).cache() - } else { - input.map(parseArticle _).cache() - } - val n = links.count() - val defaultRank = 1.0 / n - val a = 0.15 - - // Do the computation - val startTime = System.currentTimeMillis - val ranks = - pageRank(links, numIterations, defaultRank, a, n, partitioner, usePartitioner, - sc.defaultParallelism) - - // Print the result - System.err.println("Articles with PageRank >= " + threshold + ":") - val top = - (ranks - .filter { case (id, rank) => rank >= threshold } - .map { case (id, rank) => "%s\t%s\n".format(id, rank) } - .collect().mkString) - println(top) - - val time = (System.currentTimeMillis - startTime) / 1000.0 - println("Completed %d iterations in %f seconds: %f seconds per iteration" - .format(numIterations, time, time / numIterations)) - sc.stop() - } - - def parseArticle(line: String): (String, Array[String]) = { - val fields = line.split("\t") - val (title, body) = (fields(1), fields(3).replace("\\n", "\n")) - val id = new String(title) - val links = - if (body == "\\N") { - NodeSeq.Empty - } else { - try { - XML.loadString(body) \\ "link" \ "target" - } catch { - case e: org.xml.sax.SAXParseException => - System.err.println("Article \"" + title + "\" has malformed XML in body:\n" + body) - NodeSeq.Empty - } - } - val outEdges = links.map(link => new String(link.text)).toArray - (id, outEdges) - } - - def pageRank( - links: RDD[(String, Array[String])], - numIterations: Int, - defaultRank: Double, - a: Double, - n: Long, - partitioner: Partitioner, - usePartitioner: Boolean, - numPartitions: Int - ): RDD[(String, Double)] = { - var ranks = links.mapValues { edges => defaultRank } - for (i <- 1 to numIterations) { - val contribs = links.groupWith(ranks).flatMap { - case (id, (linksWrapperIterable, rankWrapperIterable)) => - val linksWrapper = linksWrapperIterable.iterator - val rankWrapper = rankWrapperIterable.iterator - if (linksWrapper.hasNext) { - val linksWrapperHead = linksWrapper.next - if (rankWrapper.hasNext) { - val rankWrapperHead = rankWrapper.next - linksWrapperHead.map(dest => (dest, rankWrapperHead / linksWrapperHead.size)) - } else { - linksWrapperHead.map(dest => (dest, defaultRank / linksWrapperHead.size)) - } - } else { - Array[(String, Double)]() - } - } - ranks = (contribs.combineByKey((x: Double) => x, - (x: Double, y: Double) => x + y, - (x: Double, y: Double) => x + y, - partitioner) - .mapValues(sum => a/n + (1-a)*sum)) - } - ranks - } -} - -class WPRSerializer extends org.apache.spark.serializer.Serializer { - def newInstance(): SerializerInstance = new WPRSerializerInstance() -} - -class WPRSerializerInstance extends SerializerInstance { - def serialize[T: ClassTag](t: T): ByteBuffer = { - throw new UnsupportedOperationException() - } - - def deserialize[T: ClassTag](bytes: ByteBuffer): T = { - throw new UnsupportedOperationException() - } - - def deserialize[T: ClassTag](bytes: ByteBuffer, loader: ClassLoader): T = { - throw new UnsupportedOperationException() - } - - def serializeStream(s: OutputStream): SerializationStream = { - new WPRSerializationStream(s) - } - - def deserializeStream(s: InputStream): DeserializationStream = { - new WPRDeserializationStream(s) - } -} - -class WPRSerializationStream(os: OutputStream) extends SerializationStream { - val dos = new DataOutputStream(os) - - def writeObject[T: ClassTag](t: T): SerializationStream = t match { - case (id: String, wrapper: ArrayBuffer[_]) => wrapper(0) match { - case links: Array[String] => { - dos.writeInt(0) // links - dos.writeUTF(id) - dos.writeInt(links.length) - for (link <- links) { - dos.writeUTF(link) - } - this - } - case rank: Double => { - dos.writeInt(1) // rank - dos.writeUTF(id) - dos.writeDouble(rank) - this - } - } - case (id: String, rank: Double) => { - dos.writeInt(2) // rank without wrapper - dos.writeUTF(id) - dos.writeDouble(rank) - this - } - } - - def flush() { dos.flush() } - def close() { dos.close() } -} - -class WPRDeserializationStream(is: InputStream) extends DeserializationStream { - val dis = new DataInputStream(is) - - def readObject[T: ClassTag](): T = { - val typeId = dis.readInt() - typeId match { - case 0 => { - val id = dis.readUTF() - val numLinks = dis.readInt() - val links = new Array[String](numLinks) - for (i <- 0 until numLinks) { - val link = dis.readUTF() - links(i) = link - } - (id, ArrayBuffer(links)).asInstanceOf[T] - } - case 1 => { - val id = dis.readUTF() - val rank = dis.readDouble() - (id, ArrayBuffer(rank)).asInstanceOf[T] - } - case 2 => { - val id = dis.readUTF() - val rank = dis.readDouble() - (id, rank).asInstanceOf[T] - } - } - } - - def close() { dis.close() } -} diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala index 9002e99d82ad3..54e4073941056 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala @@ -112,7 +112,7 @@ object DecisionTreeExample { .text(s"input path to test dataset. If given, option fracTest is ignored." + s" default: ${defaultParams.testInput}") .action((x, c) => c.copy(testInput = x)) - opt[String]("") + opt[String]("dataFormat") .text("data format: libsvm (default), dense (deprecated in Spark v1.1)") .action((x, c) => c.copy(dataFormat = x)) arg[String]("") @@ -276,16 +276,14 @@ object DecisionTreeExample { // Get the trained Decision Tree from the fitted PipelineModel algo match { case "classification" => - val treeModel = pipelineModel.getModel[DecisionTreeClassificationModel]( - dt.asInstanceOf[DecisionTreeClassifier]) + val treeModel = pipelineModel.stages.last.asInstanceOf[DecisionTreeClassificationModel] if (treeModel.numNodes < 20) { println(treeModel.toDebugString) // Print full model. } else { println(treeModel) // Print model summary. } case "regression" => - val treeModel = pipelineModel.getModel[DecisionTreeRegressionModel]( - dt.asInstanceOf[DecisionTreeRegressor]) + val treeModel = pipelineModel.stages.last.asInstanceOf[DecisionTreeRegressionModel] if (treeModel.numNodes < 20) { println(treeModel.toDebugString) // Print full model. } else { diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala index 2245fa429fda3..3ee456edbe01e 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala @@ -18,13 +18,13 @@ package org.apache.spark.examples.ml import org.apache.spark.{SparkConf, SparkContext} -import org.apache.spark.ml.classification.{Classifier, ClassifierParams, ClassificationModel} -import org.apache.spark.ml.param.{Params, IntParam, ParamMap} +import org.apache.spark.ml.classification.{ClassificationModel, Classifier, ClassifierParams} +import org.apache.spark.ml.param.{IntParam, ParamMap} +import org.apache.spark.ml.util.Identifiable import org.apache.spark.mllib.linalg.{BLAS, Vector, Vectors} import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.sql.{DataFrame, Row, SQLContext} - /** * A simple example demonstrating how to write your own learning algorithm using Estimator, * Transformer, and other abstractions. @@ -99,7 +99,7 @@ private trait MyLogisticRegressionParams extends ClassifierParams { * class since the maxIter parameter is only used during training (not in the Model). */ val maxIter: IntParam = new IntParam(this, "maxIter", "max number of iterations") - def getMaxIter: Int = getOrDefault(maxIter) + def getMaxIter: Int = $(maxIter) } /** @@ -107,28 +107,28 @@ private trait MyLogisticRegressionParams extends ClassifierParams { * * NOTE: This is private since it is an example. In practice, you may not want it to be private. */ -private class MyLogisticRegression +private class MyLogisticRegression(override val uid: String) extends Classifier[Vector, MyLogisticRegression, MyLogisticRegressionModel] with MyLogisticRegressionParams { + def this() = this(Identifiable.randomUID("myLogReg")) + setMaxIter(100) // Initialize // The parameter setter is in this class since it should return type MyLogisticRegression. def setMaxIter(value: Int): this.type = set(maxIter, value) // This method is used by fit() - override protected def train( - dataset: DataFrame, - paramMap: ParamMap): MyLogisticRegressionModel = { + override protected def train(dataset: DataFrame): MyLogisticRegressionModel = { // Extract columns from data using helper method. - val oldDataset = extractLabeledPoints(dataset, paramMap) + val oldDataset = extractLabeledPoints(dataset) // Do learning to estimate the weight vector. val numFeatures = oldDataset.take(1)(0).features.size val weights = Vectors.zeros(numFeatures) // Learning would happen here. // Create a model, and return it. - new MyLogisticRegressionModel(this, paramMap, weights) + new MyLogisticRegressionModel(uid, weights).setParent(this) } } @@ -138,8 +138,7 @@ private class MyLogisticRegression * NOTE: This is private since it is an example. In practice, you may not want it to be private. */ private class MyLogisticRegressionModel( - override val parent: MyLogisticRegression, - override val fittingParamMap: ParamMap, + override val uid: String, val weights: Vector) extends ClassificationModel[Vector, MyLogisticRegressionModel] with MyLogisticRegressionParams { @@ -176,9 +175,7 @@ private class MyLogisticRegressionModel( * * This is used for the default implementation of [[transform()]]. */ - override protected def copy(): MyLogisticRegressionModel = { - val m = new MyLogisticRegressionModel(parent, fittingParamMap, weights) - Params.inheritValues(extractParamMap(), this, m) - m + override def copy(extra: ParamMap): MyLogisticRegressionModel = { + copyValues(new MyLogisticRegressionModel(uid, weights), extra) } } diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala index 5fccb142d4c3d..33905277c7341 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala @@ -111,7 +111,7 @@ object GBTExample { .text(s"input path to test dataset. If given, option fracTest is ignored." + s" default: ${defaultParams.testInput}") .action((x, c) => c.copy(testInput = x)) - opt[String]("") + opt[String]("dataFormat") .text("data format: libsvm (default), dense (deprecated in Spark v1.1)") .action((x, c) => c.copy(dataFormat = x)) arg[String]("") @@ -201,14 +201,14 @@ object GBTExample { // Get the trained GBT from the fitted PipelineModel algo match { case "classification" => - val rfModel = pipelineModel.getModel[GBTClassificationModel](dt.asInstanceOf[GBTClassifier]) + val rfModel = pipelineModel.stages.last.asInstanceOf[GBTClassificationModel] if (rfModel.totalNumNodes < 30) { println(rfModel.toDebugString) // Print full model. } else { println(rfModel) // Print model summary. } case "regression" => - val rfModel = pipelineModel.getModel[GBTRegressionModel](dt.asInstanceOf[GBTRegressor]) + val rfModel = pipelineModel.stages.last.asInstanceOf[GBTRegressionModel] if (rfModel.totalNumNodes < 30) { println(rfModel.toDebugString) // Print full model. } else { diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala new file mode 100644 index 0000000000000..6927eb8f275cf --- /dev/null +++ b/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala @@ -0,0 +1,185 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.examples.ml + +import java.util.concurrent.TimeUnit.{NANOSECONDS => NANO} + +import scopt.OptionParser + +import org.apache.spark.{SparkContext, SparkConf} +import org.apache.spark.examples.mllib.AbstractParams +import org.apache.spark.ml.classification.{OneVsRest, LogisticRegression} +import org.apache.spark.ml.util.MetadataUtils +import org.apache.spark.mllib.evaluation.MulticlassMetrics +import org.apache.spark.mllib.regression.LabeledPoint +import org.apache.spark.mllib.util.MLUtils +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.SQLContext + +/** + * An example runner for Multiclass to Binary Reduction with One Vs Rest. + * The example uses Logistic Regression as the base classifier. All parameters that + * can be specified on the base classifier can be passed in to the runner options. + * Run with + * {{{ + * ./bin/run-example ml.OneVsRestExample [options] + * }}} + * For local mode, run + * {{{ + * ./bin/spark-submit --class org.apache.spark.examples.ml.OneVsRestExample --driver-memory 1g + * [examples JAR path] [options] + * }}} + * If you use it as a template to create your own app, please use `spark-submit` to submit your app. + */ +object OneVsRestExample { + + case class Params private[ml] ( + input: String = null, + testInput: Option[String] = None, + maxIter: Int = 100, + tol: Double = 1E-6, + fitIntercept: Boolean = true, + regParam: Option[Double] = None, + elasticNetParam: Option[Double] = None, + fracTest: Double = 0.2) extends AbstractParams[Params] + + def main(args: Array[String]) { + val defaultParams = Params() + + val parser = new OptionParser[Params]("OneVsRest Example") { + head("OneVsRest Example: multiclass to binary reduction using OneVsRest") + opt[String]("input") + .text("input path to labeled examples. This path must be specified") + .required() + .action((x, c) => c.copy(input = x)) + opt[Double]("fracTest") + .text(s"fraction of data to hold out for testing. If given option testInput, " + + s"this option is ignored. default: ${defaultParams.fracTest}") + .action((x, c) => c.copy(fracTest = x)) + opt[String]("testInput") + .text("input path to test dataset. If given, option fracTest is ignored") + .action((x, c) => c.copy(testInput = Some(x))) + opt[Int]("maxIter") + .text(s"maximum number of iterations for Logistic Regression." + + s" default: ${defaultParams.maxIter}") + .action((x, c) => c.copy(maxIter = x)) + opt[Double]("tol") + .text(s"the convergence tolerance of iterations for Logistic Regression." + + s" default: ${defaultParams.tol}") + .action((x, c) => c.copy(tol = x)) + opt[Boolean]("fitIntercept") + .text(s"fit intercept for Logistic Regression." + + s" default: ${defaultParams.fitIntercept}") + .action((x, c) => c.copy(fitIntercept = x)) + opt[Double]("regParam") + .text(s"the regularization parameter for Logistic Regression.") + .action((x, c) => c.copy(regParam = Some(x))) + opt[Double]("elasticNetParam") + .text(s"the ElasticNet mixing parameter for Logistic Regression.") + .action((x, c) => c.copy(elasticNetParam = Some(x))) + checkConfig { params => + if (params.fracTest < 0 || params.fracTest >= 1) { + failure(s"fracTest ${params.fracTest} value incorrect; should be in [0,1).") + } else { + success + } + } + } + parser.parse(args, defaultParams).map { params => + run(params) + }.getOrElse { + sys.exit(1) + } + } + + private def run(params: Params) { + val conf = new SparkConf().setAppName(s"OneVsRestExample with $params") + val sc = new SparkContext(conf) + val inputData = MLUtils.loadLibSVMFile(sc, params.input) + val sqlContext = new SQLContext(sc) + import sqlContext.implicits._ + + // compute the train/test split: if testInput is not provided use part of input. + val data = params.testInput match { + case Some(t) => { + // compute the number of features in the training set. + val numFeatures = inputData.first().features.size + val testData = MLUtils.loadLibSVMFile(sc, t, numFeatures) + Array[RDD[LabeledPoint]](inputData, testData) + } + case None => { + val f = params.fracTest + inputData.randomSplit(Array(1 - f, f), seed = 12345) + } + } + val Array(train, test) = data.map(_.toDF().cache()) + + // instantiate the base classifier + val classifier = new LogisticRegression() + .setMaxIter(params.maxIter) + .setTol(params.tol) + .setFitIntercept(params.fitIntercept) + + // Set regParam, elasticNetParam if specified in params + params.regParam.foreach(classifier.setRegParam) + params.elasticNetParam.foreach(classifier.setElasticNetParam) + + // instantiate the One Vs Rest Classifier. + + val ovr = new OneVsRest() + ovr.setClassifier(classifier) + + // train the multiclass model. + val (trainingDuration, ovrModel) = time(ovr.fit(train)) + + // score the model on test data. + val (predictionDuration, predictions) = time(ovrModel.transform(test)) + + // evaluate the model + val predictionsAndLabels = predictions.select("prediction", "label") + .map(row => (row.getDouble(0), row.getDouble(1))) + + val metrics = new MulticlassMetrics(predictionsAndLabels) + + val confusionMatrix = metrics.confusionMatrix + + // compute the false positive rate per label + val predictionColSchema = predictions.schema("prediction") + val numClasses = MetadataUtils.getNumClasses(predictionColSchema).get + val fprs = Range(0, numClasses).map(p => (p, metrics.falsePositiveRate(p.toDouble))) + + println(s" Training Time ${trainingDuration} sec\n") + + println(s" Prediction Time ${predictionDuration} sec\n") + + println(s" Confusion Matrix\n ${confusionMatrix.toString}\n") + + println("label\tfpr") + + println(fprs.map {case (label, fpr) => label + "\t" + fpr}.mkString("\n")) + + sc.stop() + } + + private def time[R](block: => R): (Long, R) = { + val t0 = System.nanoTime() + val result = block // call-by-name + val t1 = System.nanoTime() + (NANO.toSeconds(t1 - t0), result) + } +} diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala index 9b909324ec82a..9f7cad68a4594 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala @@ -117,7 +117,7 @@ object RandomForestExample { .text(s"input path to test dataset. If given, option fracTest is ignored." + s" default: ${defaultParams.testInput}") .action((x, c) => c.copy(testInput = x)) - opt[String]("") + opt[String]("dataFormat") .text("data format: libsvm (default), dense (deprecated in Spark v1.1)") .action((x, c) => c.copy(dataFormat = x)) arg[String]("") @@ -209,16 +209,14 @@ object RandomForestExample { // Get the trained Random Forest from the fitted PipelineModel algo match { case "classification" => - val rfModel = pipelineModel.getModel[RandomForestClassificationModel]( - dt.asInstanceOf[RandomForestClassifier]) + val rfModel = pipelineModel.stages.last.asInstanceOf[RandomForestClassificationModel] if (rfModel.totalNumNodes < 30) { println(rfModel.toDebugString) // Print full model. } else { println(rfModel) // Print model summary. } case "regression" => - val rfModel = pipelineModel.getModel[RandomForestRegressionModel]( - dt.asInstanceOf[RandomForestRegressor]) + val rfModel = pipelineModel.stages.last.asInstanceOf[RandomForestRegressionModel] if (rfModel.totalNumNodes < 30) { println(rfModel.toDebugString) // Print full model. } else { diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala index bf805149d0af6..a0561e2573fc9 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleParamsExample.scala @@ -63,7 +63,7 @@ object SimpleParamsExample { // we can view the parameters it used during fit(). // This prints the parameter (name: value) pairs, where names are unique IDs for this // LogisticRegression instance. - println("Model 1 was fit using parameters: " + model1.fittingParamMap) + println("Model 1 was fit using parameters: " + model1.parent.extractParamMap()) // We may alternatively specify parameters using a ParamMap, // which supports several methods for specifying parameters. @@ -78,7 +78,7 @@ object SimpleParamsExample { // Now learn a new model using the paramMapCombined parameters. // paramMapCombined overrides all parameters set earlier via lr.set* methods. val model2 = lr.fit(training.toDF(), paramMapCombined) - println("Model 2 was fit using parameters: " + model2.fittingParamMap) + println("Model 2 was fit using parameters: " + model2.parent.extractParamMap()) // Prepare test data. val test = sc.parallelize(Seq( @@ -87,7 +87,7 @@ object SimpleParamsExample { LabeledPoint(1.0, Vectors.dense(0.0, 2.2, -1.5)))) // Make predictions on test data using the Transformer.transform() method. - // LogisticRegression.transform will only use the 'features' column. + // LogisticRegressionModel.transform will only use the 'features' column. // Note that model2.transform() outputs a 'myProbability' column instead of the usual // 'probability' column since we renamed the lr.probabilityCol parameter previously. model2.transform(test.toDF()) diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala index 6772efd2c581c..1324b066c30c3 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/SimpleTextClassificationPipeline.scala @@ -64,7 +64,7 @@ object SimpleTextClassificationPipeline { .setOutputCol("features") val lr = new LogisticRegression() .setMaxIter(10) - .setRegParam(0.01) + .setRegParam(0.001) val pipeline = new Pipeline() .setStages(Array(tokenizer, hashingTF, lr)) @@ -75,7 +75,7 @@ object SimpleTextClassificationPipeline { val test = sc.parallelize(Seq( Document(4L, "spark i j k"), Document(5L, "l m n"), - Document(6L, "mapreduce spark"), + Document(6L, "spark hadoop spark"), Document(7L, "apache hadoop"))) // Make predictions on test documents. diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DatasetExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DatasetExample.scala index e943d6c889fab..520893b26d595 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/DatasetExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DatasetExample.scala @@ -103,10 +103,10 @@ object DatasetExample { tmpDir.deleteOnExit() val outputDir = new File(tmpDir, "dataset").toString println(s"Saving to $outputDir as Parquet file.") - df.saveAsParquetFile(outputDir) + df.write.parquet(outputDir) println(s"Loading Parquet file with UDT from $outputDir.") - val newDataset = sqlContext.parquetFile(outputDir) + val newDataset = sqlContext.read.parquet(outputDir) println(s"Schema from Parquet: ${newDataset.schema.prettyJson}") val newFeatures = newDataset.select("features").map { case Row(v: Vector) => v } diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala index 262fd2c9611d0..3381941673db8 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala @@ -22,7 +22,6 @@ import scala.language.reflectiveCalls import scopt.OptionParser import org.apache.spark.{SparkConf, SparkContext} -import org.apache.spark.SparkContext._ import org.apache.spark.mllib.evaluation.MulticlassMetrics import org.apache.spark.mllib.linalg.Vector import org.apache.spark.mllib.regression.LabeledPoint @@ -126,7 +125,7 @@ object DecisionTreeRunner { .text(s"input path to test dataset. If given, option fracTest is ignored." + s" default: ${defaultParams.testInput}") .action((x, c) => c.copy(testInput = x)) - opt[String]("") + opt[String]("dataFormat") .text("data format: libsvm (default), dense (deprecated in Spark v1.1)") .action((x, c) => c.copy(dataFormat = x)) arg[String]("") @@ -354,7 +353,11 @@ object DecisionTreeRunner { /** * Calculates the mean squared error for regression. + * + * This is just for demo purpose. In general, don't copy this code because it is NOT efficient + * due to the use of structural types, which leads to one reflection call per record. */ + // scalastyle:off structural.type private[mllib] def meanSquaredError( model: { def predict(features: Vector): Double }, data: RDD[LabeledPoint]): Double = { @@ -363,4 +366,5 @@ object DecisionTreeRunner { err * err }.mean() } + // scalastyle:on structural.type } diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DenseGaussianMixture.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DenseGaussianMixture.scala index df76b45e50810..f8c71ccabc43b 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/DenseGaussianMixture.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DenseGaussianMixture.scala @@ -40,23 +40,23 @@ object DenseGaussianMixture { private def run(inputFile: String, k: Int, convergenceTol: Double, maxIterations: Int) { val conf = new SparkConf().setAppName("Gaussian Mixture Model EM example") - val ctx = new SparkContext(conf) - + val ctx = new SparkContext(conf) + val data = ctx.textFile(inputFile).map { line => Vectors.dense(line.trim.split(' ').map(_.toDouble)) }.cache() - + val clusters = new GaussianMixture() .setK(k) .setConvergenceTol(convergenceTol) .setMaxIterations(maxIterations) .run(data) - + for (i <- 0 until clusters.k) { - println("weight=%f\nmu=%s\nsigma=\n%s\n" format + println("weight=%f\nmu=%s\nsigma=\n%s\n" format (clusters.weights(i), clusters.gaussians(i).mu, clusters.gaussians(i).sigma)) } - + println("Cluster labels (first <= 100):") val clusterLabels = clusters.predict(data) clusterLabels.take(100).foreach { x => diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala index 0763a7736305a..7416fb5a40848 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala @@ -69,7 +69,7 @@ object GradientBoostedTreesRunner { .text(s"input path to test dataset. If given, option fracTest is ignored." + s" default: ${defaultParams.testInput}") .action((x, c) => c.copy(testInput = x)) - opt[String]("") + opt[String]("dataFormat") .text("data format: libsvm (default), dense (deprecated in Spark v1.1)") .action((x, c) => c.copy(dataFormat = x)) arg[String]("") diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala index a1850390c0a86..31d629f853161 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala @@ -26,7 +26,7 @@ import scopt.OptionParser import org.apache.log4j.{Level, Logger} import org.apache.spark.{SparkContext, SparkConf} -import org.apache.spark.mllib.clustering.{DistributedLDAModel, LDA} +import org.apache.spark.mllib.clustering.{EMLDAOptimizer, OnlineLDAOptimizer, DistributedLDAModel, LDA} import org.apache.spark.mllib.linalg.{Vector, Vectors} import org.apache.spark.rdd.RDD @@ -48,6 +48,7 @@ object LDAExample { topicConcentration: Double = -1, vocabSize: Int = 10000, stopwordFile: String = "", + algorithm: String = "em", checkpointDir: Option[String] = None, checkpointInterval: Int = 10) extends AbstractParams[Params] @@ -78,6 +79,10 @@ object LDAExample { .text(s"filepath for a list of stopwords. Note: This must fit on a single machine." + s" default: ${defaultParams.stopwordFile}") .action((x, c) => c.copy(stopwordFile = x)) + opt[String]("algorithm") + .text(s"inference algorithm to use. em and online are supported." + + s" default: ${defaultParams.algorithm}") + .action((x, c) => c.copy(algorithm = x)) opt[String]("checkpointDir") .text(s"Directory for checkpointing intermediate results." + s" Checkpointing helps with recovery and eliminates temporary shuffle files on disk." + @@ -128,7 +133,17 @@ object LDAExample { // Run LDA. val lda = new LDA() - lda.setK(params.k) + + val optimizer = params.algorithm.toLowerCase match { + case "em" => new EMLDAOptimizer + // add (1.0 / actualCorpusSize) to MiniBatchFraction be more robust on tiny datasets. + case "online" => new OnlineLDAOptimizer().setMiniBatchFraction(0.05 + 1.0 / actualCorpusSize) + case _ => throw new IllegalArgumentException( + s"Only em, online are supported but got ${params.algorithm}.") + } + + lda.setOptimizer(optimizer) + .setK(params.k) .setMaxIterations(params.maxIterations) .setDocConcentration(params.docConcentration) .setTopicConcentration(params.topicConcentration) @@ -137,14 +152,18 @@ object LDAExample { sc.setCheckpointDir(params.checkpointDir.get) } val startTime = System.nanoTime() - val ldaModel = lda.run(corpus).asInstanceOf[DistributedLDAModel] + val ldaModel = lda.run(corpus) val elapsed = (System.nanoTime() - startTime) / 1e9 println(s"Finished training LDA model. Summary:") println(s"\t Training time: $elapsed sec") - val avgLogLikelihood = ldaModel.logLikelihood / actualCorpusSize.toDouble - println(s"\t Training data average log likelihood: $avgLogLikelihood") - println() + + if (ldaModel.isInstanceOf[DistributedLDAModel]) { + val distLDAModel = ldaModel.asInstanceOf[DistributedLDAModel] + val avgLogLikelihood = distLDAModel.logLikelihood / actualCorpusSize.toDouble + println(s"\t Training data average log likelihood: $avgLogLikelihood") + println() + } // Print the topics, showing the top-weighted terms for each topic. val topicIndices = ldaModel.describeTopics(maxTermsPerTopic = 10) diff --git a/examples/src/main/scala/org/apache/spark/examples/pythonconverters/AvroConverters.scala b/examples/src/main/scala/org/apache/spark/examples/pythonconverters/AvroConverters.scala index a11890d6f2b1c..3ebb112fc069e 100644 --- a/examples/src/main/scala/org/apache/spark/examples/pythonconverters/AvroConverters.scala +++ b/examples/src/main/scala/org/apache/spark/examples/pythonconverters/AvroConverters.scala @@ -36,22 +36,21 @@ object AvroConversionUtil extends Serializable { return null } schema.getType match { - case UNION => unpackUnion(obj, schema) - case ARRAY => unpackArray(obj, schema) - case FIXED => unpackFixed(obj, schema) - case MAP => unpackMap(obj, schema) - case BYTES => unpackBytes(obj) - case RECORD => unpackRecord(obj) - case STRING => obj.toString - case ENUM => obj.toString - case NULL => obj + case UNION => unpackUnion(obj, schema) + case ARRAY => unpackArray(obj, schema) + case FIXED => unpackFixed(obj, schema) + case MAP => unpackMap(obj, schema) + case BYTES => unpackBytes(obj) + case RECORD => unpackRecord(obj) + case STRING => obj.toString + case ENUM => obj.toString + case NULL => obj case BOOLEAN => obj - case DOUBLE => obj - case FLOAT => obj - case INT => obj - case LONG => obj - case other => throw new SparkException( - s"Unknown Avro schema type ${other.getName}") + case DOUBLE => obj + case FLOAT => obj + case INT => obj + case LONG => obj + case other => throw new SparkException(s"Unknown Avro schema type ${other.getName}") } } diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala index 6331d1c0060f8..b11e32047dc34 100644 --- a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala +++ b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala @@ -58,10 +58,10 @@ object RDDRelation { df.where($"key" === 1).orderBy($"value".asc).select($"key").collect().foreach(println) // Write out an RDD as a parquet file. - df.saveAsParquetFile("pair.parquet") + df.write.parquet("pair.parquet") // Read in parquet file. Parquet files are self-describing so the schmema is preserved. - val parquetFile = sqlContext.parquetFile("pair.parquet") + val parquetFile = sqlContext.read.parquet("pair.parquet") // Queries can be run using the DSL on parequet files just like the original RDD. parquetFile.where($"key" === 1).select($"value".as("a")).collect().foreach(println) diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/ActorWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/ActorWordCount.scala index 92867b44be138..016de4c63d1d2 100644 --- a/examples/src/main/scala/org/apache/spark/examples/streaming/ActorWordCount.scala +++ b/examples/src/main/scala/org/apache/spark/examples/streaming/ActorWordCount.scala @@ -104,10 +104,8 @@ extends Actor with ActorHelper { object FeederActor { def main(args: Array[String]) { - if(args.length < 2){ - System.err.println( - "Usage: FeederActor \n" - ) + if (args.length < 2){ + System.err.println("Usage: FeederActor \n") System.exit(1) } val Seq(host, port) = args.toSeq diff --git a/examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala similarity index 97% rename from examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala rename to examples/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala index 11a8cf09533ce..fbe394de4a179 100644 --- a/examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala +++ b/examples/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala @@ -51,7 +51,7 @@ object DirectKafkaWordCount { // Create context with 2 second batch interval val sparkConf = new SparkConf().setAppName("DirectKafkaWordCount") - val ssc = new StreamingContext(sparkConf, Seconds(2)) + val ssc = new StreamingContext(sparkConf, Seconds(2)) // Create direct kafka stream with brokers and topics val topicsSet = topics.split(",").toSet diff --git a/examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala similarity index 76% rename from examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala rename to examples/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala index 387c0e421334b..60416ee343544 100644 --- a/examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala +++ b/examples/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala @@ -17,9 +17,9 @@ package org.apache.spark.examples.streaming -import java.util.Properties +import java.util.HashMap -import kafka.producer._ +import org.apache.kafka.clients.producer.{ProducerConfig, KafkaProducer, ProducerRecord} import org.apache.spark.streaming._ import org.apache.spark.streaming.kafka._ @@ -49,10 +49,10 @@ object KafkaWordCount { val Array(zkQuorum, group, topics, numThreads) = args val sparkConf = new SparkConf().setAppName("KafkaWordCount") - val ssc = new StreamingContext(sparkConf, Seconds(2)) + val ssc = new StreamingContext(sparkConf, Seconds(2)) ssc.checkpoint("checkpoint") - val topicMap = topics.split(",").map((_,numThreads.toInt)).toMap + val topicMap = topics.split(",").map((_, numThreads.toInt)).toMap val lines = KafkaUtils.createStream(ssc, zkQuorum, group, topicMap).map(_._2) val words = lines.flatMap(_.split(" ")) val wordCounts = words.map(x => (x, 1L)) @@ -77,24 +77,26 @@ object KafkaWordCountProducer { val Array(brokers, topic, messagesPerSec, wordsPerMessage) = args // Zookeeper connection properties - val props = new Properties() - props.put("metadata.broker.list", brokers) - props.put("serializer.class", "kafka.serializer.StringEncoder") + val props = new HashMap[String, Object]() + props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers) + props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, + "org.apache.kafka.common.serialization.StringSerializer") + props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, + "org.apache.kafka.common.serialization.StringSerializer") - val config = new ProducerConfig(props) - val producer = new Producer[String, String](config) + val producer = new KafkaProducer[String, String](props) // Send some messages while(true) { - val messages = (1 to messagesPerSec.toInt).map { messageNum => + (1 to messagesPerSec.toInt).foreach { messageNum => val str = (1 to wordsPerMessage.toInt).map(x => scala.util.Random.nextInt(10).toString) .mkString(" ") - new KeyedMessage[String, String](topic, str) - }.toArray + val message = new ProducerRecord[String, String](topic, null, str) + producer.send(message) + } - producer.send(messages: _*) - Thread.sleep(100) + Thread.sleep(1000) } } diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/MQTTWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/MQTTWordCount.scala index 85b9a54b40baf..813c8554f5193 100644 --- a/examples/src/main/scala/org/apache/spark/examples/streaming/MQTTWordCount.scala +++ b/examples/src/main/scala/org/apache/spark/examples/streaming/MQTTWordCount.scala @@ -40,7 +40,7 @@ object MQTTPublisher { StreamingExamples.setStreamingLogLevels() val Seq(brokerUrl, topic) = args.toSeq - + var client: MqttClient = null try { @@ -49,7 +49,7 @@ object MQTTPublisher { client.connect() - val msgtopic = client.getTopic(topic) + val msgtopic = client.getTopic(topic) val msgContent = "hello mqtt demo for spark streaming" val message = new MqttMessage(msgContent.getBytes("utf-8")) @@ -59,10 +59,10 @@ object MQTTPublisher { println(s"Published data. topic: ${msgtopic.getName()}; Message: $message") } catch { case e: MqttException if e.getReasonCode == MqttException.REASON_CODE_MAX_INFLIGHT => - Thread.sleep(10) + Thread.sleep(10) println("Queue is full, wait for to consume data from the message queue") - } - } + } + } } catch { case e: MqttException => println("Exception Caught: " + e) } finally { @@ -107,7 +107,7 @@ object MQTTWordCount { val lines = MQTTUtils.createStream(ssc, brokerUrl, topic, StorageLevel.MEMORY_ONLY_SER_2) val words = lines.flatMap(x => x.split(" ")) val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _) - + wordCounts.print() ssc.start() ssc.awaitTermination() diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewGenerator.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewGenerator.scala index 54d996b8ac990..889f052c70263 100644 --- a/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewGenerator.scala +++ b/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewGenerator.scala @@ -57,8 +57,7 @@ object PageViewGenerator { 404 -> .05) val userZipCode = Map(94709 -> .5, 94117 -> .5) - val userID = Map((1 to 100).map(_ -> .01):_*) - + val userID = Map((1 to 100).map(_ -> .01) : _*) def pickFromDistribution[T](inputMap : Map[T, Double]) : T = { val rand = new Random().nextDouble() diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index 1f3e619d97a24..7ecbd83a2a191 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../../pom.xml @@ -42,15 +42,46 @@ org.apache.flume flume-ng-sdk + + + + com.google.guava + guava + + + + org.apache.thrift + libthrift + + org.apache.flume flume-ng-core + + + com.google.guava + guava + + + org.apache.thrift + libthrift + + org.scala-lang scala-library + + + com.google.guava + guava + test + + + + diff --git a/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkAvroCallbackHandler.scala b/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkAvroCallbackHandler.scala index fd01807fc3ac4..dc2a4ab138e18 100644 --- a/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkAvroCallbackHandler.scala +++ b/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkAvroCallbackHandler.scala @@ -21,7 +21,6 @@ import java.util.concurrent.atomic.AtomicLong import scala.collection.mutable -import com.google.common.util.concurrent.ThreadFactoryBuilder import org.apache.flume.Channel import org.apache.commons.lang3.RandomStringUtils @@ -45,8 +44,7 @@ import org.apache.commons.lang3.RandomStringUtils private[flume] class SparkAvroCallbackHandler(val threads: Int, val channel: Channel, val transactionTimeout: Int, val backOffInterval: Int) extends SparkFlumeProtocol with Logging { val transactionExecutorOpt = Option(Executors.newFixedThreadPool(threads, - new ThreadFactoryBuilder().setDaemon(true) - .setNameFormat("Spark Sink Processor Thread - %d").build())) + new SparkSinkThreadFactory("Spark Sink Processor Thread - %d"))) // Protected by `sequenceNumberToProcessor` private val sequenceNumberToProcessor = mutable.HashMap[CharSequence, TransactionProcessor]() // This sink will not persist sequence numbers and reuses them if it gets restarted. diff --git a/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkSinkThreadFactory.scala b/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkSinkThreadFactory.scala new file mode 100644 index 0000000000000..845fc8debda75 --- /dev/null +++ b/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/SparkSinkThreadFactory.scala @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.streaming.flume.sink + +import java.util.concurrent.ThreadFactory +import java.util.concurrent.atomic.AtomicLong + +/** + * Thread factory that generates daemon threads with a specified name format. + */ +private[sink] class SparkSinkThreadFactory(nameFormat: String) extends ThreadFactory { + + private val threadId = new AtomicLong() + + override def newThread(r: Runnable): Thread = { + val t = new Thread(r, nameFormat.format(threadId.incrementAndGet())) + t.setDaemon(true) + t + } + +} diff --git a/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/TransactionProcessor.scala b/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/TransactionProcessor.scala index ea45b14294df9..7ad43b1d7b0a0 100644 --- a/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/TransactionProcessor.scala +++ b/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/TransactionProcessor.scala @@ -143,7 +143,7 @@ private class TransactionProcessor(val channel: Channel, val seqNum: String, eventBatch.setErrorMsg(msg) } else { // At this point, the events are available, so fill them into the event batch - eventBatch = new EventBatch("",seqNum, events) + eventBatch = new EventBatch("", seqNum, events) } }) } catch { diff --git a/external/flume-sink/src/test/scala/org/apache/spark/streaming/flume/sink/SparkSinkSuite.scala b/external/flume-sink/src/test/scala/org/apache/spark/streaming/flume/sink/SparkSinkSuite.scala index 650b2fbe1c142..6ad187ff63186 100644 --- a/external/flume-sink/src/test/scala/org/apache/spark/streaming/flume/sink/SparkSinkSuite.scala +++ b/external/flume-sink/src/test/scala/org/apache/spark/streaming/flume/sink/SparkSinkSuite.scala @@ -24,7 +24,6 @@ import scala.collection.JavaConversions._ import scala.concurrent.{ExecutionContext, Future} import scala.util.{Failure, Success} -import com.google.common.util.concurrent.ThreadFactoryBuilder import org.apache.avro.ipc.NettyTransceiver import org.apache.avro.ipc.specific.SpecificRequestor import org.apache.flume.Context @@ -185,9 +184,8 @@ class SparkSinkSuite extends FunSuite { count: Int): Seq[(NettyTransceiver, SparkFlumeProtocol.Callback)] = { (1 to count).map(_ => { - lazy val channelFactoryExecutor = - Executors.newCachedThreadPool(new ThreadFactoryBuilder().setDaemon(true). - setNameFormat("Flume Receiver Channel Thread - %d").build()) + lazy val channelFactoryExecutor = Executors.newCachedThreadPool( + new SparkSinkThreadFactory("Flume Receiver Channel Thread - %d")) lazy val channelFactory = new NioClientSocketChannelFactory(channelFactoryExecutor, channelFactoryExecutor) val transceiver = new NettyTransceiver(address, channelFactory) diff --git a/external/flume/pom.xml b/external/flume/pom.xml index 8df7edbdcad33..1794f3e7d3f64 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../../pom.xml diff --git a/external/flume/src/main/scala/org/apache/spark/streaming/flume/EventTransformer.scala b/external/flume/src/main/scala/org/apache/spark/streaming/flume/EventTransformer.scala index dc629df4f4ac2..65c49c131518b 100644 --- a/external/flume/src/main/scala/org/apache/spark/streaming/flume/EventTransformer.scala +++ b/external/flume/src/main/scala/org/apache/spark/streaming/flume/EventTransformer.scala @@ -60,7 +60,7 @@ private[streaming] object EventTransformer extends Logging { out.write(body) val numHeaders = headers.size() out.writeInt(numHeaders) - for ((k,v) <- headers) { + for ((k, v) <- headers) { val keyBuff = Utils.serialize(k.toString) out.writeInt(keyBuff.length) out.write(keyBuff) diff --git a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeInputDStream.scala b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeInputDStream.scala index 60e2994431b38..1e32a365a1eee 100644 --- a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeInputDStream.scala +++ b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeInputDStream.scala @@ -152,9 +152,9 @@ class FlumeReceiver( val channelFactory = new NioServerSocketChannelFactory(Executors.newCachedThreadPool(), Executors.newCachedThreadPool()) val channelPipelineFactory = new CompressionChannelPipelineFactory() - + new NettyServer( - responder, + responder, new InetSocketAddress(host, port), channelFactory, channelPipelineFactory, @@ -188,12 +188,12 @@ class FlumeReceiver( override def preferredLocation: Option[String] = Option(host) - /** A Netty Pipeline factory that will decompress incoming data from + /** A Netty Pipeline factory that will decompress incoming data from * and the Netty client and compress data going back to the client. * * The compression on the return is required because Flume requires - * a successful response to indicate it can remove the event/batch - * from the configured channel + * a successful response to indicate it can remove the event/batch + * from the configured channel */ private[streaming] class CompressionChannelPipelineFactory extends ChannelPipelineFactory { diff --git a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumePollingInputDStream.scala b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumePollingInputDStream.scala index 92fa5b41be89e..583e7dca317ad 100644 --- a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumePollingInputDStream.scala +++ b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumePollingInputDStream.scala @@ -110,7 +110,7 @@ private[streaming] class FlumePollingReceiver( } /** - * A wrapper around the transceiver and the Avro IPC API. + * A wrapper around the transceiver and the Avro IPC API. * @param transceiver The transceiver to use for communication with Flume * @param client The client that the callbacks are received on. */ diff --git a/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumePollingStreamSuite.scala b/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumePollingStreamSuite.scala index 2edea9b5b69ba..93afe50c2134f 100644 --- a/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumePollingStreamSuite.scala +++ b/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumePollingStreamSuite.scala @@ -18,15 +18,18 @@ package org.apache.spark.streaming.flume import java.net.InetSocketAddress -import java.util.concurrent.{Callable, ExecutorCompletionService, Executors} +import java.util.concurrent._ import scala.collection.JavaConversions._ import scala.collection.mutable.{SynchronizedBuffer, ArrayBuffer} +import scala.concurrent.duration._ +import scala.language.postfixOps import org.apache.flume.Context import org.apache.flume.channel.MemoryChannel import org.apache.flume.conf.Configurables import org.apache.flume.event.EventBuilder +import org.scalatest.concurrent.Eventually._ import org.scalatest.{BeforeAndAfter, FunSuite} @@ -100,18 +103,8 @@ class FlumePollingStreamSuite extends FunSuite with BeforeAndAfter with Logging Configurables.configure(sink, context) sink.setChannel(channel) sink.start() - // Set up the streaming context and input streams - val ssc = new StreamingContext(conf, batchDuration) - val flumeStream: ReceiverInputDStream[SparkFlumeEvent] = - FlumeUtils.createPollingStream(ssc, Seq(new InetSocketAddress("localhost", sink.getPort())), - StorageLevel.MEMORY_AND_DISK, eventsPerBatch, 1) - val outputBuffer = new ArrayBuffer[Seq[SparkFlumeEvent]] - with SynchronizedBuffer[Seq[SparkFlumeEvent]] - val outputStream = new TestOutputStream(flumeStream, outputBuffer) - outputStream.register() - ssc.start() - writeAndVerify(Seq(channel), ssc, outputBuffer) + writeAndVerify(Seq(sink), Seq(channel)) assertChannelIsEmpty(channel) sink.stop() channel.stop() @@ -142,10 +135,22 @@ class FlumePollingStreamSuite extends FunSuite with BeforeAndAfter with Logging Configurables.configure(sink2, context) sink2.setChannel(channel2) sink2.start() + try { + writeAndVerify(Seq(sink, sink2), Seq(channel, channel2)) + assertChannelIsEmpty(channel) + assertChannelIsEmpty(channel2) + } finally { + sink.stop() + sink2.stop() + channel.stop() + channel2.stop() + } + } + def writeAndVerify(sinks: Seq[SparkSink], channels: Seq[MemoryChannel]) { // Set up the streaming context and input streams val ssc = new StreamingContext(conf, batchDuration) - val addresses = Seq(sink.getPort(), sink2.getPort()).map(new InetSocketAddress("localhost", _)) + val addresses = sinks.map(sink => new InetSocketAddress("localhost", sink.getPort())) val flumeStream: ReceiverInputDStream[SparkFlumeEvent] = FlumeUtils.createPollingStream(ssc, addresses, StorageLevel.MEMORY_AND_DISK, eventsPerBatch, 5) @@ -155,61 +160,49 @@ class FlumePollingStreamSuite extends FunSuite with BeforeAndAfter with Logging outputStream.register() ssc.start() - try { - writeAndVerify(Seq(channel, channel2), ssc, outputBuffer) - assertChannelIsEmpty(channel) - assertChannelIsEmpty(channel2) - } finally { - sink.stop() - sink2.stop() - channel.stop() - channel2.stop() - } - } - - def writeAndVerify(channels: Seq[MemoryChannel], ssc: StreamingContext, - outputBuffer: ArrayBuffer[Seq[SparkFlumeEvent]]) { val clock = ssc.scheduler.clock.asInstanceOf[ManualClock] val executor = Executors.newCachedThreadPool() val executorCompletion = new ExecutorCompletionService[Void](executor) - channels.map(channel => { + + val latch = new CountDownLatch(batchCount * channels.size) + sinks.foreach(_.countdownWhenBatchReceived(latch)) + + channels.foreach(channel => { executorCompletion.submit(new TxnSubmitter(channel, clock)) }) + for (i <- 0 until channels.size) { executorCompletion.take() } - val startTime = System.currentTimeMillis() - while (outputBuffer.size < batchCount * channels.size && - System.currentTimeMillis() - startTime < 15000) { - logInfo("output.size = " + outputBuffer.size) - Thread.sleep(100) - } - val timeTaken = System.currentTimeMillis() - startTime - assert(timeTaken < 15000, "Operation timed out after " + timeTaken + " ms") - logInfo("Stopping context") - ssc.stop() - val flattenedBuffer = outputBuffer.flatten - assert(flattenedBuffer.size === totalEventsPerChannel * channels.size) - var counter = 0 - for (k <- 0 until channels.size; i <- 0 until totalEventsPerChannel) { - val eventToVerify = EventBuilder.withBody((channels(k).getName + " - " + - String.valueOf(i)).getBytes("utf-8"), - Map[String, String]("test-" + i.toString -> "header")) - var found = false - var j = 0 - while (j < flattenedBuffer.size && !found) { - val strToCompare = new String(flattenedBuffer(j).event.getBody.array(), "utf-8") - if (new String(eventToVerify.getBody, "utf-8") == strToCompare && - eventToVerify.getHeaders.get("test-" + i.toString) - .equals(flattenedBuffer(j).event.getHeaders.get("test-" + i.toString))) { - found = true - counter += 1 + latch.await(15, TimeUnit.SECONDS) // Ensure all data has been received. + clock.advance(batchDuration.milliseconds) + + // The eventually is required to ensure that all data in the batch has been processed. + eventually(timeout(10 seconds), interval(100 milliseconds)) { + val flattenedBuffer = outputBuffer.flatten + assert(flattenedBuffer.size === totalEventsPerChannel * channels.size) + var counter = 0 + for (k <- 0 until channels.size; i <- 0 until totalEventsPerChannel) { + val eventToVerify = EventBuilder.withBody((channels(k).getName + " - " + + String.valueOf(i)).getBytes("utf-8"), + Map[String, String]("test-" + i.toString -> "header")) + var found = false + var j = 0 + while (j < flattenedBuffer.size && !found) { + val strToCompare = new String(flattenedBuffer(j).event.getBody.array(), "utf-8") + if (new String(eventToVerify.getBody, "utf-8") == strToCompare && + eventToVerify.getHeaders.get("test-" + i.toString) + .equals(flattenedBuffer(j).event.getHeaders.get("test-" + i.toString))) { + found = true + counter += 1 + } + j += 1 } - j += 1 } + assert(counter === totalEventsPerChannel * channels.size) } - assert(counter === totalEventsPerChannel * channels.size) + ssc.stop() } def assertChannelIsEmpty(channel: MemoryChannel): Unit = { @@ -234,7 +227,6 @@ class FlumePollingStreamSuite extends FunSuite with BeforeAndAfter with Logging tx.commit() tx.close() Thread.sleep(500) // Allow some time for the events to reach - clock.advance(batchDuration.milliseconds) } null } diff --git a/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumeStreamSuite.scala b/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumeStreamSuite.scala index 39e6754c81dbf..4ea0f8841cb92 100644 --- a/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumeStreamSuite.scala +++ b/external/flume/src/test/scala/org/apache/spark/streaming/flume/FlumeStreamSuite.scala @@ -138,7 +138,7 @@ class FlumeStreamSuite extends FunSuite with BeforeAndAfter with Matchers with L val status = client.appendBatch(inputEvents.toList) status should be (avro.Status.OK) } - + eventually(timeout(10 seconds), interval(100 milliseconds)) { val outputEvents = outputBuffer.flatten.map { _.event } outputEvents.foreach { diff --git a/external/kafka-assembly/pom.xml b/external/kafka-assembly/pom.xml index 0b79f47647f6b..ed95eb1b2997c 100644 --- a/external/kafka-assembly/pom.xml +++ b/external/kafka-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../../pom.xml diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml index 243ce6eaca658..373947e8d6288 100644 --- a/external/kafka/pom.xml +++ b/external/kafka/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../../pom.xml diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/DirectKafkaInputDStream.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/DirectKafkaInputDStream.scala index 1b1fc8051d052..060c2f23eded8 100644 --- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/DirectKafkaInputDStream.scala +++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/DirectKafkaInputDStream.scala @@ -17,7 +17,6 @@ package org.apache.spark.streaming.kafka - import scala.annotation.tailrec import scala.collection.mutable import scala.reflect.{classTag, ClassTag} @@ -27,10 +26,10 @@ import kafka.message.MessageAndMetadata import kafka.serializer.Decoder import org.apache.spark.{Logging, SparkException} -import org.apache.spark.rdd.RDD -import org.apache.spark.streaming.kafka.KafkaCluster.LeaderOffset import org.apache.spark.streaming.{StreamingContext, Time} import org.apache.spark.streaming.dstream._ +import org.apache.spark.streaming.kafka.KafkaCluster.LeaderOffset +import org.apache.spark.streaming.scheduler.InputInfo /** * A stream of {@link org.apache.spark.streaming.kafka.KafkaRDD} where @@ -66,6 +65,9 @@ class DirectKafkaInputDStream[ val maxRetries = context.sparkContext.getConf.getInt( "spark.streaming.kafka.maxRetries", 1) + // Keep this consistent with how other streams are named (e.g. "Flume polling stream [2]") + private[streaming] override def name: String = s"Kafka direct stream [$id]" + protected[streaming] override val checkpointData = new DirectKafkaInputDStreamCheckpointData @@ -117,6 +119,11 @@ class DirectKafkaInputDStream[ val rdd = KafkaRDD[K, V, U, T, R]( context.sparkContext, kafkaParams, currentOffsets, untilOffsets, messageHandler) + // Report the record number of this batch interval to InputInfoTracker. + val numRecords = rdd.offsetRanges.map(r => r.untilOffset - r.fromOffset).sum + val inputInfo = InputInfo(id, numRecords) + ssc.scheduler.inputInfoTracker.reportInfo(validTime, inputInfo) + currentOffsets = untilOffsets.map(kv => kv._1 -> kv._2.offset) Some(rdd) } diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala index 6cf254a7b69cb..65d51d87f8486 100644 --- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala +++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala @@ -113,7 +113,7 @@ class KafkaCluster(val kafkaParams: Map[String, String]) extends Serializable { r.flatMap { tm: TopicMetadata => tm.partitionsMetadata.map { pm: PartitionMetadata => TopicAndPartition(tm.topic, pm.partitionId) - } + } } } } diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala index cca0fac0234e1..04b2dc10d39ea 100644 --- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala +++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala @@ -135,7 +135,7 @@ class KafkaReceiver[ store((msgAndMetadata.key, msgAndMetadata.message)) } } catch { - case e: Throwable => logError("Error handling message; exiting", e) + case e: Throwable => reportError("Error handling message; exiting", e) } } } diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala index d7cf500577c2a..0b8a391a2c569 100644 --- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala +++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala @@ -189,7 +189,7 @@ object KafkaUtils { sc: SparkContext, kafkaParams: Map[String, String], offsetRanges: Array[OffsetRange] - ): RDD[(K, V)] = { + ): RDD[(K, V)] = sc.withScope { val messageHandler = (mmd: MessageAndMetadata[K, V]) => (mmd.key, mmd.message) val leaders = leadersForRanges(kafkaParams, offsetRanges) new KafkaRDD[K, V, KD, VD, (K, V)](sc, kafkaParams, offsetRanges, leaders, messageHandler) @@ -224,7 +224,7 @@ object KafkaUtils { offsetRanges: Array[OffsetRange], leaders: Map[TopicAndPartition, Broker], messageHandler: MessageAndMetadata[K, V] => R - ): RDD[R] = { + ): RDD[R] = sc.withScope { val leaderMap = if (leaders.isEmpty) { leadersForRanges(kafkaParams, offsetRanges) } else { @@ -233,7 +233,8 @@ object KafkaUtils { case (tp: TopicAndPartition, Broker(host, port)) => (tp, (host, port)) }.toMap } - new KafkaRDD[K, V, KD, VD, R](sc, kafkaParams, offsetRanges, leaderMap, messageHandler) + val cleanedHandler = sc.clean(messageHandler) + new KafkaRDD[K, V, KD, VD, R](sc, kafkaParams, offsetRanges, leaderMap, cleanedHandler) } /** @@ -256,7 +257,7 @@ object KafkaUtils { valueDecoderClass: Class[VD], kafkaParams: JMap[String, String], offsetRanges: Array[OffsetRange] - ): JavaPairRDD[K, V] = { + ): JavaPairRDD[K, V] = jsc.sc.withScope { implicit val keyCmt: ClassTag[K] = ClassTag(keyClass) implicit val valueCmt: ClassTag[V] = ClassTag(valueClass) implicit val keyDecoderCmt: ClassTag[KD] = ClassTag(keyDecoderClass) @@ -294,7 +295,7 @@ object KafkaUtils { offsetRanges: Array[OffsetRange], leaders: JMap[TopicAndPartition, Broker], messageHandler: JFunction[MessageAndMetadata[K, V], R] - ): JavaRDD[R] = { + ): JavaRDD[R] = jsc.sc.withScope { implicit val keyCmt: ClassTag[K] = ClassTag(keyClass) implicit val valueCmt: ClassTag[V] = ClassTag(valueClass) implicit val keyDecoderCmt: ClassTag[KD] = ClassTag(keyDecoderClass) @@ -314,7 +315,7 @@ object KafkaUtils { * Points to note: * - No receivers: This stream does not use any receiver. It directly queries Kafka * - Offsets: This does not use Zookeeper to store offsets. The consumed offsets are tracked - * by the stream itself. For interoperability with Kafka monitoring tools that depend on + * by the stream itself. For interoperability with Kafka monitoring tools that depend on * Zookeeper, you have to update Kafka/Zookeeper yourself from the streaming application. * You can access the offsets used in each batch from the generated RDDs (see * [[org.apache.spark.streaming.kafka.HasOffsetRanges]]). @@ -348,8 +349,9 @@ object KafkaUtils { fromOffsets: Map[TopicAndPartition, Long], messageHandler: MessageAndMetadata[K, V] => R ): InputDStream[R] = { + val cleanedHandler = ssc.sc.clean(messageHandler) new DirectKafkaInputDStream[K, V, KD, VD, R]( - ssc, kafkaParams, fromOffsets, messageHandler) + ssc, kafkaParams, fromOffsets, cleanedHandler) } /** @@ -361,7 +363,7 @@ object KafkaUtils { * Points to note: * - No receivers: This stream does not use any receiver. It directly queries Kafka * - Offsets: This does not use Zookeeper to store offsets. The consumed offsets are tracked - * by the stream itself. For interoperability with Kafka monitoring tools that depend on + * by the stream itself. For interoperability with Kafka monitoring tools that depend on * Zookeeper, you have to update Kafka/Zookeeper yourself from the streaming application. * You can access the offsets used in each batch from the generated RDDs (see * [[org.apache.spark.streaming.kafka.HasOffsetRanges]]). @@ -425,7 +427,7 @@ object KafkaUtils { * Points to note: * - No receivers: This stream does not use any receiver. It directly queries Kafka * - Offsets: This does not use Zookeeper to store offsets. The consumed offsets are tracked - * by the stream itself. For interoperability with Kafka monitoring tools that depend on + * by the stream itself. For interoperability with Kafka monitoring tools that depend on * Zookeeper, you have to update Kafka/Zookeeper yourself from the streaming application. * You can access the offsets used in each batch from the generated RDDs (see * [[org.apache.spark.streaming.kafka.HasOffsetRanges]]). @@ -469,11 +471,12 @@ object KafkaUtils { implicit val keyDecoderCmt: ClassTag[KD] = ClassTag(keyDecoderClass) implicit val valueDecoderCmt: ClassTag[VD] = ClassTag(valueDecoderClass) implicit val recordCmt: ClassTag[R] = ClassTag(recordClass) + val cleanedHandler = jssc.sparkContext.clean(messageHandler.call _) createDirectStream[K, V, KD, VD, R]( jssc.ssc, Map(kafkaParams.toSeq: _*), Map(fromOffsets.mapValues { _.longValue() }.toSeq: _*), - messageHandler.call _ + cleanedHandler ) } @@ -486,7 +489,7 @@ object KafkaUtils { * Points to note: * - No receivers: This stream does not use any receiver. It directly queries Kafka * - Offsets: This does not use Zookeeper to store offsets. The consumed offsets are tracked - * by the stream itself. For interoperability with Kafka monitoring tools that depend on + * by the stream itself. For interoperability with Kafka monitoring tools that depend on * Zookeeper, you have to update Kafka/Zookeeper yourself from the streaming application. * You can access the offsets used in each batch from the generated RDDs (see * [[org.apache.spark.streaming.kafka.HasOffsetRanges]]). diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/ReliableKafkaReceiver.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/ReliableKafkaReceiver.scala index ea87e960379f1..75f0dfc22b9dc 100644 --- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/ReliableKafkaReceiver.scala +++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/ReliableKafkaReceiver.scala @@ -267,7 +267,7 @@ class ReliableKafkaReceiver[ } } catch { case e: Exception => - logError("Error handling message", e) + reportError("Error handling message", e) } } } diff --git a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/DirectKafkaStreamSuite.scala b/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/DirectKafkaStreamSuite.scala index 415730f5559c5..b6d314dfc7783 100644 --- a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/DirectKafkaStreamSuite.scala +++ b/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/DirectKafkaStreamSuite.scala @@ -18,6 +18,7 @@ package org.apache.spark.streaming.kafka import java.io.File +import java.util.concurrent.atomic.AtomicLong import scala.collection.mutable import scala.collection.mutable.ArrayBuffer @@ -34,6 +35,7 @@ import org.apache.spark.{Logging, SparkConf, SparkContext} import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Milliseconds, StreamingContext, Time} import org.apache.spark.streaming.dstream.DStream +import org.apache.spark.streaming.scheduler._ import org.apache.spark.util.Utils class DirectKafkaStreamSuite @@ -290,7 +292,6 @@ class DirectKafkaStreamSuite }, "Recovered ranges are not the same as the ones generated" ) - // Restart context, give more data and verify the total at the end // If the total is write that means each records has been received only once ssc.start() @@ -301,6 +302,44 @@ class DirectKafkaStreamSuite ssc.stop() } + test("Direct Kafka stream report input information") { + val topic = "report-test" + val data = Map("a" -> 7, "b" -> 9) + kafkaTestUtils.createTopic(topic) + kafkaTestUtils.sendMessages(topic, data) + + val totalSent = data.values.sum + val kafkaParams = Map( + "metadata.broker.list" -> kafkaTestUtils.brokerAddress, + "auto.offset.reset" -> "smallest" + ) + + import DirectKafkaStreamSuite._ + ssc = new StreamingContext(sparkConf, Milliseconds(200)) + val collector = new InputInfoCollector + ssc.addStreamingListener(collector) + + val stream = withClue("Error creating direct stream") { + KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder]( + ssc, kafkaParams, Set(topic)) + } + + val allReceived = new ArrayBuffer[(String, String)] + + stream.foreachRDD { rdd => allReceived ++= rdd.collect() } + ssc.start() + eventually(timeout(20000.milliseconds), interval(200.milliseconds)) { + assert(allReceived.size === totalSent, + "didn't get expected number of messages, messages:\n" + allReceived.mkString("\n")) + + // Calculate all the record number collected in the StreamingListener. + assert(collector.numRecordsSubmitted.get() === totalSent) + assert(collector.numRecordsStarted.get() === totalSent) + assert(collector.numRecordsCompleted.get() === totalSent) + } + ssc.stop() + } + /** Get the generated offset ranges from the DirectKafkaStream */ private def getOffsetRanges[K, V]( kafkaStream: DStream[(K, V)]): Seq[(Time, Array[OffsetRange])] = { @@ -313,4 +352,22 @@ class DirectKafkaStreamSuite object DirectKafkaStreamSuite { val collectedData = new mutable.ArrayBuffer[String]() var total = -1L + + class InputInfoCollector extends StreamingListener { + val numRecordsSubmitted = new AtomicLong(0L) + val numRecordsStarted = new AtomicLong(0L) + val numRecordsCompleted = new AtomicLong(0L) + + override def onBatchSubmitted(batchSubmitted: StreamingListenerBatchSubmitted): Unit = { + numRecordsSubmitted.addAndGet(batchSubmitted.batchInfo.numRecords) + } + + override def onBatchStarted(batchStarted: StreamingListenerBatchStarted): Unit = { + numRecordsStarted.addAndGet(batchStarted.batchInfo.numRecords) + } + + override def onBatchCompleted(batchCompleted: StreamingListenerBatchCompleted): Unit = { + numRecordsCompleted.addAndGet(batchCompleted.batchInfo.numRecords) + } + } } diff --git a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaRDDSuite.scala b/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaRDDSuite.scala index 39c3fb448ff57..3c875cb766513 100644 --- a/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaRDDSuite.scala +++ b/external/kafka/src/test/scala/org/apache/spark/streaming/kafka/KafkaRDDSuite.scala @@ -65,7 +65,7 @@ class KafkaRDDSuite extends FunSuite with BeforeAndAfterAll { val offsetRanges = Array(OffsetRange(topic, 0, 0, messages.size)) - val rdd = KafkaUtils.createRDD[String, String, StringDecoder, StringDecoder]( + val rdd = KafkaUtils.createRDD[String, String, StringDecoder, StringDecoder]( sc, kafkaParams, offsetRanges) val received = rdd.map(_._2).collect.toSet diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml index 98f95a9a64fa0..ab36dc6f3f942 100644 --- a/external/mqtt/pom.xml +++ b/external/mqtt/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../../pom.xml diff --git a/external/mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala b/external/mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala index 3c0ef94cb0fab..7c2f18cb35bda 100644 --- a/external/mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala +++ b/external/mqtt/src/main/scala/org/apache/spark/streaming/mqtt/MQTTInputDStream.scala @@ -17,25 +17,12 @@ package org.apache.spark.streaming.mqtt -import java.io.IOException -import java.util.concurrent.Executors -import java.util.Properties - -import scala.collection.JavaConversions._ -import scala.collection.Map -import scala.collection.mutable.HashMap -import scala.reflect.ClassTag - import org.eclipse.paho.client.mqttv3.IMqttDeliveryToken import org.eclipse.paho.client.mqttv3.MqttCallback import org.eclipse.paho.client.mqttv3.MqttClient -import org.eclipse.paho.client.mqttv3.MqttClientPersistence -import org.eclipse.paho.client.mqttv3.MqttException import org.eclipse.paho.client.mqttv3.MqttMessage -import org.eclipse.paho.client.mqttv3.MqttTopic import org.eclipse.paho.client.mqttv3.persist.MemoryPersistence -import org.apache.spark.Logging import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.dstream._ @@ -57,6 +44,8 @@ class MQTTInputDStream( storageLevel: StorageLevel ) extends ReceiverInputDStream[String](ssc_) { + private[streaming] override def name: String = s"MQTT stream [$id]" + def getReceiver(): Receiver[String] = { new MQTTReceiver(brokerUrl, topic, storageLevel) } @@ -86,7 +75,7 @@ class MQTTReceiver( // Handles Mqtt message override def messageArrived(topic: String, message: MqttMessage) { - store(new String(message.getPayload(),"utf-8")) + store(new String(message.getPayload(), "utf-8")) } override def deliveryComplete(token: IMqttDeliveryToken) { diff --git a/external/twitter/pom.xml b/external/twitter/pom.xml index 8b6a8959ac4cf..c18f3eccf3a8c 100644 --- a/external/twitter/pom.xml +++ b/external/twitter/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../../pom.xml diff --git a/external/zeromq/pom.xml b/external/zeromq/pom.xml index a50d378b34335..c3e242c8a7ed9 100644 --- a/external/zeromq/pom.xml +++ b/external/zeromq/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../../pom.xml diff --git a/extras/java8-tests/pom.xml b/extras/java8-tests/pom.xml index 4351a8a12fe21..b9c52344f58c2 100644 --- a/extras/java8-tests/pom.xml +++ b/extras/java8-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../../pom.xml diff --git a/extras/kinesis-asl/pom.xml b/extras/kinesis-asl/pom.xml index 25847a1b33d9c..0ab5058d9faea 100644 --- a/extras/kinesis-asl/pom.xml +++ b/extras/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../../pom.xml diff --git a/extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java b/extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java index b0bff27a61c19..06e0ff28afd95 100644 --- a/extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java +++ b/extras/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java @@ -20,6 +20,7 @@ import java.util.List; import java.util.regex.Pattern; +import com.amazonaws.regions.RegionUtils; import org.apache.log4j.Logger; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FlatMapFunction; @@ -40,140 +41,146 @@ import com.google.common.collect.Lists; /** - * Java-friendly Kinesis Spark Streaming WordCount example + * Consumes messages from a Amazon Kinesis streams and does wordcount. * - * See http://spark.apache.org/docs/latest/streaming-kinesis.html for more details - * on the Kinesis Spark Streaming integration. + * This example spins up 1 Kinesis Receiver per shard for the given stream. + * It then starts pulling from the last checkpointed sequence number of the given stream. * - * This example spins up 1 Kinesis Worker (Spark Streaming Receiver) per shard - * for the given stream. - * It then starts pulling from the last checkpointed sequence number of the given - * and . + * Usage: JavaKinesisWordCountASL [app-name] [stream-name] [endpoint-url] [region-name] + * [app-name] is the name of the consumer app, used to track the read data in DynamoDB + * [stream-name] name of the Kinesis stream (ie. mySparkStream) + * [endpoint-url] endpoint of the Kinesis service + * (e.g. https://kinesis.us-east-1.amazonaws.com) * - * Valid endpoint urls: http://docs.aws.amazon.com/general/latest/gr/rande.html#ak_region - * - * This code uses the DefaultAWSCredentialsProviderChain and searches for credentials - * in the following order of precedence: - * Environment Variables - AWS_ACCESS_KEY_ID and AWS_SECRET_KEY - * Java System Properties - aws.accessKeyId and aws.secretKey - * Credential profiles file - default location (~/.aws/credentials) shared by all AWS SDKs - * Instance profile credentials - delivered through the Amazon EC2 metadata service - * - * Usage: JavaKinesisWordCountASL - * is the name of the Kinesis stream (ie. mySparkStream) - * is the endpoint of the Kinesis service - * (ie. https://kinesis.us-east-1.amazonaws.com) * * Example: - * $ export AWS_ACCESS_KEY_ID= + * # export AWS keys if necessary + * $ export AWS_ACCESS_KEY_ID=[your-access-key] * $ export AWS_SECRET_KEY= - * $ $SPARK_HOME/bin/run-example \ - * org.apache.spark.examples.streaming.JavaKinesisWordCountASL mySparkStream \ - * https://kinesis.us-east-1.amazonaws.com * - * Note that number of workers/threads should be 1 more than the number of receivers. - * This leaves one thread available for actually processing the data. + * # run the example + * $ SPARK_HOME/bin/run-example streaming.JavaKinesisWordCountASL myAppName mySparkStream \ + * https://kinesis.us-east-1.amazonaws.com + * + * There is a companion helper class called KinesisWordProducerASL which puts dummy data + * onto the Kinesis stream. * - * There is a companion helper class called KinesisWordCountProducerASL which puts dummy data - * onto the Kinesis stream. - * Usage instructions for KinesisWordCountProducerASL are provided in the class definition. + * This code uses the DefaultAWSCredentialsProviderChain to find credentials + * in the following order: + * Environment Variables - AWS_ACCESS_KEY_ID and AWS_SECRET_KEY + * Java System Properties - aws.accessKeyId and aws.secretKey + * Credential profiles file - default location (~/.aws/credentials) shared by all AWS SDKs + * Instance profile credentials - delivered through the Amazon EC2 metadata service + * For more information, see + * http://docs.aws.amazon.com/AWSSdkDocsJava/latest/DeveloperGuide/credentials.html + * + * See http://spark.apache.org/docs/latest/streaming-kinesis-integration.html for more details on + * the Kinesis Spark Streaming integration. */ public final class JavaKinesisWordCountASL { // needs to be public for access from run-example - private static final Pattern WORD_SEPARATOR = Pattern.compile(" "); - private static final Logger logger = Logger.getLogger(JavaKinesisWordCountASL.class); - - /* Make the constructor private to enforce singleton */ - private JavaKinesisWordCountASL() { + private static final Pattern WORD_SEPARATOR = Pattern.compile(" "); + private static final Logger logger = Logger.getLogger(JavaKinesisWordCountASL.class); + + public static void main(String[] args) { + // Check that all required args were passed in. + if (args.length != 3) { + System.err.println( + "Usage: JavaKinesisWordCountASL \n\n" + + " is the name of the app, used to track the read data in DynamoDB\n" + + " is the name of the Kinesis stream\n" + + " is the endpoint of the Kinesis service\n" + + " (e.g. https://kinesis.us-east-1.amazonaws.com)\n" + + "Generate data for the Kinesis stream using the example KinesisWordProducerASL.\n" + + "See http://spark.apache.org/docs/latest/streaming-kinesis-integration.html for more\n" + + "details.\n" + ); + System.exit(1); } - public static void main(String[] args) { - /* Check that all required args were passed in. */ - if (args.length < 2) { - System.err.println( - "Usage: JavaKinesisWordCountASL \n" + - " is the name of the Kinesis stream\n" + - " is the endpoint of the Kinesis service\n" + - " (e.g. https://kinesis.us-east-1.amazonaws.com)\n"); - System.exit(1); - } - - StreamingExamples.setStreamingLogLevels(); - - /* Populate the appropriate variables from the given args */ - String streamName = args[0]; - String endpointUrl = args[1]; - /* Set the batch interval to a fixed 2000 millis (2 seconds) */ - Duration batchInterval = new Duration(2000); - - /* Create a Kinesis client in order to determine the number of shards for the given stream */ - AmazonKinesisClient kinesisClient = new AmazonKinesisClient( - new DefaultAWSCredentialsProviderChain()); - kinesisClient.setEndpoint(endpointUrl); - - /* Determine the number of shards from the stream */ - int numShards = kinesisClient.describeStream(streamName) - .getStreamDescription().getShards().size(); - - /* In this example, we're going to create 1 Kinesis Worker/Receiver/DStream for each shard */ - int numStreams = numShards; - - /* Setup the Spark config. */ - SparkConf sparkConfig = new SparkConf().setAppName("KinesisWordCount"); - - /* Kinesis checkpoint interval. Same as batchInterval for this example. */ - Duration checkpointInterval = batchInterval; + // Set default log4j logging level to WARN to hide Spark logs + StreamingExamples.setStreamingLogLevels(); + + // Populate the appropriate variables from the given args + String kinesisAppName = args[0]; + String streamName = args[1]; + String endpointUrl = args[2]; + + // Create a Kinesis client in order to determine the number of shards for the given stream + AmazonKinesisClient kinesisClient = + new AmazonKinesisClient(new DefaultAWSCredentialsProviderChain()); + kinesisClient.setEndpoint(endpointUrl); + int numShards = + kinesisClient.describeStream(streamName).getStreamDescription().getShards().size(); + + + // In this example, we're going to create 1 Kinesis Receiver/input DStream for each shard. + // This is not a necessity; if there are less receivers/DStreams than the number of shards, + // then the shards will be automatically distributed among the receivers and each receiver + // will receive data from multiple shards. + int numStreams = numShards; + + // Spark Streaming batch interval + Duration batchInterval = new Duration(2000); + + // Kinesis checkpoint interval. Same as batchInterval for this example. + Duration kinesisCheckpointInterval = batchInterval; + + // Get the region name from the endpoint URL to save Kinesis Client Library metadata in + // DynamoDB of the same region as the Kinesis stream + String regionName = RegionUtils.getRegionByEndpoint(endpointUrl).getName(); + + // Setup the Spark config and StreamingContext + SparkConf sparkConfig = new SparkConf().setAppName("JavaKinesisWordCountASL"); + JavaStreamingContext jssc = new JavaStreamingContext(sparkConfig, batchInterval); + + // Create the Kinesis DStreams + List> streamsList = new ArrayList>(numStreams); + for (int i = 0; i < numStreams; i++) { + streamsList.add( + KinesisUtils.createStream(jssc, kinesisAppName, streamName, endpointUrl, regionName, + InitialPositionInStream.LATEST, kinesisCheckpointInterval, StorageLevel.MEMORY_AND_DISK_2()) + ); + } - /* Setup the StreamingContext */ - JavaStreamingContext jssc = new JavaStreamingContext(sparkConfig, batchInterval); + // Union all the streams if there is more than 1 stream + JavaDStream unionStreams; + if (streamsList.size() > 1) { + unionStreams = jssc.union(streamsList.get(0), streamsList.subList(1, streamsList.size())); + } else { + // Otherwise, just use the 1 stream + unionStreams = streamsList.get(0); + } - /* Create the same number of Kinesis DStreams/Receivers as Kinesis stream's shards */ - List> streamsList = new ArrayList>(numStreams); - for (int i = 0; i < numStreams; i++) { - streamsList.add( - KinesisUtils.createStream(jssc, streamName, endpointUrl, checkpointInterval, - InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2()) - ); + // Convert each line of Array[Byte] to String, and split into words + JavaDStream words = unionStreams.flatMap(new FlatMapFunction() { + @Override + public Iterable call(byte[] line) { + return Lists.newArrayList(WORD_SEPARATOR.split(new String(line))); + } + }); + + // Map each word to a (word, 1) tuple so we can reduce by key to count the words + JavaPairDStream wordCounts = words.mapToPair( + new PairFunction() { + @Override + public Tuple2 call(String s) { + return new Tuple2(s, 1); + } } - - /* Union all the streams if there is more than 1 stream */ - JavaDStream unionStreams; - if (streamsList.size() > 1) { - unionStreams = jssc.union(streamsList.get(0), streamsList.subList(1, streamsList.size())); - } else { - /* Otherwise, just use the 1 stream */ - unionStreams = streamsList.get(0); + ).reduceByKey( + new Function2() { + @Override + public Integer call(Integer i1, Integer i2) { + return i1 + i2; + } } + ); - /* - * Split each line of the union'd DStreams into multiple words using flatMap to produce the collection. - * Convert lines of byte[] to multiple Strings by first converting to String, then splitting on WORD_SEPARATOR. - */ - JavaDStream words = unionStreams.flatMap(new FlatMapFunction() { - @Override - public Iterable call(byte[] line) { - return Lists.newArrayList(WORD_SEPARATOR.split(new String(line))); - } - }); - - /* Map each word to a (word, 1) tuple, then reduce/aggregate by word. */ - JavaPairDStream wordCounts = words.mapToPair( - new PairFunction() { - @Override - public Tuple2 call(String s) { - return new Tuple2(s, 1); - } - }).reduceByKey(new Function2() { - @Override - public Integer call(Integer i1, Integer i2) { - return i1 + i2; - } - }); - - /* Print the first 10 wordCounts */ - wordCounts.print(); - - /* Start the streaming context and await termination */ - jssc.start(); - jssc.awaitTermination(); - } + // Print the first 10 wordCounts + wordCounts.print(); + + // Start the streaming context and await termination + jssc.start(); + jssc.awaitTermination(); + } } diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala index 32da0858d1a1d..be8b62d3cc6ba 100644 --- a/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala +++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala @@ -18,223 +18,249 @@ package org.apache.spark.examples.streaming import java.nio.ByteBuffer + import scala.util.Random -import org.apache.spark.Logging -import org.apache.spark.SparkConf -import org.apache.spark.storage.StorageLevel -import org.apache.spark.streaming.Milliseconds -import org.apache.spark.streaming.StreamingContext -import org.apache.spark.streaming.StreamingContext.toPairDStreamFunctions -import org.apache.spark.streaming.kinesis.KinesisUtils -import com.amazonaws.auth.DefaultAWSCredentialsProviderChain + +import com.amazonaws.auth.{DefaultAWSCredentialsProviderChain, BasicAWSCredentials} +import com.amazonaws.regions.RegionUtils import com.amazonaws.services.kinesis.AmazonKinesisClient import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream import com.amazonaws.services.kinesis.model.PutRecordRequest -import org.apache.log4j.Logger -import org.apache.log4j.Level +import org.apache.log4j.{Level, Logger} + +import org.apache.spark.{Logging, SparkConf} +import org.apache.spark.storage.StorageLevel +import org.apache.spark.streaming.{Milliseconds, StreamingContext} +import org.apache.spark.streaming.dstream.DStream.toPairDStreamFunctions +import org.apache.spark.streaming.kinesis.KinesisUtils + /** - * Kinesis Spark Streaming WordCount example. + * Consumes messages from a Amazon Kinesis streams and does wordcount. * - * See http://spark.apache.org/docs/latest/streaming-kinesis.html for more details on - * the Kinesis Spark Streaming integration. + * This example spins up 1 Kinesis Receiver per shard for the given stream. + * It then starts pulling from the last checkpointed sequence number of the given stream. * - * This example spins up 1 Kinesis Worker (Spark Streaming Receiver) per shard - * for the given stream. - * It then starts pulling from the last checkpointed sequence number of the given - * and . + * Usage: KinesisWordCountASL + * is the name of the consumer app, used to track the read data in DynamoDB + * name of the Kinesis stream (ie. mySparkStream) + * endpoint of the Kinesis service + * (e.g. https://kinesis.us-east-1.amazonaws.com) * - * Valid endpoint urls: http://docs.aws.amazon.com/general/latest/gr/rande.html#ak_region - * - * This code uses the DefaultAWSCredentialsProviderChain and searches for credentials - * in the following order of precedence: - * Environment Variables - AWS_ACCESS_KEY_ID and AWS_SECRET_KEY - * Java System Properties - aws.accessKeyId and aws.secretKey - * Credential profiles file - default location (~/.aws/credentials) shared by all AWS SDKs - * Instance profile credentials - delivered through the Amazon EC2 metadata service - * - * Usage: KinesisWordCountASL - * is the name of the Kinesis stream (ie. mySparkStream) - * is the endpoint of the Kinesis service - * (ie. https://kinesis.us-east-1.amazonaws.com) * * Example: - * $ export AWS_ACCESS_KEY_ID= - * $ export AWS_SECRET_KEY= - * $ $SPARK_HOME/bin/run-example \ - * org.apache.spark.examples.streaming.KinesisWordCountASL mySparkStream \ - * https://kinesis.us-east-1.amazonaws.com + * # export AWS keys if necessary + * $ export AWS_ACCESS_KEY_ID= + * $ export AWS_SECRET_KEY= + * + * # run the example + * $ SPARK_HOME/bin/run-example streaming.KinesisWordCountASL myAppName mySparkStream \ + * https://kinesis.us-east-1.amazonaws.com * - * - * Note that number of workers/threads should be 1 more than the number of receivers. - * This leaves one thread available for actually processing the data. + * There is a companion helper class called KinesisWordProducerASL which puts dummy data + * onto the Kinesis stream. * - * There is a companion helper class below called KinesisWordCountProducerASL which puts - * dummy data onto the Kinesis stream. - * Usage instructions for KinesisWordCountProducerASL are provided in that class definition. + * This code uses the DefaultAWSCredentialsProviderChain to find credentials + * in the following order: + * Environment Variables - AWS_ACCESS_KEY_ID and AWS_SECRET_KEY + * Java System Properties - aws.accessKeyId and aws.secretKey + * Credential profiles file - default location (~/.aws/credentials) shared by all AWS SDKs + * Instance profile credentials - delivered through the Amazon EC2 metadata service + * For more information, see + * http://docs.aws.amazon.com/AWSSdkDocsJava/latest/DeveloperGuide/credentials.html + * + * See http://spark.apache.org/docs/latest/streaming-kinesis-integration.html for more details on + * the Kinesis Spark Streaming integration. */ -private object KinesisWordCountASL extends Logging { +object KinesisWordCountASL extends Logging { def main(args: Array[String]) { - /* Check that all required args were passed in. */ - if (args.length < 2) { + // Check that all required args were passed in. + if (args.length != 3) { System.err.println( """ - |Usage: KinesisWordCount + |Usage: KinesisWordCountASL + | + | is the name of the consumer app, used to track the read data in DynamoDB | is the name of the Kinesis stream | is the endpoint of the Kinesis service | (e.g. https://kinesis.us-east-1.amazonaws.com) + | + |Generate input data for Kinesis stream using the example KinesisWordProducerASL. + |See http://spark.apache.org/docs/latest/streaming-kinesis-integration.html for more + |details. """.stripMargin) System.exit(1) } StreamingExamples.setStreamingLogLevels() - /* Populate the appropriate variables from the given args */ - val Array(streamName, endpointUrl) = args + // Populate the appropriate variables from the given args + val Array(appName, streamName, endpointUrl) = args - /* Determine the number of shards from the stream */ - val kinesisClient = new AmazonKinesisClient(new DefaultAWSCredentialsProviderChain()) + + // Determine the number of shards from the stream using the low-level Kinesis Client + // from the AWS Java SDK. + val credentials = new DefaultAWSCredentialsProviderChain().getCredentials() + require(credentials != null, + "No AWS credentials found. Please specify credentials using one of the methods specified " + + "in http://docs.aws.amazon.com/AWSSdkDocsJava/latest/DeveloperGuide/credentials.html") + val kinesisClient = new AmazonKinesisClient(credentials) kinesisClient.setEndpoint(endpointUrl) - val numShards = kinesisClient.describeStream(streamName).getStreamDescription().getShards() - .size() + val numShards = kinesisClient.describeStream(streamName).getStreamDescription().getShards().size + - /* In this example, we're going to create 1 Kinesis Worker/Receiver/DStream for each shard. */ + // In this example, we're going to create 1 Kinesis Receiver/input DStream for each shard. + // This is not a necessity; if there are less receivers/DStreams than the number of shards, + // then the shards will be automatically distributed among the receivers and each receiver + // will receive data from multiple shards. val numStreams = numShards - /* Setup the and SparkConfig and StreamingContext */ - /* Spark Streaming batch interval */ + // Spark Streaming batch interval val batchInterval = Milliseconds(2000) - val sparkConfig = new SparkConf().setAppName("KinesisWordCount") - val ssc = new StreamingContext(sparkConfig, batchInterval) - /* Kinesis checkpoint interval. Same as batchInterval for this example. */ + // Kinesis checkpoint interval is the interval at which the DynamoDB is updated with information + // on sequence number of records that have been received. Same as batchInterval for this + // example. val kinesisCheckpointInterval = batchInterval - /* Create the same number of Kinesis DStreams/Receivers as Kinesis stream's shards */ + // Get the region name from the endpoint URL to save Kinesis Client Library metadata in + // DynamoDB of the same region as the Kinesis stream + val regionName = RegionUtils.getRegionByEndpoint(endpointUrl).getName() + + // Setup the SparkConfig and StreamingContext + val sparkConfig = new SparkConf().setAppName("KinesisWordCountASL") + val ssc = new StreamingContext(sparkConfig, batchInterval) + + // Create the Kinesis DStreams val kinesisStreams = (0 until numStreams).map { i => - KinesisUtils.createStream(ssc, streamName, endpointUrl, kinesisCheckpointInterval, - InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2) + KinesisUtils.createStream(ssc, appName, streamName, endpointUrl, regionName, + InitialPositionInStream.LATEST, kinesisCheckpointInterval, StorageLevel.MEMORY_AND_DISK_2) } - /* Union all the streams */ + // Union all the streams val unionStreams = ssc.union(kinesisStreams) - /* Convert each line of Array[Byte] to String, split into words, and count them */ - val words = unionStreams.flatMap(byteArray => new String(byteArray) - .split(" ")) + // Convert each line of Array[Byte] to String, and split into words + val words = unionStreams.flatMap(byteArray => new String(byteArray).split(" ")) - /* Map each word to a (word, 1) tuple so we can reduce/aggregate by key. */ + // Map each word to a (word, 1) tuple so we can reduce by key to count the words val wordCounts = words.map(word => (word, 1)).reduceByKey(_ + _) - /* Print the first 10 wordCounts */ + // Print the first 10 wordCounts wordCounts.print() - /* Start the streaming context and await termination */ + // Start the streaming context and await termination ssc.start() ssc.awaitTermination() } } /** - * Usage: KinesisWordCountProducerASL - * + * Usage: KinesisWordProducerASL \ + * + * * is the name of the Kinesis stream (ie. mySparkStream) - * is the endpoint of the Kinesis service + * is the endpoint of the Kinesis service * (ie. https://kinesis.us-east-1.amazonaws.com) * is the rate of records per second to put onto the stream * is the rate of records per second to put onto the stream * * Example: - * $ export AWS_ACCESS_KEY_ID= - * $ export AWS_SECRET_KEY= - * $ $SPARK_HOME/bin/run-example \ - * org.apache.spark.examples.streaming.KinesisWordCountProducerASL mySparkStream \ - * https://kinesis.us-east-1.amazonaws.com 10 5 + * $ SPARK_HOME/bin/run-example streaming.KinesisWordProducerASL mySparkStream \ + * https://kinesis.us-east-1.amazonaws.com us-east-1 10 5 */ -private object KinesisWordCountProducerASL { +object KinesisWordProducerASL { def main(args: Array[String]) { - if (args.length < 4) { - System.err.println("Usage: KinesisWordCountProducerASL " + - " ") + if (args.length != 4) { + System.err.println( + """ + |Usage: KinesisWordProducerASL + + | + | is the name of the Kinesis stream + | is the endpoint of the Kinesis service + | (e.g. https://kinesis.us-east-1.amazonaws.com) + | is the rate of records per second to put onto the stream + | is the rate of records per second to put onto the stream + | + """.stripMargin) + System.exit(1) } + // Set default log4j logging level to WARN to hide Spark logs StreamingExamples.setStreamingLogLevels() - /* Populate the appropriate variables from the given args */ + // Populate the appropriate variables from the given args val Array(stream, endpoint, recordsPerSecond, wordsPerRecord) = args - /* Generate the records and return the totals */ - val totals = generate(stream, endpoint, recordsPerSecond.toInt, wordsPerRecord.toInt) + // Generate the records and return the totals + val totals = generate(stream, endpoint, recordsPerSecond.toInt, + wordsPerRecord.toInt) - /* Print the array of (index, total) tuples */ - println("Totals") - totals.foreach(total => println(total.toString())) + // Print the array of (word, total) tuples + println("Totals for the words sent") + totals.foreach(println(_)) } def generate(stream: String, endpoint: String, recordsPerSecond: Int, - wordsPerRecord: Int): Seq[(Int, Int)] = { + wordsPerRecord: Int): Seq[(String, Int)] = { - val MaxRandomInts = 10 + val randomWords = List("spark", "you", "are", "my", "father") + val totals = scala.collection.mutable.Map[String, Int]() - /* Create the Kinesis client */ + // Create the low-level Kinesis Client from the AWS Java SDK. val kinesisClient = new AmazonKinesisClient(new DefaultAWSCredentialsProviderChain()) kinesisClient.setEndpoint(endpoint) println(s"Putting records onto stream $stream and endpoint $endpoint at a rate of" + - s" $recordsPerSecond records per second and $wordsPerRecord words per record"); - - val totals = new Array[Int](MaxRandomInts) - /* Put String records onto the stream per the given recordPerSec and wordsPerRecord */ - for (i <- 1 to 5) { - - /* Generate recordsPerSec records to put onto the stream */ - val records = (1 to recordsPerSecond.toInt).map { recordNum => - /* - * Randomly generate each wordsPerRec words between 0 (inclusive) - * and MAX_RANDOM_INTS (exclusive) - */ + s" $recordsPerSecond records per second and $wordsPerRecord words per record") + + // Iterate and put records onto the stream per the given recordPerSec and wordsPerRecord + for (i <- 1 to 10) { + // Generate recordsPerSec records to put onto the stream + val records = (1 to recordsPerSecond.toInt).foreach { recordNum => + // Randomly generate wordsPerRecord number of words val data = (1 to wordsPerRecord.toInt).map(x => { - /* Generate the random int */ - val randomInt = Random.nextInt(MaxRandomInts) + // Get a random index to a word + val randomWordIdx = Random.nextInt(randomWords.size) + val randomWord = randomWords(randomWordIdx) - /* Keep track of the totals */ - totals(randomInt) += 1 + // Increment total count to compare to server counts later + totals(randomWord) = totals.getOrElse(randomWord, 0) + 1 - randomInt.toString() + randomWord }).mkString(" ") - /* Create a partitionKey based on recordNum */ + // Create a partitionKey based on recordNum val partitionKey = s"partitionKey-$recordNum" - /* Create a PutRecordRequest with an Array[Byte] version of the data */ + // Create a PutRecordRequest with an Array[Byte] version of the data val putRecordRequest = new PutRecordRequest().withStreamName(stream) .withPartitionKey(partitionKey) - .withData(ByteBuffer.wrap(data.getBytes())); + .withData(ByteBuffer.wrap(data.getBytes())) - /* Put the record onto the stream and capture the PutRecordResult */ - val putRecordResult = kinesisClient.putRecord(putRecordRequest); + // Put the record onto the stream and capture the PutRecordResult + val putRecordResult = kinesisClient.putRecord(putRecordRequest) } - /* Sleep for a second */ + // Sleep for a second Thread.sleep(1000) println("Sent " + recordsPerSecond + " records") } - - /* Convert the totals to (index, total) tuple */ - (0 to (MaxRandomInts - 1)).zip(totals) + // Convert the totals to (index, total) tuple + totals.toSeq.sortBy(_._1) } } -/** - * Utility functions for Spark Streaming examples. +/** + * Utility functions for Spark Streaming examples. * This has been lifted from the examples/ project to remove the circular dependency. */ private[streaming] object StreamingExamples extends Logging { - - /** Set reasonable logging levels for streaming if the user has not configured log4j. */ + // Set reasonable logging levels for streaming if the user has not configured log4j. def setStreamingLogLevels() { val log4jInitialized = Logger.getRootLogger.getAllAppenders.hasMoreElements if (!log4jInitialized) { diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointState.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointState.scala index 588e86a1887ec..83a4537559512 100644 --- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointState.scala +++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointState.scala @@ -23,20 +23,20 @@ import org.apache.spark.util.{Clock, ManualClock, SystemClock} /** * This is a helper class for managing checkpoint clocks. * - * @param checkpointInterval + * @param checkpointInterval * @param currentClock. Default to current SystemClock if none is passed in (mocking purposes) */ private[kinesis] class KinesisCheckpointState( - checkpointInterval: Duration, + checkpointInterval: Duration, currentClock: Clock = new SystemClock()) extends Logging { - + /* Initialize the checkpoint clock using the given currentClock + checkpointInterval millis */ val checkpointClock = new ManualClock() checkpointClock.setTime(currentClock.getTimeMillis() + checkpointInterval.milliseconds) /** - * Check if it's time to checkpoint based on the current time and the derived time + * Check if it's time to checkpoint based on the current time and the derived time * for the next checkpoint * * @return true if it's time to checkpoint @@ -48,7 +48,7 @@ private[kinesis] class KinesisCheckpointState( /** * Advance the checkpoint clock by the checkpoint interval. */ - def advanceCheckpoint() = { + def advanceCheckpoint(): Unit = { checkpointClock.advance(checkpointInterval.milliseconds) } } diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala index a7fe4476cacb8..1a8a4cecc1141 100644 --- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala +++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala @@ -16,39 +16,45 @@ */ package org.apache.spark.streaming.kinesis -import java.net.InetAddress import java.util.UUID +import scala.util.control.NonFatal + +import com.amazonaws.auth.{AWSCredentials, AWSCredentialsProvider, BasicAWSCredentials, DefaultAWSCredentialsProviderChain} +import com.amazonaws.services.kinesis.clientlibrary.interfaces.{IRecordProcessor, IRecordProcessorFactory} +import com.amazonaws.services.kinesis.clientlibrary.lib.worker.{InitialPositionInStream, KinesisClientLibConfiguration, Worker} + import org.apache.spark.Logging import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.Duration import org.apache.spark.streaming.receiver.Receiver import org.apache.spark.util.Utils -import com.amazonaws.auth.AWSCredentialsProvider -import com.amazonaws.auth.DefaultAWSCredentialsProviderChain -import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessor -import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorFactory -import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream -import com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration -import com.amazonaws.services.kinesis.clientlibrary.lib.worker.Worker + +private[kinesis] +case class SerializableAWSCredentials(accessKeyId: String, secretKey: String) + extends AWSCredentials { + override def getAWSAccessKeyId: String = accessKeyId + override def getAWSSecretKey: String = secretKey +} /** * Custom AWS Kinesis-specific implementation of Spark Streaming's Receiver. * This implementation relies on the Kinesis Client Library (KCL) Worker as described here: * https://github.com/awslabs/amazon-kinesis-client - * This is a custom receiver used with StreamingContext.receiverStream(Receiver) - * as described here: - * http://spark.apache.org/docs/latest/streaming-custom-receivers.html - * Instances of this class will get shipped to the Spark Streaming Workers - * to run within a Spark Executor. + * This is a custom receiver used with StreamingContext.receiverStream(Receiver) as described here: + * http://spark.apache.org/docs/latest/streaming-custom-receivers.html + * Instances of this class will get shipped to the Spark Streaming Workers to run within a + * Spark Executor. * * @param appName Kinesis application name. Kinesis Apps are mapped to Kinesis Streams * by the Kinesis Client Library. If you change the App name or Stream name, - * the KCL will throw errors. This usually requires deleting the backing + * the KCL will throw errors. This usually requires deleting the backing * DynamoDB table with the same name this Kinesis application. * @param streamName Kinesis stream name * @param endpointUrl Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com) + * @param regionName Region name used by the Kinesis Client Library for + * DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics) * @param checkpointInterval Checkpoint interval for Kinesis checkpointing. * See the Kinesis Spark Streaming documentation for more * details on the different types of checkpoints. @@ -59,92 +65,121 @@ import com.amazonaws.services.kinesis.clientlibrary.lib.worker.Worker * (InitialPositionInStream.TRIM_HORIZON) or * the tip of the stream (InitialPositionInStream.LATEST). * @param storageLevel Storage level to use for storing the received objects - * - * @return ReceiverInputDStream[Array[Byte]] + * @param awsCredentialsOption Optional AWS credentials, used when user directly specifies + * the credentials */ private[kinesis] class KinesisReceiver( appName: String, streamName: String, endpointUrl: String, - checkpointInterval: Duration, + regionName: String, initialPositionInStream: InitialPositionInStream, - storageLevel: StorageLevel) - extends Receiver[Array[Byte]](storageLevel) with Logging { receiver => - - /* - * The following vars are built in the onStart() method which executes in the Spark Worker after - * this code is serialized and shipped remotely. - */ + checkpointInterval: Duration, + storageLevel: StorageLevel, + awsCredentialsOption: Option[SerializableAWSCredentials] + ) extends Receiver[Array[Byte]](storageLevel) with Logging { receiver => /* - * workerId should be based on the ip address of the actual Spark Worker where this code runs - * (not the Driver's ip address.) + * ================================================================================= + * The following vars are initialize in the onStart() method which executes in the + * Spark worker after this Receiver is serialized and shipped to the worker. + * ================================================================================= */ - var workerId: String = null - /* - * This impl uses the DefaultAWSCredentialsProviderChain and searches for credentials - * in the following order of precedence: - * Environment Variables - AWS_ACCESS_KEY_ID and AWS_SECRET_KEY - * Java System Properties - aws.accessKeyId and aws.secretKey - * Credential profiles file at the default location (~/.aws/credentials) shared by all - * AWS SDKs and the AWS CLI - * Instance profile credentials delivered through the Amazon EC2 metadata service + /** + * workerId is used by the KCL should be based on the ip address of the actual Spark Worker + * where this code runs (not the driver's IP address.) */ - var credentialsProvider: AWSCredentialsProvider = null - - /* KCL config instance. */ - var kinesisClientLibConfiguration: KinesisClientLibConfiguration = null + private var workerId: String = null - /* - * RecordProcessorFactory creates impls of IRecordProcessor. - * IRecordProcessor adapts the KCL to our Spark KinesisReceiver via the - * IRecordProcessor.processRecords() method. - * We're using our custom KinesisRecordProcessor in this case. + /** + * Worker is the core client abstraction from the Kinesis Client Library (KCL). + * A worker can process more than one shards from the given stream. + * Each shard is assigned its own IRecordProcessor and the worker run multiple such + * processors. */ - var recordProcessorFactory: IRecordProcessorFactory = null + private var worker: Worker = null - /* - * Create a Kinesis Worker. - * This is the core client abstraction from the Kinesis Client Library (KCL). - * We pass the RecordProcessorFactory from above as well as the KCL config instance. - * A Kinesis Worker can process 1..* shards from the given stream - each with its - * own RecordProcessor. - */ - var worker: Worker = null + /** Thread running the worker */ + private var workerThread: Thread = null /** - * This is called when the KinesisReceiver starts and must be non-blocking. - * The KCL creates and manages the receiving/processing thread pool through the Worker.run() - * method. + * This is called when the KinesisReceiver starts and must be non-blocking. + * The KCL creates and manages the receiving/processing thread pool through Worker.run(). */ override def onStart() { workerId = Utils.localHostName() + ":" + UUID.randomUUID() - credentialsProvider = new DefaultAWSCredentialsProviderChain() - kinesisClientLibConfiguration = new KinesisClientLibConfiguration(appName, streamName, - credentialsProvider, workerId).withKinesisEndpoint(endpointUrl) - .withInitialPositionInStream(initialPositionInStream).withTaskBackoffTimeMillis(500) - recordProcessorFactory = new IRecordProcessorFactory { + + // KCL config instance + val awsCredProvider = resolveAWSCredentialsProvider() + val kinesisClientLibConfiguration = + new KinesisClientLibConfiguration(appName, streamName, awsCredProvider, workerId) + .withKinesisEndpoint(endpointUrl) + .withInitialPositionInStream(initialPositionInStream) + .withTaskBackoffTimeMillis(500) + .withRegionName(regionName) + + /* + * RecordProcessorFactory creates impls of IRecordProcessor. + * IRecordProcessor adapts the KCL to our Spark KinesisReceiver via the + * IRecordProcessor.processRecords() method. + * We're using our custom KinesisRecordProcessor in this case. + */ + val recordProcessorFactory = new IRecordProcessorFactory { override def createProcessor: IRecordProcessor = new KinesisRecordProcessor(receiver, workerId, new KinesisCheckpointState(checkpointInterval)) } + worker = new Worker(recordProcessorFactory, kinesisClientLibConfiguration) - worker.run() + workerThread = new Thread() { + override def run(): Unit = { + try { + worker.run() + } catch { + case NonFatal(e) => + restart("Error running the KCL worker in Receiver", e) + } + } + } + workerThread.setName(s"Kinesis Receiver ${streamId}") + workerThread.setDaemon(true) + workerThread.start() logInfo(s"Started receiver with workerId $workerId") } /** - * This is called when the KinesisReceiver stops. - * The KCL worker.shutdown() method stops the receiving/processing threads. - * The KCL will do its best to drain and checkpoint any in-flight records upon shutdown. + * This is called when the KinesisReceiver stops. + * The KCL worker.shutdown() method stops the receiving/processing threads. + * The KCL will do its best to drain and checkpoint any in-flight records upon shutdown. */ override def onStop() { - worker.shutdown() - logInfo(s"Shut down receiver with workerId $workerId") + if (workerThread != null) { + if (worker != null) { + worker.shutdown() + worker = null + } + workerThread.join() + workerThread = null + logInfo(s"Stopped receiver for workerId $workerId") + } workerId = null - credentialsProvider = null - kinesisClientLibConfiguration = null - recordProcessorFactory = null - worker = null + } + + /** + * If AWS credential is provided, return a AWSCredentialProvider returning that credential. + * Otherwise, return the DefaultAWSCredentialsProviderChain. + */ + private def resolveAWSCredentialsProvider(): AWSCredentialsProvider = { + awsCredentialsOption match { + case Some(awsCredentials) => + logInfo("Using provided AWS credentials") + new AWSCredentialsProvider { + override def getCredentials: AWSCredentials = awsCredentials + override def refresh(): Unit = { } + } + case None => + logInfo("Using DefaultAWSCredentialsProviderChain") + new DefaultAWSCredentialsProviderChain() + } } } diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala index af8cd875b4541..fe9e3a0c793e2 100644 --- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala +++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala @@ -35,7 +35,10 @@ import com.amazonaws.services.kinesis.model.Record /** * Kinesis-specific implementation of the Kinesis Client Library (KCL) IRecordProcessor. * This implementation operates on the Array[Byte] from the KinesisReceiver. - * The Kinesis Worker creates an instance of this KinesisRecordProcessor upon startup. + * The Kinesis Worker creates an instance of this KinesisRecordProcessor for each + * shard in the Kinesis stream upon startup. This is normally done in separate threads, + * but the KCLs within the KinesisReceivers will balance themselves out if you create + * multiple Receivers. * * @param receiver Kinesis receiver * @param workerId for logging purposes @@ -47,8 +50,8 @@ private[kinesis] class KinesisRecordProcessor( workerId: String, checkpointState: KinesisCheckpointState) extends IRecordProcessor with Logging { - /* shardId to be populated during initialize() */ - var shardId: String = _ + // shardId to be populated during initialize() + private var shardId: String = _ /** * The Kinesis Client Library calls this method during IRecordProcessor initialization. @@ -56,8 +59,8 @@ private[kinesis] class KinesisRecordProcessor( * @param shardId assigned by the KCL to this particular RecordProcessor. */ override def initialize(shardId: String) { - logInfo(s"Initialize: Initializing workerId $workerId with shardId $shardId") this.shardId = shardId + logInfo(s"Initialized workerId $workerId with shardId $shardId") } /** @@ -66,29 +69,34 @@ private[kinesis] class KinesisRecordProcessor( * and Spark Streaming's Receiver.store(). * * @param batch list of records from the Kinesis stream shard - * @param checkpointer used to update Kinesis when this batch has been processed/stored + * @param checkpointer used to update Kinesis when this batch has been processed/stored * in the DStream */ override def processRecords(batch: List[Record], checkpointer: IRecordProcessorCheckpointer) { if (!receiver.isStopped()) { try { /* - * Note: If we try to store the raw ByteBuffer from record.getData(), the Spark Streaming - * Receiver.store(ByteBuffer) attempts to deserialize the ByteBuffer using the - * internally-configured Spark serializer (kryo, etc). - * This is not desirable, so we instead store a raw Array[Byte] and decouple - * ourselves from Spark's internal serialization strategy. - */ + * Notes: + * 1) If we try to store the raw ByteBuffer from record.getData(), the Spark Streaming + * Receiver.store(ByteBuffer) attempts to deserialize the ByteBuffer using the + * internally-configured Spark serializer (kryo, etc). + * 2) This is not desirable, so we instead store a raw Array[Byte] and decouple + * ourselves from Spark's internal serialization strategy. + * 3) For performance, the BlockGenerator is asynchronously queuing elements within its + * memory before creating blocks. This prevents the small block scenario, but requires + * that you register callbacks to know when a block has been generated and stored + * (WAL is sufficient for storage) before can checkpoint back to the source. + */ batch.foreach(record => receiver.store(record.getData().array())) - + logDebug(s"Stored: Worker $workerId stored ${batch.size} records for shardId $shardId") /* - * Checkpoint the sequence number of the last record successfully processed/stored + * Checkpoint the sequence number of the last record successfully processed/stored * in the batch. * In this implementation, we're checkpointing after the given checkpointIntervalMillis. - * Note that this logic requires that processRecords() be called AND that it's time to - * checkpoint. I point this out because there is no background thread running the + * Note that this logic requires that processRecords() be called AND that it's time to + * checkpoint. I point this out because there is no background thread running the * checkpointer. Checkpointing is tested and trigger only when a new batch comes in. * If the worker is shutdown cleanly, checkpoint will happen (see shutdown() below). * However, if the worker dies unexpectedly, a checkpoint may not happen. @@ -116,22 +124,22 @@ private[kinesis] class KinesisRecordProcessor( logError(s"Exception: WorkerId $workerId encountered and exception while storing " + " or checkpointing a batch for workerId $workerId and shardId $shardId.", e) - /* Rethrow the exception to the Kinesis Worker that is managing this RecordProcessor.*/ + /* Rethrow the exception to the Kinesis Worker that is managing this RecordProcessor. */ throw e } } } else { /* RecordProcessor has been stopped. */ - logInfo(s"Stopped: The Spark KinesisReceiver has stopped for workerId $workerId" + + logInfo(s"Stopped: The Spark KinesisReceiver has stopped for workerId $workerId" + s" and shardId $shardId. No more records will be processed.") } } /** * Kinesis Client Library is shutting down this Worker for 1 of 2 reasons: - * 1) the stream is resharding by splitting or merging adjacent shards + * 1) the stream is resharding by splitting or merging adjacent shards * (ShutdownReason.TERMINATE) - * 2) the failed or latent Worker has stopped sending heartbeats for whatever reason + * 2) the failed or latent Worker has stopped sending heartbeats for whatever reason * (ShutdownReason.ZOMBIE) * * @param checkpointer used to perform a Kinesis checkpoint for ShutdownReason.TERMINATE @@ -145,7 +153,7 @@ private[kinesis] class KinesisRecordProcessor( * Checkpoint to indicate that all records from the shard have been drained and processed. * It's now OK to read from the new shards that resulted from a resharding event. */ - case ShutdownReason.TERMINATE => + case ShutdownReason.TERMINATE => KinesisRecordProcessor.retryRandom(checkpointer.checkpoint(), 4, 100) /* @@ -190,7 +198,7 @@ private[kinesis] object KinesisRecordProcessor extends Logging { logError(s"Retryable Exception: Random backOffMillis=${backOffMillis}", e) retryRandom(expression, numRetriesLeft - 1, maxBackOffMillis) } - /* Throw: Shutdown has been requested by the Kinesis Client Library.*/ + /* Throw: Shutdown has been requested by the Kinesis Client Library. */ case _: ShutdownException => { logError(s"ShutdownException: Caught shutdown exception, skipping checkpoint.", e) throw e diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala index 96f4399accd3a..e5acab50181e1 100644 --- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala +++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala @@ -16,29 +16,78 @@ */ package org.apache.spark.streaming.kinesis -import org.apache.spark.annotation.Experimental +import com.amazonaws.regions.RegionUtils +import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream + import org.apache.spark.storage.StorageLevel -import org.apache.spark.streaming.Duration -import org.apache.spark.streaming.StreamingContext -import org.apache.spark.streaming.api.java.JavaReceiverInputDStream -import org.apache.spark.streaming.api.java.JavaStreamingContext +import org.apache.spark.streaming.api.java.{JavaReceiverInputDStream, JavaStreamingContext} import org.apache.spark.streaming.dstream.ReceiverInputDStream - -import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream +import org.apache.spark.streaming.{Duration, StreamingContext} -/** - * Helper class to create Amazon Kinesis Input Stream - * :: Experimental :: - */ -@Experimental object KinesisUtils { /** - * Create an InputDStream that pulls messages from a Kinesis stream. - * :: Experimental :: - * @param ssc StreamingContext object + * Create an input stream that pulls messages from a Kinesis stream. + * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis. + * + * Note: The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain + * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain + * gets the AWS credentials. + * + * @param ssc StreamingContext object + * @param kinesisAppName Kinesis application name used by the Kinesis Client Library + * (KCL) to update DynamoDB + * @param streamName Kinesis stream name + * @param endpointUrl Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com) + * @param regionName Name of region used by the Kinesis Client Library (KCL) to update + * DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics) + * @param initialPositionInStream In the absence of Kinesis checkpoint info, this is the + * worker's initial starting position in the stream. + * The values are either the beginning of the stream + * per Kinesis' limit of 24 hours + * (InitialPositionInStream.TRIM_HORIZON) or + * the tip of the stream (InitialPositionInStream.LATEST). + * @param checkpointInterval Checkpoint interval for Kinesis checkpointing. + * See the Kinesis Spark Streaming documentation for more + * details on the different types of checkpoints. + * @param storageLevel Storage level to use for storing the received objects. + * StorageLevel.MEMORY_AND_DISK_2 is recommended. + */ + def createStream( + ssc: StreamingContext, + kinesisAppName: String, + streamName: String, + endpointUrl: String, + regionName: String, + initialPositionInStream: InitialPositionInStream, + checkpointInterval: Duration, + storageLevel: StorageLevel + ): ReceiverInputDStream[Array[Byte]] = { + // Setting scope to override receiver stream's scope of "receiver stream" + ssc.withNamedScope("kinesis stream") { + ssc.receiverStream( + new KinesisReceiver(kinesisAppName, streamName, endpointUrl, validateRegion(regionName), + initialPositionInStream, checkpointInterval, storageLevel, None)) + } + } + + /** + * Create an input stream that pulls messages from a Kinesis stream. + * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis. + * + * Note: + * The given AWS credentials will get saved in DStream checkpoints if checkpointing + * is enabled. Make sure that your checkpoint directory is secure. + * + * @param ssc StreamingContext object + * @param kinesisAppName Kinesis application name used by the Kinesis Client Library + * (KCL) to update DynamoDB * @param streamName Kinesis stream name * @param endpointUrl Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com) + * @param regionName Name of region used by the Kinesis Client Library (KCL) to update + * DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics) + * @param awsAccessKeyId AWS AccessKeyId (if null, will use DefaultAWSCredentialsProviderChain) + * @param awsSecretKey AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain) * @param checkpointInterval Checkpoint interval for Kinesis checkpointing. * See the Kinesis Spark Streaming documentation for more * details on the different types of checkpoints. @@ -48,28 +97,84 @@ object KinesisUtils { * per Kinesis' limit of 24 hours * (InitialPositionInStream.TRIM_HORIZON) or * the tip of the stream (InitialPositionInStream.LATEST). - * @param storageLevel Storage level to use for storing the received objects + * @param storageLevel Storage level to use for storing the received objects. + * StorageLevel.MEMORY_AND_DISK_2 is recommended. + */ + def createStream( + ssc: StreamingContext, + kinesisAppName: String, + streamName: String, + endpointUrl: String, + regionName: String, + initialPositionInStream: InitialPositionInStream, + checkpointInterval: Duration, + storageLevel: StorageLevel, + awsAccessKeyId: String, + awsSecretKey: String + ): ReceiverInputDStream[Array[Byte]] = { + ssc.receiverStream( + new KinesisReceiver(kinesisAppName, streamName, endpointUrl, validateRegion(regionName), + initialPositionInStream, checkpointInterval, storageLevel, + Some(SerializableAWSCredentials(awsAccessKeyId, awsSecretKey)))) + } + + /** + * Create an input stream that pulls messages from a Kinesis stream. + * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis. * - * @return ReceiverInputDStream[Array[Byte]] + * Note: + * - The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain + * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain + * gets AWS credentials. + * - The region of the `endpointUrl` will be used for DynamoDB and CloudWatch. + * - The Kinesis application name used by the Kinesis Client Library (KCL) will be the app name in + * [[org.apache.spark.SparkConf]]. + * + * @param ssc Java StreamingContext object + * @param streamName Kinesis stream name + * @param endpointUrl Endpoint url of Kinesis service + * (e.g., https://kinesis.us-east-1.amazonaws.com) + * @param checkpointInterval Checkpoint interval for Kinesis checkpointing. + * See the Kinesis Spark Streaming documentation for more + * details on the different types of checkpoints. + * @param initialPositionInStream In the absence of Kinesis checkpoint info, this is the + * worker's initial starting position in the stream. + * The values are either the beginning of the stream + * per Kinesis' limit of 24 hours + * (InitialPositionInStream.TRIM_HORIZON) or + * the tip of the stream (InitialPositionInStream.LATEST). + * @param storageLevel Storage level to use for storing the received objects + * StorageLevel.MEMORY_AND_DISK_2 is recommended. */ - @Experimental + @deprecated("use other forms of createStream", "1.4.0") def createStream( ssc: StreamingContext, streamName: String, endpointUrl: String, checkpointInterval: Duration, initialPositionInStream: InitialPositionInStream, - storageLevel: StorageLevel): ReceiverInputDStream[Array[Byte]] = { - ssc.receiverStream(new KinesisReceiver(ssc.sc.appName, streamName, endpointUrl, - checkpointInterval, initialPositionInStream, storageLevel)) + storageLevel: StorageLevel + ): ReceiverInputDStream[Array[Byte]] = { + ssc.receiverStream( + new KinesisReceiver(ssc.sc.appName, streamName, endpointUrl, getRegionByEndpoint(endpointUrl), + initialPositionInStream, checkpointInterval, storageLevel, None)) } /** - * Create a Java-friendly InputDStream that pulls messages from a Kinesis stream. - * :: Experimental :: + * Create an input stream that pulls messages from a Kinesis stream. + * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis. + * + * Note: The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain + * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain + * gets the AWS credentials. + * * @param jssc Java StreamingContext object + * @param kinesisAppName Kinesis application name used by the Kinesis Client Library + * (KCL) to update DynamoDB * @param streamName Kinesis stream name * @param endpointUrl Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com) + * @param regionName Name of region used by the Kinesis Client Library (KCL) to update + * DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics) * @param checkpointInterval Checkpoint interval for Kinesis checkpointing. * See the Kinesis Spark Streaming documentation for more * details on the different types of checkpoints. @@ -79,19 +184,116 @@ object KinesisUtils { * per Kinesis' limit of 24 hours * (InitialPositionInStream.TRIM_HORIZON) or * the tip of the stream (InitialPositionInStream.LATEST). - * @param storageLevel Storage level to use for storing the received objects + * @param storageLevel Storage level to use for storing the received objects. + * StorageLevel.MEMORY_AND_DISK_2 is recommended. + */ + def createStream( + jssc: JavaStreamingContext, + kinesisAppName: String, + streamName: String, + endpointUrl: String, + regionName: String, + initialPositionInStream: InitialPositionInStream, + checkpointInterval: Duration, + storageLevel: StorageLevel + ): JavaReceiverInputDStream[Array[Byte]] = { + createStream(jssc.ssc, kinesisAppName, streamName, endpointUrl, regionName, + initialPositionInStream, checkpointInterval, storageLevel) + } + + /** + * Create an input stream that pulls messages from a Kinesis stream. + * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis. * - * @return JavaReceiverInputDStream[Array[Byte]] + * Note: + * The given AWS credentials will get saved in DStream checkpoints if checkpointing + * is enabled. Make sure that your checkpoint directory is secure. + * + * @param jssc Java StreamingContext object + * @param kinesisAppName Kinesis application name used by the Kinesis Client Library + * (KCL) to update DynamoDB + * @param streamName Kinesis stream name + * @param endpointUrl Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com) + * @param regionName Name of region used by the Kinesis Client Library (KCL) to update + * DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics) + * @param awsAccessKeyId AWS AccessKeyId (if null, will use DefaultAWSCredentialsProviderChain) + * @param awsSecretKey AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain) + * @param checkpointInterval Checkpoint interval for Kinesis checkpointing. + * See the Kinesis Spark Streaming documentation for more + * details on the different types of checkpoints. + * @param initialPositionInStream In the absence of Kinesis checkpoint info, this is the + * worker's initial starting position in the stream. + * The values are either the beginning of the stream + * per Kinesis' limit of 24 hours + * (InitialPositionInStream.TRIM_HORIZON) or + * the tip of the stream (InitialPositionInStream.LATEST). + * @param storageLevel Storage level to use for storing the received objects. + * StorageLevel.MEMORY_AND_DISK_2 is recommended. */ - @Experimental def createStream( - jssc: JavaStreamingContext, - streamName: String, - endpointUrl: String, + jssc: JavaStreamingContext, + kinesisAppName: String, + streamName: String, + endpointUrl: String, + regionName: String, + initialPositionInStream: InitialPositionInStream, + checkpointInterval: Duration, + storageLevel: StorageLevel, + awsAccessKeyId: String, + awsSecretKey: String + ): JavaReceiverInputDStream[Array[Byte]] = { + createStream(jssc.ssc, kinesisAppName, streamName, endpointUrl, regionName, + initialPositionInStream, checkpointInterval, storageLevel, awsAccessKeyId, awsSecretKey) + } + + /** + * Create an input stream that pulls messages from a Kinesis stream. + * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis. + * + * Note: + * - The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain + * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain + * gets AWS credentials. + * - The region of the `endpointUrl` will be used for DynamoDB and CloudWatch. + * - The Kinesis application name used by the Kinesis Client Library (KCL) will be the app name in + * [[org.apache.spark.SparkConf]]. + * + * @param jssc Java StreamingContext object + * @param streamName Kinesis stream name + * @param endpointUrl Endpoint url of Kinesis service + * (e.g., https://kinesis.us-east-1.amazonaws.com) + * @param checkpointInterval Checkpoint interval for Kinesis checkpointing. + * See the Kinesis Spark Streaming documentation for more + * details on the different types of checkpoints. + * @param initialPositionInStream In the absence of Kinesis checkpoint info, this is the + * worker's initial starting position in the stream. + * The values are either the beginning of the stream + * per Kinesis' limit of 24 hours + * (InitialPositionInStream.TRIM_HORIZON) or + * the tip of the stream (InitialPositionInStream.LATEST). + * @param storageLevel Storage level to use for storing the received objects + * StorageLevel.MEMORY_AND_DISK_2 is recommended. + */ + @deprecated("use other forms of createStream", "1.4.0") + def createStream( + jssc: JavaStreamingContext, + streamName: String, + endpointUrl: String, checkpointInterval: Duration, initialPositionInStream: InitialPositionInStream, - storageLevel: StorageLevel): JavaReceiverInputDStream[Array[Byte]] = { - jssc.receiverStream(new KinesisReceiver(jssc.ssc.sc.appName, streamName, - endpointUrl, checkpointInterval, initialPositionInStream, storageLevel)) + storageLevel: StorageLevel + ): JavaReceiverInputDStream[Array[Byte]] = { + createStream( + jssc.ssc, streamName, endpointUrl, checkpointInterval, initialPositionInStream, storageLevel) + } + + private def getRegionByEndpoint(endpointUrl: String): String = { + RegionUtils.getRegionByEndpoint(endpointUrl).getName() + } + + private def validateRegion(regionName: String): String = { + Option(RegionUtils.getRegion(regionName)).map { _.getName }.getOrElse { + throw new IllegalArgumentException(s"Region name '$regionName' is not valid") + } } } diff --git a/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala b/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala index 255fe65819608..2103dca6b766f 100644 --- a/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala +++ b/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala @@ -20,26 +20,18 @@ import java.nio.ByteBuffer import scala.collection.JavaConversions.seqAsJavaList -import org.apache.spark.storage.StorageLevel -import org.apache.spark.streaming.Milliseconds -import org.apache.spark.streaming.Seconds -import org.apache.spark.streaming.StreamingContext -import org.apache.spark.streaming.TestSuiteBase -import org.apache.spark.util.{ManualClock, Clock} - -import org.mockito.Mockito._ -import org.scalatest.BeforeAndAfter -import org.scalatest.Matchers -import org.scalatest.mock.MockitoSugar - -import com.amazonaws.services.kinesis.clientlibrary.exceptions.InvalidStateException -import com.amazonaws.services.kinesis.clientlibrary.exceptions.KinesisClientLibDependencyException -import com.amazonaws.services.kinesis.clientlibrary.exceptions.ShutdownException -import com.amazonaws.services.kinesis.clientlibrary.exceptions.ThrottlingException +import com.amazonaws.services.kinesis.clientlibrary.exceptions.{InvalidStateException, KinesisClientLibDependencyException, ShutdownException, ThrottlingException} import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorCheckpointer import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream import com.amazonaws.services.kinesis.clientlibrary.types.ShutdownReason import com.amazonaws.services.kinesis.model.Record +import org.mockito.Mockito._ +import org.scalatest.{BeforeAndAfter, Matchers} +import org.scalatest.mock.MockitoSugar + +import org.apache.spark.storage.StorageLevel +import org.apache.spark.streaming.{Milliseconds, Seconds, StreamingContext, TestSuiteBase} +import org.apache.spark.util.{Clock, ManualClock, Utils} /** * Suite of Kinesis streaming receiver tests focusing mostly on the KinesisRecordProcessor @@ -65,7 +57,7 @@ class KinesisReceiverSuite extends TestSuiteBase with Matchers with BeforeAndAft var checkpointStateMock: KinesisCheckpointState = _ var currentClockMock: Clock = _ - override def beforeFunction() = { + override def beforeFunction(): Unit = { receiverMock = mock[KinesisReceiver] checkpointerMock = mock[IRecordProcessorCheckpointer] checkpointClockMock = mock[ManualClock] @@ -81,15 +73,28 @@ class KinesisReceiverSuite extends TestSuiteBase with Matchers with BeforeAndAft checkpointStateMock, currentClockMock) } - test("kinesis utils api") { + test("KinesisUtils API") { val ssc = new StreamingContext(master, framework, batchDuration) // Tests the API, does not actually test data receiving - val kinesisStream = KinesisUtils.createStream(ssc, "mySparkStream", + val kinesisStream1 = KinesisUtils.createStream(ssc, "mySparkStream", "https://kinesis.us-west-2.amazonaws.com", Seconds(2), - InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2); + InitialPositionInStream.LATEST, StorageLevel.MEMORY_AND_DISK_2) + val kinesisStream2 = KinesisUtils.createStream(ssc, "myAppNam", "mySparkStream", + "https://kinesis.us-west-2.amazonaws.com", "us-west-2", + InitialPositionInStream.LATEST, Seconds(2), StorageLevel.MEMORY_AND_DISK_2) + val kinesisStream3 = KinesisUtils.createStream(ssc, "myAppNam", "mySparkStream", + "https://kinesis.us-west-2.amazonaws.com", "us-west-2", + InitialPositionInStream.LATEST, Seconds(2), StorageLevel.MEMORY_AND_DISK_2, + "awsAccessKey", "awsSecretKey") + ssc.stop() } + test("check serializability of SerializableAWSCredentials") { + Utils.deserialize[SerializableAWSCredentials]( + Utils.serialize(new SerializableAWSCredentials("x", "y"))) + } + test("process records including store and checkpoint") { when(receiverMock.isStopped()).thenReturn(false) when(checkpointStateMock.shouldCheckpoint()).thenReturn(true) diff --git a/extras/spark-ganglia-lgpl/pom.xml b/extras/spark-ganglia-lgpl/pom.xml index e14bbae4a9b6e..cf878d12d5f6e 100644 --- a/extras/spark-ganglia-lgpl/pom.xml +++ b/extras/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index d38a3aa8256b7..4cfa687122a3e 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../pom.xml diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeDirection.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeDirection.scala index 058c8c8aa1b24..ce1054ed92ba1 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/EdgeDirection.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeDirection.scala @@ -26,8 +26,8 @@ class EdgeDirection private (private val name: String) extends Serializable { * out becomes in and both and either remain the same. */ def reverse: EdgeDirection = this match { - case EdgeDirection.In => EdgeDirection.Out - case EdgeDirection.Out => EdgeDirection.In + case EdgeDirection.In => EdgeDirection.Out + case EdgeDirection.Out => EdgeDirection.In case EdgeDirection.Either => EdgeDirection.Either case EdgeDirection.Both => EdgeDirection.Both } diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala index cc70b396a8dd4..4611a3ace219b 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala @@ -41,14 +41,16 @@ abstract class EdgeRDD[ED]( @transient sc: SparkContext, @transient deps: Seq[Dependency[_]]) extends RDD[Edge[ED]](sc, deps) { + // scalastyle:off structural.type private[graphx] def partitionsRDD: RDD[(PartitionID, EdgePartition[ED, VD])] forSome { type VD } + // scalastyle:on structural.type override protected def getPartitions: Array[Partition] = partitionsRDD.partitions override def compute(part: Partition, context: TaskContext): Iterator[Edge[ED]] = { val p = firstParent[(PartitionID, EdgePartition[ED, _])].iterator(part, context) if (p.hasNext) { - p.next._2.iterator.map(_.copy()) + p.next()._2.iterator.map(_.copy()) } else { Iterator.empty } diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala index c8790cac3d8a0..65f82429d2029 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala @@ -37,7 +37,7 @@ class EdgeTriplet[VD, ED] extends Edge[ED] { /** * Set the edge properties of this triplet. */ - protected[spark] def set(other: Edge[ED]): EdgeTriplet[VD,ED] = { + protected[spark] def set(other: Edge[ED]): EdgeTriplet[VD, ED] = { srcId = other.srcId dstId = other.dstId attr = other.attr diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala index 36dc7b0f86c89..db73a8abc5733 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala @@ -316,7 +316,7 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab * satisfy the predicates */ def subgraph( - epred: EdgeTriplet[VD,ED] => Boolean = (x => true), + epred: EdgeTriplet[VD, ED] => Boolean = (x => true), vpred: (VertexId, VD) => Boolean = ((v, d) => true)) : Graph[VD, ED] diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala index 7edd627b20918..9451ff1e5c0e2 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala @@ -124,18 +124,18 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali def collectNeighbors(edgeDirection: EdgeDirection): VertexRDD[Array[(VertexId, VD)]] = { val nbrs = edgeDirection match { case EdgeDirection.Either => - graph.aggregateMessages[Array[(VertexId,VD)]]( + graph.aggregateMessages[Array[(VertexId, VD)]]( ctx => { ctx.sendToSrc(Array((ctx.dstId, ctx.dstAttr))) ctx.sendToDst(Array((ctx.srcId, ctx.srcAttr))) }, (a, b) => a ++ b, TripletFields.All) case EdgeDirection.In => - graph.aggregateMessages[Array[(VertexId,VD)]]( + graph.aggregateMessages[Array[(VertexId, VD)]]( ctx => ctx.sendToDst(Array((ctx.srcId, ctx.srcAttr))), (a, b) => a ++ b, TripletFields.Src) case EdgeDirection.Out => - graph.aggregateMessages[Array[(VertexId,VD)]]( + graph.aggregateMessages[Array[(VertexId, VD)]]( ctx => ctx.sendToSrc(Array((ctx.dstId, ctx.dstAttr))), (a, b) => a ++ b, TripletFields.Dst) case EdgeDirection.Both => @@ -253,7 +253,7 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali def filter[VD2: ClassTag, ED2: ClassTag]( preprocess: Graph[VD, ED] => Graph[VD2, ED2], epred: (EdgeTriplet[VD2, ED2]) => Boolean = (x: EdgeTriplet[VD2, ED2]) => true, - vpred: (VertexId, VD2) => Boolean = (v:VertexId, d:VD2) => true): Graph[VD, ED] = { + vpred: (VertexId, VD2) => Boolean = (v: VertexId, d: VD2) => true): Graph[VD, ED] = { graph.mask(preprocess(graph).subgraph(epred, vpred)) } @@ -356,7 +356,7 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali maxIterations: Int = Int.MaxValue, activeDirection: EdgeDirection = EdgeDirection.Either)( vprog: (VertexId, VD, A) => VD, - sendMsg: EdgeTriplet[VD, ED] => Iterator[(VertexId,A)], + sendMsg: EdgeTriplet[VD, ED] => Iterator[(VertexId, A)], mergeMsg: (A, A) => A) : Graph[VD, ED] = { Pregel(graph, initialMsg, maxIterations, activeDirection)(vprog, sendMsg, mergeMsg) diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala b/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala index 01b013ff716fc..cfcf7244eaed5 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala @@ -147,10 +147,10 @@ object Pregel extends Logging { logInfo("Pregel finished iteration " + i) // Unpersist the RDDs hidden by newly-materialized RDDs - oldMessages.unpersist(blocking=false) - newVerts.unpersist(blocking=false) - prevG.unpersistVertices(blocking=false) - prevG.edges.unpersist(blocking=false) + oldMessages.unpersist(blocking = false) + newVerts.unpersist(blocking = false) + prevG.unpersistVertices(blocking = false) + prevG.edges.unpersist(blocking = false) // count the iteration i += 1 } diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala index c561570809253..ab021a252eb8a 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala @@ -156,8 +156,8 @@ class EdgePartition[ val size = data.size var i = 0 while (i < size) { - edge.srcId = srcIds(i) - edge.dstId = dstIds(i) + edge.srcId = srcIds(i) + edge.dstId = dstIds(i) edge.attr = data(i) newData(i) = f(edge) i += 1 diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala index bc974b2f04e70..8c0a461e99fa4 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala @@ -116,7 +116,7 @@ object PageRank extends Logging { val personalized = srcId isDefined val src: VertexId = srcId.getOrElse(-1L) - def delta(u: VertexId, v: VertexId):Double = { if (u == v) 1.0 else 0.0 } + def delta(u: VertexId, v: VertexId): Double = { if (u == v) 1.0 else 0.0 } var iteration = 0 var prevRankGraph: Graph[Double, Double] = null @@ -133,13 +133,13 @@ object PageRank extends Logging { // edge partitions. prevRankGraph = rankGraph val rPrb = if (personalized) { - (src: VertexId ,id: VertexId) => resetProb * delta(src,id) + (src: VertexId , id: VertexId) => resetProb * delta(src, id) } else { (src: VertexId, id: VertexId) => resetProb } rankGraph = rankGraph.joinVertices(rankUpdates) { - (id, oldRank, msgSum) => rPrb(src,id) + (1.0 - resetProb) * msgSum + (id, oldRank, msgSum) => rPrb(src, id) + (1.0 - resetProb) * msgSum }.cache() rankGraph.edges.foreachPartition(x => {}) // also materializes rankGraph.vertices @@ -243,7 +243,7 @@ object PageRank extends Logging { // Execute a dynamic version of Pregel. val vp = if (personalized) { - (id: VertexId, attr: (Double, Double),msgSum: Double) => + (id: VertexId, attr: (Double, Double), msgSum: Double) => personalizedVertexProgram(id, attr, msgSum) } else { (id: VertexId, attr: (Double, Double), msgSum: Double) => diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala index 3b0e1628d86b5..9cb24ed080e1c 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala @@ -210,7 +210,7 @@ object SVDPlusPlus { /** * Forces materialization of a Graph by count()ing its RDDs. */ - private def materialize(g: Graph[_,_]): Unit = { + private def materialize(g: Graph[_, _]): Unit = { g.vertices.count() g.edges.count() } diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala index daf162085e3e4..a5d598053f9ca 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala @@ -38,7 +38,7 @@ import org.apache.spark.graphx._ */ object TriangleCount { - def run[VD: ClassTag, ED: ClassTag](graph: Graph[VD,ED]): Graph[Int, ED] = { + def run[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]): Graph[Int, ED] = { // Remove redundant edges val g = graph.groupEdges((a, b) => a).cache() @@ -49,7 +49,7 @@ object TriangleCount { var i = 0 while (i < nbrs.size) { // prevent self cycle - if(nbrs(i) != vid) { + if (nbrs(i) != vid) { set.add(nbrs(i)) } i += 1 diff --git a/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala b/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala index 2d6a825b61726..9591c4e9b8f4e 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala @@ -243,14 +243,15 @@ object GraphGenerators { * @return A graph containing vertices with the row and column ids * as their attributes and edge values as 1.0. */ - def gridGraph(sc: SparkContext, rows: Int, cols: Int): Graph[(Int,Int), Double] = { + def gridGraph(sc: SparkContext, rows: Int, cols: Int): Graph[(Int, Int), Double] = { // Convert row column address into vertex ids (row major order) def sub2ind(r: Int, c: Int): VertexId = r * cols + c - val vertices: RDD[(VertexId, (Int,Int))] = - sc.parallelize(0 until rows).flatMap( r => (0 until cols).map( c => (sub2ind(r,c), (r,c)) ) ) + val vertices: RDD[(VertexId, (Int, Int))] = sc.parallelize(0 until rows).flatMap { r => + (0 until cols).map( c => (sub2ind(r, c), (r, c)) ) + } val edges: RDD[Edge[Double]] = - vertices.flatMap{ case (vid, (r,c)) => + vertices.flatMap{ case (vid, (r, c)) => (if (r + 1 < rows) { Seq( (sub2ind(r, c), sub2ind(r + 1, c))) } else { Seq.empty }) ++ (if (c + 1 < cols) { Seq( (sub2ind(r, c), sub2ind(r, c + 1))) } else { Seq.empty }) }.map{ case (src, dst) => Edge(src, dst, 1.0) } diff --git a/graphx/src/test/scala/org/apache/spark/graphx/EdgeSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/EdgeSuite.scala index 5a2c73b414279..81436069c0d4e 100644 --- a/graphx/src/test/scala/org/apache/spark/graphx/EdgeSuite.scala +++ b/graphx/src/test/scala/org/apache/spark/graphx/EdgeSuite.scala @@ -23,15 +23,15 @@ class EdgeSuite extends FunSuite { test ("compare") { // decending order val testEdges: Array[Edge[Int]] = Array( - Edge(0x7FEDCBA987654321L, -0x7FEDCBA987654321L, 1), - Edge(0x2345L, 0x1234L, 1), - Edge(0x1234L, 0x5678L, 1), - Edge(0x1234L, 0x2345L, 1), + Edge(0x7FEDCBA987654321L, -0x7FEDCBA987654321L, 1), + Edge(0x2345L, 0x1234L, 1), + Edge(0x1234L, 0x5678L, 1), + Edge(0x1234L, 0x2345L, 1), Edge(-0x7FEDCBA987654321L, 0x7FEDCBA987654321L, 1) ) // to ascending order val sortedEdges = testEdges.sorted(Edge.lexicographicOrdering[Int]) - + for (i <- 0 until testEdges.length) { assert(sortedEdges(i) == testEdges(testEdges.length - i - 1)) } diff --git a/graphx/src/test/scala/org/apache/spark/graphx/GraphOpsSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/GraphOpsSuite.scala index 9bc8007ce49cd..68fe83739e399 100644 --- a/graphx/src/test/scala/org/apache/spark/graphx/GraphOpsSuite.scala +++ b/graphx/src/test/scala/org/apache/spark/graphx/GraphOpsSuite.scala @@ -59,7 +59,7 @@ class GraphOpsSuite extends FunSuite with LocalSparkContext { test ("filter") { withSpark { sc => val n = 5 - val vertices = sc.parallelize((0 to n).map(x => (x:VertexId, x))) + val vertices = sc.parallelize((0 to n).map(x => (x: VertexId, x))) val edges = sc.parallelize((1 to n).map(x => Edge(0, x, x))) val graph: Graph[Int, Int] = Graph(vertices, edges).cache() val filteredGraph = graph.filter( @@ -67,11 +67,11 @@ class GraphOpsSuite extends FunSuite with LocalSparkContext { val degrees: VertexRDD[Int] = graph.outDegrees graph.outerJoinVertices(degrees) {(vid, data, deg) => deg.getOrElse(0)} }, - vpred = (vid: VertexId, deg:Int) => deg > 0 + vpred = (vid: VertexId, deg: Int) => deg > 0 ).cache() val v = filteredGraph.vertices.collect().toSet - assert(v === Set((0,0))) + assert(v === Set((0, 0))) // the map is necessary because of object-reuse in the edge iterator val e = filteredGraph.edges.map(e => Edge(e.srcId, e.dstId, e.attr)).collect().toSet diff --git a/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala index a570e4ed75fc3..2b1d8e47326f8 100644 --- a/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala +++ b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala @@ -248,7 +248,7 @@ class GraphSuite extends FunSuite with LocalSparkContext { test("mask") { withSpark { sc => val n = 5 - val vertices = sc.parallelize((0 to n).map(x => (x:VertexId, x))) + val vertices = sc.parallelize((0 to n).map(x => (x: VertexId, x))) val edges = sc.parallelize((1 to n).map(x => Edge(0, x, x))) val graph: Graph[Int, Int] = Graph(vertices, edges).cache() @@ -260,11 +260,11 @@ class GraphSuite extends FunSuite with LocalSparkContext { val projectedGraph = graph.mask(subgraph) val v = projectedGraph.vertices.collect().toSet - assert(v === Set((0,0), (1,1), (2,2), (4,4), (5,5))) + assert(v === Set((0, 0), (1, 1), (2, 2), (4, 4), (5, 5))) // the map is necessary because of object-reuse in the edge iterator val e = projectedGraph.edges.map(e => Edge(e.srcId, e.dstId, e.attr)).collect().toSet - assert(e === Set(Edge(0,1,1), Edge(0,2,2), Edge(0,5,5))) + assert(e === Set(Edge(0, 1, 1), Edge(0, 2, 2), Edge(0, 5, 5))) } } diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/ConnectedComponentsSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/ConnectedComponentsSuite.scala index 4cc30a96408f8..accccfc232cd3 100644 --- a/graphx/src/test/scala/org/apache/spark/graphx/lib/ConnectedComponentsSuite.scala +++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/ConnectedComponentsSuite.scala @@ -52,13 +52,16 @@ class ConnectedComponentsSuite extends FunSuite with LocalSparkContext { withSpark { sc => val chain1 = (0 until 9).map(x => (x, x + 1)) val chain2 = (10 until 20).map(x => (x, x + 1)) - val rawEdges = sc.parallelize(chain1 ++ chain2, 3).map { case (s,d) => (s.toLong, d.toLong) } + val rawEdges = sc.parallelize(chain1 ++ chain2, 3).map { case (s, d) => (s.toLong, d.toLong) } val twoChains = Graph.fromEdgeTuples(rawEdges, 1.0) val ccGraph = twoChains.connectedComponents() val vertices = ccGraph.vertices.collect() for ( (id, cc) <- vertices ) { - if(id < 10) { assert(cc === 0) } - else { assert(cc === 10) } + if (id < 10) { + assert(cc === 0) + } else { + assert(cc === 10) + } } val ccMap = vertices.toMap for (id <- 0 until 20) { @@ -75,7 +78,7 @@ class ConnectedComponentsSuite extends FunSuite with LocalSparkContext { withSpark { sc => val chain1 = (0 until 9).map(x => (x, x + 1)) val chain2 = (10 until 20).map(x => (x, x + 1)) - val rawEdges = sc.parallelize(chain1 ++ chain2, 3).map { case (s,d) => (s.toLong, d.toLong) } + val rawEdges = sc.parallelize(chain1 ++ chain2, 3).map { case (s, d) => (s.toLong, d.toLong) } val twoChains = Graph.fromEdgeTuples(rawEdges, true).reverse val ccGraph = twoChains.connectedComponents() val vertices = ccGraph.vertices.collect() @@ -106,9 +109,9 @@ class ConnectedComponentsSuite extends FunSuite with LocalSparkContext { (4L, ("peter", "student")))) // Create an RDD for edges val relationships: RDD[Edge[String]] = - sc.parallelize(Array(Edge(3L, 7L, "collab"), Edge(5L, 3L, "advisor"), + sc.parallelize(Array(Edge(3L, 7L, "collab"), Edge(5L, 3L, "advisor"), Edge(2L, 5L, "colleague"), Edge(5L, 7L, "pi"), - Edge(4L, 0L, "student"), Edge(5L, 0L, "colleague"))) + Edge(4L, 0L, "student"), Edge(5L, 0L, "colleague"))) // Edges are: // 2 ---> 5 ---> 3 // | \ diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala index 3f3c9dfd7b3dd..39c6ace912b00 100644 --- a/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala +++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala @@ -31,14 +31,14 @@ object GridPageRank { def sub2ind(r: Int, c: Int): Int = r * nCols + c // Make the grid graph for (r <- 0 until nRows; c <- 0 until nCols) { - val ind = sub2ind(r,c) + val ind = sub2ind(r, c) if (r + 1 < nRows) { outDegree(ind) += 1 - inNbrs(sub2ind(r + 1,c)) += ind + inNbrs(sub2ind(r + 1, c)) += ind } if (c + 1 < nCols) { outDegree(ind) += 1 - inNbrs(sub2ind(r,c + 1)) += ind + inNbrs(sub2ind(r, c + 1)) += ind } } // compute the pagerank @@ -99,8 +99,8 @@ class PageRankSuite extends FunSuite with LocalSparkContext { val resetProb = 0.15 val errorTol = 1.0e-5 - val staticRanks1 = starGraph.staticPersonalizedPageRank(0,numIter = 1, resetProb).vertices - val staticRanks2 = starGraph.staticPersonalizedPageRank(0,numIter = 2, resetProb) + val staticRanks1 = starGraph.staticPersonalizedPageRank(0, numIter = 1, resetProb).vertices + val staticRanks2 = starGraph.staticPersonalizedPageRank(0, numIter = 2, resetProb) .vertices.cache() // Static PageRank should only take 2 iterations to converge @@ -117,7 +117,7 @@ class PageRankSuite extends FunSuite with LocalSparkContext { } assert(staticErrors.sum === 0) - val dynamicRanks = starGraph.personalizedPageRank(0,0, resetProb).vertices.cache() + val dynamicRanks = starGraph.personalizedPageRank(0, 0, resetProb).vertices.cache() assert(compareRanks(staticRanks2, dynamicRanks) < errorTol) } } // end of test Star PageRank @@ -162,7 +162,7 @@ class PageRankSuite extends FunSuite with LocalSparkContext { test("Chain PersonalizedPageRank") { withSpark { sc => val chain1 = (0 until 9).map(x => (x, x + 1) ) - val rawEdges = sc.parallelize(chain1, 1).map { case (s,d) => (s.toLong, d.toLong) } + val rawEdges = sc.parallelize(chain1, 1).map { case (s, d) => (s.toLong, d.toLong) } val chain = Graph.fromEdgeTuples(rawEdges, 1.0).cache() val resetProb = 0.15 val tol = 0.0001 diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/TriangleCountSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/TriangleCountSuite.scala index 293c7f3ba4c21..79bf4e6cd18ee 100644 --- a/graphx/src/test/scala/org/apache/spark/graphx/lib/TriangleCountSuite.scala +++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/TriangleCountSuite.scala @@ -58,7 +58,7 @@ class TriangleCountSuite extends FunSuite with LocalSparkContext { val triangles = Array(0L -> 1L, 1L -> 2L, 2L -> 0L) ++ Array(0L -> -1L, -1L -> -2L, -2L -> 0L) - val revTriangles = triangles.map { case (a,b) => (b,a) } + val revTriangles = triangles.map { case (a, b) => (b, a) } val rawEdges = sc.parallelize(triangles ++ revTriangles, 2) val graph = Graph.fromEdgeTuples(rawEdges, true).cache() val triangleCount = graph.triangleCount() diff --git a/launcher/pom.xml b/launcher/pom.xml index ebfa7685eaa18..c2f3bce939688 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../pom.xml diff --git a/make-distribution.sh b/make-distribution.sh index 1bfa9acb1fe6e..a2b0c431fb4d0 100755 --- a/make-distribution.sh +++ b/make-distribution.sh @@ -58,7 +58,7 @@ while (( "$#" )); do --hadoop) echo "Error: '--hadoop' is no longer supported:" echo "Error: use Maven profiles and options -Dhadoop.version and -Dyarn.version instead." - echo "Error: Related profiles include hadoop-2.2, hadoop-2.3 and hadoop-2.4." + echo "Error: Related profiles include hadoop-1, hadoop-2.2, hadoop-2.3 and hadoop-2.4." exit_with_usage ;; --with-yarn) @@ -231,6 +231,11 @@ cp -r "$SPARK_HOME/bin" "$DISTDIR" cp -r "$SPARK_HOME/python" "$DISTDIR" cp -r "$SPARK_HOME/sbin" "$DISTDIR" cp -r "$SPARK_HOME/ec2" "$DISTDIR" +# Copy SparkR if it exists +if [ -d "$SPARK_HOME"/R/lib/SparkR ]; then + mkdir -p "$DISTDIR"/R/lib + cp -r "$SPARK_HOME/R/lib/SparkR" "$DISTDIR"/R/lib +fi # Download and copy in tachyon, if requested if [ "$SPARK_TACHYON" == "true" ]; then diff --git a/mllib/pom.xml b/mllib/pom.xml index a3c57ae26000b..5439095b51cf3 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../pom.xml @@ -141,16 +141,5 @@ target/scala-${scala.binary.version}/classes target/scala-${scala.binary.version}/test-classes - - - ../python - - pyspark/mllib/*.py - pyspark/mllib/stat/*.py - pyspark/ml/*.py - pyspark/ml/param/*.py - - - diff --git a/mllib/src/main/scala/org/apache/spark/ml/Estimator.scala b/mllib/src/main/scala/org/apache/spark/ml/Estimator.scala index d6b3503ebdd9a..e9a5d7c0e7988 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/Estimator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/Estimator.scala @@ -19,28 +19,31 @@ package org.apache.spark.ml import scala.annotation.varargs -import org.apache.spark.annotation.AlphaComponent -import org.apache.spark.ml.param.{ParamMap, ParamPair, Params} +import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.ml.param.{ParamMap, ParamPair} import org.apache.spark.sql.DataFrame /** - * :: AlphaComponent :: + * :: DeveloperApi :: * Abstract class for estimators that fit models to data. */ -@AlphaComponent -abstract class Estimator[M <: Model[M]] extends PipelineStage with Params { +@DeveloperApi +abstract class Estimator[M <: Model[M]] extends PipelineStage { /** * Fits a single model to the input data with optional parameters. * * @param dataset input dataset - * @param paramPairs Optional list of param pairs. - * These values override any specified in this Estimator's embedded ParamMap. + * @param firstParamPair the first param pair, overrides embedded params + * @param otherParamPairs other param pairs. These values override any specified in this + * Estimator's embedded ParamMap. * @return fitted model */ @varargs - def fit(dataset: DataFrame, paramPairs: ParamPair[_]*): M = { - val map = ParamMap(paramPairs: _*) + def fit(dataset: DataFrame, firstParamPair: ParamPair[_], otherParamPairs: ParamPair[_]*): M = { + val map = new ParamMap() + .put(firstParamPair) + .put(otherParamPairs: _*) fit(dataset, map) } @@ -52,12 +55,19 @@ abstract class Estimator[M <: Model[M]] extends PipelineStage with Params { * These values override any specified in this Estimator's embedded ParamMap. * @return fitted model */ - def fit(dataset: DataFrame, paramMap: ParamMap): M + def fit(dataset: DataFrame, paramMap: ParamMap): M = { + copy(paramMap).fit(dataset) + } + + /** + * Fits a model to the input data. + */ + def fit(dataset: DataFrame): M /** * Fits multiple models to the input data with multiple sets of parameters. * The default implementation uses a for loop on each parameter map. - * Subclasses could overwrite this to optimize multi-model training. + * Subclasses could override this to optimize multi-model training. * * @param dataset input dataset * @param paramMaps An array of parameter maps. @@ -67,4 +77,8 @@ abstract class Estimator[M <: Model[M]] extends PipelineStage with Params { def fit(dataset: DataFrame, paramMaps: Array[ParamMap]): Seq[M] = { paramMaps.map(fit(dataset, _)) } + + override def copy(extra: ParamMap): Estimator[M] = { + super.copy(extra).asInstanceOf[Estimator[M]] + } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/Model.scala b/mllib/src/main/scala/org/apache/spark/ml/Model.scala index a491bc7ee8295..186bf7ae7a2f6 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/Model.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/Model.scala @@ -17,26 +17,36 @@ package org.apache.spark.ml -import org.apache.spark.annotation.AlphaComponent +import org.apache.spark.annotation.DeveloperApi import org.apache.spark.ml.param.ParamMap /** - * :: AlphaComponent :: + * :: DeveloperApi :: * A fitted model, i.e., a [[Transformer]] produced by an [[Estimator]]. * * @tparam M model type */ -@AlphaComponent +@DeveloperApi abstract class Model[M <: Model[M]] extends Transformer { /** * The parent estimator that produced this model. * Note: For ensembles' component Models, this value can be null. */ - val parent: Estimator[M] + @transient var parent: Estimator[M] = _ /** - * Fitting parameters, such that parent.fit(..., fittingParamMap) could reproduce the model. - * Note: For ensembles' component Models, this value can be null. + * Sets the parent of this model (Java API). */ - val fittingParamMap: ParamMap + def setParent(parent: Estimator[M]): M = { + this.parent = parent + this.asInstanceOf[M] + } + + /** Indicates whether this [[Model]] has a corresponding parent. */ + def hasParent: Boolean = parent != null + + override def copy(extra: ParamMap): M = { + // The default implementation of Params.copy doesn't work for models. + throw new NotImplementedError(s"${this.getClass} doesn't implement copy(extra: ParamMap)") + } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala index 6bfeecd764d75..11a4722722ea1 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala @@ -20,54 +20,56 @@ package org.apache.spark.ml import scala.collection.mutable.ListBuffer import org.apache.spark.Logging -import org.apache.spark.annotation.{AlphaComponent, DeveloperApi} -import org.apache.spark.ml.param.{Params, Param, ParamMap} +import org.apache.spark.annotation.{DeveloperApi, Experimental} +import org.apache.spark.ml.param.{Param, ParamMap, Params} +import org.apache.spark.ml.util.Identifiable import org.apache.spark.sql.DataFrame import org.apache.spark.sql.types.StructType /** - * :: AlphaComponent :: + * :: DeveloperApi :: * A stage in a pipeline, either an [[Estimator]] or a [[Transformer]]. */ -@AlphaComponent -abstract class PipelineStage extends Serializable with Logging { +@DeveloperApi +abstract class PipelineStage extends Params with Logging { /** * :: DeveloperApi :: * - * Derives the output schema from the input schema and parameters. - * The schema describes the columns and types of the data. - * - * @param schema Input schema to this stage - * @param paramMap Parameters passed to this stage - * @return Output schema from this stage + * Derives the output schema from the input schema. */ @DeveloperApi - def transformSchema(schema: StructType, paramMap: ParamMap): StructType + def transformSchema(schema: StructType): StructType /** + * :: DeveloperApi :: + * * Derives the output schema from the input schema and parameters, optionally with logging. * * This should be optimistic. If it is unclear whether the schema will be valid, then it should * be assumed valid until proven otherwise. */ + @DeveloperApi protected def transformSchema( schema: StructType, - paramMap: ParamMap, logging: Boolean): StructType = { if (logging) { logDebug(s"Input schema: ${schema.json}") } - val outputSchema = transformSchema(schema, paramMap) + val outputSchema = transformSchema(schema) if (logging) { logDebug(s"Expected output schema: ${outputSchema.json}") } outputSchema } + + override def copy(extra: ParamMap): PipelineStage = { + super.copy(extra).asInstanceOf[PipelineStage] + } } /** - * :: AlphaComponent :: + * :: Experimental :: * A simple pipeline, which acts as an estimator. A Pipeline consists of a sequence of stages, each * of which is either an [[Estimator]] or a [[Transformer]]. When [[Pipeline#fit]] is called, the * stages are executed in order. If a stage is an [[Estimator]], its [[Estimator#fit]] method will @@ -78,20 +80,26 @@ abstract class PipelineStage extends Serializable with Logging { * transformers, corresponding to the pipeline stages. If there are no stages, the pipeline acts as * an identity transformer. */ -@AlphaComponent -class Pipeline extends Estimator[PipelineModel] { +@Experimental +class Pipeline(override val uid: String) extends Estimator[PipelineModel] { + + def this() = this(Identifiable.randomUID("pipeline")) - /** param for pipeline stages */ + /** + * param for pipeline stages + * @group param + */ val stages: Param[Array[PipelineStage]] = new Param(this, "stages", "stages of the pipeline") + + /** @group setParam */ def setStages(value: Array[PipelineStage]): this.type = { set(stages, value); this } - def getStages: Array[PipelineStage] = getOrDefault(stages) - override def validate(paramMap: ParamMap): Unit = { - val map = extractParamMap(paramMap) - getStages.foreach { - case pStage: Params => pStage.validate(map) - case _ => - } + /** @group getParam */ + def getStages: Array[PipelineStage] = $(stages).clone() + + override def validateParams(): Unit = { + super.validateParams() + $(stages).foreach(_.validateParams()) } /** @@ -104,13 +112,11 @@ class Pipeline extends Estimator[PipelineModel] { * pipeline stages. If there are no stages, the output model acts as an identity transformer. * * @param dataset input dataset - * @param paramMap parameter map * @return fitted pipeline */ - override def fit(dataset: DataFrame, paramMap: ParamMap): PipelineModel = { - transformSchema(dataset.schema, paramMap, logging = true) - val map = extractParamMap(paramMap) - val theStages = map(stages) + override def fit(dataset: DataFrame): PipelineModel = { + transformSchema(dataset.schema, logging = true) + val theStages = $(stages) // Search for the last estimator. var indexOfLastEstimator = -1 theStages.view.zipWithIndex.foreach { case (stage, index) => @@ -126,7 +132,7 @@ class Pipeline extends Estimator[PipelineModel] { if (index <= indexOfLastEstimator) { val transformer = stage match { case estimator: Estimator[_] => - estimator.fit(curDataset, paramMap) + estimator.fit(curDataset) case t: Transformer => t case _ => @@ -134,7 +140,7 @@ class Pipeline extends Estimator[PipelineModel] { s"Do not support stage $stage of type ${stage.getClass}") } if (index < indexOfLastEstimator) { - curDataset = transformer.transform(curDataset, paramMap) + curDataset = transformer.transform(curDataset) } transformers += transformer } else { @@ -142,62 +148,48 @@ class Pipeline extends Estimator[PipelineModel] { } } - new PipelineModel(this, map, transformers.toArray) + new PipelineModel(uid, transformers.toArray).setParent(this) + } + + override def copy(extra: ParamMap): Pipeline = { + val map = extractParamMap(extra) + val newStages = map(stages).map(_.copy(extra)) + new Pipeline().setStages(newStages) } - override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = { - val map = extractParamMap(paramMap) - val theStages = map(stages) + override def transformSchema(schema: StructType): StructType = { + val theStages = $(stages) require(theStages.toSet.size == theStages.length, "Cannot have duplicate components in a pipeline.") - theStages.foldLeft(schema)((cur, stage) => stage.transformSchema(cur, paramMap)) + theStages.foldLeft(schema)((cur, stage) => stage.transformSchema(cur)) } } /** - * :: AlphaComponent :: - * Represents a compiled pipeline. + * :: Experimental :: + * Represents a fitted pipeline. */ -@AlphaComponent +@Experimental class PipelineModel private[ml] ( - override val parent: Pipeline, - override val fittingParamMap: ParamMap, - private[ml] val stages: Array[Transformer]) + override val uid: String, + val stages: Array[Transformer]) extends Model[PipelineModel] with Logging { - override def validate(paramMap: ParamMap): Unit = { - val map = fittingParamMap ++ extractParamMap(paramMap) - stages.foreach(_.validate(map)) + override def validateParams(): Unit = { + super.validateParams() + stages.foreach(_.validateParams()) } - /** - * Gets the model produced by the input estimator. Throws an NoSuchElementException is the input - * estimator does not exist in the pipeline. - */ - def getModel[M <: Model[M]](stage: Estimator[M]): M = { - val matched = stages.filter { - case m: Model[_] => m.parent.eq(stage) - case _ => false - } - if (matched.isEmpty) { - throw new NoSuchElementException(s"Cannot find stage $stage from the pipeline.") - } else if (matched.length > 1) { - throw new IllegalStateException(s"Cannot have duplicate estimators in the sample pipeline.") - } else { - matched.head.asInstanceOf[M] - } + override def transform(dataset: DataFrame): DataFrame = { + transformSchema(dataset.schema, logging = true) + stages.foldLeft(dataset)((cur, transformer) => transformer.transform(cur)) } - override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = { - // Precedence of ParamMaps: paramMap > this.paramMap > fittingParamMap - val map = fittingParamMap ++ extractParamMap(paramMap) - transformSchema(dataset.schema, map, logging = true) - stages.foldLeft(dataset)((cur, transformer) => transformer.transform(cur, map)) + override def transformSchema(schema: StructType): StructType = { + stages.foldLeft(schema)((cur, transformer) => transformer.transformSchema(cur)) } - override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = { - // Precedence of ParamMaps: paramMap > this.paramMap > fittingParamMap - val map = fittingParamMap ++ extractParamMap(paramMap) - stages.foldLeft(schema)((cur, transformer) => transformer.transformSchema(cur, map)) + override def copy(extra: ParamMap): PipelineModel = { + new PipelineModel(uid, stages) } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/impl/estimator/Predictor.scala b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala similarity index 62% rename from mllib/src/main/scala/org/apache/spark/ml/impl/estimator/Predictor.scala rename to mllib/src/main/scala/org/apache/spark/ml/Predictor.scala index 195333a5cc47f..e752b81a14282 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/impl/estimator/Predictor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala @@ -15,36 +15,28 @@ * limitations under the License. */ -package org.apache.spark.ml.impl.estimator +package org.apache.spark.ml -import org.apache.spark.annotation.{AlphaComponent, DeveloperApi} -import org.apache.spark.ml.util.SchemaUtils -import org.apache.spark.ml.{Estimator, Model} +import org.apache.spark.annotation.DeveloperApi import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ -import org.apache.spark.mllib.linalg.{VectorUDT, Vector} +import org.apache.spark.ml.util.SchemaUtils +import org.apache.spark.mllib.linalg.{Vector, VectorUDT} import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.rdd.RDD -import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.{DataType, DoubleType, StructType} - +import org.apache.spark.sql.{DataFrame, Row} /** - * :: DeveloperApi :: - * - * Trait for parameters for prediction (regression and classification). - * - * NOTE: This is currently private[spark] but will be made public later once it is stabilized. + * (private[ml]) Trait for parameters for prediction (regression and classification). */ -@DeveloperApi -private[spark] trait PredictorParams extends Params +private[ml] trait PredictorParams extends Params with HasLabelCol with HasFeaturesCol with HasPredictionCol { /** * Validates and transforms the input schema with the provided param map. * @param schema input schema - * @param paramMap additional parameters * @param fitting whether this is in fitting * @param featuresDataType SQL DataType for FeaturesType. * E.g., [[org.apache.spark.mllib.linalg.VectorUDT]] for vector features. @@ -52,23 +44,20 @@ private[spark] trait PredictorParams extends Params */ protected def validateAndTransformSchema( schema: StructType, - paramMap: ParamMap, fitting: Boolean, featuresDataType: DataType): StructType = { - val map = extractParamMap(paramMap) // TODO: Support casting Array[Double] and Array[Float] to Vector when FeaturesType = Vector - SchemaUtils.checkColumnType(schema, map(featuresCol), featuresDataType) + SchemaUtils.checkColumnType(schema, $(featuresCol), featuresDataType) if (fitting) { // TODO: Allow other numeric types - SchemaUtils.checkColumnType(schema, map(labelCol), DoubleType) + SchemaUtils.checkColumnType(schema, $(labelCol), DoubleType) } - SchemaUtils.appendColumn(schema, map(predictionCol), DoubleType) + SchemaUtils.appendColumn(schema, $(predictionCol), DoubleType) } } /** - * :: AlphaComponent :: - * + * :: DeveloperApi :: * Abstraction for prediction problems (regression and classification). * * @tparam FeaturesType Type of features. @@ -77,11 +66,9 @@ private[spark] trait PredictorParams extends Params * parameter to specify the concrete type. * @tparam M Specialization of [[PredictionModel]]. If you subclass this type, use this type * parameter to specify the concrete type for the corresponding model. - * - * NOTE: This is currently private[spark] but will be made public later once it is stabilized. */ -@AlphaComponent -private[spark] abstract class Predictor[ +@DeveloperApi +abstract class Predictor[ FeaturesType, Learner <: Predictor[FeaturesType, Learner, M], M <: PredictionModel[FeaturesType, M]] @@ -96,34 +83,28 @@ private[spark] abstract class Predictor[ /** @group setParam */ def setPredictionCol(value: String): Learner = set(predictionCol, value).asInstanceOf[Learner] - override def fit(dataset: DataFrame, paramMap: ParamMap): M = { + override def fit(dataset: DataFrame): M = { // This handles a few items such as schema validation. // Developers only need to implement train(). - transformSchema(dataset.schema, paramMap, logging = true) - val map = extractParamMap(paramMap) - val model = train(dataset, map) - Params.inheritValues(map, this, model) // copy params to model - model + transformSchema(dataset.schema, logging = true) + copyValues(train(dataset).setParent(this)) + } + + override def copy(extra: ParamMap): Learner = { + super.copy(extra).asInstanceOf[Learner] } /** - * :: DeveloperApi :: - * * Train a model using the given dataset and parameters. * Developers can implement this instead of [[fit()]] to avoid dealing with schema validation * and copying parameters into the model. * * @param dataset Training dataset - * @param paramMap Parameter map. Unlike [[fit()]]'s paramMap, this paramMap has already - * been combined with the embedded ParamMap. * @return Fitted model */ - @DeveloperApi - protected def train(dataset: DataFrame, paramMap: ParamMap): M + protected def train(dataset: DataFrame): M /** - * :: DeveloperApi :: - * * Returns the SQL DataType corresponding to the FeaturesType type parameter. * * This is used by [[validateAndTransformSchema()]]. @@ -131,20 +112,18 @@ private[spark] abstract class Predictor[ * * The default value is VectorUDT, but it may be overridden if FeaturesType is not Vector. */ - @DeveloperApi - protected def featuresDataType: DataType = new VectorUDT + private[ml] def featuresDataType: DataType = new VectorUDT - override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = { - validateAndTransformSchema(schema, paramMap, fitting = true, featuresDataType) + override def transformSchema(schema: StructType): StructType = { + validateAndTransformSchema(schema, fitting = true, featuresDataType) } /** * Extract [[labelCol]] and [[featuresCol]] from the given dataset, * and put it in an RDD with strong types. */ - protected def extractLabeledPoints(dataset: DataFrame, paramMap: ParamMap): RDD[LabeledPoint] = { - val map = extractParamMap(paramMap) - dataset.select(map(labelCol), map(featuresCol)) + protected def extractLabeledPoints(dataset: DataFrame): RDD[LabeledPoint] = { + dataset.select($(labelCol), $(featuresCol)) .map { case Row(label: Double, features: Vector) => LabeledPoint(label, features) } @@ -152,19 +131,16 @@ private[spark] abstract class Predictor[ } /** - * :: AlphaComponent :: - * + * :: DeveloperApi :: * Abstraction for a model for prediction tasks (regression and classification). * * @tparam FeaturesType Type of features. * E.g., [[org.apache.spark.mllib.linalg.VectorUDT]] for vector features. * @tparam M Specialization of [[PredictionModel]]. If you subclass this type, use this type * parameter to specify the concrete type for the corresponding model. - * - * NOTE: This is currently private[spark] but will be made public later once it is stabilized. */ -@AlphaComponent -private[spark] abstract class PredictionModel[FeaturesType, M <: PredictionModel[FeaturesType, M]] +@DeveloperApi +abstract class PredictionModel[FeaturesType, M <: PredictionModel[FeaturesType, M]] extends Model[M] with PredictorParams { /** @group setParam */ @@ -174,8 +150,6 @@ private[spark] abstract class PredictionModel[FeaturesType, M <: PredictionModel def setPredictionCol(value: String): M = set(predictionCol, value).asInstanceOf[M] /** - * :: DeveloperApi :: - * * Returns the SQL DataType corresponding to the FeaturesType type parameter. * * This is used by [[validateAndTransformSchema()]]. @@ -183,11 +157,10 @@ private[spark] abstract class PredictionModel[FeaturesType, M <: PredictionModel * * The default value is VectorUDT, but it may be overridden if FeaturesType is not Vector. */ - @DeveloperApi protected def featuresDataType: DataType = new VectorUDT - override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = { - validateAndTransformSchema(schema, paramMap, fitting = false, featuresDataType) + override def transformSchema(schema: StructType): StructType = { + validateAndTransformSchema(schema, fitting = false, featuresDataType) } /** @@ -195,30 +168,12 @@ private[spark] abstract class PredictionModel[FeaturesType, M <: PredictionModel * the predictions as a new column [[predictionCol]]. * * @param dataset input dataset - * @param paramMap additional parameters, overwrite embedded params * @return transformed dataset with [[predictionCol]] of type [[Double]] */ - override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = { - // This default implementation should be overridden as needed. - - // Check schema - transformSchema(dataset.schema, paramMap, logging = true) - val map = extractParamMap(paramMap) - - // Prepare model - val tmpModel = if (paramMap.size != 0) { - val tmpModel = this.copy() - Params.inheritValues(paramMap, parent, tmpModel) - tmpModel - } else { - this - } - - if (map(predictionCol) != "") { - val pred: FeaturesType => Double = (features) => { - tmpModel.predict(features) - } - dataset.withColumn(map(predictionCol), callUDF(pred, DoubleType, col(map(featuresCol)))) + override def transform(dataset: DataFrame): DataFrame = { + transformSchema(dataset.schema, logging = true) + if ($(predictionCol).nonEmpty) { + dataset.withColumn($(predictionCol), callUDF(predict _, DoubleType, col($(featuresCol)))) } else { this.logWarning(s"$uid: Predictor.transform() was called as NOOP" + " since no output columns were set.") @@ -227,17 +182,8 @@ private[spark] abstract class PredictionModel[FeaturesType, M <: PredictionModel } /** - * :: DeveloperApi :: - * * Predict label for the given features. * This internal method is used to implement [[transform()]] and output [[predictionCol]]. */ - @DeveloperApi protected def predict(features: FeaturesType): Double - - /** - * Create a copy of the model. - * The copy is shallow, except for the embedded paramMap, which gets a deep copy. - */ - protected def copy(): M } diff --git a/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala b/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala index 0acda71ec6045..f07f733a5ddb5 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala @@ -20,7 +20,7 @@ package org.apache.spark.ml import scala.annotation.varargs import org.apache.spark.Logging -import org.apache.spark.annotation.AlphaComponent +import org.apache.spark.annotation.DeveloperApi import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ import org.apache.spark.sql.DataFrame @@ -28,22 +28,27 @@ import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ /** - * :: AlphaComponent :: + * :: DeveloperApi :: * Abstract class for transformers that transform one dataset into another. */ -@AlphaComponent -abstract class Transformer extends PipelineStage with Params { +@DeveloperApi +abstract class Transformer extends PipelineStage { /** * Transforms the dataset with optional parameters * @param dataset input dataset - * @param paramPairs optional list of param pairs, overwrite embedded params + * @param firstParamPair the first param pair, overwrite embedded params + * @param otherParamPairs other param pairs, overwrite embedded params * @return transformed dataset */ @varargs - def transform(dataset: DataFrame, paramPairs: ParamPair[_]*): DataFrame = { + def transform( + dataset: DataFrame, + firstParamPair: ParamPair[_], + otherParamPairs: ParamPair[_]*): DataFrame = { val map = new ParamMap() - paramPairs.foreach(map.put(_)) + .put(firstParamPair) + .put(otherParamPairs: _*) transform(dataset, map) } @@ -53,14 +58,27 @@ abstract class Transformer extends PipelineStage with Params { * @param paramMap additional parameters, overwrite embedded params * @return transformed dataset */ - def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame + def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = { + this.copy(paramMap).transform(dataset) + } + + /** + * Transforms the input dataset. + */ + def transform(dataset: DataFrame): DataFrame + + override def copy(extra: ParamMap): Transformer = { + super.copy(extra).asInstanceOf[Transformer] + } } /** + * :: DeveloperApi :: * Abstract class for transformers that take one input column, apply transformation, and output the * result as a new column. */ -private[ml] abstract class UnaryTransformer[IN, OUT, T <: UnaryTransformer[IN, OUT, T]] +@DeveloperApi +abstract class UnaryTransformer[IN, OUT, T <: UnaryTransformer[IN, OUT, T]] extends Transformer with HasInputCol with HasOutputCol with Logging { /** @group setParam */ @@ -74,7 +92,7 @@ private[ml] abstract class UnaryTransformer[IN, OUT, T <: UnaryTransformer[IN, O * account of the embedded param map. So the param values should be determined solely by the input * param map. */ - protected def createTransformFunc(paramMap: ParamMap): IN => OUT + protected def createTransformFunc: IN => OUT /** * Returns the data type of the output column. @@ -86,22 +104,20 @@ private[ml] abstract class UnaryTransformer[IN, OUT, T <: UnaryTransformer[IN, O */ protected def validateInputType(inputType: DataType): Unit = {} - override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = { - val map = extractParamMap(paramMap) - val inputType = schema(map(inputCol)).dataType + override def transformSchema(schema: StructType): StructType = { + val inputType = schema($(inputCol)).dataType validateInputType(inputType) - if (schema.fieldNames.contains(map(outputCol))) { - throw new IllegalArgumentException(s"Output column ${map(outputCol)} already exists.") + if (schema.fieldNames.contains($(outputCol))) { + throw new IllegalArgumentException(s"Output column ${$(outputCol)} already exists.") } val outputFields = schema.fields :+ - StructField(map(outputCol), outputDataType, nullable = false) + StructField($(outputCol), outputDataType, nullable = false) StructType(outputFields) } - override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = { - transformSchema(dataset.schema, paramMap, logging = true) - val map = extractParamMap(paramMap) - dataset.withColumn(map(outputCol), - callUDF(this.createTransformFunc(map), outputDataType, dataset(map(inputCol)))) + override def transform(dataset: DataFrame): DataFrame = { + transformSchema(dataset.schema, logging = true) + dataset.withColumn($(outputCol), + callUDF(this.createTransformFunc, outputDataType, dataset($(inputCol)))) } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala index d7dee8fed2a55..457c15830fd38 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeGroup.scala @@ -19,10 +19,12 @@ package org.apache.spark.ml.attribute import scala.collection.mutable.ArrayBuffer +import org.apache.spark.annotation.DeveloperApi import org.apache.spark.mllib.linalg.VectorUDT import org.apache.spark.sql.types.{Metadata, MetadataBuilder, StructField} /** + * :: DeveloperApi :: * Attributes that describe a vector ML column. * * @param name name of the attribute group (the ML column name) @@ -31,6 +33,7 @@ import org.apache.spark.sql.types.{Metadata, MetadataBuilder, StructField} * @param attrs optional array of attributes. Attribute will be copied with their corresponding * indices in the array. */ +@DeveloperApi class AttributeGroup private ( val name: String, val numAttributes: Option[Int], @@ -123,6 +126,7 @@ class AttributeGroup private ( nominalMetadata += nominal.toMetadataImpl(withType = false) case binary: BinaryAttribute => binaryMetadata += binary.toMetadataImpl(withType = false) + case UnresolvedAttribute => } val attrBldr = new MetadataBuilder if (numericMetadata.nonEmpty) { @@ -181,7 +185,11 @@ class AttributeGroup private ( } } -/** Factory methods to create attribute groups. */ +/** + * :: DeveloperApi :: + * Factory methods to create attribute groups. + */ +@DeveloperApi object AttributeGroup { import AttributeKeys._ diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala index 65e7e43d5a5b0..5c7089b491677 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/AttributeType.scala @@ -17,12 +17,17 @@ package org.apache.spark.ml.attribute +import org.apache.spark.annotation.DeveloperApi + /** + * :: DeveloperApi :: * An enum-like type for attribute types: [[AttributeType$#Numeric]], [[AttributeType$#Nominal]], * and [[AttributeType$#Binary]]. */ +@DeveloperApi sealed abstract class AttributeType(val name: String) +@DeveloperApi object AttributeType { /** Numeric type. */ @@ -43,6 +48,12 @@ object AttributeType { Binary } + /** Unresolved type. */ + val Unresolved: AttributeType = { + case object Unresolved extends AttributeType("unresolved") + Unresolved + } + /** * Gets the [[AttributeType]] object from its name. * @param name attribute type name: "numeric", "nominal", or "binary" @@ -54,6 +65,8 @@ object AttributeType { Nominal } else if (name == Binary.name) { Binary + } else if (name == Unresolved.name) { + Unresolved } else { throw new IllegalArgumentException(s"Cannot recognize type $name.") } diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala index 5717d6ec2eaec..ce43a450daad0 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala @@ -19,11 +19,14 @@ package org.apache.spark.ml.attribute import scala.annotation.varargs +import org.apache.spark.annotation.DeveloperApi import org.apache.spark.sql.types.{DoubleType, Metadata, MetadataBuilder, StructField} /** + * :: DeveloperApi :: * Abstract class for ML attributes. */ +@DeveloperApi sealed abstract class Attribute extends Serializable { name.foreach { n => @@ -125,10 +128,20 @@ private[attribute] trait AttributeFactory { */ def fromStructField(field: StructField): Attribute = { require(field.dataType == DoubleType) - fromMetadata(field.metadata.getMetadata(AttributeKeys.ML_ATTR)).withName(field.name) + val metadata = field.metadata + val mlAttr = AttributeKeys.ML_ATTR + if (metadata.contains(mlAttr)) { + fromMetadata(metadata.getMetadata(mlAttr)).withName(field.name) + } else { + UnresolvedAttribute + } } } +/** + * :: DeveloperApi :: + */ +@DeveloperApi object Attribute extends AttributeFactory { private[attribute] override def fromMetadata(metadata: Metadata): Attribute = { @@ -157,6 +170,7 @@ object Attribute extends AttributeFactory { /** + * :: DeveloperApi :: * A numeric attribute with optional summary statistics. * @param name optional name * @param index optional index @@ -165,6 +179,7 @@ object Attribute extends AttributeFactory { * @param std optional standard deviation * @param sparsity optional sparsity (ratio of zeros) */ +@DeveloperApi class NumericAttribute private[ml] ( override val name: Option[String] = None, override val index: Option[Int] = None, @@ -272,8 +287,10 @@ class NumericAttribute private[ml] ( } /** + * :: DeveloperApi :: * Factory methods for numeric attributes. */ +@DeveloperApi object NumericAttribute extends AttributeFactory { /** The default numeric attribute. */ @@ -292,6 +309,7 @@ object NumericAttribute extends AttributeFactory { } /** + * :: DeveloperApi :: * A nominal attribute. * @param name optional name * @param index optional index @@ -300,6 +318,7 @@ object NumericAttribute extends AttributeFactory { * defined. * @param values optional values. At most one of `numValues` and `values` can be defined. */ +@DeveloperApi class NominalAttribute private[ml] ( override val name: Option[String] = None, override val index: Option[Int] = None, @@ -424,7 +443,11 @@ class NominalAttribute private[ml] ( } } -/** Factory methods for nominal attributes. */ +/** + * :: DeveloperApi :: + * Factory methods for nominal attributes. + */ +@DeveloperApi object NominalAttribute extends AttributeFactory { /** The default nominal attribute. */ @@ -444,11 +467,13 @@ object NominalAttribute extends AttributeFactory { } /** + * :: DeveloperApi :: * A binary attribute. * @param name optional name * @param index optional index * @param values optionla values. If set, its size must be 2. */ +@DeveloperApi class BinaryAttribute private[ml] ( override val name: Option[String] = None, override val index: Option[Int] = None, @@ -520,7 +545,11 @@ class BinaryAttribute private[ml] ( } } -/** Factory methods for binary attributes. */ +/** + * :: DeveloperApi :: + * Factory methods for binary attributes. + */ +@DeveloperApi object BinaryAttribute extends AttributeFactory { /** The default binary attribute. */ @@ -535,3 +564,34 @@ object BinaryAttribute extends AttributeFactory { new BinaryAttribute(name, index, values) } } + +/** + * :: DeveloperApi :: + * An unresolved attribute. + */ +@DeveloperApi +object UnresolvedAttribute extends Attribute { + + override def attrType: AttributeType = AttributeType.Unresolved + + override def withIndex(index: Int): Attribute = this + + override def isNumeric: Boolean = false + + override def withoutIndex: Attribute = this + + override def isNominal: Boolean = false + + override def name: Option[String] = None + + override private[attribute] def toMetadataImpl(withType: Boolean): Metadata = { + Metadata.empty + } + + override def withoutName: Attribute = this + + override def index: Option[Int] = None + + override def withName(name: String): Attribute = this + +} diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala index 29339c98f51cf..263d580fe2dd3 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala @@ -17,9 +17,8 @@ package org.apache.spark.ml.classification -import org.apache.spark.annotation.{AlphaComponent, DeveloperApi} -import org.apache.spark.ml.impl.estimator.{PredictionModel, Predictor, PredictorParams} -import org.apache.spark.ml.param.{ParamMap, Params} +import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.ml.{PredictionModel, PredictorParams, Predictor} import org.apache.spark.ml.param.shared.HasRawPredictionCol import org.apache.spark.ml.util.SchemaUtils import org.apache.spark.mllib.linalg.{Vector, VectorUDT} @@ -29,44 +28,36 @@ import org.apache.spark.sql.types.{DataType, DoubleType, StructType} /** - * :: DeveloperApi :: - * Params for classification. - * - * NOTE: This is currently private[spark] but will be made public later once it is stabilized. + * (private[spark]) Params for classification. */ -@DeveloperApi -private[spark] trait ClassifierParams extends PredictorParams - with HasRawPredictionCol { +private[spark] trait ClassifierParams + extends PredictorParams with HasRawPredictionCol { override protected def validateAndTransformSchema( schema: StructType, - paramMap: ParamMap, fitting: Boolean, featuresDataType: DataType): StructType = { - val parentSchema = super.validateAndTransformSchema(schema, paramMap, fitting, featuresDataType) - val map = extractParamMap(paramMap) - SchemaUtils.appendColumn(parentSchema, map(rawPredictionCol), new VectorUDT) + val parentSchema = super.validateAndTransformSchema(schema, fitting, featuresDataType) + SchemaUtils.appendColumn(parentSchema, $(rawPredictionCol), new VectorUDT) } } /** - * :: AlphaComponent :: + * :: DeveloperApi :: + * * Single-label binary or multiclass classification. * Classes are indexed {0, 1, ..., numClasses - 1}. * * @tparam FeaturesType Type of input features. E.g., [[Vector]] * @tparam E Concrete Estimator type * @tparam M Concrete Model type - * - * NOTE: This is currently private[spark] but will be made public later once it is stabilized. */ -@AlphaComponent -private[spark] abstract class Classifier[ +@DeveloperApi +abstract class Classifier[ FeaturesType, E <: Classifier[FeaturesType, E, M], M <: ClassificationModel[FeaturesType, M]] - extends Predictor[FeaturesType, E, M] - with ClassifierParams { + extends Predictor[FeaturesType, E, M] with ClassifierParams { /** @group setParam */ def setRawPredictionCol(value: String): E = set(rawPredictionCol, value).asInstanceOf[E] @@ -75,17 +66,15 @@ private[spark] abstract class Classifier[ } /** - * :: AlphaComponent :: + * :: DeveloperApi :: + * * Model produced by a [[Classifier]]. * Classes are indexed {0, 1, ..., numClasses - 1}. * * @tparam FeaturesType Type of input features. E.g., [[Vector]] * @tparam M Concrete Model type - * - * NOTE: This is currently private[spark] but will be made public later once it is stabilized. */ -@AlphaComponent -private[spark] +@DeveloperApi abstract class ClassificationModel[FeaturesType, M <: ClassificationModel[FeaturesType, M]] extends PredictionModel[FeaturesType, M] with ClassifierParams { @@ -102,27 +91,30 @@ abstract class ClassificationModel[FeaturesType, M <: ClassificationModel[Featur * - raw predictions (confidences) as [[rawPredictionCol]] of type [[Vector]]. * * @param dataset input dataset - * @param paramMap additional parameters, overwrite embedded params * @return transformed dataset */ - override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = { - // This default implementation should be overridden as needed. - - // Check schema - transformSchema(dataset.schema, paramMap, logging = true) - val map = extractParamMap(paramMap) - - // Prepare model - val tmpModel = if (paramMap.size != 0) { - val tmpModel = this.copy() - Params.inheritValues(paramMap, parent, tmpModel) - tmpModel - } else { - this + override def transform(dataset: DataFrame): DataFrame = { + transformSchema(dataset.schema, logging = true) + + // Output selected columns only. + // This is a bit complicated since it tries to avoid repeated computation. + var outputData = dataset + var numColsOutput = 0 + if (getRawPredictionCol != "") { + outputData = outputData.withColumn(getRawPredictionCol, + callUDF(predictRaw _, new VectorUDT, col(getFeaturesCol))) + numColsOutput += 1 + } + if (getPredictionCol != "") { + val predUDF = if (getRawPredictionCol != "") { + callUDF(raw2prediction _, DoubleType, col(getRawPredictionCol)) + } else { + callUDF(predict _, DoubleType, col(getFeaturesCol)) + } + outputData = outputData.withColumn(getPredictionCol, predUDF) + numColsOutput += 1 } - val (numColsOutput, outputData) = - ClassificationModel.transformColumnsImpl[FeaturesType](dataset, tmpModel, map) if (numColsOutput == 0) { logWarning(s"$uid: ClassificationModel.transform() was called as NOOP" + " since no output columns were set.") @@ -131,22 +123,17 @@ abstract class ClassificationModel[FeaturesType, M <: ClassificationModel[Featur } /** - * :: DeveloperApi :: - * * Predict label for the given features. * This internal method is used to implement [[transform()]] and output [[predictionCol]]. * * This default implementation for classification predicts the index of the maximum value * from [[predictRaw()]]. */ - @DeveloperApi override protected def predict(features: FeaturesType): Double = { - predictRaw(features).toArray.zipWithIndex.maxBy(_._1)._2 + raw2prediction(predictRaw(features)) } /** - * :: DeveloperApi :: - * * Raw prediction for each possible label. * The meaning of a "raw" prediction may vary between algorithms, but it intuitively gives * a measure of confidence in each possible label (where larger = more confident). @@ -156,52 +143,12 @@ abstract class ClassificationModel[FeaturesType, M <: ClassificationModel[Featur * This raw prediction may be any real number, where a larger value indicates greater * confidence for that label. */ - @DeveloperApi protected def predictRaw(features: FeaturesType): Vector -} - -private[ml] object ClassificationModel { - /** - * Added prediction column(s). This is separated from [[ClassificationModel.transform()]] - * since it is used by [[org.apache.spark.ml.classification.ProbabilisticClassificationModel]]. - * @param dataset Input dataset - * @param map Parameter map. This will NOT be merged with the embedded paramMap; the merge - * should already be done. - * @return (number of columns added, transformed dataset) + * Given a vector of raw predictions, select the predicted label. + * This may be overridden to support thresholds which favor particular labels. + * @return predicted label */ - def transformColumnsImpl[FeaturesType]( - dataset: DataFrame, - model: ClassificationModel[FeaturesType, _], - map: ParamMap): (Int, DataFrame) = { - - // Output selected columns only. - // This is a bit complicated since it tries to avoid repeated computation. - var tmpData = dataset - var numColsOutput = 0 - if (map(model.rawPredictionCol) != "") { - // output raw prediction - val features2raw: FeaturesType => Vector = model.predictRaw - tmpData = tmpData.withColumn(map(model.rawPredictionCol), - callUDF(features2raw, new VectorUDT, col(map(model.featuresCol)))) - numColsOutput += 1 - if (map(model.predictionCol) != "") { - val raw2pred: Vector => Double = (rawPred) => { - rawPred.toArray.zipWithIndex.maxBy(_._1)._2 - } - tmpData = tmpData.withColumn(map(model.predictionCol), - callUDF(raw2pred, DoubleType, col(map(model.rawPredictionCol)))) - numColsOutput += 1 - } - } else if (map(model.predictionCol) != "") { - // output prediction - val features2pred: FeaturesType => Double = model.predict - tmpData = tmpData.withColumn(map(model.predictionCol), - callUDF(features2pred, DoubleType, col(map(model.featuresCol)))) - numColsOutput += 1 - } - (numColsOutput, tmpData) - } - + protected def raw2prediction(rawPrediction: Vector): Double = rawPrediction.toDense.argmax } diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala index ee2a8dc6db171..8030e0728a56c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala @@ -17,12 +17,11 @@ package org.apache.spark.ml.classification -import org.apache.spark.annotation.AlphaComponent -import org.apache.spark.ml.impl.estimator.{Predictor, PredictionModel} -import org.apache.spark.ml.impl.tree._ -import org.apache.spark.ml.param.{Params, ParamMap} -import org.apache.spark.ml.tree.{DecisionTreeModel, Node} -import org.apache.spark.ml.util.MetadataUtils +import org.apache.spark.annotation.Experimental +import org.apache.spark.ml.{PredictionModel, Predictor} +import org.apache.spark.ml.param.ParamMap +import org.apache.spark.ml.tree.{DecisionTreeModel, DecisionTreeParams, Node, TreeClassifierParams} +import org.apache.spark.ml.util.{Identifiable, MetadataUtils} import org.apache.spark.mllib.linalg.Vector import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.tree.{DecisionTree => OldDecisionTree} @@ -31,20 +30,20 @@ import org.apache.spark.mllib.tree.model.{DecisionTreeModel => OldDecisionTreeMo import org.apache.spark.rdd.RDD import org.apache.spark.sql.DataFrame - /** - * :: AlphaComponent :: - * + * :: Experimental :: * [[http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree]] learning algorithm * for classification. * It supports both binary and multiclass labels, as well as both continuous and categorical * features. */ -@AlphaComponent -final class DecisionTreeClassifier +@Experimental +final class DecisionTreeClassifier(override val uid: String) extends Predictor[Vector, DecisionTreeClassifier, DecisionTreeClassificationModel] with DecisionTreeParams with TreeClassifierParams { + def this() = this(Identifiable.randomUID("dtc")) + // Override parameter setters from parent trait for Java API compatibility. override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value) @@ -64,22 +63,20 @@ final class DecisionTreeClassifier override def setImpurity(value: String): this.type = super.setImpurity(value) - override protected def train( - dataset: DataFrame, - paramMap: ParamMap): DecisionTreeClassificationModel = { + override protected def train(dataset: DataFrame): DecisionTreeClassificationModel = { val categoricalFeatures: Map[Int, Int] = - MetadataUtils.getCategoricalFeatures(dataset.schema(paramMap(featuresCol))) - val numClasses: Int = MetadataUtils.getNumClasses(dataset.schema(paramMap(labelCol))) match { + MetadataUtils.getCategoricalFeatures(dataset.schema($(featuresCol))) + val numClasses: Int = MetadataUtils.getNumClasses(dataset.schema($(labelCol))) match { case Some(n: Int) => n case None => throw new IllegalArgumentException("DecisionTreeClassifier was given input" + - s" with invalid label column ${paramMap(labelCol)}, without the number of classes" + + s" with invalid label column ${$(labelCol)}, without the number of classes" + " specified. See StringIndexer.") // TODO: Automatically index labels: SPARK-7126 } - val oldDataset: RDD[LabeledPoint] = extractLabeledPoints(dataset, paramMap) + val oldDataset: RDD[LabeledPoint] = extractLabeledPoints(dataset) val strategy = getOldStrategy(categoricalFeatures, numClasses) val oldModel = OldDecisionTree.train(oldDataset, strategy) - DecisionTreeClassificationModel.fromOld(oldModel, this, paramMap, categoricalFeatures) + DecisionTreeClassificationModel.fromOld(oldModel, this, categoricalFeatures) } /** (private[ml]) Create a Strategy instance to use with the old API. */ @@ -91,22 +88,21 @@ final class DecisionTreeClassifier } } +@Experimental object DecisionTreeClassifier { /** Accessor for supported impurities: entropy, gini */ final val supportedImpurities: Array[String] = TreeClassifierParams.supportedImpurities } /** - * :: AlphaComponent :: - * + * :: Experimental :: * [[http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree]] model for classification. * It supports both binary and multiclass labels, as well as both continuous and categorical * features. */ -@AlphaComponent +@Experimental final class DecisionTreeClassificationModel private[ml] ( - override val parent: DecisionTreeClassifier, - override val fittingParamMap: ParamMap, + override val uid: String, override val rootNode: Node) extends PredictionModel[Vector, DecisionTreeClassificationModel] with DecisionTreeModel with Serializable { @@ -118,10 +114,8 @@ final class DecisionTreeClassificationModel private[ml] ( rootNode.predict(features) } - override protected def copy(): DecisionTreeClassificationModel = { - val m = new DecisionTreeClassificationModel(parent, fittingParamMap, rootNode) - Params.inheritValues(this.extractParamMap(), this, m) - m + override def copy(extra: ParamMap): DecisionTreeClassificationModel = { + copyValues(new DecisionTreeClassificationModel(uid, rootNode), extra) } override def toString: String = { @@ -140,12 +134,12 @@ private[ml] object DecisionTreeClassificationModel { def fromOld( oldModel: OldDecisionTreeModel, parent: DecisionTreeClassifier, - fittingParamMap: ParamMap, categoricalFeatures: Map[Int, Int]): DecisionTreeClassificationModel = { require(oldModel.algo == OldAlgo.Classification, s"Cannot convert non-classification DecisionTreeModel (old API) to" + s" DecisionTreeClassificationModel (new API). Algo is: ${oldModel.algo}") val rootNode = Node.fromOld(oldModel.topNode, categoricalFeatures) - new DecisionTreeClassificationModel(parent, fittingParamMap, rootNode) + val uid = if (parent != null) parent.uid else Identifiable.randomUID("dtc") + new DecisionTreeClassificationModel(uid, rootNode) } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala index 3d849867d4c47..62f4b51f770e9 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala @@ -20,36 +20,35 @@ package org.apache.spark.ml.classification import com.github.fommil.netlib.BLAS.{getInstance => blas} import org.apache.spark.Logging -import org.apache.spark.annotation.AlphaComponent -import org.apache.spark.ml.impl.estimator.{PredictionModel, Predictor} -import org.apache.spark.ml.impl.tree._ -import org.apache.spark.ml.param.{Param, Params, ParamMap} +import org.apache.spark.annotation.Experimental +import org.apache.spark.ml.{PredictionModel, Predictor} +import org.apache.spark.ml.param.{Param, ParamMap} import org.apache.spark.ml.regression.DecisionTreeRegressionModel -import org.apache.spark.ml.tree.{DecisionTreeModel, TreeEnsembleModel} -import org.apache.spark.ml.util.MetadataUtils +import org.apache.spark.ml.tree.{DecisionTreeModel, GBTParams, TreeClassifierParams, TreeEnsembleModel} +import org.apache.spark.ml.util.{Identifiable, MetadataUtils} import org.apache.spark.mllib.linalg.Vector import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.tree.{GradientBoostedTrees => OldGBT} import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo} -import org.apache.spark.mllib.tree.loss.{Loss => OldLoss, LogLoss => OldLogLoss} +import org.apache.spark.mllib.tree.loss.{LogLoss => OldLogLoss, Loss => OldLoss} import org.apache.spark.mllib.tree.model.{GradientBoostedTreesModel => OldGBTModel} import org.apache.spark.rdd.RDD import org.apache.spark.sql.DataFrame - /** - * :: AlphaComponent :: - * + * :: Experimental :: * [[http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)]] * learning algorithm for classification. * It supports binary labels, as well as both continuous and categorical features. * Note: Multiclass labels are not currently supported. */ -@AlphaComponent -final class GBTClassifier +@Experimental +final class GBTClassifier(override val uid: String) extends Predictor[Vector, GBTClassifier, GBTClassificationModel] with GBTParams with TreeClassifierParams with Logging { + def this() = this(Identifiable.randomUID("gbtc")) + // Override parameter setters from parent trait for Java API compatibility. // Parameters from TreeClassifierParams: @@ -112,7 +111,7 @@ final class GBTClassifier def setLossType(value: String): this.type = set(lossType, value) /** @group getParam */ - def getLossType: String = getOrDefault(lossType).toLowerCase + def getLossType: String = $(lossType).toLowerCase /** (private[ml]) Convert new loss to old loss. */ override private[ml] def getOldLossType: OldLoss = { @@ -124,28 +123,27 @@ final class GBTClassifier } } - override protected def train( - dataset: DataFrame, - paramMap: ParamMap): GBTClassificationModel = { + override protected def train(dataset: DataFrame): GBTClassificationModel = { val categoricalFeatures: Map[Int, Int] = - MetadataUtils.getCategoricalFeatures(dataset.schema(paramMap(featuresCol))) - val numClasses: Int = MetadataUtils.getNumClasses(dataset.schema(paramMap(labelCol))) match { + MetadataUtils.getCategoricalFeatures(dataset.schema($(featuresCol))) + val numClasses: Int = MetadataUtils.getNumClasses(dataset.schema($(labelCol))) match { case Some(n: Int) => n case None => throw new IllegalArgumentException("GBTClassifier was given input" + - s" with invalid label column ${paramMap(labelCol)}, without the number of classes" + + s" with invalid label column ${$(labelCol)}, without the number of classes" + " specified. See StringIndexer.") // TODO: Automatically index labels: SPARK-7126 } require(numClasses == 2, s"GBTClassifier only supports binary classification but was given numClasses = $numClasses") - val oldDataset: RDD[LabeledPoint] = extractLabeledPoints(dataset, paramMap) + val oldDataset: RDD[LabeledPoint] = extractLabeledPoints(dataset) val boostingStrategy = super.getOldBoostingStrategy(categoricalFeatures, OldAlgo.Classification) val oldGBT = new OldGBT(boostingStrategy) val oldModel = oldGBT.run(oldDataset) - GBTClassificationModel.fromOld(oldModel, this, paramMap, categoricalFeatures) + GBTClassificationModel.fromOld(oldModel, this, categoricalFeatures) } } +@Experimental object GBTClassifier { // The losses below should be lowercase. /** Accessor for supported loss settings: logistic */ @@ -153,8 +151,7 @@ object GBTClassifier { } /** - * :: AlphaComponent :: - * + * :: Experimental :: * [[http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)]] * model for classification. * It supports binary labels, as well as both continuous and categorical features. @@ -162,10 +159,9 @@ object GBTClassifier { * @param _trees Decision trees in the ensemble. * @param _treeWeights Weights for the decision trees in the ensemble. */ -@AlphaComponent +@Experimental final class GBTClassificationModel( - override val parent: GBTClassifier, - override val fittingParamMap: ParamMap, + override val uid: String, private val _trees: Array[DecisionTreeRegressionModel], private val _treeWeights: Array[Double]) extends PredictionModel[Vector, GBTClassificationModel] @@ -188,10 +184,8 @@ final class GBTClassificationModel( if (prediction > 0.0) 1.0 else 0.0 } - override protected def copy(): GBTClassificationModel = { - val m = new GBTClassificationModel(parent, fittingParamMap, _trees, _treeWeights) - Params.inheritValues(this.extractParamMap(), this, m) - m + override def copy(extra: ParamMap): GBTClassificationModel = { + copyValues(new GBTClassificationModel(uid, _trees, _treeWeights), extra) } override def toString: String = { @@ -210,14 +204,14 @@ private[ml] object GBTClassificationModel { def fromOld( oldModel: OldGBTModel, parent: GBTClassifier, - fittingParamMap: ParamMap, categoricalFeatures: Map[Int, Int]): GBTClassificationModel = { require(oldModel.algo == OldAlgo.Classification, "Cannot convert GradientBoostedTreesModel" + s" with algo=${oldModel.algo} (old API) to GBTClassificationModel (new API).") val newTrees = oldModel.trees.map { tree => - // parent, fittingParamMap for each tree is null since there are no good ways to set these. - DecisionTreeRegressionModel.fromOld(tree, null, null, categoricalFeatures) + // parent for each tree is null since there is no good way to set this. + DecisionTreeRegressionModel.fromOld(tree, null, categoricalFeatures) } - new GBTClassificationModel(parent, fittingParamMap, newTrees, oldModel.treeWeights) + val uid = if (parent != null) parent.uid else Identifiable.randomUID("gbtc") + new GBTClassificationModel(parent.uid, newTrees, oldModel.treeWeights) } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index cc8b0721cf2b6..d13109d9da4c0 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -17,82 +17,214 @@ package org.apache.spark.ml.classification -import org.apache.spark.annotation.AlphaComponent +import scala.collection.mutable + +import breeze.linalg.{DenseVector => BDV, norm => brzNorm} +import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS, OWLQN => BreezeOWLQN} + +import org.apache.spark.{Logging, SparkException} +import org.apache.spark.annotation.Experimental import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ -import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS -import org.apache.spark.mllib.linalg.{VectorUDT, BLAS, Vector, Vectors} +import org.apache.spark.ml.util.Identifiable +import org.apache.spark.mllib.linalg._ +import org.apache.spark.mllib.linalg.BLAS._ +import org.apache.spark.mllib.regression.LabeledPoint +import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer +import org.apache.spark.mllib.util.MLUtils +import org.apache.spark.rdd.RDD import org.apache.spark.sql.DataFrame -import org.apache.spark.sql.functions._ import org.apache.spark.storage.StorageLevel - /** * Params for logistic regression. */ private[classification] trait LogisticRegressionParams extends ProbabilisticClassifierParams - with HasRegParam with HasMaxIter with HasFitIntercept with HasThreshold { - - setDefault(regParam -> 0.1, maxIter -> 100, threshold -> 0.5) -} + with HasRegParam with HasElasticNetParam with HasMaxIter with HasFitIntercept with HasTol + with HasThreshold /** - * :: AlphaComponent :: - * + * :: Experimental :: * Logistic regression. * Currently, this class only supports binary classification. */ -@AlphaComponent -class LogisticRegression +@Experimental +class LogisticRegression(override val uid: String) extends ProbabilisticClassifier[Vector, LogisticRegression, LogisticRegressionModel] - with LogisticRegressionParams { + with LogisticRegressionParams with Logging { - /** @group setParam */ + def this() = this(Identifiable.randomUID("logreg")) + + /** + * Set the regularization parameter. + * Default is 0.0. + * @group setParam + */ def setRegParam(value: Double): this.type = set(regParam, value) + setDefault(regParam -> 0.0) - /** @group setParam */ + /** + * Set the ElasticNet mixing parameter. + * For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty. + * For 0 < alpha < 1, the penalty is a combination of L1 and L2. + * Default is 0.0 which is an L2 penalty. + * @group setParam + */ + def setElasticNetParam(value: Double): this.type = set(elasticNetParam, value) + setDefault(elasticNetParam -> 0.0) + + /** + * Set the maximal number of iterations. + * Default is 100. + * @group setParam + */ def setMaxIter(value: Int): this.type = set(maxIter, value) + setDefault(maxIter -> 100) + + /** + * Set the convergence tolerance of iterations. + * Smaller value will lead to higher accuracy with the cost of more iterations. + * Default is 1E-6. + * @group setParam + */ + def setTol(value: Double): this.type = set(tol, value) + setDefault(tol -> 1E-6) /** @group setParam */ def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value) + setDefault(fitIntercept -> true) /** @group setParam */ def setThreshold(value: Double): this.type = set(threshold, value) + setDefault(threshold -> 0.5) - override protected def train(dataset: DataFrame, paramMap: ParamMap): LogisticRegressionModel = { + override protected def train(dataset: DataFrame): LogisticRegressionModel = { // Extract columns from data. If dataset is persisted, do not persist oldDataset. - val oldDataset = extractLabeledPoints(dataset, paramMap) + val instances = extractLabeledPoints(dataset).map { + case LabeledPoint(label: Double, features: Vector) => (label, features) + } val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE - if (handlePersistence) { - oldDataset.persist(StorageLevel.MEMORY_AND_DISK) + if (handlePersistence) instances.persist(StorageLevel.MEMORY_AND_DISK) + + val (summarizer, labelSummarizer) = instances.treeAggregate( + (new MultivariateOnlineSummarizer, new MultiClassSummarizer))( + seqOp = (c, v) => (c, v) match { + case ((summarizer: MultivariateOnlineSummarizer, labelSummarizer: MultiClassSummarizer), + (label: Double, features: Vector)) => + (summarizer.add(features), labelSummarizer.add(label)) + }, + combOp = (c1, c2) => (c1, c2) match { + case ((summarizer1: MultivariateOnlineSummarizer, + classSummarizer1: MultiClassSummarizer), (summarizer2: MultivariateOnlineSummarizer, + classSummarizer2: MultiClassSummarizer)) => + (summarizer1.merge(summarizer2), classSummarizer1.merge(classSummarizer2)) + }) + + val histogram = labelSummarizer.histogram + val numInvalid = labelSummarizer.countInvalid + val numClasses = histogram.length + val numFeatures = summarizer.mean.size + + if (numInvalid != 0) { + val msg = s"Classification labels should be in {0 to ${numClasses - 1} " + + s"Found $numInvalid invalid labels." + logError(msg) + throw new SparkException(msg) + } + + if (numClasses > 2) { + val msg = s"Currently, LogisticRegression with ElasticNet in ML package only supports " + + s"binary classification. Found $numClasses in the input dataset." + logError(msg) + throw new SparkException(msg) } - // Train model - val lr = new LogisticRegressionWithLBFGS() - .setIntercept(paramMap(fitIntercept)) - lr.optimizer - .setRegParam(paramMap(regParam)) - .setNumIterations(paramMap(maxIter)) - val oldModel = lr.run(oldDataset) - val lrm = new LogisticRegressionModel(this, paramMap, oldModel.weights, oldModel.intercept) + val featuresMean = summarizer.mean.toArray + val featuresStd = summarizer.variance.toArray.map(math.sqrt) + + val regParamL1 = $(elasticNetParam) * $(regParam) + val regParamL2 = (1.0 - $(elasticNetParam)) * $(regParam) - if (handlePersistence) { - oldDataset.unpersist() + val costFun = new LogisticCostFun(instances, numClasses, $(fitIntercept), + featuresStd, featuresMean, regParamL2) + + val optimizer = if ($(elasticNetParam) == 0.0 || $(regParam) == 0.0) { + new BreezeLBFGS[BDV[Double]]($(maxIter), 10, $(tol)) + } else { + // Remove the L1 penalization on the intercept + def regParamL1Fun = (index: Int) => { + if (index == numFeatures) 0.0 else regParamL1 + } + new BreezeOWLQN[Int, BDV[Double]]($(maxIter), 10, regParamL1Fun, $(tol)) + } + + val initialWeightsWithIntercept = + Vectors.zeros(if ($(fitIntercept)) numFeatures + 1 else numFeatures) + + if ($(fitIntercept)) { + /** + * For binary logistic regression, when we initialize the weights as zeros, + * it will converge faster if we initialize the intercept such that + * it follows the distribution of the labels. + * + * {{{ + * P(0) = 1 / (1 + \exp(b)), and + * P(1) = \exp(b) / (1 + \exp(b)) + * }}}, hence + * {{{ + * b = \log{P(1) / P(0)} = \log{count_1 / count_0} + * }}} + */ + initialWeightsWithIntercept.toArray(numFeatures) + = math.log(histogram(1).toDouble / histogram(0).toDouble) } - lrm + + val states = optimizer.iterations(new CachedDiffFunction(costFun), + initialWeightsWithIntercept.toBreeze.toDenseVector) + + var state = states.next() + val lossHistory = mutable.ArrayBuilder.make[Double] + + while (states.hasNext) { + lossHistory += state.value + state = states.next() + } + lossHistory += state.value + + // The weights are trained in the scaled space; we're converting them back to + // the original space. + val weightsWithIntercept = { + val rawWeights = state.x.toArray.clone() + var i = 0 + // Note that the intercept in scaled space and original space is the same; + // as a result, no scaling is needed. + while (i < numFeatures) { + rawWeights(i) *= { if (featuresStd(i) != 0.0) 1.0 / featuresStd(i) else 0.0 } + i += 1 + } + Vectors.dense(rawWeights) + } + + if (handlePersistence) instances.unpersist() + + val (weights, intercept) = if ($(fitIntercept)) { + (Vectors.dense(weightsWithIntercept.toArray.slice(0, weightsWithIntercept.size - 1)), + weightsWithIntercept(weightsWithIntercept.size - 1)) + } else { + (weightsWithIntercept, 0.0) + } + + new LogisticRegressionModel(uid, weights.compressed, intercept) } } - /** - * :: AlphaComponent :: - * + * :: Experimental :: * Model produced by [[LogisticRegression]]. */ -@AlphaComponent +@Experimental class LogisticRegressionModel private[ml] ( - override val parent: LogisticRegression, - override val fittingParamMap: ParamMap, + override val uid: String, val weights: Vector, val intercept: Double) extends ProbabilisticClassificationModel[Vector, LogisticRegressionModel] @@ -101,78 +233,17 @@ class LogisticRegressionModel private[ml] ( /** @group setParam */ def setThreshold(value: Double): this.type = set(threshold, value) + /** Margin (rawPrediction) for class label 1. For binary classification only. */ private val margin: Vector => Double = (features) => { BLAS.dot(features, weights) + intercept } + /** Score (probability) for class label 1. For binary classification only. */ private val score: Vector => Double = (features) => { val m = margin(features) 1.0 / (1.0 + math.exp(-m)) } - override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = { - // This is overridden (a) to be more efficient (avoiding re-computing values when creating - // multiple output columns) and (b) to handle threshold, which the abstractions do not use. - // TODO: We should abstract away the steps defined by UDFs below so that the abstractions - // can call whichever UDFs are needed to create the output columns. - - // Check schema - transformSchema(dataset.schema, paramMap, logging = true) - - val map = extractParamMap(paramMap) - - // Output selected columns only. - // This is a bit complicated since it tries to avoid repeated computation. - // rawPrediction (-margin, margin) - // probability (1.0-score, score) - // prediction (max margin) - var tmpData = dataset - var numColsOutput = 0 - if (map(rawPredictionCol) != "") { - val features2raw: Vector => Vector = (features) => predictRaw(features) - tmpData = tmpData.withColumn(map(rawPredictionCol), - callUDF(features2raw, new VectorUDT, col(map(featuresCol)))) - numColsOutput += 1 - } - if (map(probabilityCol) != "") { - if (map(rawPredictionCol) != "") { - val raw2prob = udf { (rawPreds: Vector) => - val prob1 = 1.0 / (1.0 + math.exp(-rawPreds(1))) - Vectors.dense(1.0 - prob1, prob1): Vector - } - tmpData = tmpData.withColumn(map(probabilityCol), raw2prob(col(map(rawPredictionCol)))) - } else { - val features2prob = udf { (features: Vector) => predictProbabilities(features) : Vector } - tmpData = tmpData.withColumn(map(probabilityCol), features2prob(col(map(featuresCol)))) - } - numColsOutput += 1 - } - if (map(predictionCol) != "") { - val t = map(threshold) - if (map(probabilityCol) != "") { - val predict = udf { probs: Vector => - if (probs(1) > t) 1.0 else 0.0 - } - tmpData = tmpData.withColumn(map(predictionCol), predict(col(map(probabilityCol)))) - } else if (map(rawPredictionCol) != "") { - val predict = udf { rawPreds: Vector => - val prob1 = 1.0 / (1.0 + math.exp(-rawPreds(1))) - if (prob1 > t) 1.0 else 0.0 - } - tmpData = tmpData.withColumn(map(predictionCol), predict(col(map(rawPredictionCol)))) - } else { - val predict = udf { features: Vector => this.predict(features) } - tmpData = tmpData.withColumn(map(predictionCol), predict(col(map(featuresCol)))) - } - numColsOutput += 1 - } - if (numColsOutput == 0) { - this.logWarning(s"$uid: LogisticRegressionModel.transform() was called as NOOP" + - " since no output columns were set.") - } - tmpData - } - override val numClasses: Int = 2 /** @@ -183,19 +254,301 @@ class LogisticRegressionModel private[ml] ( if (score(features) > getThreshold) 1 else 0 } - override protected def predictProbabilities(features: Vector): Vector = { - val s = score(features) - Vectors.dense(1.0 - s, s) + override protected def raw2probabilityInPlace(rawPrediction: Vector): Vector = { + rawPrediction match { + case dv: DenseVector => + var i = 0 + val size = dv.size + while (i < size) { + dv.values(i) = 1.0 / (1.0 + math.exp(-dv.values(i))) + i += 1 + } + dv + case sv: SparseVector => + throw new RuntimeException("Unexpected error in LogisticRegressionModel:" + + " raw2probabilitiesInPlace encountered SparseVector") + } } override protected def predictRaw(features: Vector): Vector = { val m = margin(features) - Vectors.dense(0.0, m) + Vectors.dense(-m, m) } - override protected def copy(): LogisticRegressionModel = { - val m = new LogisticRegressionModel(parent, fittingParamMap, weights, intercept) - Params.inheritValues(this.extractParamMap(), this, m) - m + override def copy(extra: ParamMap): LogisticRegressionModel = { + copyValues(new LogisticRegressionModel(uid, weights, intercept), extra) + } + + override protected def raw2prediction(rawPrediction: Vector): Double = { + val t = getThreshold + val rawThreshold = if (t == 0.0) { + Double.NegativeInfinity + } else if (t == 1.0) { + Double.PositiveInfinity + } else { + math.log(t / (1.0 - t)) + } + if (rawPrediction(1) > rawThreshold) 1 else 0 + } + + override protected def probability2prediction(probability: Vector): Double = { + if (probability(1) > getThreshold) 1 else 0 + } +} + +/** + * MultiClassSummarizer computes the number of distinct labels and corresponding counts, + * and validates the data to see if the labels used for k class multi-label classification + * are in the range of {0, 1, ..., k - 1} in a online fashion. + * + * Two MultilabelSummarizer can be merged together to have a statistical summary of the + * corresponding joint dataset. + */ +private[classification] class MultiClassSummarizer extends Serializable { + private val distinctMap = new mutable.HashMap[Int, Long] + private var totalInvalidCnt: Long = 0L + + /** + * Add a new label into this MultilabelSummarizer, and update the distinct map. + * @param label The label for this data point. + * @return This MultilabelSummarizer + */ + def add(label: Double): this.type = { + if (label - label.toInt != 0.0 || label < 0) { + totalInvalidCnt += 1 + this + } + else { + val counts: Long = distinctMap.getOrElse(label.toInt, 0L) + distinctMap.put(label.toInt, counts + 1) + this + } + } + + /** + * Merge another MultilabelSummarizer, and update the distinct map. + * (Note that it will merge the smaller distinct map into the larger one using in-place + * merging, so either `this` or `other` object will be modified and returned.) + * + * @param other The other MultilabelSummarizer to be merged. + * @return Merged MultilabelSummarizer object. + */ + def merge(other: MultiClassSummarizer): MultiClassSummarizer = { + val (largeMap, smallMap) = if (this.distinctMap.size > other.distinctMap.size) { + (this, other) + } else { + (other, this) + } + smallMap.distinctMap.foreach { + case (key, value) => + val counts = largeMap.distinctMap.getOrElse(key, 0L) + largeMap.distinctMap.put(key, counts + value) + } + largeMap.totalInvalidCnt += smallMap.totalInvalidCnt + largeMap + } + + /** @return The total invalid input counts. */ + def countInvalid: Long = totalInvalidCnt + + /** @return The number of distinct labels in the input dataset. */ + def numClasses: Int = distinctMap.keySet.max + 1 + + /** @return The counts of each label in the input dataset. */ + def histogram: Array[Long] = { + val result = Array.ofDim[Long](numClasses) + var i = 0 + val len = result.length + while (i < len) { + result(i) = distinctMap.getOrElse(i, 0L) + i += 1 + } + result + } +} + +/** + * LogisticAggregator computes the gradient and loss for binary logistic loss function, as used + * in binary classification for samples in sparse or dense vector in a online fashion. + * + * Note that multinomial logistic loss is not supported yet! + * + * Two LogisticAggregator can be merged together to have a summary of loss and gradient of + * the corresponding joint dataset. + * + * @param weights The weights/coefficients corresponding to the features. + * @param numClasses the number of possible outcomes for k classes classification problem in + * Multinomial Logistic Regression. + * @param fitIntercept Whether to fit an intercept term. + * @param featuresStd The standard deviation values of the features. + * @param featuresMean The mean values of the features. + */ +private class LogisticAggregator( + weights: Vector, + numClasses: Int, + fitIntercept: Boolean, + featuresStd: Array[Double], + featuresMean: Array[Double]) extends Serializable { + + private var totalCnt: Long = 0L + private var lossSum = 0.0 + + private val weightsArray = weights match { + case dv: DenseVector => dv.values + case _ => + throw new IllegalArgumentException( + s"weights only supports dense vector but got type ${weights.getClass}.") + } + + private val dim = if (fitIntercept) weightsArray.length - 1 else weightsArray.length + + private val gradientSumArray = Array.ofDim[Double](weightsArray.length) + + /** + * Add a new training data to this LogisticAggregator, and update the loss and gradient + * of the objective function. + * + * @param label The label for this data point. + * @param data The features for one data point in dense/sparse vector format to be added + * into this aggregator. + * @return This LogisticAggregator object. + */ + def add(label: Double, data: Vector): this.type = { + require(dim == data.size, s"Dimensions mismatch when adding new sample." + + s" Expecting $dim but got ${data.size}.") + + val dataSize = data.size + + val localWeightsArray = weightsArray + val localGradientSumArray = gradientSumArray + + numClasses match { + case 2 => + /** + * For Binary Logistic Regression. + */ + val margin = - { + var sum = 0.0 + data.foreachActive { (index, value) => + if (featuresStd(index) != 0.0 && value != 0.0) { + sum += localWeightsArray(index) * (value / featuresStd(index)) + } + } + sum + { if (fitIntercept) localWeightsArray(dim) else 0.0 } + } + + val multiplier = (1.0 / (1.0 + math.exp(margin))) - label + + data.foreachActive { (index, value) => + if (featuresStd(index) != 0.0 && value != 0.0) { + localGradientSumArray(index) += multiplier * (value / featuresStd(index)) + } + } + + if (fitIntercept) { + localGradientSumArray(dim) += multiplier + } + + if (label > 0) { + // The following is equivalent to log(1 + exp(margin)) but more numerically stable. + lossSum += MLUtils.log1pExp(margin) + } else { + lossSum += MLUtils.log1pExp(margin) - margin + } + case _ => + new NotImplementedError("LogisticRegression with ElasticNet in ML package only supports " + + "binary classification for now.") + } + totalCnt += 1 + this + } + + /** + * Merge another LogisticAggregator, and update the loss and gradient + * of the objective function. + * (Note that it's in place merging; as a result, `this` object will be modified.) + * + * @param other The other LogisticAggregator to be merged. + * @return This LogisticAggregator object. + */ + def merge(other: LogisticAggregator): this.type = { + require(dim == other.dim, s"Dimensions mismatch when merging with another " + + s"LeastSquaresAggregator. Expecting $dim but got ${other.dim}.") + + if (other.totalCnt != 0) { + totalCnt += other.totalCnt + lossSum += other.lossSum + + var i = 0 + val localThisGradientSumArray = this.gradientSumArray + val localOtherGradientSumArray = other.gradientSumArray + val len = localThisGradientSumArray.length + while (i < len) { + localThisGradientSumArray(i) += localOtherGradientSumArray(i) + i += 1 + } + } + this + } + + def count: Long = totalCnt + + def loss: Double = lossSum / totalCnt + + def gradient: Vector = { + val result = Vectors.dense(gradientSumArray.clone()) + scal(1.0 / totalCnt, result) + result + } +} + +/** + * LogisticCostFun implements Breeze's DiffFunction[T] for a multinomial logistic loss function, + * as used in multi-class classification (it is also used in binary logistic regression). + * It returns the loss and gradient with L2 regularization at a particular point (weights). + * It's used in Breeze's convex optimization routines. + */ +private class LogisticCostFun( + data: RDD[(Double, Vector)], + numClasses: Int, + fitIntercept: Boolean, + featuresStd: Array[Double], + featuresMean: Array[Double], + regParamL2: Double) extends DiffFunction[BDV[Double]] { + + override def calculate(weights: BDV[Double]): (Double, BDV[Double]) = { + val w = Vectors.fromBreeze(weights) + + val logisticAggregator = data.treeAggregate(new LogisticAggregator(w, numClasses, fitIntercept, + featuresStd, featuresMean))( + seqOp = (c, v) => (c, v) match { + case (aggregator, (label, features)) => aggregator.add(label, features) + }, + combOp = (c1, c2) => (c1, c2) match { + case (aggregator1, aggregator2) => aggregator1.merge(aggregator2) + }) + + // regVal is the sum of weight squares for L2 regularization + val norm = if (regParamL2 == 0.0) { + 0.0 + } else if (fitIntercept) { + brzNorm(Vectors.dense(weights.toArray.slice(0, weights.size -1)).toBreeze, 2.0) + } else { + brzNorm(weights, 2.0) + } + val regVal = 0.5 * regParamL2 * norm * norm + + val loss = logisticAggregator.loss + regVal + val gradient = logisticAggregator.gradient + + if (fitIntercept) { + val wArray = w.toArray.clone() + wArray(wArray.length - 1) = 0.0 + axpy(regParamL2, Vectors.dense(wArray), gradient) + } else { + axpy(regParamL2, w, gradient) + } + + (loss, gradient.toBreeze.asInstanceOf[BDV[Double]]) } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala new file mode 100644 index 0000000000000..825f9ed1b54b2 --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.classification + +import java.util.UUID + +import scala.language.existentials + +import org.apache.spark.annotation.Experimental +import org.apache.spark.ml._ +import org.apache.spark.ml.attribute._ +import org.apache.spark.ml.param.Param +import org.apache.spark.ml.util.{Identifiable, MetadataUtils} +import org.apache.spark.mllib.linalg.Vector +import org.apache.spark.sql.{DataFrame, Row} +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types._ +import org.apache.spark.storage.StorageLevel + +/** + * Params for [[OneVsRest]]. + */ +private[ml] trait OneVsRestParams extends PredictorParams { + + // scalastyle:off structural.type + type ClassifierType = Classifier[F, E, M] forSome { + type F + type M <: ClassificationModel[F, M] + type E <: Classifier[F, E, M] + } + // scalastyle:on structural.type + + /** + * param for the base binary classifier that we reduce multiclass classification into. + * @group param + */ + val classifier: Param[ClassifierType] = new Param(this, "classifier", "base binary classifier") + + /** @group getParam */ + def getClassifier: ClassifierType = $(classifier) +} + +/** + * :: Experimental :: + * Model produced by [[OneVsRest]]. + * This stores the models resulting from training k binary classifiers: one for each class. + * Each example is scored against all k models, and the model with the highest score + * is picked to label the example. + * + * @param labelMetadata Metadata of label column if it exists, or Nominal attribute + * representing the number of classes in training dataset otherwise. + * @param models The binary classification models for the reduction. + * The i-th model is produced by testing the i-th class (taking label 1) vs the rest + * (taking label 0). + */ +@Experimental +final class OneVsRestModel private[ml] ( + override val uid: String, + labelMetadata: Metadata, + val models: Array[_ <: ClassificationModel[_, _]]) + extends Model[OneVsRestModel] with OneVsRestParams { + + override def transformSchema(schema: StructType): StructType = { + validateAndTransformSchema(schema, fitting = false, getClassifier.featuresDataType) + } + + override def transform(dataset: DataFrame): DataFrame = { + // Check schema + transformSchema(dataset.schema, logging = true) + + // determine the input columns: these need to be passed through + val origCols = dataset.schema.map(f => col(f.name)) + + // add an accumulator column to store predictions of all the models + val accColName = "mbc$acc" + UUID.randomUUID().toString + val init: () => Map[Int, Double] = () => {Map()} + val mapType = MapType(IntegerType, DoubleType, valueContainsNull = false) + val newDataset = dataset.withColumn(accColName, callUDF(init, mapType)) + + // persist if underlying dataset is not persistent. + val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE + if (handlePersistence) { + newDataset.persist(StorageLevel.MEMORY_AND_DISK) + } + + // update the accumulator column with the result of prediction of models + val aggregatedDataset = models.zipWithIndex.foldLeft[DataFrame](newDataset) { + case (df, (model, index)) => + val rawPredictionCol = model.getRawPredictionCol + val columns = origCols ++ List(col(rawPredictionCol), col(accColName)) + + // add temporary column to store intermediate scores and update + val tmpColName = "mbc$tmp" + UUID.randomUUID().toString + val update: (Map[Int, Double], Vector) => Map[Int, Double] = + (predictions: Map[Int, Double], prediction: Vector) => { + predictions + ((index, prediction(1))) + } + val updateUdf = callUDF(update, mapType, col(accColName), col(rawPredictionCol)) + val transformedDataset = model.transform(df).select(columns : _*) + val updatedDataset = transformedDataset.withColumn(tmpColName, updateUdf) + val newColumns = origCols ++ List(col(tmpColName)) + + // switch out the intermediate column with the accumulator column + updatedDataset.select(newColumns : _*).withColumnRenamed(tmpColName, accColName) + } + + if (handlePersistence) { + newDataset.unpersist() + } + + // output the index of the classifier with highest confidence as prediction + val label: Map[Int, Double] => Double = (predictions: Map[Int, Double]) => { + predictions.maxBy(_._2)._1.toDouble + } + + // output label and label metadata as prediction + val labelUdf = callUDF(label, DoubleType, col(accColName)) + aggregatedDataset.withColumn($(predictionCol), labelUdf.as($(predictionCol), labelMetadata)) + .drop(accColName) + } +} + +/** + * :: Experimental :: + * + * Reduction of Multiclass Classification to Binary Classification. + * Performs reduction using one against all strategy. + * For a multiclass classification with k classes, train k models (one per class). + * Each example is scored against all k models and the model with highest score + * is picked to label the example. + */ +@Experimental +final class OneVsRest(override val uid: String) + extends Estimator[OneVsRestModel] with OneVsRestParams { + + def this() = this(Identifiable.randomUID("oneVsRest")) + + /** @group setParam */ + def setClassifier(value: Classifier[_, _, _]): this.type = { + set(classifier, value.asInstanceOf[ClassifierType]) + } + + override def transformSchema(schema: StructType): StructType = { + validateAndTransformSchema(schema, fitting = true, getClassifier.featuresDataType) + } + + override def fit(dataset: DataFrame): OneVsRestModel = { + // determine number of classes either from metadata if provided, or via computation. + val labelSchema = dataset.schema($(labelCol)) + val computeNumClasses: () => Int = () => { + val Row(maxLabelIndex: Double) = dataset.agg(max($(labelCol))).head() + // classes are assumed to be numbered from 0,...,maxLabelIndex + maxLabelIndex.toInt + 1 + } + val numClasses = MetadataUtils.getNumClasses(labelSchema).fold(computeNumClasses())(identity) + + val multiclassLabeled = dataset.select($(labelCol), $(featuresCol)) + + // persist if underlying dataset is not persistent. + val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE + if (handlePersistence) { + multiclassLabeled.persist(StorageLevel.MEMORY_AND_DISK) + } + + // create k columns, one for each binary classifier. + val models = Range(0, numClasses).par.map { index => + + val label: Double => Double = (label: Double) => { + if (label.toInt == index) 1.0 else 0.0 + } + + // generate new label metadata for the binary problem. + // TODO: use when ... otherwise after SPARK-7321 is merged + val labelUDF = callUDF(label, DoubleType, col($(labelCol))) + val newLabelMeta = BinaryAttribute.defaultAttr.withName("label").toMetadata() + val labelColName = "mc2b$" + index + val labelUDFWithNewMeta = labelUDF.as(labelColName, newLabelMeta) + val trainingDataset = multiclassLabeled.withColumn(labelColName, labelUDFWithNewMeta) + val classifier = getClassifier + classifier.fit(trainingDataset, classifier.labelCol -> labelColName) + }.toArray[ClassificationModel[_, _]] + + if (handlePersistence) { + multiclassLabeled.unpersist() + } + + // extract label metadata from label column if present, or create a nominal attribute + // to output the number of labels + val labelAttribute = Attribute.fromStructField(labelSchema) match { + case _: NumericAttribute | UnresolvedAttribute => + NominalAttribute.defaultAttr.withName("label").withNumValues(numClasses) + case attr: Attribute => attr + } + val model = new OneVsRestModel(uid, labelAttribute.toMetadata(), models).setParent(this) + copyValues(model) + } +} diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala index 10404548ccfde..330ae2938f4e0 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala @@ -17,45 +17,40 @@ package org.apache.spark.ml.classification -import org.apache.spark.annotation.{AlphaComponent, DeveloperApi} -import org.apache.spark.ml.param.{ParamMap, Params} +import org.apache.spark.annotation.DeveloperApi import org.apache.spark.ml.param.shared._ import org.apache.spark.ml.util.SchemaUtils import org.apache.spark.mllib.linalg.{Vector, VectorUDT} import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions._ -import org.apache.spark.sql.types.{DataType, StructType} +import org.apache.spark.sql.types.{DoubleType, DataType, StructType} /** - * Params for probabilistic classification. + * (private[classification]) Params for probabilistic classification. */ private[classification] trait ProbabilisticClassifierParams extends ClassifierParams with HasProbabilityCol { override protected def validateAndTransformSchema( schema: StructType, - paramMap: ParamMap, fitting: Boolean, featuresDataType: DataType): StructType = { - val parentSchema = super.validateAndTransformSchema(schema, paramMap, fitting, featuresDataType) - val map = extractParamMap(paramMap) - SchemaUtils.appendColumn(parentSchema, map(probabilityCol), new VectorUDT) + val parentSchema = super.validateAndTransformSchema(schema, fitting, featuresDataType) + SchemaUtils.appendColumn(parentSchema, $(probabilityCol), new VectorUDT) } } /** - * :: AlphaComponent :: + * :: DeveloperApi :: * * Single-label binary or multiclass classifier which can output class conditional probabilities. * * @tparam FeaturesType Type of input features. E.g., [[Vector]] * @tparam E Concrete Estimator type * @tparam M Concrete Model type - * - * NOTE: This is currently private[spark] but will be made public later once it is stabilized. */ -@AlphaComponent +@DeveloperApi private[spark] abstract class ProbabilisticClassifier[ FeaturesType, E <: ProbabilisticClassifier[FeaturesType, E, M], @@ -68,17 +63,15 @@ private[spark] abstract class ProbabilisticClassifier[ /** - * :: AlphaComponent :: + * :: DeveloperApi :: * * Model produced by a [[ProbabilisticClassifier]]. * Classes are indexed {0, 1, ..., numClasses - 1}. * * @tparam FeaturesType Type of input features. E.g., [[Vector]] * @tparam M Concrete Model type - * - * NOTE: This is currently private[spark] but will be made public later once it is stabilized. */ -@AlphaComponent +@DeveloperApi private[spark] abstract class ProbabilisticClassificationModel[ FeaturesType, M <: ProbabilisticClassificationModel[FeaturesType, M]] @@ -95,56 +88,82 @@ private[spark] abstract class ProbabilisticClassificationModel[ * - probability of each class as [[probabilityCol]] of type [[Vector]]. * * @param dataset input dataset - * @param paramMap additional parameters, overwrite embedded params * @return transformed dataset */ - override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = { - // This default implementation should be overridden as needed. - - // Check schema - transformSchema(dataset.schema, paramMap, logging = true) - val map = extractParamMap(paramMap) - - // Prepare model - val tmpModel = if (paramMap.size != 0) { - val tmpModel = this.copy() - Params.inheritValues(paramMap, parent, tmpModel) - tmpModel - } else { - this - } - - val (numColsOutput, outputData) = - ClassificationModel.transformColumnsImpl[FeaturesType](dataset, tmpModel, map) + override def transform(dataset: DataFrame): DataFrame = { + transformSchema(dataset.schema, logging = true) // Output selected columns only. - if (map(probabilityCol) != "") { - // output probabilities - val features2probs: FeaturesType => Vector = (features) => { - tmpModel.predictProbabilities(features) + // This is a bit complicated since it tries to avoid repeated computation. + var outputData = dataset + var numColsOutput = 0 + if ($(rawPredictionCol).nonEmpty) { + outputData = outputData.withColumn(getRawPredictionCol, + callUDF(predictRaw _, new VectorUDT, col(getFeaturesCol))) + numColsOutput += 1 + } + if ($(probabilityCol).nonEmpty) { + val probUDF = if ($(rawPredictionCol).nonEmpty) { + callUDF(raw2probability _, new VectorUDT, col($(rawPredictionCol))) + } else { + callUDF(predictProbability _, new VectorUDT, col($(featuresCol))) } - outputData.withColumn(map(probabilityCol), - callUDF(features2probs, new VectorUDT, col(map(featuresCol)))) - } else { - if (numColsOutput == 0) { - this.logWarning(s"$uid: ProbabilisticClassificationModel.transform() was called as NOOP" + - " since no output columns were set.") + outputData = outputData.withColumn($(probabilityCol), probUDF) + numColsOutput += 1 + } + if ($(predictionCol).nonEmpty) { + val predUDF = if ($(rawPredictionCol).nonEmpty) { + callUDF(raw2prediction _, DoubleType, col($(rawPredictionCol))) + } else if ($(probabilityCol).nonEmpty) { + callUDF(probability2prediction _, DoubleType, col($(probabilityCol))) + } else { + callUDF(predict _, DoubleType, col($(featuresCol))) } - outputData + outputData = outputData.withColumn($(predictionCol), predUDF) + numColsOutput += 1 } + + if (numColsOutput == 0) { + this.logWarning(s"$uid: ProbabilisticClassificationModel.transform() was called as NOOP" + + " since no output columns were set.") + } + outputData } /** - * :: DeveloperApi :: + * Estimate the probability of each class given the raw prediction, + * doing the computation in-place. + * These predictions are also called class conditional probabilities. + * + * This internal method is used to implement [[transform()]] and output [[probabilityCol]]. * + * @return Estimated class conditional probabilities (modified input vector) + */ + protected def raw2probabilityInPlace(rawPrediction: Vector): Vector + + /** Non-in-place version of [[raw2probabilityInPlace()]] */ + protected def raw2probability(rawPrediction: Vector): Vector = { + val probs = rawPrediction.copy + raw2probabilityInPlace(probs) + } + + /** * Predict the probability of each class given the features. * These predictions are also called class conditional probabilities. * - * WARNING: Not all models output well-calibrated probability estimates! These probabilities - * should be treated as confidences, not precise probabilities. - * * This internal method is used to implement [[transform()]] and output [[probabilityCol]]. + * + * @return Estimated class conditional probabilities + */ + protected def predictProbability(features: FeaturesType): Vector = { + val rawPreds = predictRaw(features) + raw2probabilityInPlace(rawPreds) + } + + /** + * Given a vector of class conditional probabilities, select the predicted label. + * This may be overridden to support thresholds which favor particular labels. + * @return predicted label */ - @DeveloperApi - protected def predictProbabilities(features: FeaturesType): Vector + protected def probability2prediction(probability: Vector): Double = probability.toDense.argmax } diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala index cfd6508fce890..852a67e066322 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala @@ -19,34 +19,33 @@ package org.apache.spark.ml.classification import scala.collection.mutable -import org.apache.spark.annotation.AlphaComponent -import org.apache.spark.ml.impl.estimator.{PredictionModel, Predictor} -import org.apache.spark.ml.impl.tree._ -import org.apache.spark.ml.param.{Params, ParamMap} -import org.apache.spark.ml.tree.{DecisionTreeModel, TreeEnsembleModel} -import org.apache.spark.ml.util.MetadataUtils +import org.apache.spark.annotation.Experimental +import org.apache.spark.ml.{PredictionModel, Predictor} +import org.apache.spark.ml.param.ParamMap +import org.apache.spark.ml.tree.{DecisionTreeModel, RandomForestParams, TreeClassifierParams, TreeEnsembleModel} +import org.apache.spark.ml.util.{Identifiable, MetadataUtils} import org.apache.spark.mllib.linalg.Vector import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.tree.{RandomForest => OldRandomForest} -import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo, Strategy => OldStrategy} +import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo} import org.apache.spark.mllib.tree.model.{RandomForestModel => OldRandomForestModel} import org.apache.spark.rdd.RDD import org.apache.spark.sql.DataFrame - /** - * :: AlphaComponent :: - * + * :: Experimental :: * [[http://en.wikipedia.org/wiki/Random_forest Random Forest]] learning algorithm for * classification. * It supports both binary and multiclass labels, as well as both continuous and categorical * features. */ -@AlphaComponent -final class RandomForestClassifier +@Experimental +final class RandomForestClassifier(override val uid: String) extends Predictor[Vector, RandomForestClassifier, RandomForestClassificationModel] with RandomForestParams with TreeClassifierParams { + def this() = this(Identifiable.randomUID("rfc")) + // Override parameter setters from parent trait for Java API compatibility. // Parameters from TreeClassifierParams: @@ -81,27 +80,26 @@ final class RandomForestClassifier override def setFeatureSubsetStrategy(value: String): this.type = super.setFeatureSubsetStrategy(value) - override protected def train( - dataset: DataFrame, - paramMap: ParamMap): RandomForestClassificationModel = { + override protected def train(dataset: DataFrame): RandomForestClassificationModel = { val categoricalFeatures: Map[Int, Int] = - MetadataUtils.getCategoricalFeatures(dataset.schema(paramMap(featuresCol))) - val numClasses: Int = MetadataUtils.getNumClasses(dataset.schema(paramMap(labelCol))) match { + MetadataUtils.getCategoricalFeatures(dataset.schema($(featuresCol))) + val numClasses: Int = MetadataUtils.getNumClasses(dataset.schema($(labelCol))) match { case Some(n: Int) => n case None => throw new IllegalArgumentException("RandomForestClassifier was given input" + - s" with invalid label column ${paramMap(labelCol)}, without the number of classes" + + s" with invalid label column ${$(labelCol)}, without the number of classes" + " specified. See StringIndexer.") // TODO: Automatically index labels: SPARK-7126 } - val oldDataset: RDD[LabeledPoint] = extractLabeledPoints(dataset, paramMap) + val oldDataset: RDD[LabeledPoint] = extractLabeledPoints(dataset) val strategy = super.getOldStrategy(categoricalFeatures, numClasses, OldAlgo.Classification, getOldImpurity) val oldModel = OldRandomForest.trainClassifier( oldDataset, strategy, getNumTrees, getFeatureSubsetStrategy, getSeed.toInt) - RandomForestClassificationModel.fromOld(oldModel, this, paramMap, categoricalFeatures) + RandomForestClassificationModel.fromOld(oldModel, this, categoricalFeatures) } } +@Experimental object RandomForestClassifier { /** Accessor for supported impurity settings: entropy, gini */ final val supportedImpurities: Array[String] = TreeClassifierParams.supportedImpurities @@ -112,18 +110,16 @@ object RandomForestClassifier { } /** - * :: AlphaComponent :: - * + * :: Experimental :: * [[http://en.wikipedia.org/wiki/Random_forest Random Forest]] model for classification. * It supports both binary and multiclass labels, as well as both continuous and categorical * features. * @param _trees Decision trees in the ensemble. * Warning: These have null parents. */ -@AlphaComponent +@Experimental final class RandomForestClassificationModel private[ml] ( - override val parent: RandomForestClassifier, - override val fittingParamMap: ParamMap, + override val uid: String, private val _trees: Array[DecisionTreeClassificationModel]) extends PredictionModel[Vector, RandomForestClassificationModel] with TreeEnsembleModel with Serializable { @@ -150,10 +146,8 @@ final class RandomForestClassificationModel private[ml] ( votes.maxBy(_._2)._1 } - override protected def copy(): RandomForestClassificationModel = { - val m = new RandomForestClassificationModel(parent, fittingParamMap, _trees) - Params.inheritValues(this.extractParamMap(), this, m) - m + override def copy(extra: ParamMap): RandomForestClassificationModel = { + copyValues(new RandomForestClassificationModel(uid, _trees), extra) } override def toString: String = { @@ -172,14 +166,14 @@ private[ml] object RandomForestClassificationModel { def fromOld( oldModel: OldRandomForestModel, parent: RandomForestClassifier, - fittingParamMap: ParamMap, categoricalFeatures: Map[Int, Int]): RandomForestClassificationModel = { require(oldModel.algo == OldAlgo.Classification, "Cannot convert RandomForestModel" + s" with algo=${oldModel.algo} (old API) to RandomForestClassificationModel (new API).") val newTrees = oldModel.trees.map { tree => - // parent, fittingParamMap for each tree is null since there are no good ways to set these. - DecisionTreeClassificationModel.fromOld(tree, null, null, categoricalFeatures) + // parent for each tree is null since there is no good way to set this. + DecisionTreeClassificationModel.fromOld(tree, null, categoricalFeatures) } - new RandomForestClassificationModel(parent, fittingParamMap, newTrees) + val uid = if (parent != null) parent.uid else Identifiable.randomUID("rfc") + new RandomForestClassificationModel(uid, newTrees) } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala index c865eb9fe092d..f695ddaeefc72 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala @@ -17,24 +17,24 @@ package org.apache.spark.ml.evaluation -import org.apache.spark.annotation.AlphaComponent -import org.apache.spark.ml.Evaluator +import org.apache.spark.annotation.Experimental import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ -import org.apache.spark.ml.util.SchemaUtils +import org.apache.spark.ml.util.{Identifiable, SchemaUtils} import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics import org.apache.spark.mllib.linalg.{Vector, VectorUDT} import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.types.DoubleType /** - * :: AlphaComponent :: - * + * :: Experimental :: * Evaluator for binary classification, which expects two input columns: score and label. */ -@AlphaComponent -class BinaryClassificationEvaluator extends Evaluator with Params - with HasRawPredictionCol with HasLabelCol { +@Experimental +class BinaryClassificationEvaluator(override val uid: String) + extends Evaluator with HasRawPredictionCol with HasLabelCol { + + def this() = this(Identifiable.randomUID("binEval")) /** * param for metric name in evaluation @@ -44,7 +44,7 @@ class BinaryClassificationEvaluator extends Evaluator with Params "metric name in evaluation (areaUnderROC|areaUnderPR)") /** @group getParam */ - def getMetricName: String = getOrDefault(metricName) + def getMetricName: String = $(metricName) /** @group setParam */ def setMetricName(value: String): this.type = set(metricName, value) @@ -57,20 +57,18 @@ class BinaryClassificationEvaluator extends Evaluator with Params setDefault(metricName -> "areaUnderROC") - override def evaluate(dataset: DataFrame, paramMap: ParamMap): Double = { - val map = extractParamMap(paramMap) - + override def evaluate(dataset: DataFrame): Double = { val schema = dataset.schema - SchemaUtils.checkColumnType(schema, map(rawPredictionCol), new VectorUDT) - SchemaUtils.checkColumnType(schema, map(labelCol), DoubleType) + SchemaUtils.checkColumnType(schema, $(rawPredictionCol), new VectorUDT) + SchemaUtils.checkColumnType(schema, $(labelCol), DoubleType) // TODO: When dataset metadata has been implemented, check rawPredictionCol vector length = 2. - val scoreAndLabels = dataset.select(map(rawPredictionCol), map(labelCol)) + val scoreAndLabels = dataset.select($(rawPredictionCol), $(labelCol)) .map { case Row(rawPrediction: Vector, label: Double) => (rawPrediction(1), label) } val metrics = new BinaryClassificationMetrics(scoreAndLabels) - val metric = map(metricName) match { + val metric = $(metricName) match { case "areaUnderROC" => metrics.areaUnderROC() case "areaUnderPR" => diff --git a/mllib/src/main/scala/org/apache/spark/ml/Evaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala similarity index 63% rename from mllib/src/main/scala/org/apache/spark/ml/Evaluator.scala rename to mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala index 8b4b5fd8af986..61e937e693699 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/Evaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/Evaluator.scala @@ -15,26 +15,38 @@ * limitations under the License. */ -package org.apache.spark.ml +package org.apache.spark.ml.evaluation -import org.apache.spark.annotation.AlphaComponent -import org.apache.spark.ml.param.ParamMap -import org.apache.spark.ml.util.Identifiable +import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.ml.param.{ParamMap, Params} import org.apache.spark.sql.DataFrame /** - * :: AlphaComponent :: + * :: DeveloperApi :: * Abstract class for evaluators that compute metrics from predictions. */ -@AlphaComponent -abstract class Evaluator extends Identifiable { +@DeveloperApi +abstract class Evaluator extends Params { /** - * Evaluates the output. + * Evaluates model output and returns a scalar metric (larger is better). * * @param dataset a dataset that contains labels/observations and predictions. * @param paramMap parameter map that specifies the input columns and output metrics * @return metric */ - def evaluate(dataset: DataFrame, paramMap: ParamMap): Double + def evaluate(dataset: DataFrame, paramMap: ParamMap): Double = { + this.copy(paramMap).evaluate(dataset) + } + + /** + * Evaluates the output. + * @param dataset a dataset that contains labels/observations and predictions. + * @return metric + */ + def evaluate(dataset: DataFrame): Double + + override def copy(extra: ParamMap): Evaluator = { + super.copy(extra).asInstanceOf[Evaluator] + } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala new file mode 100644 index 0000000000000..abb1b35bedea5 --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.evaluation + +import org.apache.spark.annotation.Experimental +import org.apache.spark.ml.param.{Param, ParamValidators} +import org.apache.spark.ml.param.shared.{HasLabelCol, HasPredictionCol} +import org.apache.spark.ml.util.{Identifiable, SchemaUtils} +import org.apache.spark.mllib.evaluation.RegressionMetrics +import org.apache.spark.sql.{DataFrame, Row} +import org.apache.spark.sql.types.DoubleType + +/** + * :: Experimental :: + * Evaluator for regression, which expects two input columns: prediction and label. + */ +@Experimental +final class RegressionEvaluator(override val uid: String) + extends Evaluator with HasPredictionCol with HasLabelCol { + + def this() = this(Identifiable.randomUID("regEval")) + + /** + * param for metric name in evaluation (supports `"rmse"` (default), `"mse"`, `"r2"`, and `"mae"`) + * @group param + */ + val metricName: Param[String] = { + val allowedParams = ParamValidators.inArray(Array("mse", "rmse", "r2", "mae")) + new Param(this, "metricName", "metric name in evaluation (mse|rmse|r2|mae)", allowedParams) + } + + /** @group getParam */ + def getMetricName: String = $(metricName) + + /** @group setParam */ + def setMetricName(value: String): this.type = set(metricName, value) + + /** @group setParam */ + def setPredictionCol(value: String): this.type = set(predictionCol, value) + + /** @group setParam */ + def setLabelCol(value: String): this.type = set(labelCol, value) + + setDefault(metricName -> "rmse") + + override def evaluate(dataset: DataFrame): Double = { + val schema = dataset.schema + SchemaUtils.checkColumnType(schema, $(predictionCol), DoubleType) + SchemaUtils.checkColumnType(schema, $(labelCol), DoubleType) + + val predictionAndLabels = dataset.select($(predictionCol), $(labelCol)) + .map { case Row(prediction: Double, label: Double) => + (prediction, label) + } + val metrics = new RegressionMetrics(predictionAndLabels) + val metric = $(metricName) match { + case "rmse" => + metrics.rootMeanSquaredError + case "mse" => + metrics.meanSquaredError + case "r2" => + metrics.r2 + case "mae" => + metrics.meanAbsoluteError + } + metric + } +} diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala index f3ce6dfca2c1c..b06122d733853 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala @@ -17,22 +17,25 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.AlphaComponent +import org.apache.spark.annotation.Experimental import org.apache.spark.ml.Transformer import org.apache.spark.ml.attribute.BinaryAttribute import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol} -import org.apache.spark.ml.util.SchemaUtils +import org.apache.spark.ml.util.{Identifiable, SchemaUtils} import org.apache.spark.sql._ import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.{DoubleType, StructType} /** - * :: AlphaComponent :: + * :: Experimental :: * Binarize a column of continuous features given a threshold. */ -@AlphaComponent -final class Binarizer extends Transformer with HasInputCol with HasOutputCol { +@Experimental +final class Binarizer(override val uid: String) + extends Transformer with HasInputCol with HasOutputCol { + + def this() = this(Identifiable.randomUID("binarizer")) /** * Param for threshold used to binarize continuous features. @@ -44,7 +47,7 @@ final class Binarizer extends Transformer with HasInputCol with HasOutputCol { new DoubleParam(this, "threshold", "threshold used to binarize continuous features") /** @group getParam */ - def getThreshold: Double = getOrDefault(threshold) + def getThreshold: Double = $(threshold) /** @group setParam */ def setThreshold(value: Double): this.type = set(threshold, value) @@ -57,23 +60,21 @@ final class Binarizer extends Transformer with HasInputCol with HasOutputCol { /** @group setParam */ def setOutputCol(value: String): this.type = set(outputCol, value) - override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = { - transformSchema(dataset.schema, paramMap, logging = true) - val map = extractParamMap(paramMap) - val td = map(threshold) + override def transform(dataset: DataFrame): DataFrame = { + transformSchema(dataset.schema, logging = true) + val td = $(threshold) val binarizer = udf { in: Double => if (in > td) 1.0 else 0.0 } - val outputColName = map(outputCol) + val outputColName = $(outputCol) val metadata = BinaryAttribute.defaultAttr.withName(outputColName).toMetadata() dataset.select(col("*"), - binarizer(col(map(inputCol))).as(outputColName, metadata)) + binarizer(col($(inputCol))).as(outputColName, metadata)) } - override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = { - val map = extractParamMap(paramMap) - SchemaUtils.checkColumnType(schema, map(inputCol), DoubleType) + override def transformSchema(schema: StructType): StructType = { + SchemaUtils.checkColumnType(schema, $(inputCol), DoubleType) val inputFields = schema.fields - val outputColName = map(outputCol) + val outputColName = $(outputCol) require(inputFields.forall(_.name != outputColName), s"Output column $outputColName already exists.") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala new file mode 100644 index 0000000000000..a3d1f6f65ccaf --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.feature + +import java.{util => ju} + +import org.apache.spark.SparkException +import org.apache.spark.annotation.Experimental +import org.apache.spark.ml.Model +import org.apache.spark.ml.attribute.NominalAttribute +import org.apache.spark.ml.param._ +import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol} +import org.apache.spark.ml.util.{Identifiable, SchemaUtils} +import org.apache.spark.sql._ +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types.{DoubleType, StructField, StructType} + +/** + * :: Experimental :: + * `Bucketizer` maps a column of continuous features to a column of feature buckets. + */ +@Experimental +final class Bucketizer(override val uid: String) + extends Model[Bucketizer] with HasInputCol with HasOutputCol { + + def this() = this(Identifiable.randomUID("bucketizer")) + + /** + * Parameter for mapping continuous features into buckets. With n+1 splits, there are n buckets. + * A bucket defined by splits x,y holds values in the range [x,y) except the last bucket, which + * also includes y. Splits should be strictly increasing. + * Values at -inf, inf must be explicitly provided to cover all Double values; + * otherwise, values outside the splits specified will be treated as errors. + * @group param + */ + val splits: DoubleArrayParam = new DoubleArrayParam(this, "splits", + "Split points for mapping continuous features into buckets. With n+1 splits, there are n " + + "buckets. A bucket defined by splits x,y holds values in the range [x,y) except the last " + + "bucket, which also includes y. The splits should be strictly increasing. " + + "Values at -inf, inf must be explicitly provided to cover all Double values; " + + "otherwise, values outside the splits specified will be treated as errors.", + Bucketizer.checkSplits) + + /** @group getParam */ + def getSplits: Array[Double] = $(splits) + + /** @group setParam */ + def setSplits(value: Array[Double]): this.type = set(splits, value) + + /** @group setParam */ + def setInputCol(value: String): this.type = set(inputCol, value) + + /** @group setParam */ + def setOutputCol(value: String): this.type = set(outputCol, value) + + override def transform(dataset: DataFrame): DataFrame = { + transformSchema(dataset.schema) + val bucketizer = udf { feature: Double => + Bucketizer.binarySearchForBuckets($(splits), feature) + } + val newCol = bucketizer(dataset($(inputCol))) + val newField = prepOutputField(dataset.schema) + dataset.withColumn($(outputCol), newCol.as($(outputCol), newField.metadata)) + } + + private def prepOutputField(schema: StructType): StructField = { + val buckets = $(splits).sliding(2).map(bucket => bucket.mkString(", ")).toArray + val attr = new NominalAttribute(name = Some($(outputCol)), isOrdinal = Some(true), + values = Some(buckets)) + attr.toStructField() + } + + override def transformSchema(schema: StructType): StructType = { + SchemaUtils.checkColumnType(schema, $(inputCol), DoubleType) + SchemaUtils.appendColumn(schema, prepOutputField(schema)) + } +} + +private[feature] object Bucketizer { + /** We require splits to be of length >= 3 and to be in strictly increasing order. */ + def checkSplits(splits: Array[Double]): Boolean = { + if (splits.length < 3) { + false + } else { + var i = 0 + val n = splits.length - 1 + while (i < n) { + if (splits(i) >= splits(i + 1)) return false + i += 1 + } + true + } + } + + /** + * Binary searching in several buckets to place each data point. + * @throws SparkException if a feature is < splits.head or > splits.last + */ + def binarySearchForBuckets(splits: Array[Double], feature: Double): Double = { + if (feature == splits.last) { + splits.length - 2 + } else { + val idx = ju.Arrays.binarySearch(splits, feature) + if (idx >= 0) { + idx + } else { + val insertPos = -idx - 1 + if (insertPos == 0 || insertPos == splits.length) { + throw new SparkException(s"Feature value $feature out of Bucketizer bounds" + + s" [${splits.head}, ${splits.last}]. Check your features, or loosen " + + s"the lower/upper bound constraints.") + } else { + insertPos - 1 + } + } + } + } +} diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala new file mode 100644 index 0000000000000..3ae1833390152 --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.feature + +import org.apache.spark.annotation.Experimental +import org.apache.spark.ml.UnaryTransformer +import org.apache.spark.ml.param.Param +import org.apache.spark.ml.util.Identifiable +import org.apache.spark.mllib.feature +import org.apache.spark.mllib.linalg.{Vector, VectorUDT} +import org.apache.spark.sql.types.DataType + +/** + * :: Experimental :: + * Outputs the Hadamard product (i.e., the element-wise product) of each input vector with a + * provided "weight" vector. In other words, it scales each column of the dataset by a scalar + * multiplier. + */ +@Experimental +class ElementwiseProduct(override val uid: String) + extends UnaryTransformer[Vector, Vector, ElementwiseProduct] { + + def this() = this(Identifiable.randomUID("elemProd")) + + /** + * the vector to multiply with input vectors + * @group param + */ + val scalingVec: Param[Vector] = new Param(this, "scalingVector", "vector for hadamard product") + + /** @group setParam */ + def setScalingVec(value: Vector): this.type = set(scalingVec, value) + + /** @group getParam */ + def getScalingVec: Vector = getOrDefault(scalingVec) + + override protected def createTransformFunc: Vector => Vector = { + require(params.contains(scalingVec), s"transformation requires a weight vector") + val elemScaler = new feature.ElementwiseProduct($(scalingVec)) + elemScaler.transform + } + + override protected def outputDataType: DataType = new VectorUDT() +} diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala index 0b3128f9ee8cd..f936aef80f8af 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala @@ -17,19 +17,31 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.AlphaComponent -import org.apache.spark.ml.UnaryTransformer -import org.apache.spark.ml.param.{ParamValidators, IntParam, ParamMap} +import org.apache.spark.annotation.Experimental +import org.apache.spark.ml.Transformer +import org.apache.spark.ml.attribute.AttributeGroup +import org.apache.spark.ml.param.{IntParam, ParamValidators} +import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol} +import org.apache.spark.ml.util.{Identifiable, SchemaUtils} import org.apache.spark.mllib.feature -import org.apache.spark.mllib.linalg.{VectorUDT, Vector} -import org.apache.spark.sql.types.DataType +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.functions.{col, udf} +import org.apache.spark.sql.types.{ArrayType, StructType} /** - * :: AlphaComponent :: + * :: Experimental :: * Maps a sequence of terms to their term frequencies using the hashing trick. */ -@AlphaComponent -class HashingTF extends UnaryTransformer[Iterable[_], Vector, HashingTF] { +@Experimental +class HashingTF(override val uid: String) extends Transformer with HasInputCol with HasOutputCol { + + def this() = this(Identifiable.randomUID("hashingTF")) + + /** @group setParam */ + def setInputCol(value: String): this.type = set(inputCol, value) + + /** @group setParam */ + def setOutputCol(value: String): this.type = set(outputCol, value) /** * Number of features. Should be > 0. @@ -42,15 +54,24 @@ class HashingTF extends UnaryTransformer[Iterable[_], Vector, HashingTF] { setDefault(numFeatures -> (1 << 18)) /** @group getParam */ - def getNumFeatures: Int = getOrDefault(numFeatures) + def getNumFeatures: Int = $(numFeatures) /** @group setParam */ def setNumFeatures(value: Int): this.type = set(numFeatures, value) - override protected def createTransformFunc(paramMap: ParamMap): Iterable[_] => Vector = { - val hashingTF = new feature.HashingTF(paramMap(numFeatures)) - hashingTF.transform + override def transform(dataset: DataFrame): DataFrame = { + val outputSchema = transformSchema(dataset.schema) + val hashingTF = new feature.HashingTF($(numFeatures)) + val t = udf { terms: Seq[_] => hashingTF.transform(terms) } + val metadata = outputSchema($(outputCol)).metadata + dataset.select(col("*"), t(col($(inputCol))).as($(outputCol), metadata)) } - override protected def outputDataType: DataType = new VectorUDT() + override def transformSchema(schema: StructType): StructType = { + val inputType = schema($(inputCol)).dataType + require(inputType.isInstanceOf[ArrayType], + s"The input column must be ArrayType, but got $inputType.") + val attrGroup = new AttributeGroup($(outputCol), $(numFeatures)) + SchemaUtils.appendColumn(schema, attrGroup.toStructField()) + } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala index e6a62d998bb97..376b84530cd57 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala @@ -17,11 +17,11 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.AlphaComponent +import org.apache.spark.annotation.Experimental import org.apache.spark.ml._ import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ -import org.apache.spark.ml.util.SchemaUtils +import org.apache.spark.ml.util.{Identifiable, SchemaUtils} import org.apache.spark.mllib.feature import org.apache.spark.mllib.linalg.{Vector, VectorUDT} import org.apache.spark.sql._ @@ -43,7 +43,7 @@ private[feature] trait IDFBase extends Params with HasInputCol with HasOutputCol setDefault(minDocFreq -> 0) /** @group getParam */ - def getMinDocFreq: Int = getOrDefault(minDocFreq) + def getMinDocFreq: Int = $(minDocFreq) /** @group setParam */ def setMinDocFreq(value: Int): this.type = set(minDocFreq, value) @@ -51,19 +51,20 @@ private[feature] trait IDFBase extends Params with HasInputCol with HasOutputCol /** * Validate and transform the input schema. */ - protected def validateAndTransformSchema(schema: StructType, paramMap: ParamMap): StructType = { - val map = extractParamMap(paramMap) - SchemaUtils.checkColumnType(schema, map(inputCol), new VectorUDT) - SchemaUtils.appendColumn(schema, map(outputCol), new VectorUDT) + protected def validateAndTransformSchema(schema: StructType): StructType = { + SchemaUtils.checkColumnType(schema, $(inputCol), new VectorUDT) + SchemaUtils.appendColumn(schema, $(outputCol), new VectorUDT) } } /** - * :: AlphaComponent :: + * :: Experimental :: * Compute the Inverse Document Frequency (IDF) given a collection of documents. */ -@AlphaComponent -final class IDF extends Estimator[IDFModel] with IDFBase { +@Experimental +final class IDF(override val uid: String) extends Estimator[IDFModel] with IDFBase { + + def this() = this(Identifiable.randomUID("idf")) /** @group setParam */ def setInputCol(value: String): this.type = set(inputCol, value) @@ -71,29 +72,25 @@ final class IDF extends Estimator[IDFModel] with IDFBase { /** @group setParam */ def setOutputCol(value: String): this.type = set(outputCol, value) - override def fit(dataset: DataFrame, paramMap: ParamMap): IDFModel = { - transformSchema(dataset.schema, paramMap, logging = true) - val map = extractParamMap(paramMap) - val input = dataset.select(map(inputCol)).map { case Row(v: Vector) => v } - val idf = new feature.IDF(map(minDocFreq)).fit(input) - val model = new IDFModel(this, map, idf) - Params.inheritValues(map, this, model) - model + override def fit(dataset: DataFrame): IDFModel = { + transformSchema(dataset.schema, logging = true) + val input = dataset.select($(inputCol)).map { case Row(v: Vector) => v } + val idf = new feature.IDF($(minDocFreq)).fit(input) + copyValues(new IDFModel(uid, idf).setParent(this)) } - override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = { - validateAndTransformSchema(schema, paramMap) + override def transformSchema(schema: StructType): StructType = { + validateAndTransformSchema(schema) } } /** - * :: AlphaComponent :: + * :: Experimental :: * Model fitted by [[IDF]]. */ -@AlphaComponent +@Experimental class IDFModel private[ml] ( - override val parent: IDF, - override val fittingParamMap: ParamMap, + override val uid: String, idfModel: feature.IDFModel) extends Model[IDFModel] with IDFBase { @@ -103,14 +100,13 @@ class IDFModel private[ml] ( /** @group setParam */ def setOutputCol(value: String): this.type = set(outputCol, value) - override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = { - transformSchema(dataset.schema, paramMap, logging = true) - val map = extractParamMap(paramMap) + override def transform(dataset: DataFrame): DataFrame = { + transformSchema(dataset.schema, logging = true) val idf = udf { vec: Vector => idfModel.transform(vec) } - dataset.withColumn(map(outputCol), idf(col(map(inputCol)))) + dataset.withColumn($(outputCol), idf(col($(inputCol)))) } - override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = { - validateAndTransformSchema(schema, paramMap) + override def transformSchema(schema: StructType): StructType = { + validateAndTransformSchema(schema) } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala index bd2b5f6067e2d..8282e5ffa17f7 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala @@ -17,19 +17,22 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.AlphaComponent +import org.apache.spark.annotation.Experimental import org.apache.spark.ml.UnaryTransformer -import org.apache.spark.ml.param.{ParamValidators, DoubleParam, ParamMap} +import org.apache.spark.ml.param.{DoubleParam, ParamValidators} +import org.apache.spark.ml.util.Identifiable import org.apache.spark.mllib.feature -import org.apache.spark.mllib.linalg.{VectorUDT, Vector} +import org.apache.spark.mllib.linalg.{Vector, VectorUDT} import org.apache.spark.sql.types.DataType /** - * :: AlphaComponent :: + * :: Experimental :: * Normalize a vector to have unit norm using the given p-norm. */ -@AlphaComponent -class Normalizer extends UnaryTransformer[Vector, Vector, Normalizer] { +@Experimental +class Normalizer(override val uid: String) extends UnaryTransformer[Vector, Vector, Normalizer] { + + def this() = this(Identifiable.randomUID("normalizer")) /** * Normalization in L^p^ space. Must be >= 1. @@ -41,13 +44,13 @@ class Normalizer extends UnaryTransformer[Vector, Vector, Normalizer] { setDefault(p -> 2.0) /** @group getParam */ - def getP: Double = getOrDefault(p) + def getP: Double = $(p) /** @group setParam */ def setP(value: Double): this.type = set(p, value) - override protected def createTransformFunc(paramMap: ParamMap): Vector => Vector = { - val normalizer = new feature.Normalizer(paramMap(p)) + override protected def createTransformFunc: Vector => Vector = { + val normalizer = new feature.Normalizer($(p)) normalizer.transform } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala new file mode 100644 index 0000000000000..8f34878c8d329 --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.feature + +import org.apache.spark.annotation.Experimental +import org.apache.spark.ml.Transformer +import org.apache.spark.ml.attribute._ +import org.apache.spark.ml.param._ +import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol} +import org.apache.spark.ml.util.{Identifiable, SchemaUtils} +import org.apache.spark.mllib.linalg.Vectors +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.functions.{col, udf} +import org.apache.spark.sql.types.{DoubleType, StructType} + +/** + * :: Experimental :: + * A one-hot encoder that maps a column of category indices to a column of binary vectors, with + * at most a single one-value per row that indicates the input category index. + * For example with 5 categories, an input value of 2.0 would map to an output vector of + * `[0.0, 0.0, 1.0, 0.0]`. + * The last category is not included by default (configurable via [[OneHotEncoder!.dropLast]] + * because it makes the vector entries sum up to one, and hence linearly dependent. + * So an input value of 4.0 maps to `[0.0, 0.0, 0.0, 0.0]`. + * Note that this is different from scikit-learn's OneHotEncoder, which keeps all categories. + * The output vectors are sparse. + * + * @see [[StringIndexer]] for converting categorical values into category indices + */ +@Experimental +class OneHotEncoder(override val uid: String) extends Transformer + with HasInputCol with HasOutputCol { + + def this() = this(Identifiable.randomUID("oneHot")) + + /** + * Whether to drop the last category in the encoded vector (default: true) + * @group param + */ + final val dropLast: BooleanParam = + new BooleanParam(this, "dropLast", "whether to drop the last category") + setDefault(dropLast -> true) + + /** @group setParam */ + def setDropLast(value: Boolean): this.type = set(dropLast, value) + + /** @group setParam */ + def setInputCol(value: String): this.type = set(inputCol, value) + + /** @group setParam */ + def setOutputCol(value: String): this.type = set(outputCol, value) + + override def transformSchema(schema: StructType): StructType = { + val is = "_is_" + val inputColName = $(inputCol) + val outputColName = $(outputCol) + + SchemaUtils.checkColumnType(schema, inputColName, DoubleType) + val inputFields = schema.fields + require(!inputFields.exists(_.name == outputColName), + s"Output column $outputColName already exists.") + + val inputAttr = Attribute.fromStructField(schema(inputColName)) + val outputAttrNames: Option[Array[String]] = inputAttr match { + case nominal: NominalAttribute => + if (nominal.values.isDefined) { + nominal.values.map(_.map(v => inputColName + is + v)) + } else if (nominal.numValues.isDefined) { + nominal.numValues.map(n => Array.tabulate(n)(i => inputColName + is + i)) + } else { + None + } + case binary: BinaryAttribute => + if (binary.values.isDefined) { + binary.values.map(_.map(v => inputColName + is + v)) + } else { + Some(Array.tabulate(2)(i => inputColName + is + i)) + } + case _: NumericAttribute => + throw new RuntimeException( + s"The input column $inputColName cannot be numeric.") + case _ => + None // optimistic about unknown attributes + } + + val filteredOutputAttrNames = outputAttrNames.map { names => + if ($(dropLast)) { + require(names.length > 1, + s"The input column $inputColName should have at least two distinct values.") + names.dropRight(1) + } else { + names + } + } + + val outputAttrGroup = if (filteredOutputAttrNames.isDefined) { + val attrs: Array[Attribute] = filteredOutputAttrNames.get.map { name => + BinaryAttribute.defaultAttr.withName(name) + } + new AttributeGroup($(outputCol), attrs) + } else { + new AttributeGroup($(outputCol)) + } + + val outputFields = inputFields :+ outputAttrGroup.toStructField() + StructType(outputFields) + } + + override def transform(dataset: DataFrame): DataFrame = { + // schema transformation + val is = "_is_" + val inputColName = $(inputCol) + val outputColName = $(outputCol) + val shouldDropLast = $(dropLast) + var outputAttrGroup = AttributeGroup.fromStructField( + transformSchema(dataset.schema)(outputColName)) + if (outputAttrGroup.size < 0) { + // If the number of attributes is unknown, we check the values from the input column. + val numAttrs = dataset.select(col(inputColName).cast(DoubleType)).map(_.getDouble(0)) + .aggregate(0.0)( + (m, x) => { + assert(x >=0.0 && x == x.toInt, + s"Values from column $inputColName must be indices, but got $x.") + math.max(m, x) + }, + (m0, m1) => { + math.max(m0, m1) + } + ).toInt + 1 + val outputAttrNames = Array.tabulate(numAttrs)(i => inputColName + is + i) + val filtered = if (shouldDropLast) outputAttrNames.dropRight(1) else outputAttrNames + val outputAttrs: Array[Attribute] = + filtered.map(name => BinaryAttribute.defaultAttr.withName(name)) + outputAttrGroup = new AttributeGroup(outputColName, outputAttrs) + } + val metadata = outputAttrGroup.toMetadata() + + // data transformation + val size = outputAttrGroup.size + val oneValue = Array(1.0) + val emptyValues = Array[Double]() + val emptyIndices = Array[Int]() + val encode = udf { label: Double => + if (label < size) { + Vectors.sparse(size, Array(label.toInt), oneValue) + } else { + Vectors.sparse(size, emptyIndices, emptyValues) + } + } + + dataset.select(col("*"), encode(col(inputColName).cast(DoubleType)).as(outputColName, metadata)) + } +} diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala index 1b7c939c2dffe..442e95820217a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala @@ -19,22 +19,26 @@ package org.apache.spark.ml.feature import scala.collection.mutable -import org.apache.spark.annotation.AlphaComponent +import org.apache.spark.annotation.Experimental import org.apache.spark.ml.UnaryTransformer -import org.apache.spark.ml.param.{ParamValidators, IntParam, ParamMap} +import org.apache.spark.ml.param.{IntParam, ParamValidators} +import org.apache.spark.ml.util.Identifiable import org.apache.spark.mllib.linalg._ import org.apache.spark.sql.types.DataType /** - * :: AlphaComponent :: + * :: Experimental :: * Perform feature expansion in a polynomial space. As said in wikipedia of Polynomial Expansion, * which is available at [[http://en.wikipedia.org/wiki/Polynomial_expansion]], "In mathematics, an * expansion of a product of sums expresses it as a sum of products by using the fact that * multiplication distributes over addition". Take a 2-variable feature vector as an example: - * `(x, y)`, if we want to expand it with degree 2, then we get `(x, y, x * x, x * y, y * y)`. + * `(x, y)`, if we want to expand it with degree 2, then we get `(x, x * x, y, x * y, y * y)`. */ -@AlphaComponent -class PolynomialExpansion extends UnaryTransformer[Vector, Vector, PolynomialExpansion] { +@Experimental +class PolynomialExpansion(override val uid: String) + extends UnaryTransformer[Vector, Vector, PolynomialExpansion] { + + def this() = this(Identifiable.randomUID("poly")) /** * The polynomial degree to expand, which should be >= 1. A value of 1 means no expansion. @@ -47,14 +51,13 @@ class PolynomialExpansion extends UnaryTransformer[Vector, Vector, PolynomialExp setDefault(degree -> 2) /** @group getParam */ - def getDegree: Int = getOrDefault(degree) + def getDegree: Int = $(degree) /** @group setParam */ def setDegree(value: Int): this.type = set(degree, value) - override protected def createTransformFunc(paramMap: ParamMap): Vector => Vector = { v => - val d = paramMap(degree) - PolynomialExpansion.expand(v, d) + override protected def createTransformFunc: Vector => Vector = { v => + PolynomialExpansion.expand(v, $(degree)) } override protected def outputDataType: DataType = new VectorUDT() @@ -72,7 +75,7 @@ class PolynomialExpansion extends UnaryTransformer[Vector, Vector, PolynomialExp * To handle sparsity, if c is zero, we can skip all monomials that contain it. We remember the * current index and increment it properly for sparse input. */ -object PolynomialExpansion { +private[feature] object PolynomialExpansion { private def choose(n: Int, k: Int): Int = { Range(n, n - k, -1).product / Range(k, 1, -1).product diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala index a0e9ed32e0e4c..b0fd06d84fdb3 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala @@ -17,10 +17,11 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.AlphaComponent +import org.apache.spark.annotation.Experimental import org.apache.spark.ml._ import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ +import org.apache.spark.ml.util.Identifiable import org.apache.spark.mllib.feature import org.apache.spark.mllib.linalg.{Vector, VectorUDT} import org.apache.spark.sql._ @@ -34,13 +35,13 @@ private[feature] trait StandardScalerParams extends Params with HasInputCol with /** * Centers the data with mean before scaling. - * It will build a dense output, so this does not work on sparse input + * It will build a dense output, so this does not work on sparse input * and will raise an exception. * Default: false * @group param */ val withMean: BooleanParam = new BooleanParam(this, "withMean", "Center data with mean") - + /** * Scales the data to unit standard deviation. * Default: true @@ -50,12 +51,15 @@ private[feature] trait StandardScalerParams extends Params with HasInputCol with } /** - * :: AlphaComponent :: + * :: Experimental :: * Standardizes features by removing the mean and scaling to unit variance using column summary * statistics on the samples in the training set. */ -@AlphaComponent -class StandardScaler extends Estimator[StandardScalerModel] with StandardScalerParams { +@Experimental +class StandardScaler(override val uid: String) extends Estimator[StandardScalerModel] + with StandardScalerParams { + + def this() = this(Identifiable.randomUID("stdScal")) setDefault(withMean -> false, withStd -> true) @@ -64,44 +68,39 @@ class StandardScaler extends Estimator[StandardScalerModel] with StandardScalerP /** @group setParam */ def setOutputCol(value: String): this.type = set(outputCol, value) - + /** @group setParam */ def setWithMean(value: Boolean): this.type = set(withMean, value) - + /** @group setParam */ def setWithStd(value: Boolean): this.type = set(withStd, value) - - override def fit(dataset: DataFrame, paramMap: ParamMap): StandardScalerModel = { - transformSchema(dataset.schema, paramMap, logging = true) - val map = extractParamMap(paramMap) - val input = dataset.select(map(inputCol)).map { case Row(v: Vector) => v } - val scaler = new feature.StandardScaler(withMean = map(withMean), withStd = map(withStd)) + + override def fit(dataset: DataFrame): StandardScalerModel = { + transformSchema(dataset.schema, logging = true) + val input = dataset.select($(inputCol)).map { case Row(v: Vector) => v } + val scaler = new feature.StandardScaler(withMean = $(withMean), withStd = $(withStd)) val scalerModel = scaler.fit(input) - val model = new StandardScalerModel(this, map, scalerModel) - Params.inheritValues(map, this, model) - model + copyValues(new StandardScalerModel(uid, scalerModel).setParent(this)) } - override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = { - val map = extractParamMap(paramMap) - val inputType = schema(map(inputCol)).dataType + override def transformSchema(schema: StructType): StructType = { + val inputType = schema($(inputCol)).dataType require(inputType.isInstanceOf[VectorUDT], - s"Input column ${map(inputCol)} must be a vector column") - require(!schema.fieldNames.contains(map(outputCol)), - s"Output column ${map(outputCol)} already exists.") - val outputFields = schema.fields :+ StructField(map(outputCol), new VectorUDT, false) + s"Input column ${$(inputCol)} must be a vector column") + require(!schema.fieldNames.contains($(outputCol)), + s"Output column ${$(outputCol)} already exists.") + val outputFields = schema.fields :+ StructField($(outputCol), new VectorUDT, false) StructType(outputFields) } } /** - * :: AlphaComponent :: + * :: Experimental :: * Model fitted by [[StandardScaler]]. */ -@AlphaComponent +@Experimental class StandardScalerModel private[ml] ( - override val parent: StandardScaler, - override val fittingParamMap: ParamMap, + override val uid: String, scaler: feature.StandardScalerModel) extends Model[StandardScalerModel] with StandardScalerParams { @@ -111,21 +110,19 @@ class StandardScalerModel private[ml] ( /** @group setParam */ def setOutputCol(value: String): this.type = set(outputCol, value) - override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = { - transformSchema(dataset.schema, paramMap, logging = true) - val map = extractParamMap(paramMap) - val scale = udf((v: Vector) => { scaler.transform(v) } : Vector) - dataset.withColumn(map(outputCol), scale(col(map(inputCol)))) + override def transform(dataset: DataFrame): DataFrame = { + transformSchema(dataset.schema, logging = true) + val scale = udf { scaler.transform _ } + dataset.withColumn($(outputCol), scale(col($(inputCol)))) } - override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = { - val map = extractParamMap(paramMap) - val inputType = schema(map(inputCol)).dataType + override def transformSchema(schema: StructType): StructType = { + val inputType = schema($(inputCol)).dataType require(inputType.isInstanceOf[VectorUDT], - s"Input column ${map(inputCol)} must be a vector column") - require(!schema.fieldNames.contains(map(outputCol)), - s"Output column ${map(outputCol)} already exists.") - val outputFields = schema.fields :+ StructField(map(outputCol), new VectorUDT, false) + s"Input column ${$(inputCol)} must be a vector column") + require(!schema.fieldNames.contains($(outputCol)), + s"Output column ${$(outputCol)} already exists.") + val outputFields = schema.fields :+ StructField($(outputCol), new VectorUDT, false) StructType(outputFields) } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala index 9db3b29e10d69..a2dc8a8b960c5 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala @@ -18,11 +18,12 @@ package org.apache.spark.ml.feature import org.apache.spark.SparkException -import org.apache.spark.annotation.AlphaComponent +import org.apache.spark.annotation.Experimental import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.attribute.NominalAttribute import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ +import org.apache.spark.ml.util.Identifiable import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.{NumericType, StringType, StructType} @@ -34,32 +35,34 @@ import org.apache.spark.util.collection.OpenHashMap private[feature] trait StringIndexerBase extends Params with HasInputCol with HasOutputCol { /** Validates and transforms the input schema. */ - protected def validateAndTransformSchema(schema: StructType, paramMap: ParamMap): StructType = { - val map = extractParamMap(paramMap) - val inputColName = map(inputCol) + protected def validateAndTransformSchema(schema: StructType): StructType = { + val inputColName = $(inputCol) val inputDataType = schema(inputColName).dataType require(inputDataType == StringType || inputDataType.isInstanceOf[NumericType], s"The input column $inputColName must be either string type or numeric type, " + s"but got $inputDataType.") val inputFields = schema.fields - val outputColName = map(outputCol) + val outputColName = $(outputCol) require(inputFields.forall(_.name != outputColName), s"Output column $outputColName already exists.") - val attr = NominalAttribute.defaultAttr.withName(map(outputCol)) + val attr = NominalAttribute.defaultAttr.withName($(outputCol)) val outputFields = inputFields :+ attr.toStructField() StructType(outputFields) } } /** - * :: AlphaComponent :: + * :: Experimental :: * A label indexer that maps a string column of labels to an ML column of label indices. * If the input column is numeric, we cast it to string and index the string values. * The indices are in [0, numLabels), ordered by label frequencies. * So the most frequent label gets index 0. */ -@AlphaComponent -class StringIndexer extends Estimator[StringIndexerModel] with StringIndexerBase { +@Experimental +class StringIndexer(override val uid: String) extends Estimator[StringIndexerModel] + with StringIndexerBase { + + def this() = this(Identifiable.randomUID("strIdx")) /** @group setParam */ def setInputCol(value: String): this.type = set(inputCol, value) @@ -69,30 +72,26 @@ class StringIndexer extends Estimator[StringIndexerModel] with StringIndexerBase // TODO: handle unseen labels - override def fit(dataset: DataFrame, paramMap: ParamMap): StringIndexerModel = { - val map = extractParamMap(paramMap) - val counts = dataset.select(col(map(inputCol)).cast(StringType)) + override def fit(dataset: DataFrame): StringIndexerModel = { + val counts = dataset.select(col($(inputCol)).cast(StringType)) .map(_.getString(0)) .countByValue() val labels = counts.toSeq.sortBy(-_._2).map(_._1).toArray - val model = new StringIndexerModel(this, map, labels) - Params.inheritValues(map, this, model) - model + copyValues(new StringIndexerModel(uid, labels).setParent(this)) } - override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = { - validateAndTransformSchema(schema, paramMap) + override def transformSchema(schema: StructType): StructType = { + validateAndTransformSchema(schema) } } /** - * :: AlphaComponent :: + * :: Experimental :: * Model fitted by [[StringIndexer]]. */ -@AlphaComponent +@Experimental class StringIndexerModel private[ml] ( - override val parent: StringIndexer, - override val fittingParamMap: ParamMap, + override val uid: String, labels: Array[String]) extends Model[StringIndexerModel] with StringIndexerBase { private val labelToIndex: OpenHashMap[String, Double] = { @@ -112,8 +111,7 @@ class StringIndexerModel private[ml] ( /** @group setParam */ def setOutputCol(value: String): this.type = set(outputCol, value) - override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = { - val map = extractParamMap(paramMap) + override def transform(dataset: DataFrame): DataFrame = { val indexer = udf { label: String => if (labelToIndex.contains(label)) { labelToIndex(label) @@ -122,14 +120,14 @@ class StringIndexerModel private[ml] ( throw new SparkException(s"Unseen label: $label.") } } - val outputColName = map(outputCol) + val outputColName = $(outputCol) val metadata = NominalAttribute.defaultAttr .withName(outputColName).withValues(labels).toMetadata() dataset.select(col("*"), - indexer(dataset(map(inputCol)).cast(StringType)).as(outputColName, metadata)) + indexer(dataset($(inputCol)).cast(StringType)).as(outputColName, metadata)) } - override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = { - validateAndTransformSchema(schema, paramMap) + override def transformSchema(schema: StructType): StructType = { + validateAndTransformSchema(schema) } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala index 01752ba482d0c..21c15b6c33f6c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala @@ -17,19 +17,24 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.AlphaComponent +import org.apache.spark.annotation.Experimental import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.param._ -import org.apache.spark.sql.types.{DataType, StringType, ArrayType} +import org.apache.spark.ml.util.Identifiable +import org.apache.spark.sql.types.{ArrayType, DataType, StringType} /** - * :: AlphaComponent :: + * :: Experimental :: * A tokenizer that converts the input string to lowercase and then splits it by white spaces. + * + * @see [[RegexTokenizer]] */ -@AlphaComponent -class Tokenizer extends UnaryTransformer[String, Seq[String], Tokenizer] { +@Experimental +class Tokenizer(override val uid: String) extends UnaryTransformer[String, Seq[String], Tokenizer] { + + def this() = this(Identifiable.randomUID("tok")) - override protected def createTransformFunc(paramMap: ParamMap): String => Seq[String] = { + override protected def createTransformFunc: String => Seq[String] = { _.toLowerCase.split("\\s") } @@ -41,32 +46,35 @@ class Tokenizer extends UnaryTransformer[String, Seq[String], Tokenizer] { } /** - * :: AlphaComponent :: - * A regex based tokenizer that extracts tokens either by repeatedly matching the regex(default) - * or using it to split the text (set matching to false). Optional parameters also allow filtering - * tokens using a minimal length. + * :: Experimental :: + * A regex based tokenizer that extracts tokens either by using the provided regex pattern to split + * the text (default) or repeatedly matching the regex (if `gaps` is true). + * Optional parameters also allow filtering tokens using a minimal length. * It returns an array of strings that can be empty. */ -@AlphaComponent -class RegexTokenizer extends UnaryTransformer[String, Seq[String], RegexTokenizer] { +@Experimental +class RegexTokenizer(override val uid: String) + extends UnaryTransformer[String, Seq[String], RegexTokenizer] { + + def this() = this(Identifiable.randomUID("regexTok")) /** * Minimum token length, >= 0. * Default: 1, to avoid returning empty strings * @group param */ - val minTokenLength: IntParam = new IntParam(this, "minLength", "minimum token length (>= 0)", + val minTokenLength: IntParam = new IntParam(this, "minTokenLength", "minimum token length (>= 0)", ParamValidators.gtEq(0)) /** @group setParam */ def setMinTokenLength(value: Int): this.type = set(minTokenLength, value) /** @group getParam */ - def getMinTokenLength: Int = getOrDefault(minTokenLength) + def getMinTokenLength: Int = $(minTokenLength) /** - * Indicates whether regex splits on gaps (true) or matching tokens (false). - * Default: false + * Indicates whether regex splits on gaps (true) or matches tokens (false). + * Default: true * @group param */ val gaps: BooleanParam = new BooleanParam(this, "gaps", "Set regex to match gaps or tokens") @@ -75,11 +83,11 @@ class RegexTokenizer extends UnaryTransformer[String, Seq[String], RegexTokenize def setGaps(value: Boolean): this.type = set(gaps, value) /** @group getParam */ - def getGaps: Boolean = getOrDefault(gaps) + def getGaps: Boolean = $(gaps) /** - * Regex pattern used by tokenizer. - * Default: `"\\p{L}+|[^\\p{L}\\s]+"` + * Regex pattern used to match delimiters if [[gaps]] is true or tokens if [[gaps]] is false. + * Default: `"\\s+"` * @group param */ val pattern: Param[String] = new Param(this, "pattern", "regex pattern used for tokenizing") @@ -88,14 +96,14 @@ class RegexTokenizer extends UnaryTransformer[String, Seq[String], RegexTokenize def setPattern(value: String): this.type = set(pattern, value) /** @group getParam */ - def getPattern: String = getOrDefault(pattern) + def getPattern: String = $(pattern) - setDefault(minTokenLength -> 1, gaps -> false, pattern -> "\\p{L}+|[^\\p{L}\\s]+") + setDefault(minTokenLength -> 1, gaps -> true, pattern -> "\\s+") - override protected def createTransformFunc(paramMap: ParamMap): String => Seq[String] = { str => - val re = paramMap(pattern).r - val tokens = if (paramMap(gaps)) re.split(str).toSeq else re.findAllIn(str).toSeq - val minLength = paramMap(minTokenLength) + override protected def createTransformFunc: String => Seq[String] = { str => + val re = $(pattern).r + val tokens = if ($(gaps)) re.split(str).toSeq else re.findAllIn(str).toSeq + val minLength = $(minTokenLength) tokens.filter(_.length >= minLength) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala index 5e781a326d98c..229ee27ec5942 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala @@ -20,21 +20,25 @@ package org.apache.spark.ml.feature import scala.collection.mutable.ArrayBuilder import org.apache.spark.SparkException -import org.apache.spark.annotation.AlphaComponent +import org.apache.spark.annotation.Experimental import org.apache.spark.ml.Transformer -import org.apache.spark.ml.param.ParamMap +import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NumericAttribute, UnresolvedAttribute} import org.apache.spark.ml.param.shared._ +import org.apache.spark.ml.util.Identifiable import org.apache.spark.mllib.linalg.{Vector, VectorUDT, Vectors} import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ /** - * :: AlphaComponent :: - * A feature transformer than merge multiple columns into a vector column. + * :: Experimental :: + * A feature transformer that merges multiple columns into a vector column. */ -@AlphaComponent -class VectorAssembler extends Transformer with HasInputCols with HasOutputCol { +@Experimental +class VectorAssembler(override val uid: String) + extends Transformer with HasInputCols with HasOutputCol { + + def this() = this(Identifiable.randomUID("vecAssembler")) /** @group setParam */ def setInputCols(value: Array[String]): this.type = set(inputCols, value) @@ -42,27 +46,65 @@ class VectorAssembler extends Transformer with HasInputCols with HasOutputCol { /** @group setParam */ def setOutputCol(value: String): this.type = set(outputCol, value) - override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = { - val map = extractParamMap(paramMap) + override def transform(dataset: DataFrame): DataFrame = { + // Schema transformation. + val schema = dataset.schema + lazy val first = dataset.first() + val attrs = $(inputCols).flatMap { c => + val field = schema(c) + val index = schema.fieldIndex(c) + field.dataType match { + case DoubleType => + val attr = Attribute.fromStructField(field) + // If the input column doesn't have ML attribute, assume numeric. + if (attr == UnresolvedAttribute) { + Some(NumericAttribute.defaultAttr.withName(c)) + } else { + Some(attr.withName(c)) + } + case _: NumericType | BooleanType => + // If the input column type is a compatible scalar type, assume numeric. + Some(NumericAttribute.defaultAttr.withName(c)) + case _: VectorUDT => + val group = AttributeGroup.fromStructField(field) + if (group.attributes.isDefined) { + // If attributes are defined, copy them with updated names. + group.attributes.get.map { attr => + if (attr.name.isDefined) { + // TODO: Define a rigorous naming scheme. + attr.withName(c + "_" + attr.name.get) + } else { + attr + } + } + } else { + // Otherwise, treat all attributes as numeric. If we cannot get the number of attributes + // from metadata, check the first row. + val numAttrs = group.numAttributes.getOrElse(first.getAs[Vector](index).size) + Array.fill(numAttrs)(NumericAttribute.defaultAttr) + } + } + } + val metadata = new AttributeGroup($(outputCol), attrs).toMetadata() + + // Data transformation. val assembleFunc = udf { r: Row => VectorAssembler.assemble(r.toSeq: _*) } - val schema = dataset.schema - val inputColNames = map(inputCols) - val args = inputColNames.map { c => + val args = $(inputCols).map { c => schema(c).dataType match { case DoubleType => dataset(c) case _: VectorUDT => dataset(c) case _: NumericType | BooleanType => dataset(c).cast(DoubleType).as(s"${c}_double_$uid") } } - dataset.select(col("*"), assembleFunc(struct(args : _*)).as(map(outputCol))) + + dataset.select(col("*"), assembleFunc(struct(args : _*)).as($(outputCol), metadata)) } - override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = { - val map = extractParamMap(paramMap) - val inputColNames = map(inputCols) - val outputColName = map(outputCol) + override def transformSchema(schema: StructType): StructType = { + val inputColNames = $(inputCols) + val outputColName = $(outputCol) val inputDataTypes = inputColNames.map(name => schema(name).dataType) inputDataTypes.foreach { case _: NumericType | BooleanType => @@ -77,8 +119,7 @@ class VectorAssembler extends Transformer with HasInputCols with HasOutputCol { } } -@AlphaComponent -object VectorAssembler { +private object VectorAssembler { private[feature] def assemble(vv: Any*): Vector = { val indices = ArrayBuilder.make[Int] @@ -105,6 +146,6 @@ object VectorAssembler { case o => throw new SparkException(s"$o of type ${o.getClass.getName} is not supported.") } - Vectors.sparse(cur, indices.result(), values.result()) + Vectors.sparse(cur, indices.result(), values.result()).compressed } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala index ed833c63c7ef1..1d0f23b4fb3db 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala @@ -17,20 +17,23 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.AlphaComponent -import org.apache.spark.ml.util.SchemaUtils +import java.lang.{Double => JDouble, Integer => JInt} +import java.util.{Map => JMap} + +import scala.collection.JavaConverters._ + +import org.apache.spark.annotation.Experimental import org.apache.spark.ml.{Estimator, Model} -import org.apache.spark.ml.attribute.{BinaryAttribute, NumericAttribute, NominalAttribute, - Attribute, AttributeGroup} -import org.apache.spark.ml.param.{ParamValidators, IntParam, ParamMap, Params} +import org.apache.spark.ml.attribute._ +import org.apache.spark.ml.param.{IntParam, ParamValidators, Params} import org.apache.spark.ml.param.shared._ -import org.apache.spark.mllib.linalg.{SparseVector, DenseVector, Vector, VectorUDT} -import org.apache.spark.sql.{Row, DataFrame} +import org.apache.spark.ml.util.{Identifiable, SchemaUtils} +import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, VectorUDT} +import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.functions.callUDF import org.apache.spark.sql.types.{StructField, StructType} import org.apache.spark.util.collection.OpenHashSet - /** Private trait for params for VectorIndexer and VectorIndexerModel */ private[ml] trait VectorIndexerParams extends Params with HasInputCol with HasOutputCol { @@ -49,12 +52,11 @@ private[ml] trait VectorIndexerParams extends Params with HasInputCol with HasOu setDefault(maxCategories -> 20) /** @group getParam */ - def getMaxCategories: Int = getOrDefault(maxCategories) + def getMaxCategories: Int = $(maxCategories) } /** - * :: AlphaComponent :: - * + * :: Experimental :: * Class for indexing categorical feature columns in a dataset of [[Vector]]. * * This has 2 usage modes: @@ -88,8 +90,11 @@ private[ml] trait VectorIndexerParams extends Params with HasInputCol with HasOu * - Add warning if a categorical feature has only 1 category. * - Add option for allowing unknown categories. */ -@AlphaComponent -class VectorIndexer extends Estimator[VectorIndexerModel] with VectorIndexerParams { +@Experimental +class VectorIndexer(override val uid: String) extends Estimator[VectorIndexerModel] + with VectorIndexerParams { + + def this() = this(Identifiable.randomUID("vecIdx")) /** @group setParam */ def setMaxCategories(value: Int): this.type = set(maxCategories, value) @@ -100,33 +105,31 @@ class VectorIndexer extends Estimator[VectorIndexerModel] with VectorIndexerPara /** @group setParam */ def setOutputCol(value: String): this.type = set(outputCol, value) - override def fit(dataset: DataFrame, paramMap: ParamMap): VectorIndexerModel = { - transformSchema(dataset.schema, paramMap, logging = true) - val map = extractParamMap(paramMap) - val firstRow = dataset.select(map(inputCol)).take(1) + override def fit(dataset: DataFrame): VectorIndexerModel = { + transformSchema(dataset.schema, logging = true) + val firstRow = dataset.select($(inputCol)).take(1) require(firstRow.length == 1, s"VectorIndexer cannot be fit on an empty dataset.") val numFeatures = firstRow(0).getAs[Vector](0).size - val vectorDataset = dataset.select(map(inputCol)).map { case Row(v: Vector) => v } - val maxCats = map(maxCategories) + val vectorDataset = dataset.select($(inputCol)).map { case Row(v: Vector) => v } + val maxCats = $(maxCategories) val categoryStats: VectorIndexer.CategoryStats = vectorDataset.mapPartitions { iter => val localCatStats = new VectorIndexer.CategoryStats(numFeatures, maxCats) iter.foreach(localCatStats.addVector) Iterator(localCatStats) }.reduce((stats1, stats2) => stats1.merge(stats2)) - val model = new VectorIndexerModel(this, map, numFeatures, categoryStats.getCategoryMaps) - Params.inheritValues(map, this, model) - model + val model = new VectorIndexerModel(uid, numFeatures, categoryStats.getCategoryMaps) + .setParent(this) + copyValues(model) } - override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = { + override def transformSchema(schema: StructType): StructType = { // We do not transfer feature metadata since we do not know what types of features we will // produce in transform(). - val map = extractParamMap(paramMap) val dataType = new VectorUDT - require(map.contains(inputCol), s"VectorIndexer requires input column parameter: $inputCol") - require(map.contains(outputCol), s"VectorIndexer requires output column parameter: $outputCol") - SchemaUtils.checkColumnType(schema, map(inputCol), dataType) - SchemaUtils.appendColumn(schema, map(outputCol), dataType) + require(isDefined(inputCol), s"VectorIndexer requires input column parameter: $inputCol") + require(isDefined(outputCol), s"VectorIndexer requires output column parameter: $outputCol") + SchemaUtils.checkColumnType(schema, $(inputCol), dataType) + SchemaUtils.appendColumn(schema, $(outputCol), dataType) } } @@ -195,7 +198,8 @@ private object VectorIndexer { private def addDenseVector(dv: DenseVector): Unit = { var i = 0 - while (i < dv.size) { + val size = dv.size + while (i < size) { if (featureValueSets(i).size <= maxCategories) { featureValueSets(i).add(dv(i)) } @@ -207,7 +211,8 @@ private object VectorIndexer { // TODO: This might be able to handle 0's more efficiently. var vecIndex = 0 // index into vector var k = 0 // index into non-zero elements - while (vecIndex < sv.size) { + val size = sv.size + while (vecIndex < size) { val featureValue = if (k < sv.indices.length && vecIndex == sv.indices(k)) { k += 1 sv.values(k - 1) @@ -224,8 +229,7 @@ private object VectorIndexer { } /** - * :: AlphaComponent :: - * + * :: Experimental :: * Transform categorical features to use 0-based indices instead of their original values. * - Categorical features are mapped to indices. * - Continuous features (columns) are left unchanged. @@ -240,14 +244,18 @@ private object VectorIndexer { * Values are maps from original features values to 0-based category indices. * If a feature is not in this map, it is treated as continuous. */ -@AlphaComponent +@Experimental class VectorIndexerModel private[ml] ( - override val parent: VectorIndexer, - override val fittingParamMap: ParamMap, + override val uid: String, val numFeatures: Int, val categoryMaps: Map[Int, Map[Double, Int]]) extends Model[VectorIndexerModel] with VectorIndexerParams { + /** Java-friendly version of [[categoryMaps]] */ + def javaCategoryMaps: JMap[JInt, JMap[JDouble, JInt]] = { + categoryMaps.mapValues(_.asJava).asJava.asInstanceOf[JMap[JInt, JMap[JDouble, JInt]]] + } + /** * Pre-computed feature attributes, with some missing info. * In transform(), set attribute name and other info, if available. @@ -326,35 +334,33 @@ class VectorIndexerModel private[ml] ( /** @group setParam */ def setOutputCol(value: String): this.type = set(outputCol, value) - override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = { - transformSchema(dataset.schema, paramMap, logging = true) - val map = extractParamMap(paramMap) - val newField = prepOutputField(dataset.schema, map) - val newCol = callUDF(transformFunc, new VectorUDT, dataset(map(inputCol))) - dataset.withColumn(map(outputCol), newCol.as(map(outputCol), newField.metadata)) + override def transform(dataset: DataFrame): DataFrame = { + transformSchema(dataset.schema, logging = true) + val newField = prepOutputField(dataset.schema) + val newCol = callUDF(transformFunc, new VectorUDT, dataset($(inputCol))) + dataset.withColumn($(outputCol), newCol.as($(outputCol), newField.metadata)) } - override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = { - val map = extractParamMap(paramMap) + override def transformSchema(schema: StructType): StructType = { val dataType = new VectorUDT - require(map.contains(inputCol), + require(isDefined(inputCol), s"VectorIndexerModel requires input column parameter: $inputCol") - require(map.contains(outputCol), + require(isDefined(outputCol), s"VectorIndexerModel requires output column parameter: $outputCol") - SchemaUtils.checkColumnType(schema, map(inputCol), dataType) + SchemaUtils.checkColumnType(schema, $(inputCol), dataType) // If the input metadata specifies numFeatures, compare with expected numFeatures. - val origAttrGroup = AttributeGroup.fromStructField(schema(map(inputCol))) + val origAttrGroup = AttributeGroup.fromStructField(schema($(inputCol))) val origNumFeatures: Option[Int] = if (origAttrGroup.attributes.nonEmpty) { Some(origAttrGroup.attributes.get.length) } else { origAttrGroup.numAttributes } require(origNumFeatures.forall(_ == numFeatures), "VectorIndexerModel expected" + - s" $numFeatures features, but input column ${map(inputCol)} had metadata specifying" + + s" $numFeatures features, but input column ${$(inputCol)} had metadata specifying" + s" ${origAttrGroup.numAttributes.get} features.") - val newField = prepOutputField(schema, map) + val newField = prepOutputField(schema) val outputFields = schema.fields :+ newField StructType(outputFields) } @@ -362,11 +368,10 @@ class VectorIndexerModel private[ml] ( /** * Prepare the output column field, including per-feature metadata. * @param schema Input schema - * @param map Parameter map (with this class' embedded parameter map folded in) * @return Output column field. This field does not contain non-ML metadata. */ - private def prepOutputField(schema: StructType, map: ParamMap): StructField = { - val origAttrGroup = AttributeGroup.fromStructField(schema(map(inputCol))) + private def prepOutputField(schema: StructType): StructField = { + val origAttrGroup = AttributeGroup.fromStructField(schema($(inputCol))) val featureAttributes: Array[Attribute] = if (origAttrGroup.attributes.nonEmpty) { // Convert original attributes to modified attributes val origAttrs: Array[Attribute] = origAttrGroup.attributes.get @@ -385,11 +390,13 @@ class VectorIndexerModel private[ml] ( } case (origAttr: Attribute, featAttr: NumericAttribute) => origAttr.withIndex(featAttr.index.get) + case (origAttr: Attribute, _) => + origAttr } } else { partialFeatureAttributes } - val newAttributeGroup = new AttributeGroup(map(outputCol), featureAttributes) + val newAttributeGroup = new AttributeGroup($(outputCol), featureAttributes) newAttributeGroup.toStructField() } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala index 0163fa8bd8a5b..36f19509f0cfb 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala @@ -17,17 +17,17 @@ package org.apache.spark.ml.feature -import org.apache.spark.annotation.AlphaComponent +import org.apache.spark.annotation.Experimental +import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ -import org.apache.spark.ml.util.SchemaUtils -import org.apache.spark.ml.{Estimator, Model} +import org.apache.spark.ml.util.{Identifiable, SchemaUtils} import org.apache.spark.mllib.feature -import org.apache.spark.mllib.linalg.BLAS._ import org.apache.spark.mllib.linalg.{VectorUDT, Vectors} +import org.apache.spark.mllib.linalg.BLAS._ +import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ -import org.apache.spark.sql.{DataFrame, Row} /** * Params for [[Word2Vec]] and [[Word2VecModel]]. @@ -37,56 +37,59 @@ private[feature] trait Word2VecBase extends Params /** * The dimension of the code that you want to transform from words. + * @group param */ final val vectorSize = new IntParam( this, "vectorSize", "the dimension of codes after transforming from words") setDefault(vectorSize -> 100) /** @group getParam */ - def getVectorSize: Int = getOrDefault(vectorSize) + def getVectorSize: Int = $(vectorSize) /** * Number of partitions for sentences of words. + * @group param */ final val numPartitions = new IntParam( this, "numPartitions", "number of partitions for sentences of words") setDefault(numPartitions -> 1) /** @group getParam */ - def getNumPartitions: Int = getOrDefault(numPartitions) + def getNumPartitions: Int = $(numPartitions) /** * The minimum number of times a token must appear to be included in the word2vec model's * vocabulary. + * @group param */ final val minCount = new IntParam(this, "minCount", "the minimum number of times a token must " + "appear to be included in the word2vec model's vocabulary") setDefault(minCount -> 5) /** @group getParam */ - def getMinCount: Int = getOrDefault(minCount) + def getMinCount: Int = $(minCount) setDefault(stepSize -> 0.025) setDefault(maxIter -> 1) - setDefault(seed -> 42L) /** * Validate and transform the input schema. */ - protected def validateAndTransformSchema(schema: StructType, paramMap: ParamMap): StructType = { - val map = extractParamMap(paramMap) - SchemaUtils.checkColumnType(schema, map(inputCol), new ArrayType(StringType, true)) - SchemaUtils.appendColumn(schema, map(outputCol), new VectorUDT) + protected def validateAndTransformSchema(schema: StructType): StructType = { + SchemaUtils.checkColumnType(schema, $(inputCol), new ArrayType(StringType, true)) + SchemaUtils.appendColumn(schema, $(outputCol), new VectorUDT) } } /** - * :: AlphaComponent :: + * :: Experimental :: * Word2Vec trains a model of `Map(String, Vector)`, i.e. transforms a word into a code for further * natural language processing or machine learning process. */ -@AlphaComponent -final class Word2Vec extends Estimator[Word2VecModel] with Word2VecBase { +@Experimental +final class Word2Vec(override val uid: String) extends Estimator[Word2VecModel] with Word2VecBase { + + def this() = this(Identifiable.randomUID("w2v")) /** @group setParam */ def setInputCol(value: String): this.type = set(inputCol, value) @@ -112,36 +115,32 @@ final class Word2Vec extends Estimator[Word2VecModel] with Word2VecBase { /** @group setParam */ def setMinCount(value: Int): this.type = set(minCount, value) - override def fit(dataset: DataFrame, paramMap: ParamMap): Word2VecModel = { - transformSchema(dataset.schema, paramMap, logging = true) - val map = extractParamMap(paramMap) - val input = dataset.select(map(inputCol)).map { case Row(v: Seq[String]) => v } + override def fit(dataset: DataFrame): Word2VecModel = { + transformSchema(dataset.schema, logging = true) + val input = dataset.select($(inputCol)).map(_.getAs[Seq[String]](0)) val wordVectors = new feature.Word2Vec() - .setLearningRate(map(stepSize)) - .setMinCount(map(minCount)) - .setNumIterations(map(maxIter)) - .setNumPartitions(map(numPartitions)) - .setSeed(map(seed)) - .setVectorSize(map(vectorSize)) + .setLearningRate($(stepSize)) + .setMinCount($(minCount)) + .setNumIterations($(maxIter)) + .setNumPartitions($(numPartitions)) + .setSeed($(seed)) + .setVectorSize($(vectorSize)) .fit(input) - val model = new Word2VecModel(this, map, wordVectors) - Params.inheritValues(map, this, model) - model + copyValues(new Word2VecModel(uid, wordVectors).setParent(this)) } - override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = { - validateAndTransformSchema(schema, paramMap) + override def transformSchema(schema: StructType): StructType = { + validateAndTransformSchema(schema) } } /** - * :: AlphaComponent :: + * :: Experimental :: * Model fitted by [[Word2Vec]]. */ -@AlphaComponent +@Experimental class Word2VecModel private[ml] ( - override val parent: Word2Vec, - override val fittingParamMap: ParamMap, + override val uid: String, wordVectors: feature.Word2VecModel) extends Model[Word2VecModel] with Word2VecBase { @@ -155,15 +154,14 @@ class Word2VecModel private[ml] ( * Transform a sentence column to a vector column to represent the whole sentence. The transform * is performed by averaging all word vectors it contains. */ - override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = { - transformSchema(dataset.schema, paramMap, logging = true) - val map = extractParamMap(paramMap) + override def transform(dataset: DataFrame): DataFrame = { + transformSchema(dataset.schema, logging = true) val bWordVectors = dataset.sqlContext.sparkContext.broadcast(wordVectors) val word2Vec = udf { sentence: Seq[String] => if (sentence.size == 0) { - Vectors.sparse(map(vectorSize), Array.empty[Int], Array.empty[Double]) + Vectors.sparse($(vectorSize), Array.empty[Int], Array.empty[Double]) } else { - val cum = Vectors.zeros(map(vectorSize)) + val cum = Vectors.zeros($(vectorSize)) val model = bWordVectors.value.getVectors for (word <- sentence) { if (model.contains(word)) { @@ -176,10 +174,10 @@ class Word2VecModel private[ml] ( cum } } - dataset.withColumn(map(outputCol), word2Vec(col(map(inputCol)))) + dataset.withColumn($(outputCol), word2Vec(col($(inputCol)))) } - override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = { - validateAndTransformSchema(schema, paramMap) + override def transformSchema(schema: StructType): StructType = { + validateAndTransformSchema(schema) } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/package-info.java b/mllib/src/main/scala/org/apache/spark/ml/package-info.java index 00d9c802e930d..87f4223964ada 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/package-info.java +++ b/mllib/src/main/scala/org/apache/spark/ml/package-info.java @@ -16,10 +16,10 @@ */ /** - * Spark ML is an ALPHA component that adds a new set of machine learning APIs to let users quickly + * Spark ML is a BETA component that adds a new set of machine learning APIs to let users quickly * assemble and configure practical machine learning pipelines. */ -@AlphaComponent +@Experimental package org.apache.spark.ml; -import org.apache.spark.annotation.AlphaComponent; +import org.apache.spark.annotation.Experimental; diff --git a/mllib/src/main/scala/org/apache/spark/ml/package.scala b/mllib/src/main/scala/org/apache/spark/ml/package.scala index ac75e9de1a8f2..c589d06d9f7e4 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/package.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/package.scala @@ -18,7 +18,7 @@ package org.apache.spark /** - * Spark ML is an ALPHA component that adds a new set of machine learning APIs to let users quickly + * Spark ML is a BETA component that adds a new set of machine learning APIs to let users quickly * assemble and configure practical machine learning pipelines. * * @groupname param Parameters diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala index df6360dce6013..473488dce9b0d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala @@ -22,12 +22,13 @@ import java.util.NoSuchElementException import scala.annotation.varargs import scala.collection.mutable +import scala.collection.JavaConverters._ -import org.apache.spark.annotation.{AlphaComponent, DeveloperApi} +import org.apache.spark.annotation.{DeveloperApi, Experimental} import org.apache.spark.ml.util.Identifiable /** - * :: AlphaComponent :: + * :: DeveloperApi :: * A param with self-contained documentation and optionally default value. Primitive-typed param * should use the specialized versions, which are more friendly to Java users. * @@ -38,18 +39,23 @@ import org.apache.spark.ml.util.Identifiable * See [[ParamValidators]] for factory methods for common validation functions. * @tparam T param value type */ -@AlphaComponent -class Param[T] (val parent: Params, val name: String, val doc: String, val isValid: T => Boolean) +@DeveloperApi +class Param[T](val parent: String, val name: String, val doc: String, val isValid: T => Boolean) extends Serializable { - def this(parent: Params, name: String, doc: String) = + def this(parent: Identifiable, name: String, doc: String, isValid: T => Boolean) = + this(parent.uid, name, doc, isValid) + + def this(parent: String, name: String, doc: String) = this(parent, name, doc, ParamValidators.alwaysTrue[T]) + def this(parent: Identifiable, name: String, doc: String) = this(parent.uid, name, doc) + /** * Assert that the given value is valid for this parameter. * * Note: Parameter checks involving interactions between multiple parameters should be - * implemented in [[Params.validate()]]. Checks for input/output columns should be + * implemented in [[Params.validateParams()]]. Checks for input/output columns should be * implemented in [[org.apache.spark.ml.PipelineStage.transformSchema()]]. * * DEVELOPERS: This method is only called by [[ParamPair]], which means that all parameters @@ -59,8 +65,7 @@ class Param[T] (val parent: Params, val name: String, val doc: String, val isVal */ private[param] def validate(value: T): Unit = { if (!isValid(value)) { - throw new IllegalArgumentException(s"$parent parameter $name given invalid value $value." + - s" Parameter description: $toString") + throw new IllegalArgumentException(s"$parent parameter $name given invalid value $value.") } } @@ -74,26 +79,24 @@ class Param[T] (val parent: Params, val name: String, val doc: String, val isVal */ def ->(value: T): ParamPair[T] = ParamPair(this, value) - /** - * Converts this param's name, doc, and optionally its default value and the user-supplied - * value in its parent to string. - */ - override def toString: String = { - val valueStr = if (parent.isDefined(this)) { - val defaultValueStr = parent.getDefault(this).map("default: " + _) - val currentValueStr = parent.get(this).map("current: " + _) - (defaultValueStr ++ currentValueStr).mkString("(", ", ", ")") - } else { - "(undefined)" + override final def toString: String = s"${parent}__$name" + + override final def hashCode: Int = toString.## + + override final def equals(obj: Any): Boolean = { + obj match { + case p: Param[_] => (p.parent == parent) && (p.name == name) + case _ => false } - s"$name: $doc $valueStr" } } /** + * :: DeveloperApi :: * Factory methods for common validation functions for [[Param.isValid]]. * The numerical methods only support Int, Long, Float, and Double. */ +@DeveloperApi object ParamValidators { /** (private[param]) Default validation always return true */ @@ -171,56 +174,134 @@ object ParamValidators { // specialize primitive-typed params because Java doesn't recognize scala.Double, scala.Int, ... -/** Specialized version of [[Param[Double]]] for Java. */ -class DoubleParam(parent: Params, name: String, doc: String, isValid: Double => Boolean) +/** + * :: DeveloperApi :: + * Specialized version of [[Param[Double]]] for Java. + */ +@DeveloperApi +class DoubleParam(parent: String, name: String, doc: String, isValid: Double => Boolean) extends Param[Double](parent, name, doc, isValid) { - def this(parent: Params, name: String, doc: String) = + def this(parent: String, name: String, doc: String) = this(parent, name, doc, ParamValidators.alwaysTrue) + def this(parent: Identifiable, name: String, doc: String, isValid: Double => Boolean) = + this(parent.uid, name, doc, isValid) + + def this(parent: Identifiable, name: String, doc: String) = this(parent.uid, name, doc) + override def w(value: Double): ParamPair[Double] = super.w(value) } -/** Specialized version of [[Param[Int]]] for Java. */ -class IntParam(parent: Params, name: String, doc: String, isValid: Int => Boolean) +/** + * :: DeveloperApi :: + * Specialized version of [[Param[Int]]] for Java. + */ +@DeveloperApi +class IntParam(parent: String, name: String, doc: String, isValid: Int => Boolean) extends Param[Int](parent, name, doc, isValid) { - def this(parent: Params, name: String, doc: String) = + def this(parent: String, name: String, doc: String) = this(parent, name, doc, ParamValidators.alwaysTrue) + def this(parent: Identifiable, name: String, doc: String, isValid: Int => Boolean) = + this(parent.uid, name, doc, isValid) + + def this(parent: Identifiable, name: String, doc: String) = this(parent.uid, name, doc) + override def w(value: Int): ParamPair[Int] = super.w(value) } -/** Specialized version of [[Param[Float]]] for Java. */ -class FloatParam(parent: Params, name: String, doc: String, isValid: Float => Boolean) +/** + * :: DeveloperApi :: + * Specialized version of [[Param[Float]]] for Java. + */ +@DeveloperApi +class FloatParam(parent: String, name: String, doc: String, isValid: Float => Boolean) extends Param[Float](parent, name, doc, isValid) { - def this(parent: Params, name: String, doc: String) = + def this(parent: String, name: String, doc: String) = this(parent, name, doc, ParamValidators.alwaysTrue) + def this(parent: Identifiable, name: String, doc: String, isValid: Float => Boolean) = + this(parent.uid, name, doc, isValid) + + def this(parent: Identifiable, name: String, doc: String) = this(parent.uid, name, doc) + override def w(value: Float): ParamPair[Float] = super.w(value) } -/** Specialized version of [[Param[Long]]] for Java. */ -class LongParam(parent: Params, name: String, doc: String, isValid: Long => Boolean) +/** + * :: DeveloperApi :: + * Specialized version of [[Param[Long]]] for Java. + */ +@DeveloperApi +class LongParam(parent: String, name: String, doc: String, isValid: Long => Boolean) extends Param[Long](parent, name, doc, isValid) { - def this(parent: Params, name: String, doc: String) = + def this(parent: String, name: String, doc: String) = this(parent, name, doc, ParamValidators.alwaysTrue) + def this(parent: Identifiable, name: String, doc: String, isValid: Long => Boolean) = + this(parent.uid, name, doc, isValid) + + def this(parent: Identifiable, name: String, doc: String) = this(parent.uid, name, doc) + override def w(value: Long): ParamPair[Long] = super.w(value) } -/** Specialized version of [[Param[Boolean]]] for Java. */ -class BooleanParam(parent: Params, name: String, doc: String) // No need for isValid +/** + * :: DeveloperApi :: + * Specialized version of [[Param[Boolean]]] for Java. + */ +@DeveloperApi +class BooleanParam(parent: String, name: String, doc: String) // No need for isValid extends Param[Boolean](parent, name, doc) { + def this(parent: Identifiable, name: String, doc: String) = this(parent.uid, name, doc) + override def w(value: Boolean): ParamPair[Boolean] = super.w(value) } /** + * :: DeveloperApi :: + * Specialized version of [[Param[Array[String]]]] for Java. + */ +@DeveloperApi +class StringArrayParam(parent: Params, name: String, doc: String, isValid: Array[String] => Boolean) + extends Param[Array[String]](parent, name, doc, isValid) { + + def this(parent: Params, name: String, doc: String) = + this(parent, name, doc, ParamValidators.alwaysTrue) + + override def w(value: Array[String]): ParamPair[Array[String]] = super.w(value) + + /** Creates a param pair with a [[java.util.List]] of values (for Java and Python). */ + def w(value: java.util.List[String]): ParamPair[Array[String]] = w(value.asScala.toArray) +} + +/** + * :: DeveloperApi :: + * Specialized version of [[Param[Array[Double]]]] for Java. + */ +@DeveloperApi +class DoubleArrayParam(parent: Params, name: String, doc: String, isValid: Array[Double] => Boolean) + extends Param[Array[Double]](parent, name, doc, isValid) { + + def this(parent: Params, name: String, doc: String) = + this(parent, name, doc, ParamValidators.alwaysTrue) + + override def w(value: Array[Double]): ParamPair[Array[Double]] = super.w(value) + + /** Creates a param pair with a [[java.util.List]] of values (for Java and Python). */ + def w(value: java.util.List[Double]): ParamPair[Array[Double]] = w(value.asScala.toArray) +} + +/** + * :: Experimental :: * A param amd its value. */ +@Experimental case class ParamPair[T](param: Param[T], value: T) { // This is *the* place Param.validate is called. Whenever a parameter is specified, we should // always construct a ParamPair so that validate is called. @@ -228,16 +309,19 @@ case class ParamPair[T](param: Param[T], value: T) { } /** - * :: AlphaComponent :: + * :: DeveloperApi :: * Trait for components that take parameters. This also provides an internal param map to store * parameter values attached to the instance. */ -@AlphaComponent +@DeveloperApi trait Params extends Identifiable with Serializable { /** * Returns all params sorted by their names. The default implementation uses Java reflection to * list all public methods that have no arguments and return [[Param]]. + * + * Note: Developer should not use this method in constructor because we cannot guarantee that + * this variable gets initialized before other params. */ lazy val params: Array[Param[_]] = { val methods = this.getClass.getMethods @@ -250,31 +334,45 @@ trait Params extends Identifiable with Serializable { } /** - * Validates parameter values stored internally plus the input parameter map. - * Raises an exception if any parameter is invalid. + * Validates parameter values stored internally. + * Raise an exception if any parameter value is invalid. * * This only needs to check for interactions between parameters. * Parameter value checks which do not depend on other parameters are handled by * [[Param.validate()]]. This method does not handle input/output column parameters; * those are checked during schema validation. */ - def validate(paramMap: ParamMap): Unit = { } + def validateParams(): Unit = { + params.filter(isDefined).foreach { param => + param.asInstanceOf[Param[Any]].validate($(param)) + } + } /** - * Validates parameter values stored internally. - * Raise an exception if any parameter value is invalid. - * - * This only needs to check for interactions between parameters. - * Parameter value checks which do not depend on other parameters are handled by - * [[Param.validate()]]. This method does not handle input/output column parameters; - * those are checked during schema validation. + * Explains a param. + * @param param input param, must belong to this instance. + * @return a string that contains the input param name, doc, and optionally its default value and + * the user-supplied value */ - def validate(): Unit = validate(ParamMap.empty) + def explainParam(param: Param[_]): String = { + shouldOwn(param) + val valueStr = if (isDefined(param)) { + val defaultValueStr = getDefault(param).map("default: " + _) + val currentValueStr = get(param).map("current: " + _) + (defaultValueStr ++ currentValueStr).mkString("(", ", ", ")") + } else { + "(undefined)" + } + s"${param.name}: ${param.doc} $valueStr" + } /** - * Returns the documentation of all params. + * Explains all params of this instance. + * @see [[explainParam()]] */ - def explainParams(): String = params.mkString("\n") + def explainParams(): String = { + params.map(explainParam).mkString("\n") + } /** Checks whether a param is explicitly set. */ final def isSet(param: Param[_]): Boolean = { @@ -288,6 +386,11 @@ trait Params extends Identifiable with Serializable { defaultParamMap.contains(param) || paramMap.contains(param) } + /** Tests whether this instance contains a param with a given name. */ + def hasParam(paramName: String): Boolean = { + params.exists(_.name == paramName) + } + /** Gets a param by its name. */ def getParam(paramName: String): Param[Any] = { params.find(_.name == paramName).getOrElse { @@ -299,9 +402,7 @@ trait Params extends Identifiable with Serializable { * Sets a parameter in the embedded param map. */ protected final def set[T](param: Param[T], value: T): this.type = { - shouldOwn(param) - paramMap.put(param.asInstanceOf[Param[Any]], value) - this + set(param -> value) } /** @@ -311,6 +412,15 @@ trait Params extends Identifiable with Serializable { set(getParam(param), value) } + /** + * Sets a parameter in the embedded param map. + */ + protected final def set(paramPair: ParamPair[_]): this.type = { + shouldOwn(paramPair.param) + paramMap.put(paramPair) + this + } + /** * Optionally returns the user-supplied value of a param. */ @@ -337,6 +447,9 @@ trait Params extends Identifiable with Serializable { get(param).orElse(getDefault(param)).get } + /** An alias for [[getOrDefault()]]. */ + protected final def $[T](param: Param[T]): T = getOrDefault(param) + /** * Sets a default value for a param. * @param param param to set the default value. Make sure that this param is initialized before @@ -344,21 +457,18 @@ trait Params extends Identifiable with Serializable { * @param value the default value */ protected final def setDefault[T](param: Param[T], value: T): this.type = { - shouldOwn(param) - defaultParamMap.put(param, value) + defaultParamMap.put(param -> value) this } /** * Sets default values for a list of params. * - * Note: Java developers should use the single-parameter [[setDefault()]]. - * Annotating this with varargs causes compilation failures. - * * @param paramPairs a list of param pairs that specify params and their default values to set * respectively. Make sure that the params are initialized before this method * gets called. */ + @varargs protected final def setDefault(paramPairs: ParamPair[_]*): this.type = { paramPairs.foreach { p => setDefault(p.param.asInstanceOf[Param[Any]], p.value) @@ -382,19 +492,30 @@ trait Params extends Identifiable with Serializable { defaultParamMap.contains(param) } + /** + * Creates a copy of this instance with the same UID and some extra params. + * The default implementation tries to create a new instance with the same UID. + * Then it copies the embedded and extra parameters over and returns the new instance. + * Subclasses should override this method if the default approach is not sufficient. + */ + def copy(extra: ParamMap): Params = { + val that = this.getClass.getConstructor(classOf[String]).newInstance(uid) + copyValues(that, extra) + } + /** * Extracts the embedded default param values and user-supplied values, and then merges them with * extra values from input into a flat param map, where the latter value is used if there exist - * conflicts, i.e., with ordering: default param values < user-supplied values < extraParamMap. + * conflicts, i.e., with ordering: default param values < user-supplied values < extra. */ - protected final def extractParamMap(extraParamMap: ParamMap): ParamMap = { - defaultParamMap ++ paramMap ++ extraParamMap + final def extractParamMap(extra: ParamMap): ParamMap = { + defaultParamMap ++ paramMap ++ extra } /** * [[extractParamMap]] with no extra values. */ - protected final def extractParamMap(): ParamMap = { + final def extractParamMap(): ParamMap = { extractParamMap(ParamMap.empty) } @@ -406,52 +527,41 @@ trait Params extends Identifiable with Serializable { /** Validates that the input param belongs to this instance. */ private def shouldOwn(param: Param[_]): Unit = { - require(param.parent.eq(this), s"Param $param does not belong to $this.") + require(param.parent == uid && hasParam(param.name), s"Param $param does not belong to $this.") } -} -/** - * :: DeveloperApi :: - * - * Helper functionality for developers. - * - * NOTE: This is currently private[spark] but will be made public later once it is stabilized. - */ -@DeveloperApi -private[spark] object Params { - - /** - * Copies parameter values from the parent estimator to the child model it produced. - * @param paramMap the param map that holds parameters of the parent - * @param parent the parent estimator - * @param child the child model - */ - def inheritValues[E <: Params, M <: E]( - paramMap: ParamMap, - parent: E, - child: M): Unit = { - val childParams = child.params.map(_.name).toSet - parent.params.foreach { param => - if (paramMap.contains(param) && childParams.contains(param.name)) { - child.set(child.getParam(param.name), paramMap(param)) + /** + * Copies param values from this instance to another instance for params shared by them. + * @param to the target instance + * @param extra extra params to be copied + * @return the target instance with param values copied + */ + protected def copyValues[T <: Params](to: T, extra: ParamMap = ParamMap.empty): T = { + val map = extractParamMap(extra) + params.foreach { param => + if (map.contains(param) && to.hasParam(param.name)) { + to.set(param.name, map(param)) } } + to } } /** + * :: DeveloperApi :: * Java-friendly wrapper for [[Params]]. * Java developers who need to extend [[Params]] should use this class instead. * If you need to extend a abstract class which already extends [[Params]], then that abstract * class should be Java-friendly as well. */ +@DeveloperApi abstract class JavaParams extends Params /** - * :: AlphaComponent :: + * :: Experimental :: * A param to value map. */ -@AlphaComponent +@Experimental final class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any]) extends Serializable { @@ -469,7 +579,7 @@ final class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any]) /** * Puts a (param, value) pair (overwrites if the input param exists). */ - def put[T](param: Param[T], value: T): this.type = put(ParamPair(param, value)) + def put[T](param: Param[T], value: T): this.type = put(param -> value) /** * Puts a list of param pairs (overwrites if the input params exists). @@ -535,7 +645,7 @@ final class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any]) override def toString: String = { map.toSeq.sortBy(_._1.name).map { case (param, value) => - s"\t${param.parent.uid}-${param.name}: $value" + s"\t${param.parent}-${param.name}: $value" }.mkString("{\n", ",\n", "\n}") } @@ -572,6 +682,7 @@ final class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any]) def size: Int = map.size } +@Experimental object ParamMap { /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala index 7da4bb4b4bf25..1ffb5eddc36bd 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala @@ -21,8 +21,6 @@ import java.io.PrintWriter import scala.reflect.ClassTag -import org.apache.spark.ml.param.ParamValidators - /** * Code generator for shared params (sharedParams.scala). Run under the Spark folder with * {{{ @@ -42,18 +40,20 @@ private[shared] object SharedParamsCodeGen { ParamDesc[String]("predictionCol", "prediction column name", Some("\"prediction\"")), ParamDesc[String]("rawPredictionCol", "raw prediction (a.k.a. confidence) column name", Some("\"rawPrediction\"")), - ParamDesc[String]("probabilityCol", - "column name for predicted class conditional probabilities", Some("\"probability\"")), + ParamDesc[String]("probabilityCol", "Column name for predicted class conditional" + + " probabilities. Note: Not all models output well-calibrated probability estimates!" + + " These probabilities should be treated as confidences, not precise probabilities.", + Some("\"probability\"")), ParamDesc[Double]("threshold", "threshold in binary classification prediction, in range [0, 1]", isValid = "ParamValidators.inRange(0, 1)"), ParamDesc[String]("inputCol", "input column name"), ParamDesc[Array[String]]("inputCols", "input column names"), - ParamDesc[String]("outputCol", "output column name"), + ParamDesc[String]("outputCol", "output column name", Some("uid + \"__output\"")), ParamDesc[Int]("checkpointInterval", "checkpoint interval (>= 1)", isValid = "ParamValidators.gtEq(1)"), ParamDesc[Boolean]("fitIntercept", "whether to fit an intercept term", Some("true")), - ParamDesc[Long]("seed", "random seed", Some("Utils.random.nextLong()")), + ParamDesc[Long]("seed", "random seed", Some("this.getClass.getName.hashCode.toLong")), ParamDesc[Double]("elasticNetParam", "the ElasticNet mixing parameter, in range [0, 1]." + " For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.", isValid = "ParamValidators.inRange(0, 1)"), @@ -85,6 +85,7 @@ private[shared] object SharedParamsCodeGen { case _ if c == classOf[Float] => "FloatParam" case _ if c == classOf[Double] => "DoubleParam" case _ if c == classOf[Boolean] => "BooleanParam" + case _ if c.isArray && c.getComponentType == classOf[String] => s"StringArrayParam" case _ => s"Param[${getTypeString(c)}]" } } @@ -142,7 +143,7 @@ private[shared] object SharedParamsCodeGen { | final val $name: $Param = new $Param(this, "$name", "$doc"$isValid) |$setDefault | /** @group getParam */ - | final def get$Name: $T = getOrDefault($name) + | final def get$Name: $T = $$($name) |} |""".stripMargin } @@ -169,7 +170,6 @@ private[shared] object SharedParamsCodeGen { | |package org.apache.spark.ml.param.shared | - |import org.apache.spark.annotation.DeveloperApi |import org.apache.spark.ml.param._ |import org.apache.spark.util.Utils | diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala index e1549f46a68d4..ed08417bd4df8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala @@ -17,7 +17,6 @@ package org.apache.spark.ml.param.shared -import org.apache.spark.annotation.DeveloperApi import org.apache.spark.ml.param._ import org.apache.spark.util.Utils @@ -37,7 +36,7 @@ private[ml] trait HasRegParam extends Params { final val regParam: DoubleParam = new DoubleParam(this, "regParam", "regularization parameter (>= 0)", ParamValidators.gtEq(0)) /** @group getParam */ - final def getRegParam: Double = getOrDefault(regParam) + final def getRegParam: Double = $(regParam) } /** @@ -52,7 +51,7 @@ private[ml] trait HasMaxIter extends Params { final val maxIter: IntParam = new IntParam(this, "maxIter", "max number of iterations (>= 0)", ParamValidators.gtEq(0)) /** @group getParam */ - final def getMaxIter: Int = getOrDefault(maxIter) + final def getMaxIter: Int = $(maxIter) } /** @@ -69,7 +68,7 @@ private[ml] trait HasFeaturesCol extends Params { setDefault(featuresCol, "features") /** @group getParam */ - final def getFeaturesCol: String = getOrDefault(featuresCol) + final def getFeaturesCol: String = $(featuresCol) } /** @@ -86,7 +85,7 @@ private[ml] trait HasLabelCol extends Params { setDefault(labelCol, "label") /** @group getParam */ - final def getLabelCol: String = getOrDefault(labelCol) + final def getLabelCol: String = $(labelCol) } /** @@ -103,7 +102,7 @@ private[ml] trait HasPredictionCol extends Params { setDefault(predictionCol, "prediction") /** @group getParam */ - final def getPredictionCol: String = getOrDefault(predictionCol) + final def getPredictionCol: String = $(predictionCol) } /** @@ -120,7 +119,7 @@ private[ml] trait HasRawPredictionCol extends Params { setDefault(rawPredictionCol, "rawPrediction") /** @group getParam */ - final def getRawPredictionCol: String = getOrDefault(rawPredictionCol) + final def getRawPredictionCol: String = $(rawPredictionCol) } /** @@ -129,15 +128,15 @@ private[ml] trait HasRawPredictionCol extends Params { private[ml] trait HasProbabilityCol extends Params { /** - * Param for column name for predicted class conditional probabilities. + * Param for Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities.. * @group param */ - final val probabilityCol: Param[String] = new Param[String](this, "probabilityCol", "column name for predicted class conditional probabilities") + final val probabilityCol: Param[String] = new Param[String](this, "probabilityCol", "Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities.") setDefault(probabilityCol, "probability") /** @group getParam */ - final def getProbabilityCol: String = getOrDefault(probabilityCol) + final def getProbabilityCol: String = $(probabilityCol) } /** @@ -152,7 +151,7 @@ private[ml] trait HasThreshold extends Params { final val threshold: DoubleParam = new DoubleParam(this, "threshold", "threshold in binary classification prediction, in range [0, 1]", ParamValidators.inRange(0, 1)) /** @group getParam */ - final def getThreshold: Double = getOrDefault(threshold) + final def getThreshold: Double = $(threshold) } /** @@ -167,7 +166,7 @@ private[ml] trait HasInputCol extends Params { final val inputCol: Param[String] = new Param[String](this, "inputCol", "input column name") /** @group getParam */ - final def getInputCol: String = getOrDefault(inputCol) + final def getInputCol: String = $(inputCol) } /** @@ -179,14 +178,14 @@ private[ml] trait HasInputCols extends Params { * Param for input column names. * @group param */ - final val inputCols: Param[Array[String]] = new Param[Array[String]](this, "inputCols", "input column names") + final val inputCols: StringArrayParam = new StringArrayParam(this, "inputCols", "input column names") /** @group getParam */ - final def getInputCols: Array[String] = getOrDefault(inputCols) + final def getInputCols: Array[String] = $(inputCols) } /** - * (private[ml]) Trait for shared param outputCol. + * (private[ml]) Trait for shared param outputCol (default: uid + "__output"). */ private[ml] trait HasOutputCol extends Params { @@ -196,8 +195,10 @@ private[ml] trait HasOutputCol extends Params { */ final val outputCol: Param[String] = new Param[String](this, "outputCol", "output column name") + setDefault(outputCol, uid + "__output") + /** @group getParam */ - final def getOutputCol: String = getOrDefault(outputCol) + final def getOutputCol: String = $(outputCol) } /** @@ -212,7 +213,7 @@ private[ml] trait HasCheckpointInterval extends Params { final val checkpointInterval: IntParam = new IntParam(this, "checkpointInterval", "checkpoint interval (>= 1)", ParamValidators.gtEq(1)) /** @group getParam */ - final def getCheckpointInterval: Int = getOrDefault(checkpointInterval) + final def getCheckpointInterval: Int = $(checkpointInterval) } /** @@ -229,11 +230,11 @@ private[ml] trait HasFitIntercept extends Params { setDefault(fitIntercept, true) /** @group getParam */ - final def getFitIntercept: Boolean = getOrDefault(fitIntercept) + final def getFitIntercept: Boolean = $(fitIntercept) } /** - * (private[ml]) Trait for shared param seed (default: Utils.random.nextLong()). + * (private[ml]) Trait for shared param seed (default: this.getClass.getName.hashCode.toLong). */ private[ml] trait HasSeed extends Params { @@ -243,10 +244,10 @@ private[ml] trait HasSeed extends Params { */ final val seed: LongParam = new LongParam(this, "seed", "random seed") - setDefault(seed, Utils.random.nextLong()) + setDefault(seed, this.getClass.getName.hashCode.toLong) /** @group getParam */ - final def getSeed: Long = getOrDefault(seed) + final def getSeed: Long = $(seed) } /** @@ -261,7 +262,7 @@ private[ml] trait HasElasticNetParam extends Params { final val elasticNetParam: DoubleParam = new DoubleParam(this, "elasticNetParam", "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.", ParamValidators.inRange(0, 1)) /** @group getParam */ - final def getElasticNetParam: Double = getOrDefault(elasticNetParam) + final def getElasticNetParam: Double = $(elasticNetParam) } /** @@ -276,7 +277,7 @@ private[ml] trait HasTol extends Params { final val tol: DoubleParam = new DoubleParam(this, "tol", "the convergence tolerance for iterative algorithms") /** @group getParam */ - final def getTol: Double = getOrDefault(tol) + final def getTol: Double = $(tol) } /** @@ -291,6 +292,6 @@ private[ml] trait HasStepSize extends Params { final val stepSize: DoubleParam = new DoubleParam(this, "stepSize", "Step size to be used for each iteration of optimization.") /** @group getParam */ - final def getStepSize: Double = getOrDefault(stepSize) + final def getStepSize: Double = $(stepSize) } // scalastyle:on diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala index f9f2b2764ddb1..df009d855ecbb 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala @@ -31,25 +31,51 @@ import org.apache.hadoop.fs.{FileSystem, Path} import org.netlib.util.intW import org.apache.spark.{Logging, Partitioner} -import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.annotation.{DeveloperApi, Experimental} import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ +import org.apache.spark.ml.util.{Identifiable, SchemaUtils} import org.apache.spark.mllib.optimization.NNLS import org.apache.spark.rdd.RDD import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions._ -import org.apache.spark.sql.types.{DoubleType, FloatType, IntegerType, StructField, StructType} +import org.apache.spark.sql.types.{DoubleType, FloatType, IntegerType, StructType} import org.apache.spark.storage.StorageLevel import org.apache.spark.util.Utils import org.apache.spark.util.collection.{OpenHashMap, OpenHashSet, SortDataFormat, Sorter} import org.apache.spark.util.random.XORShiftRandom +/** + * Common params for ALS and ALSModel. + */ +private[recommendation] trait ALSModelParams extends Params with HasPredictionCol { + /** + * Param for the column name for user ids. + * Default: "user" + * @group param + */ + val userCol = new Param[String](this, "userCol", "column name for user ids") + + /** @group getParam */ + def getUserCol: String = $(userCol) + + /** + * Param for the column name for item ids. + * Default: "item" + * @group param + */ + val itemCol = new Param[String](this, "itemCol", "column name for item ids") + + /** @group getParam */ + def getItemCol: String = $(itemCol) +} + /** * Common params for ALS. */ -private[recommendation] trait ALSParams extends Params with HasMaxIter with HasRegParam - with HasPredictionCol with HasCheckpointInterval { +private[recommendation] trait ALSParams extends ALSModelParams with HasMaxIter with HasRegParam + with HasPredictionCol with HasCheckpointInterval with HasSeed { /** * Param for rank of the matrix factorization (>= 1). @@ -59,7 +85,7 @@ private[recommendation] trait ALSParams extends Params with HasMaxIter with HasR val rank = new IntParam(this, "rank", "rank of the factorization", ParamValidators.gtEq(1)) /** @group getParam */ - def getRank: Int = getOrDefault(rank) + def getRank: Int = $(rank) /** * Param for number of user blocks (>= 1). @@ -70,7 +96,7 @@ private[recommendation] trait ALSParams extends Params with HasMaxIter with HasR ParamValidators.gtEq(1)) /** @group getParam */ - def getNumUserBlocks: Int = getOrDefault(numUserBlocks) + def getNumUserBlocks: Int = $(numUserBlocks) /** * Param for number of item blocks (>= 1). @@ -81,7 +107,7 @@ private[recommendation] trait ALSParams extends Params with HasMaxIter with HasR ParamValidators.gtEq(1)) /** @group getParam */ - def getNumItemBlocks: Int = getOrDefault(numItemBlocks) + def getNumItemBlocks: Int = $(numItemBlocks) /** * Param to decide whether to use implicit preference. @@ -91,7 +117,7 @@ private[recommendation] trait ALSParams extends Params with HasMaxIter with HasR val implicitPrefs = new BooleanParam(this, "implicitPrefs", "whether to use implicit preference") /** @group getParam */ - def getImplicitPrefs: Boolean = getOrDefault(implicitPrefs) + def getImplicitPrefs: Boolean = $(implicitPrefs) /** * Param for the alpha parameter in the implicit preference formulation (>= 0). @@ -102,27 +128,7 @@ private[recommendation] trait ALSParams extends Params with HasMaxIter with HasR ParamValidators.gtEq(0)) /** @group getParam */ - def getAlpha: Double = getOrDefault(alpha) - - /** - * Param for the column name for user ids. - * Default: "user" - * @group param - */ - val userCol = new Param[String](this, "userCol", "column name for user ids") - - /** @group getParam */ - def getUserCol: String = getOrDefault(userCol) - - /** - * Param for the column name for item ids. - * Default: "item" - * @group param - */ - val itemCol = new Param[String](this, "itemCol", "column name for item ids") - - /** @group getParam */ - def getItemCol: String = getOrDefault(itemCol) + def getAlpha: Double = $(alpha) /** * Param for the column name for ratings. @@ -132,7 +138,7 @@ private[recommendation] trait ALSParams extends Params with HasMaxIter with HasR val ratingCol = new Param[String](this, "ratingCol", "column name for ratings") /** @group getParam */ - def getRatingCol: String = getOrDefault(ratingCol) + def getRatingCol: String = $(ratingCol) /** * Param for whether to apply nonnegativity constraints. @@ -143,7 +149,7 @@ private[recommendation] trait ALSParams extends Params with HasMaxIter with HasR this, "nonnegative", "whether to use nonnegative constraint for least squares") /** @group getParam */ - def getNonnegative: Boolean = getOrDefault(nonnegative) + def getNonnegative: Boolean = $(nonnegative) setDefault(rank -> 10, maxIter -> 10, regParam -> 0.1, numUserBlocks -> 10, numItemBlocks -> 10, implicitPrefs -> false, alpha -> 1.0, userCol -> "user", itemCol -> "item", @@ -152,65 +158,69 @@ private[recommendation] trait ALSParams extends Params with HasMaxIter with HasR /** * Validates and transforms the input schema. * @param schema input schema - * @param paramMap extra params * @return output schema */ - protected def validateAndTransformSchema(schema: StructType, paramMap: ParamMap): StructType = { - val map = extractParamMap(paramMap) - assert(schema(map(userCol)).dataType == IntegerType) - assert(schema(map(itemCol)).dataType== IntegerType) - val ratingType = schema(map(ratingCol)).dataType - assert(ratingType == FloatType || ratingType == DoubleType) - val predictionColName = map(predictionCol) - assert(!schema.fieldNames.contains(predictionColName), - s"Prediction column $predictionColName already exists.") - val newFields = schema.fields :+ StructField(map(predictionCol), FloatType, nullable = false) - StructType(newFields) + protected def validateAndTransformSchema(schema: StructType): StructType = { + SchemaUtils.checkColumnType(schema, $(userCol), IntegerType) + SchemaUtils.checkColumnType(schema, $(itemCol), IntegerType) + val ratingType = schema($(ratingCol)).dataType + require(ratingType == FloatType || ratingType == DoubleType) + SchemaUtils.appendColumn(schema, $(predictionCol), FloatType) } } /** + * :: Experimental :: * Model fitted by ALS. + * + * @param rank rank of the matrix factorization model + * @param userFactors a DataFrame that stores user factors in two columns: `id` and `features` + * @param itemFactors a DataFrame that stores item factors in two columns: `id` and `features` */ +@Experimental class ALSModel private[ml] ( - override val parent: ALS, - override val fittingParamMap: ParamMap, - k: Int, - userFactors: RDD[(Int, Array[Float])], - itemFactors: RDD[(Int, Array[Float])]) - extends Model[ALSModel] with ALSParams { + override val uid: String, + val rank: Int, + @transient val userFactors: DataFrame, + @transient val itemFactors: DataFrame) + extends Model[ALSModel] with ALSModelParams { /** @group setParam */ - def setPredictionCol(value: String): this.type = set(predictionCol, value) + def setUserCol(value: String): this.type = set(userCol, value) - override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = { - import dataset.sqlContext.implicits._ - val map = extractParamMap(paramMap) - val users = userFactors.toDF("id", "features") - val items = itemFactors.toDF("id", "features") + /** @group setParam */ + def setItemCol(value: String): this.type = set(itemCol, value) + + /** @group setParam */ + def setPredictionCol(value: String): this.type = set(predictionCol, value) + override def transform(dataset: DataFrame): DataFrame = { // Register a UDF for DataFrame, and then // create a new column named map(predictionCol) by running the predict UDF. val predict = udf { (userFeatures: Seq[Float], itemFeatures: Seq[Float]) => if (userFeatures != null && itemFeatures != null) { - blas.sdot(k, userFeatures.toArray, 1, itemFeatures.toArray, 1) + blas.sdot(rank, userFeatures.toArray, 1, itemFeatures.toArray, 1) } else { Float.NaN } } dataset - .join(users, dataset(map(userCol)) === users("id"), "left") - .join(items, dataset(map(itemCol)) === items("id"), "left") - .select(dataset("*"), predict(users("features"), items("features")).as(map(predictionCol))) + .join(userFactors, dataset($(userCol)) === userFactors("id"), "left") + .join(itemFactors, dataset($(itemCol)) === itemFactors("id"), "left") + .select(dataset("*"), + predict(userFactors("features"), itemFactors("features")).as($(predictionCol))) } - override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = { - validateAndTransformSchema(schema, paramMap) + override def transformSchema(schema: StructType): StructType = { + SchemaUtils.checkColumnType(schema, $(userCol), IntegerType) + SchemaUtils.checkColumnType(schema, $(itemCol), IntegerType) + SchemaUtils.appendColumn(schema, $(predictionCol), FloatType) } } /** + * :: Experimental :: * Alternating Least Squares (ALS) matrix factorization. * * ALS attempts to estimate the ratings matrix `R` as the product of two lower-rank matrices, @@ -239,10 +249,13 @@ class ALSModel private[ml] ( * indicated user * preferences rather than explicit ratings given to items. */ -class ALS extends Estimator[ALSModel] with ALSParams { +@Experimental +class ALS(override val uid: String) extends Estimator[ALSModel] with ALSParams { import org.apache.spark.ml.recommendation.ALS.Rating + def this() = this(Identifiable.randomUID("als")) + /** @group setParam */ def setRank(value: Int): this.type = set(rank, value) @@ -282,6 +295,9 @@ class ALS extends Estimator[ALSModel] with ALSParams { /** @group setParam */ def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value) + /** @group setParam */ + def setSeed(value: Long): this.type = set(seed, value) + /** * Sets both numUserBlocks and numItemBlocks to the specific value. * @group setParam @@ -292,25 +308,27 @@ class ALS extends Estimator[ALSModel] with ALSParams { this } - override def fit(dataset: DataFrame, paramMap: ParamMap): ALSModel = { - val map = extractParamMap(paramMap) + override def fit(dataset: DataFrame): ALSModel = { + import dataset.sqlContext.implicits._ val ratings = dataset - .select(col(map(userCol)), col(map(itemCol)), col(map(ratingCol)).cast(FloatType)) + .select(col($(userCol)).cast(IntegerType), col($(itemCol)).cast(IntegerType), + col($(ratingCol)).cast(FloatType)) .map { row => Rating(row.getInt(0), row.getInt(1), row.getFloat(2)) } - val (userFactors, itemFactors) = ALS.train(ratings, rank = map(rank), - numUserBlocks = map(numUserBlocks), numItemBlocks = map(numItemBlocks), - maxIter = map(maxIter), regParam = map(regParam), implicitPrefs = map(implicitPrefs), - alpha = map(alpha), nonnegative = map(nonnegative), - checkpointInterval = map(checkpointInterval)) - val model = new ALSModel(this, map, map(rank), userFactors, itemFactors) - Params.inheritValues(map, this, model) - model + val (userFactors, itemFactors) = ALS.train(ratings, rank = $(rank), + numUserBlocks = $(numUserBlocks), numItemBlocks = $(numItemBlocks), + maxIter = $(maxIter), regParam = $(regParam), implicitPrefs = $(implicitPrefs), + alpha = $(alpha), nonnegative = $(nonnegative), + checkpointInterval = $(checkpointInterval), seed = $(seed)) + val userDF = userFactors.toDF("id", "features") + val itemDF = itemFactors.toDF("id", "features") + val model = new ALSModel(uid, $(rank), userDF, itemDF).setParent(this) + copyValues(model) } - override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = { - validateAndTransformSchema(schema, paramMap) + override def transformSchema(schema: StructType): StructType = { + validateAndTransformSchema(schema) } } @@ -325,7 +343,11 @@ class ALS extends Estimator[ALSModel] with ALSParams { @DeveloperApi object ALS extends Logging { - /** Rating class for better code readability. */ + /** + * :: DeveloperApi :: + * Rating class for better code readability. + */ + @DeveloperApi case class Rating[@specialized(Int, Long) ID](user: ID, item: ID, rating: Float) /** Trait for least squares solvers applied to the normal equation. */ @@ -486,8 +508,10 @@ object ALS extends Logging { } /** + * :: DeveloperApi :: * Implementation of the ALS algorithm. */ + @DeveloperApi def train[ID: ClassTag]( // scalastyle:ignore ratings: RDD[Rating[ID]], rank: Int = 10, diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala index 756725a64b0f3..43b68e7bb20fa 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala @@ -17,12 +17,11 @@ package org.apache.spark.ml.regression -import org.apache.spark.annotation.AlphaComponent -import org.apache.spark.ml.impl.estimator.{PredictionModel, Predictor} -import org.apache.spark.ml.impl.tree._ -import org.apache.spark.ml.param.{Params, ParamMap} -import org.apache.spark.ml.tree.{DecisionTreeModel, Node} -import org.apache.spark.ml.util.MetadataUtils +import org.apache.spark.annotation.Experimental +import org.apache.spark.ml.{PredictionModel, Predictor} +import org.apache.spark.ml.param.ParamMap +import org.apache.spark.ml.tree.{DecisionTreeModel, DecisionTreeParams, Node, TreeRegressorParams} +import org.apache.spark.ml.util.{Identifiable, MetadataUtils} import org.apache.spark.mllib.linalg.Vector import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.tree.{DecisionTree => OldDecisionTree} @@ -31,19 +30,19 @@ import org.apache.spark.mllib.tree.model.{DecisionTreeModel => OldDecisionTreeMo import org.apache.spark.rdd.RDD import org.apache.spark.sql.DataFrame - /** - * :: AlphaComponent :: - * + * :: Experimental :: * [[http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree]] learning algorithm * for regression. * It supports both continuous and categorical features. */ -@AlphaComponent -final class DecisionTreeRegressor +@Experimental +final class DecisionTreeRegressor(override val uid: String) extends Predictor[Vector, DecisionTreeRegressor, DecisionTreeRegressionModel] with DecisionTreeParams with TreeRegressorParams { + def this() = this(Identifiable.randomUID("dtr")) + // Override parameter setters from parent trait for Java API compatibility. override def setMaxDepth(value: Int): this.type = super.setMaxDepth(value) @@ -63,15 +62,13 @@ final class DecisionTreeRegressor override def setImpurity(value: String): this.type = super.setImpurity(value) - override protected def train( - dataset: DataFrame, - paramMap: ParamMap): DecisionTreeRegressionModel = { + override protected def train(dataset: DataFrame): DecisionTreeRegressionModel = { val categoricalFeatures: Map[Int, Int] = - MetadataUtils.getCategoricalFeatures(dataset.schema(paramMap(featuresCol))) - val oldDataset: RDD[LabeledPoint] = extractLabeledPoints(dataset, paramMap) + MetadataUtils.getCategoricalFeatures(dataset.schema($(featuresCol))) + val oldDataset: RDD[LabeledPoint] = extractLabeledPoints(dataset) val strategy = getOldStrategy(categoricalFeatures) val oldModel = OldDecisionTree.train(oldDataset, strategy) - DecisionTreeRegressionModel.fromOld(oldModel, this, paramMap, categoricalFeatures) + DecisionTreeRegressionModel.fromOld(oldModel, this, categoricalFeatures) } /** (private[ml]) Create a Strategy instance to use with the old API. */ @@ -81,22 +78,21 @@ final class DecisionTreeRegressor } } +@Experimental object DecisionTreeRegressor { /** Accessor for supported impurities: variance */ final val supportedImpurities: Array[String] = TreeRegressorParams.supportedImpurities } /** - * :: AlphaComponent :: - * + * :: Experimental :: * [[http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree]] model for regression. * It supports both continuous and categorical features. * @param rootNode Root of the decision tree */ -@AlphaComponent +@Experimental final class DecisionTreeRegressionModel private[ml] ( - override val parent: DecisionTreeRegressor, - override val fittingParamMap: ParamMap, + override val uid: String, override val rootNode: Node) extends PredictionModel[Vector, DecisionTreeRegressionModel] with DecisionTreeModel with Serializable { @@ -108,10 +104,8 @@ final class DecisionTreeRegressionModel private[ml] ( rootNode.predict(features) } - override protected def copy(): DecisionTreeRegressionModel = { - val m = new DecisionTreeRegressionModel(parent, fittingParamMap, rootNode) - Params.inheritValues(this.extractParamMap(), this, m) - m + override def copy(extra: ParamMap): DecisionTreeRegressionModel = { + copyValues(new DecisionTreeRegressionModel(uid, rootNode), extra) } override def toString: String = { @@ -130,12 +124,12 @@ private[ml] object DecisionTreeRegressionModel { def fromOld( oldModel: OldDecisionTreeModel, parent: DecisionTreeRegressor, - fittingParamMap: ParamMap, categoricalFeatures: Map[Int, Int]): DecisionTreeRegressionModel = { require(oldModel.algo == OldAlgo.Regression, s"Cannot convert non-regression DecisionTreeModel (old API) to" + s" DecisionTreeRegressionModel (new API). Algo is: ${oldModel.algo}") val rootNode = Node.fromOld(oldModel.topNode, categoricalFeatures) - new DecisionTreeRegressionModel(parent, fittingParamMap, rootNode) + val uid = if (parent != null) parent.uid else Identifiable.randomUID("dtr") + new DecisionTreeRegressionModel(uid, rootNode) } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala index 76c98376930c5..b7e374bb6cb49 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala @@ -20,35 +20,33 @@ package org.apache.spark.ml.regression import com.github.fommil.netlib.BLAS.{getInstance => blas} import org.apache.spark.Logging -import org.apache.spark.annotation.AlphaComponent -import org.apache.spark.ml.impl.estimator.{PredictionModel, Predictor} -import org.apache.spark.ml.impl.tree._ -import org.apache.spark.ml.param.{Params, ParamMap, Param} -import org.apache.spark.ml.tree.{DecisionTreeModel, TreeEnsembleModel} -import org.apache.spark.ml.util.MetadataUtils +import org.apache.spark.annotation.Experimental +import org.apache.spark.ml.{PredictionModel, Predictor} +import org.apache.spark.ml.param.{Param, ParamMap} +import org.apache.spark.ml.tree.{DecisionTreeModel, GBTParams, TreeEnsembleModel, TreeRegressorParams} +import org.apache.spark.ml.util.{Identifiable, MetadataUtils} import org.apache.spark.mllib.linalg.Vector import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.tree.{GradientBoostedTrees => OldGBT} import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo} -import org.apache.spark.mllib.tree.loss.{AbsoluteError => OldAbsoluteError, Loss => OldLoss, - SquaredError => OldSquaredError} +import org.apache.spark.mllib.tree.loss.{AbsoluteError => OldAbsoluteError, Loss => OldLoss, SquaredError => OldSquaredError} import org.apache.spark.mllib.tree.model.{GradientBoostedTreesModel => OldGBTModel} import org.apache.spark.rdd.RDD import org.apache.spark.sql.DataFrame - /** - * :: AlphaComponent :: - * + * :: Experimental :: * [[http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)]] * learning algorithm for regression. * It supports both continuous and categorical features. */ -@AlphaComponent -final class GBTRegressor +@Experimental +final class GBTRegressor(override val uid: String) extends Predictor[Vector, GBTRegressor, GBTRegressionModel] with GBTParams with TreeRegressorParams with Logging { + def this() = this(Identifiable.randomUID("gbtr")) + // Override parameter setters from parent trait for Java API compatibility. // Parameters from TreeRegressorParams: @@ -111,7 +109,7 @@ final class GBTRegressor def setLossType(value: String): this.type = set(lossType, value) /** @group getParam */ - def getLossType: String = getOrDefault(lossType).toLowerCase + def getLossType: String = $(lossType).toLowerCase /** (private[ml]) Convert new loss to old loss. */ override private[ml] def getOldLossType: OldLoss = { @@ -124,19 +122,18 @@ final class GBTRegressor } } - override protected def train( - dataset: DataFrame, - paramMap: ParamMap): GBTRegressionModel = { + override protected def train(dataset: DataFrame): GBTRegressionModel = { val categoricalFeatures: Map[Int, Int] = - MetadataUtils.getCategoricalFeatures(dataset.schema(paramMap(featuresCol))) - val oldDataset: RDD[LabeledPoint] = extractLabeledPoints(dataset, paramMap) + MetadataUtils.getCategoricalFeatures(dataset.schema($(featuresCol))) + val oldDataset: RDD[LabeledPoint] = extractLabeledPoints(dataset) val boostingStrategy = super.getOldBoostingStrategy(categoricalFeatures, OldAlgo.Regression) val oldGBT = new OldGBT(boostingStrategy) val oldModel = oldGBT.run(oldDataset) - GBTRegressionModel.fromOld(oldModel, this, paramMap, categoricalFeatures) + GBTRegressionModel.fromOld(oldModel, this, categoricalFeatures) } } +@Experimental object GBTRegressor { // The losses below should be lowercase. /** Accessor for supported loss settings: squared (L2), absolute (L1) */ @@ -144,7 +141,7 @@ object GBTRegressor { } /** - * :: AlphaComponent :: + * :: Experimental :: * * [[http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)]] * model for regression. @@ -152,10 +149,9 @@ object GBTRegressor { * @param _trees Decision trees in the ensemble. * @param _treeWeights Weights for the decision trees in the ensemble. */ -@AlphaComponent +@Experimental final class GBTRegressionModel( - override val parent: GBTRegressor, - override val fittingParamMap: ParamMap, + override val uid: String, private val _trees: Array[DecisionTreeRegressionModel], private val _treeWeights: Array[Double]) extends PredictionModel[Vector, GBTRegressionModel] @@ -178,10 +174,8 @@ final class GBTRegressionModel( if (prediction > 0.0) 1.0 else 0.0 } - override protected def copy(): GBTRegressionModel = { - val m = new GBTRegressionModel(parent, fittingParamMap, _trees, _treeWeights) - Params.inheritValues(this.extractParamMap(), this, m) - m + override def copy(extra: ParamMap): GBTRegressionModel = { + copyValues(new GBTRegressionModel(uid, _trees, _treeWeights), extra) } override def toString: String = { @@ -200,14 +194,14 @@ private[ml] object GBTRegressionModel { def fromOld( oldModel: OldGBTModel, parent: GBTRegressor, - fittingParamMap: ParamMap, categoricalFeatures: Map[Int, Int]): GBTRegressionModel = { require(oldModel.algo == OldAlgo.Regression, "Cannot convert GradientBoostedTreesModel" + s" with algo=${oldModel.algo} (old API) to GBTRegressionModel (new API).") val newTrees = oldModel.trees.map { tree => - // parent, fittingParamMap for each tree is null since there are no good ways to set these. - DecisionTreeRegressionModel.fromOld(tree, null, null, categoricalFeatures) + // parent for each tree is null since there is no good way to set this. + DecisionTreeRegressionModel.fromOld(tree, null, categoricalFeatures) } - new GBTRegressionModel(parent, fittingParamMap, newTrees, oldModel.treeWeights) + val uid = if (parent != null) parent.uid else Identifiable.randomUID("gbtr") + new GBTRegressionModel(parent.uid, newTrees, oldModel.treeWeights) } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala index 0b81c48466be9..fe2a71a331694 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -19,32 +19,32 @@ package org.apache.spark.ml.regression import scala.collection.mutable -import breeze.linalg.{norm => brzNorm, DenseVector => BDV} -import breeze.optimize.{LBFGS => BreezeLBFGS, OWLQN => BreezeOWLQN} -import breeze.optimize.{CachedDiffFunction, DiffFunction} +import breeze.linalg.{DenseVector => BDV, norm => brzNorm} +import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS, OWLQN => BreezeOWLQN} -import org.apache.spark.annotation.AlphaComponent -import org.apache.spark.ml.param.{Params, ParamMap} -import org.apache.spark.ml.param.shared.{HasTol, HasElasticNetParam, HasMaxIter, HasRegParam} -import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer +import org.apache.spark.Logging +import org.apache.spark.annotation.Experimental +import org.apache.spark.ml.PredictorParams +import org.apache.spark.ml.param.ParamMap +import org.apache.spark.ml.param.shared.{HasElasticNetParam, HasMaxIter, HasRegParam, HasTol} +import org.apache.spark.ml.util.Identifiable import org.apache.spark.mllib.linalg.{Vector, Vectors} import org.apache.spark.mllib.linalg.BLAS._ import org.apache.spark.mllib.regression.LabeledPoint +import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer import org.apache.spark.rdd.RDD import org.apache.spark.sql.DataFrame import org.apache.spark.storage.StorageLevel import org.apache.spark.util.StatCounter -import org.apache.spark.Logging /** * Params for linear regression. */ -private[regression] trait LinearRegressionParams extends RegressorParams +private[regression] trait LinearRegressionParams extends PredictorParams with HasRegParam with HasElasticNetParam with HasMaxIter with HasTol /** - * :: AlphaComponent :: - * + * :: Experimental :: * Linear regression. * * The learning objective is to minimize the squared error, with regularization. @@ -57,10 +57,13 @@ private[regression] trait LinearRegressionParams extends RegressorParams * - L1 (Lasso) * - L2 + L1 (elastic net) */ -@AlphaComponent -class LinearRegression extends Regressor[Vector, LinearRegression, LinearRegressionModel] +@Experimental +class LinearRegression(override val uid: String) + extends Regressor[Vector, LinearRegression, LinearRegressionModel] with LinearRegressionParams with Logging { + def this() = this(Identifiable.randomUID("linReg")) + /** * Set the regularization parameter. * Default is 0.0. @@ -96,23 +99,25 @@ class LinearRegression extends Regressor[Vector, LinearRegression, LinearRegress def setTol(value: Double): this.type = set(tol, value) setDefault(tol -> 1E-6) - override protected def train(dataset: DataFrame, paramMap: ParamMap): LinearRegressionModel = { + override protected def train(dataset: DataFrame): LinearRegressionModel = { // Extract columns from data. If dataset is persisted, do not persist instances. - val instances = extractLabeledPoints(dataset, paramMap).map { + val instances = extractLabeledPoints(dataset).map { case LabeledPoint(label: Double, features: Vector) => (label, features) } val handlePersistence = dataset.rdd.getStorageLevel == StorageLevel.NONE if (handlePersistence) instances.persist(StorageLevel.MEMORY_AND_DISK) val (summarizer, statCounter) = instances.treeAggregate( - (new MultivariateOnlineSummarizer, new StatCounter))( { - case ((summarizer: MultivariateOnlineSummarizer, statCounter: StatCounter), - (label: Double, features: Vector)) => - (summarizer.add(features), statCounter.merge(label)) - }, { - case ((summarizer1: MultivariateOnlineSummarizer, statCounter1: StatCounter), - (summarizer2: MultivariateOnlineSummarizer, statCounter2: StatCounter)) => - (summarizer1.merge(summarizer2), statCounter1.merge(statCounter2)) + (new MultivariateOnlineSummarizer, new StatCounter))( + seqOp = (c, v) => (c, v) match { + case ((summarizer: MultivariateOnlineSummarizer, statCounter: StatCounter), + (label: Double, features: Vector)) => + (summarizer.add(features), statCounter.merge(label)) + }, + combOp = (c1, c2) => (c1, c2) match { + case ((summarizer1: MultivariateOnlineSummarizer, statCounter1: StatCounter), + (summarizer2: MultivariateOnlineSummarizer, statCounter2: StatCounter)) => + (summarizer1.merge(summarizer2), statCounter1.merge(statCounter2)) }) val numFeatures = summarizer.mean.size @@ -125,7 +130,7 @@ class LinearRegression extends Regressor[Vector, LinearRegression, LinearRegress logWarning(s"The standard deviation of the label is zero, so the weights will be zeros " + s"and the intercept will be the mean of the label; as a result, training is not needed.") if (handlePersistence) instances.unpersist() - return new LinearRegressionModel(this, paramMap, Vectors.sparse(numFeatures, Seq()), yMean) + return new LinearRegressionModel(uid, Vectors.sparse(numFeatures, Seq()), yMean) } val featuresMean = summarizer.mean.toArray @@ -133,17 +138,17 @@ class LinearRegression extends Regressor[Vector, LinearRegression, LinearRegress // Since we implicitly do the feature scaling when we compute the cost function // to improve the convergence, the effective regParam will be changed. - val effectiveRegParam = paramMap(regParam) / yStd - val effectiveL1RegParam = paramMap(elasticNetParam) * effectiveRegParam - val effectiveL2RegParam = (1.0 - paramMap(elasticNetParam)) * effectiveRegParam + val effectiveRegParam = $(regParam) / yStd + val effectiveL1RegParam = $(elasticNetParam) * effectiveRegParam + val effectiveL2RegParam = (1.0 - $(elasticNetParam)) * effectiveRegParam val costFun = new LeastSquaresCostFun(instances, yStd, yMean, featuresStd, featuresMean, effectiveL2RegParam) - val optimizer = if (paramMap(elasticNetParam) == 0.0 || effectiveRegParam == 0.0) { - new BreezeLBFGS[BDV[Double]](paramMap(maxIter), 10, paramMap(tol)) + val optimizer = if ($(elasticNetParam) == 0.0 || effectiveRegParam == 0.0) { + new BreezeLBFGS[BDV[Double]]($(maxIter), 10, $(tol)) } else { - new BreezeOWLQN[Int, BDV[Double]](paramMap(maxIter), 10, effectiveL1RegParam, paramMap(tol)) + new BreezeOWLQN[Int, BDV[Double]]($(maxIter), 10, effectiveL1RegParam, $(tol)) } val initialWeights = Vectors.zeros(numFeatures) @@ -164,7 +169,8 @@ class LinearRegression extends Regressor[Vector, LinearRegression, LinearRegress val weights = { val rawWeights = state.x.toArray.clone() var i = 0 - while (i < rawWeights.length) { + val len = rawWeights.length + while (i < len) { rawWeights(i) *= { if (featuresStd(i) != 0.0) yStd / featuresStd(i) else 0.0 } i += 1 } @@ -178,19 +184,17 @@ class LinearRegression extends Regressor[Vector, LinearRegression, LinearRegress if (handlePersistence) instances.unpersist() // TODO: Converts to sparse format based on the storage, but may base on the scoring speed. - new LinearRegressionModel(this, paramMap, weights.compressed, intercept) + copyValues(new LinearRegressionModel(uid, weights.compressed, intercept)) } } /** - * :: AlphaComponent :: - * + * :: Experimental :: * Model produced by [[LinearRegression]]. */ -@AlphaComponent +@Experimental class LinearRegressionModel private[ml] ( - override val parent: LinearRegression, - override val fittingParamMap: ParamMap, + override val uid: String, val weights: Vector, val intercept: Double) extends RegressionModel[Vector, LinearRegressionModel] @@ -200,10 +204,8 @@ class LinearRegressionModel private[ml] ( dot(features, weights) + intercept } - override protected def copy(): LinearRegressionModel = { - val m = new LinearRegressionModel(parent, fittingParamMap, weights, intercept) - Params.inheritValues(extractParamMap(), this, m) - m + override def copy(extra: ParamMap): LinearRegressionModel = { + copyValues(new LinearRegressionModel(uid, weights, intercept), extra) } } @@ -243,7 +245,7 @@ class LinearRegressionModel private[ml] ( * + \bar{y} / \hat{y}||^2 * = 1/2n ||\sum_i w_i^\prime x_i - y / \hat{y} + offset||^2 = 1/2n diff^2 * }}} - * where w_i^\prime is the effective weights defined by w_i/\hat{x_i}, offset is + * where w_i^\prime^ is the effective weights defined by w_i/\hat{x_i}, offset is * {{{ * - \sum_i (w_i/\hat{x_i})\bar{x_i} + \bar{y} / \hat{y}. * }}}, and diff is @@ -307,7 +309,8 @@ private class LeastSquaresAggregator( val weightsArray = weights.toArray.clone() var sum = 0.0 var i = 0 - while (i < weightsArray.length) { + val len = weightsArray.length + while (i < len) { if (featuresStd(i) != 0.0) { weightsArray(i) /= featuresStd(i) sum += weightsArray(i) * featuresMean(i) @@ -318,7 +321,7 @@ private class LeastSquaresAggregator( } (weightsArray, -sum + labelMean / labelStd, weightsArray.length) } - + private val effectiveWeightsVector = Vectors.dense(effectiveWeightsArray) private val gradientSumArray = Array.ofDim[Double](dim) diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala index 2171ef3d32c26..49a1f7ce8c995 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala @@ -17,32 +17,31 @@ package org.apache.spark.ml.regression -import org.apache.spark.annotation.AlphaComponent -import org.apache.spark.ml.impl.estimator.{PredictionModel, Predictor} -import org.apache.spark.ml.impl.tree.{RandomForestParams, TreeRegressorParams} -import org.apache.spark.ml.param.{Params, ParamMap} -import org.apache.spark.ml.tree.{DecisionTreeModel, TreeEnsembleModel} -import org.apache.spark.ml.util.MetadataUtils +import org.apache.spark.annotation.Experimental +import org.apache.spark.ml.{PredictionModel, Predictor} +import org.apache.spark.ml.param.ParamMap +import org.apache.spark.ml.tree.{DecisionTreeModel, RandomForestParams, TreeEnsembleModel, TreeRegressorParams} +import org.apache.spark.ml.util.{Identifiable, MetadataUtils} import org.apache.spark.mllib.linalg.Vector import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.tree.{RandomForest => OldRandomForest} -import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo, Strategy => OldStrategy} +import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo} import org.apache.spark.mllib.tree.model.{RandomForestModel => OldRandomForestModel} import org.apache.spark.rdd.RDD import org.apache.spark.sql.DataFrame - /** - * :: AlphaComponent :: - * + * :: Experimental :: * [[http://en.wikipedia.org/wiki/Random_forest Random Forest]] learning algorithm for regression. * It supports both continuous and categorical features. */ -@AlphaComponent -final class RandomForestRegressor +@Experimental +final class RandomForestRegressor(override val uid: String) extends Predictor[Vector, RandomForestRegressor, RandomForestRegressionModel] with RandomForestParams with TreeRegressorParams { + def this() = this(Identifiable.randomUID("rfr")) + // Override parameter setters from parent trait for Java API compatibility. // Parameters from TreeRegressorParams: @@ -77,20 +76,19 @@ final class RandomForestRegressor override def setFeatureSubsetStrategy(value: String): this.type = super.setFeatureSubsetStrategy(value) - override protected def train( - dataset: DataFrame, - paramMap: ParamMap): RandomForestRegressionModel = { + override protected def train(dataset: DataFrame): RandomForestRegressionModel = { val categoricalFeatures: Map[Int, Int] = - MetadataUtils.getCategoricalFeatures(dataset.schema(paramMap(featuresCol))) - val oldDataset: RDD[LabeledPoint] = extractLabeledPoints(dataset, paramMap) + MetadataUtils.getCategoricalFeatures(dataset.schema($(featuresCol))) + val oldDataset: RDD[LabeledPoint] = extractLabeledPoints(dataset) val strategy = super.getOldStrategy(categoricalFeatures, numClasses = 0, OldAlgo.Regression, getOldImpurity) val oldModel = OldRandomForest.trainRegressor( oldDataset, strategy, getNumTrees, getFeatureSubsetStrategy, getSeed.toInt) - RandomForestRegressionModel.fromOld(oldModel, this, paramMap, categoricalFeatures) + RandomForestRegressionModel.fromOld(oldModel, this, categoricalFeatures) } } +@Experimental object RandomForestRegressor { /** Accessor for supported impurity settings: variance */ final val supportedImpurities: Array[String] = TreeRegressorParams.supportedImpurities @@ -101,16 +99,14 @@ object RandomForestRegressor { } /** - * :: AlphaComponent :: - * + * :: Experimental :: * [[http://en.wikipedia.org/wiki/Random_forest Random Forest]] model for regression. * It supports both continuous and categorical features. * @param _trees Decision trees in the ensemble. */ -@AlphaComponent +@Experimental final class RandomForestRegressionModel private[ml] ( - override val parent: RandomForestRegressor, - override val fittingParamMap: ParamMap, + override val uid: String, private val _trees: Array[DecisionTreeRegressionModel]) extends PredictionModel[Vector, RandomForestRegressionModel] with TreeEnsembleModel with Serializable { @@ -132,10 +128,8 @@ final class RandomForestRegressionModel private[ml] ( _trees.map(_.rootNode.predict(features)).sum / numTrees } - override protected def copy(): RandomForestRegressionModel = { - val m = new RandomForestRegressionModel(parent, fittingParamMap, _trees) - Params.inheritValues(this.extractParamMap(), this, m) - m + override def copy(extra: ParamMap): RandomForestRegressionModel = { + copyValues(new RandomForestRegressionModel(uid, _trees), extra) } override def toString: String = { @@ -154,14 +148,13 @@ private[ml] object RandomForestRegressionModel { def fromOld( oldModel: OldRandomForestModel, parent: RandomForestRegressor, - fittingParamMap: ParamMap, categoricalFeatures: Map[Int, Int]): RandomForestRegressionModel = { require(oldModel.algo == OldAlgo.Regression, "Cannot convert RandomForestModel" + s" with algo=${oldModel.algo} (old API) to RandomForestRegressionModel (new API).") val newTrees = oldModel.trees.map { tree => - // parent, fittingParamMap for each tree is null since there are no good ways to set these. - DecisionTreeRegressionModel.fromOld(tree, null, null, categoricalFeatures) + // parent for each tree is null since there is no good way to set this. + DecisionTreeRegressionModel.fromOld(tree, null, categoricalFeatures) } - new RandomForestRegressionModel(parent, fittingParamMap, newTrees) + new RandomForestRegressionModel(parent.uid, newTrees) } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala index d679085eeafe1..c72ef29680329 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala @@ -17,62 +17,40 @@ package org.apache.spark.ml.regression -import org.apache.spark.annotation.{DeveloperApi, AlphaComponent} -import org.apache.spark.ml.impl.estimator.{PredictionModel, Predictor, PredictorParams} +import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.ml.{PredictionModel, PredictorParams, Predictor} -/** - * :: DeveloperApi :: - * Params for regression. - * Currently empty, but may add functionality later. - * - * NOTE: This is currently private[spark] but will be made public later once it is stabilized. - */ -@DeveloperApi -private[spark] trait RegressorParams extends PredictorParams /** - * :: AlphaComponent :: + * :: DeveloperApi :: * * Single-label regression * * @tparam FeaturesType Type of input features. E.g., [[org.apache.spark.mllib.linalg.Vector]] * @tparam Learner Concrete Estimator type * @tparam M Concrete Model type - * - * NOTE: This is currently private[spark] but will be made public later once it is stabilized. */ -@AlphaComponent +@DeveloperApi private[spark] abstract class Regressor[ FeaturesType, Learner <: Regressor[FeaturesType, Learner, M], M <: RegressionModel[FeaturesType, M]] - extends Predictor[FeaturesType, Learner, M] - with RegressorParams { + extends Predictor[FeaturesType, Learner, M] with PredictorParams { // TODO: defaultEvaluator (follow-up PR) } /** - * :: AlphaComponent :: + * :: DeveloperApi :: * * Model produced by a [[Regressor]]. * * @tparam FeaturesType Type of input features. E.g., [[org.apache.spark.mllib.linalg.Vector]] * @tparam M Concrete Model type. - * - * NOTE: This is currently private[spark] but will be made public later once it is stabilized. */ -@AlphaComponent -private[spark] abstract class RegressionModel[FeaturesType, M <: RegressionModel[FeaturesType, M]] - extends PredictionModel[FeaturesType, M] with RegressorParams { - - /** - * :: DeveloperApi :: - * - * Predict real-valued label for the given features. - * This internal method is used to implement [[transform()]] and output [[predictionCol]]. - */ - @DeveloperApi - protected def predict(features: FeaturesType): Double +@DeveloperApi +abstract class RegressionModel[FeaturesType, M <: RegressionModel[FeaturesType, M]] + extends PredictionModel[FeaturesType, M] with PredictorParams { + // TODO: defaultEvaluator (follow-up PR) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala index d2dec0c76cb12..4242154be14ce 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala @@ -17,14 +17,16 @@ package org.apache.spark.ml.tree +import org.apache.spark.annotation.DeveloperApi import org.apache.spark.mllib.linalg.Vector import org.apache.spark.mllib.tree.model.{InformationGainStats => OldInformationGainStats, Node => OldNode, Predict => OldPredict} - /** + * :: DeveloperApi :: * Decision tree node interface. */ +@DeveloperApi sealed abstract class Node extends Serializable { // TODO: Add aggregate stats (once available). This will happen after we move the DecisionTree @@ -89,10 +91,12 @@ private[ml] object Node { } /** + * :: DeveloperApi :: * Decision tree leaf node. * @param prediction Prediction this node makes * @param impurity Impurity measure at this node (for training data) */ +@DeveloperApi final class LeafNode private[ml] ( override val prediction: Double, override val impurity: Double) extends Node { @@ -118,6 +122,7 @@ final class LeafNode private[ml] ( } /** + * :: DeveloperApi :: * Internal Decision Tree node. * @param prediction Prediction this node would make if it were a leaf node * @param impurity Impurity measure at this node (for training data) @@ -127,6 +132,7 @@ final class LeafNode private[ml] ( * @param rightChild Right-hand child node * @param split Information about the test used to split to the left or right child. */ +@DeveloperApi final class InternalNode private[ml] ( override val prediction: Double, override val impurity: Double, @@ -153,9 +159,9 @@ final class InternalNode private[ml] ( override private[tree] def subtreeToString(indentFactor: Int = 0): String = { val prefix: String = " " * indentFactor - prefix + s"If (${InternalNode.splitToString(split, left=true)})\n" + + prefix + s"If (${InternalNode.splitToString(split, left = true)})\n" + leftChild.subtreeToString(indentFactor + 1) + - prefix + s"Else (${InternalNode.splitToString(split, left=false)})\n" + + prefix + s"Else (${InternalNode.splitToString(split, left = false)})\n" + rightChild.subtreeToString(indentFactor + 1) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala index 90f1d052764d3..7acdeeee72d23 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala @@ -17,15 +17,18 @@ package org.apache.spark.ml.tree +import org.apache.spark.annotation.DeveloperApi import org.apache.spark.mllib.linalg.Vector import org.apache.spark.mllib.tree.configuration.{FeatureType => OldFeatureType} import org.apache.spark.mllib.tree.model.{Split => OldSplit} /** + * :: DeveloperApi :: * Interface for a "Split," which specifies a test made at a decision tree node * to choose the left or right path. */ +@DeveloperApi sealed trait Split extends Serializable { /** Index of feature which this split tests */ @@ -52,12 +55,14 @@ private[tree] object Split { } /** + * :: DeveloperApi :: * Split which tests a categorical feature. * @param featureIndex Index of the feature to test * @param _leftCategories If the feature value is in this set of categories, then the split goes * left. Otherwise, it goes right. * @param numCategories Number of categories for this feature. */ +@DeveloperApi final class CategoricalSplit private[ml] ( override val featureIndex: Int, _leftCategories: Array[Double], @@ -125,11 +130,13 @@ final class CategoricalSplit private[ml] ( } /** + * :: DeveloperApi :: * Split which tests a continuous feature. * @param featureIndex Index of the feature to test * @param threshold If the feature value is <= this threshold, then the split goes left. * Otherwise, it goes right. */ +@DeveloperApi final class ContinuousSplit private[ml] (override val featureIndex: Int, val threshold: Double) extends Split { diff --git a/mllib/src/main/scala/org/apache/spark/ml/impl/tree/treeParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala similarity index 91% rename from mllib/src/main/scala/org/apache/spark/ml/impl/tree/treeParams.scala rename to mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala index fb770622e71f0..a0c5238d966bf 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/impl/tree/treeParams.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala @@ -15,26 +15,20 @@ * limitations under the License. */ -package org.apache.spark.ml.impl.tree +package org.apache.spark.ml.tree -import org.apache.spark.annotation.DeveloperApi -import org.apache.spark.ml.impl.estimator.PredictorParams +import org.apache.spark.ml.PredictorParams import org.apache.spark.ml.param._ -import org.apache.spark.ml.param.shared.{HasSeed, HasMaxIter} -import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo, - BoostingStrategy => OldBoostingStrategy, Strategy => OldStrategy} -import org.apache.spark.mllib.tree.impurity.{Gini => OldGini, Entropy => OldEntropy, - Impurity => OldImpurity, Variance => OldVariance} +import org.apache.spark.ml.param.shared.{HasMaxIter, HasSeed} +import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo, BoostingStrategy => OldBoostingStrategy, Strategy => OldStrategy} +import org.apache.spark.mllib.tree.impurity.{Entropy => OldEntropy, Gini => OldGini, Impurity => OldImpurity, Variance => OldVariance} import org.apache.spark.mllib.tree.loss.{Loss => OldLoss} - /** - * :: DeveloperApi :: * Parameters for Decision Tree-based algorithms. * * Note: Marked as private and DeveloperApi since this may be made public in the future. */ -@DeveloperApi private[ml] trait DecisionTreeParams extends PredictorParams { /** @@ -123,43 +117,43 @@ private[ml] trait DecisionTreeParams extends PredictorParams { def setMaxDepth(value: Int): this.type = set(maxDepth, value) /** @group getParam */ - final def getMaxDepth: Int = getOrDefault(maxDepth) + final def getMaxDepth: Int = $(maxDepth) /** @group setParam */ def setMaxBins(value: Int): this.type = set(maxBins, value) /** @group getParam */ - final def getMaxBins: Int = getOrDefault(maxBins) + final def getMaxBins: Int = $(maxBins) /** @group setParam */ def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value) /** @group getParam */ - final def getMinInstancesPerNode: Int = getOrDefault(minInstancesPerNode) + final def getMinInstancesPerNode: Int = $(minInstancesPerNode) /** @group setParam */ def setMinInfoGain(value: Double): this.type = set(minInfoGain, value) /** @group getParam */ - final def getMinInfoGain: Double = getOrDefault(minInfoGain) + final def getMinInfoGain: Double = $(minInfoGain) /** @group expertSetParam */ def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value) /** @group expertGetParam */ - final def getMaxMemoryInMB: Int = getOrDefault(maxMemoryInMB) + final def getMaxMemoryInMB: Int = $(maxMemoryInMB) /** @group expertSetParam */ def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value) /** @group expertGetParam */ - final def getCacheNodeIds: Boolean = getOrDefault(cacheNodeIds) + final def getCacheNodeIds: Boolean = $(cacheNodeIds) /** @group expertSetParam */ def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value) /** @group expertGetParam */ - final def getCheckpointInterval: Int = getOrDefault(checkpointInterval) + final def getCheckpointInterval: Int = $(checkpointInterval) /** (private[ml]) Create a Strategy instance to use with the old API. */ private[ml] def getOldStrategy( @@ -206,7 +200,7 @@ private[ml] trait TreeClassifierParams extends Params { def setImpurity(value: String): this.type = set(impurity, value) /** @group getParam */ - final def getImpurity: String = getOrDefault(impurity).toLowerCase + final def getImpurity: String = $(impurity).toLowerCase /** Convert new impurity to old impurity. */ private[ml] def getOldImpurity: OldImpurity = { @@ -248,7 +242,7 @@ private[ml] trait TreeRegressorParams extends Params { def setImpurity(value: String): this.type = set(impurity, value) /** @group getParam */ - final def getImpurity: String = getOrDefault(impurity).toLowerCase + final def getImpurity: String = $(impurity).toLowerCase /** Convert new impurity to old impurity. */ private[ml] def getOldImpurity: OldImpurity = { @@ -268,12 +262,10 @@ private[ml] object TreeRegressorParams { } /** - * :: DeveloperApi :: * Parameters for Decision Tree-based ensemble algorithms. * * Note: Marked as private and DeveloperApi since this may be made public in the future. */ -@DeveloperApi private[ml] trait TreeEnsembleParams extends DecisionTreeParams with HasSeed { /** @@ -291,7 +283,7 @@ private[ml] trait TreeEnsembleParams extends DecisionTreeParams with HasSeed { def setSubsamplingRate(value: Double): this.type = set(subsamplingRate, value) /** @group getParam */ - final def getSubsamplingRate: Double = getOrDefault(subsamplingRate) + final def getSubsamplingRate: Double = $(subsamplingRate) /** @group setParam */ def setSeed(value: Long): this.type = set(seed, value) @@ -310,12 +302,10 @@ private[ml] trait TreeEnsembleParams extends DecisionTreeParams with HasSeed { } /** - * :: DeveloperApi :: * Parameters for Random Forest algorithms. * * Note: Marked as private and DeveloperApi since this may be made public in the future. */ -@DeveloperApi private[ml] trait RandomForestParams extends TreeEnsembleParams { /** @@ -364,13 +354,13 @@ private[ml] trait RandomForestParams extends TreeEnsembleParams { def setNumTrees(value: Int): this.type = set(numTrees, value) /** @group getParam */ - final def getNumTrees: Int = getOrDefault(numTrees) + final def getNumTrees: Int = $(numTrees) /** @group setParam */ def setFeatureSubsetStrategy(value: String): this.type = set(featureSubsetStrategy, value) /** @group getParam */ - final def getFeatureSubsetStrategy: String = getOrDefault(featureSubsetStrategy).toLowerCase + final def getFeatureSubsetStrategy: String = $(featureSubsetStrategy).toLowerCase } private[ml] object RandomForestParams { @@ -380,12 +370,10 @@ private[ml] object RandomForestParams { } /** - * :: DeveloperApi :: * Parameters for Gradient-Boosted Tree algorithms. * * Note: Marked as private and DeveloperApi since this may be made public in the future. */ -@DeveloperApi private[ml] trait GBTParams extends TreeEnsembleParams with HasMaxIter { /** @@ -418,7 +406,7 @@ private[ml] trait GBTParams extends TreeEnsembleParams with HasMaxIter { def setStepSize(value: Double): this.type = set(stepSize, value) /** @group getParam */ - final def getStepSize: Double = getOrDefault(stepSize) + final def getStepSize: Double = $(stepSize) /** (private[ml]) Create a BoostingStrategy instance to use with the old API. */ private[ml] def getOldBoostingStrategy( diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala index d1ad0893cd044..6434b64aed15d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala @@ -20,9 +20,11 @@ package org.apache.spark.ml.tuning import com.github.fommil.netlib.F2jBLAS import org.apache.spark.Logging -import org.apache.spark.annotation.AlphaComponent +import org.apache.spark.annotation.Experimental import org.apache.spark.ml._ +import org.apache.spark.ml.evaluation.Evaluator import org.apache.spark.ml.param._ +import org.apache.spark.ml.util.Identifiable import org.apache.spark.mllib.util.MLUtils import org.apache.spark.sql.DataFrame import org.apache.spark.sql.types.StructType @@ -39,7 +41,7 @@ private[ml] trait CrossValidatorParams extends Params { val estimator: Param[Estimator[_]] = new Param(this, "estimator", "estimator for selection") /** @group getParam */ - def getEstimator: Estimator[_] = getOrDefault(estimator) + def getEstimator: Estimator[_] = $(estimator) /** * param for estimator param maps @@ -49,16 +51,18 @@ private[ml] trait CrossValidatorParams extends Params { new Param(this, "estimatorParamMaps", "param maps for the estimator") /** @group getParam */ - def getEstimatorParamMaps: Array[ParamMap] = getOrDefault(estimatorParamMaps) + def getEstimatorParamMaps: Array[ParamMap] = $(estimatorParamMaps) /** - * param for the evaluator for selection + * param for the evaluator used to select hyper-parameters that maximize the cross-validated + * metric * @group param */ - val evaluator: Param[Evaluator] = new Param(this, "evaluator", "evaluator for selection") + val evaluator: Param[Evaluator] = new Param(this, "evaluator", + "evaluator used to select hyper-parameters that maximize the cross-validated metric") /** @group getParam */ - def getEvaluator: Evaluator = getOrDefault(evaluator) + def getEvaluator: Evaluator = $(evaluator) /** * Param for number of folds for cross validation. Must be >= 2. @@ -69,17 +73,20 @@ private[ml] trait CrossValidatorParams extends Params { "number of folds for cross validation (>= 2)", ParamValidators.gtEq(2)) /** @group getParam */ - def getNumFolds: Int = getOrDefault(numFolds) + def getNumFolds: Int = $(numFolds) setDefault(numFolds -> 3) } /** - * :: AlphaComponent :: + * :: Experimental :: * K-fold cross validation. */ -@AlphaComponent -class CrossValidator extends Estimator[CrossValidatorModel] with CrossValidatorParams with Logging { +@Experimental +class CrossValidator(override val uid: String) extends Estimator[CrossValidatorModel] + with CrossValidatorParams with Logging { + + def this() = this(Identifiable.randomUID("cv")) private val f2jBLAS = new F2jBLAS @@ -95,23 +102,16 @@ class CrossValidator extends Estimator[CrossValidatorModel] with CrossValidatorP /** @group setParam */ def setNumFolds(value: Int): this.type = set(numFolds, value) - override def validate(paramMap: ParamMap): Unit = { - getEstimatorParamMaps.foreach { eMap => - getEstimator.validate(eMap ++ paramMap) - } - } - - override def fit(dataset: DataFrame, paramMap: ParamMap): CrossValidatorModel = { - val map = extractParamMap(paramMap) + override def fit(dataset: DataFrame): CrossValidatorModel = { val schema = dataset.schema - transformSchema(dataset.schema, paramMap, logging = true) + transformSchema(schema, logging = true) val sqlCtx = dataset.sqlContext - val est = map(estimator) - val eval = map(evaluator) - val epm = map(estimatorParamMaps) + val est = $(estimator) + val eval = $(evaluator) + val epm = $(estimatorParamMaps) val numModels = epm.length val metrics = new Array[Double](epm.length) - val splits = MLUtils.kFold(dataset.rdd, map(numFolds), 0) + val splits = MLUtils.kFold(dataset.rdd, $(numFolds), 0) splits.zipWithIndex.foreach { case ((training, validation), splitIndex) => val trainingDataset = sqlCtx.createDataFrame(training, schema).cache() val validationDataset = sqlCtx.createDataFrame(validation, schema).cache() @@ -121,50 +121,61 @@ class CrossValidator extends Estimator[CrossValidatorModel] with CrossValidatorP trainingDataset.unpersist() var i = 0 while (i < numModels) { - val metric = eval.evaluate(models(i).transform(validationDataset, epm(i)), map) + // TODO: duplicate evaluator to take extra params from input + val metric = eval.evaluate(models(i).transform(validationDataset, epm(i))) logDebug(s"Got metric $metric for model trained with ${epm(i)}.") metrics(i) += metric i += 1 } validationDataset.unpersist() } - f2jBLAS.dscal(numModels, 1.0 / map(numFolds), metrics, 1) + f2jBLAS.dscal(numModels, 1.0 / $(numFolds), metrics, 1) logInfo(s"Average cross-validation metrics: ${metrics.toSeq}") val (bestMetric, bestIndex) = metrics.zipWithIndex.maxBy(_._1) logInfo(s"Best set of parameters:\n${epm(bestIndex)}") logInfo(s"Best cross-validation metric: $bestMetric.") val bestModel = est.fit(dataset, epm(bestIndex)).asInstanceOf[Model[_]] - val cvModel = new CrossValidatorModel(this, map, bestModel) - Params.inheritValues(map, this, cvModel) - cvModel + copyValues(new CrossValidatorModel(uid, bestModel).setParent(this)) } - override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = { - val map = extractParamMap(paramMap) - map(estimator).transformSchema(schema, paramMap) + override def transformSchema(schema: StructType): StructType = { + $(estimator).transformSchema(schema) + } + + override def validateParams(): Unit = { + super.validateParams() + val est = $(estimator) + for (paramMap <- $(estimatorParamMaps)) { + est.copy(paramMap).validateParams() + } } } /** - * :: AlphaComponent :: + * :: Experimental :: * Model from k-fold cross validation. */ -@AlphaComponent +@Experimental class CrossValidatorModel private[ml] ( - override val parent: CrossValidator, - override val fittingParamMap: ParamMap, + override val uid: String, val bestModel: Model[_]) extends Model[CrossValidatorModel] with CrossValidatorParams { - override def validate(paramMap: ParamMap): Unit = { - bestModel.validate(paramMap) + override def validateParams(): Unit = { + bestModel.validateParams() + } + + override def transform(dataset: DataFrame): DataFrame = { + transformSchema(dataset.schema, logging = true) + bestModel.transform(dataset) } - override def transform(dataset: DataFrame, paramMap: ParamMap): DataFrame = { - bestModel.transform(dataset, paramMap) + override def transformSchema(schema: StructType): StructType = { + bestModel.transformSchema(schema) } - override def transformSchema(schema: StructType, paramMap: ParamMap): StructType = { - bestModel.transformSchema(schema, paramMap) + override def copy(extra: ParamMap): CrossValidatorModel = { + val copied = new CrossValidatorModel(uid, bestModel.copy(extra).asInstanceOf[Model[_]]) + copyValues(copied, extra) } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala index dafe73d82c00a..98a8f0330ca45 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala @@ -20,14 +20,14 @@ package org.apache.spark.ml.tuning import scala.annotation.varargs import scala.collection.mutable -import org.apache.spark.annotation.AlphaComponent +import org.apache.spark.annotation.Experimental import org.apache.spark.ml.param._ /** - * :: AlphaComponent :: + * :: Experimental :: * Builder for a param grid used in grid search-based model selection. */ -@AlphaComponent +@Experimental class ParamGridBuilder { private val paramGrid = mutable.Map.empty[Param[_], Iterable[_]] diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/Identifiable.scala b/mllib/src/main/scala/org/apache/spark/ml/util/Identifiable.scala index 8a56748ab0a02..ddd34a54503a6 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/util/Identifiable.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/util/Identifiable.scala @@ -19,15 +19,26 @@ package org.apache.spark.ml.util import java.util.UUID + /** - * Object with a unique id. + * Trait for an object with an immutable unique ID that identifies itself and its derivatives. */ -private[ml] trait Identifiable extends Serializable { +private[spark] trait Identifiable { + + /** + * An immutable unique ID for the object and its derivatives. + */ + val uid: String + + override def toString: String = uid +} + +private[spark] object Identifiable { /** - * A unique id for the object. The default implementation concatenates the class name, "_", and 8 - * random hex chars. + * Returns a random UID that concatenates the given prefix, "_", and 12 random hex chars. */ - private[ml] val uid: String = - this.getClass.getSimpleName + "_" + UUID.randomUUID().toString.take(8) + def randomUID(prefix: String): String = { + prefix + "_" + UUID.randomUUID().toString.takeRight(12) + } } diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala b/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala index c84c8b4eb744f..2a1db90f2ca2b 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala @@ -19,19 +19,14 @@ package org.apache.spark.ml.util import scala.collection.immutable.HashMap -import org.apache.spark.annotation.Experimental -import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, BinaryAttribute, NominalAttribute, - NumericAttribute} +import org.apache.spark.ml.attribute._ import org.apache.spark.sql.types.StructField /** - * :: Experimental :: - * * Helper utilities for tree-based algorithms */ -@Experimental -object MetadataUtils { +private[spark] object MetadataUtils { /** * Examine a schema to identify the number of classes in a label column. @@ -39,9 +34,9 @@ object MetadataUtils { */ def getNumClasses(labelSchema: StructField): Option[Int] = { Attribute.fromStructField(labelSchema) match { - case numAttr: NumericAttribute => None case binAttr: BinaryAttribute => Some(2) case nomAttr: NominalAttribute => nomAttr.getNumValues + case _: NumericAttribute | UnresolvedAttribute => None } } @@ -65,7 +60,7 @@ object MetadataUtils { Iterator() } else { attr match { - case numAttr: NumericAttribute => Iterator() + case _: NumericAttribute | UnresolvedAttribute => Iterator() case binAttr: BinaryAttribute => Iterator(idx -> 2) case nomAttr: NominalAttribute => nomAttr.getNumValues match { diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/SchemaUtils.scala b/mllib/src/main/scala/org/apache/spark/ml/util/SchemaUtils.scala index 0383bf0b382b7..7cd53c6d7ef79 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/util/SchemaUtils.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/util/SchemaUtils.scala @@ -17,15 +17,13 @@ package org.apache.spark.ml.util -import org.apache.spark.annotation.DeveloperApi import org.apache.spark.sql.types.{DataType, StructField, StructType} + /** - * :: DeveloperApi :: * Utils for handling schemas. */ -@DeveloperApi -object SchemaUtils { +private[spark] object SchemaUtils { // TODO: Move the utility methods to SQL. @@ -58,4 +56,15 @@ object SchemaUtils { val outputFields = schema.fields :+ StructField(colName, dataType, nullable = false) StructType(outputFields) } + + /** + * Appends a new column to the input schema. This fails if the given output column already exists. + * @param schema input schema + * @param col New column schema + * @return new schema with the input column appended + */ + def appendColumn(schema: StructType, col: StructField): StructType = { + require(!schema.fieldNames.contains(col.name), s"Column ${col.name} already exists.") + StructType(schema.fields :+ col) + } } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala index 6237b64c8f984..16f3131796709 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala @@ -32,6 +32,7 @@ import org.apache.spark.api.java.{JavaRDD, JavaSparkContext} import org.apache.spark.api.python.SerDeUtil import org.apache.spark.mllib.classification._ import org.apache.spark.mllib.clustering._ +import org.apache.spark.mllib.evaluation.RankingMetrics import org.apache.spark.mllib.feature._ import org.apache.spark.mllib.fpm.{FPGrowth, FPGrowthModel} import org.apache.spark.mllib.linalg._ @@ -50,6 +51,7 @@ import org.apache.spark.mllib.tree.model.{DecisionTreeModel, GradientBoostedTree import org.apache.spark.mllib.tree.{DecisionTree, GradientBoostedTrees, RandomForest} import org.apache.spark.mllib.util.MLUtils import org.apache.spark.rdd.RDD +import org.apache.spark.sql.DataFrame import org.apache.spark.storage.StorageLevel import org.apache.spark.util.Utils @@ -282,6 +284,24 @@ private[python] class PythonMLLibAPI extends Serializable { map(_.asInstanceOf[Object]).asJava } + /** + * Java stub for Python mllib IsotonicRegression.run() + */ + def trainIsotonicRegressionModel( + data: JavaRDD[Vector], + isotonic: Boolean): JList[Object] = { + val isotonicRegressionAlg = new IsotonicRegression().setIsotonic(isotonic) + val input = data.rdd.map { x => + (x(0), x(1), x(2)) + }.persist(StorageLevel.MEMORY_AND_DISK) + try { + val model = isotonicRegressionAlg.run(input) + List[AnyRef](model.boundaryVector, model.predictionVector).asJava + } finally { + data.rdd.unpersist(blocking = false) + } + } + /** * Java stub for Python mllib KMeans.run() */ @@ -291,12 +311,16 @@ private[python] class PythonMLLibAPI extends Serializable { maxIterations: Int, runs: Int, initializationMode: String, - seed: java.lang.Long): KMeansModel = { + seed: java.lang.Long, + initializationSteps: Int, + epsilon: Double): KMeansModel = { val kMeansAlg = new KMeans() .setK(k) .setMaxIterations(maxIterations) .setRuns(runs) .setInitializationMode(initializationMode) + .setInitializationSteps(initializationSteps) + .setEpsilon(epsilon) if (seed != null) kMeansAlg.setSeed(seed) @@ -307,33 +331,54 @@ private[python] class PythonMLLibAPI extends Serializable { } } + /** + * Java stub for Python mllib KMeansModel.computeCost() + */ + def computeCostKmeansModel( + data: JavaRDD[Vector], + centers: java.util.ArrayList[Vector]): Double = { + new KMeansModel(centers).computeCost(data) + } + /** * Java stub for Python mllib GaussianMixture.run() * Returns a list containing weights, mean and covariance of each mixture component. */ def trainGaussianMixture( - data: JavaRDD[Vector], - k: Int, - convergenceTol: Double, + data: JavaRDD[Vector], + k: Int, + convergenceTol: Double, maxIterations: Int, - seed: java.lang.Long): JList[Object] = { + seed: java.lang.Long, + initialModelWeights: java.util.ArrayList[Double], + initialModelMu: java.util.ArrayList[Vector], + initialModelSigma: java.util.ArrayList[Matrix]): JList[Object] = { val gmmAlg = new GaussianMixture() .setK(k) .setConvergenceTol(convergenceTol) .setMaxIterations(maxIterations) + if (initialModelWeights != null && initialModelMu != null && initialModelSigma != null) { + val gaussians = initialModelMu.asScala.toSeq.zip(initialModelSigma.asScala.toSeq).map { + case (x, y) => new MultivariateGaussian(x.asInstanceOf[Vector], y.asInstanceOf[Matrix]) + } + val initialModel = new GaussianMixtureModel( + initialModelWeights.asScala.toArray, gaussians.toArray) + gmmAlg.setInitialModel(initialModel) + } + if (seed != null) gmmAlg.setSeed(seed) try { val model = gmmAlg.run(data.rdd.persist(StorageLevel.MEMORY_AND_DISK)) var wt = ArrayBuffer.empty[Double] - var mu = ArrayBuffer.empty[Vector] + var mu = ArrayBuffer.empty[Vector] var sigma = ArrayBuffer.empty[Matrix] for (i <- 0 until model.k) { wt += model.weights(i) mu += model.gaussians(i).mu sigma += model.gaussians(i).sigma - } + } List(Vectors.dense(wt.toArray), mu.toArray, sigma.toArray).map(_.asInstanceOf[Object]).asJava } finally { data.rdd.unpersist(blocking = false) @@ -347,14 +392,14 @@ private[python] class PythonMLLibAPI extends Serializable { data: JavaRDD[Vector], wt: Vector, mu: Array[Object], - si: Array[Object]): RDD[Vector] = { + si: Array[Object]): RDD[Vector] = { val weight = wt.toArray val mean = mu.map(_.asInstanceOf[DenseVector]) val sigma = si.map(_.asInstanceOf[DenseMatrix]) val gaussians = Array.tabulate(weight.length){ i => new MultivariateGaussian(mean(i), sigma(i)) - } + } val model = new GaussianMixtureModel(weight, gaussians) model.predictSoft(data).map(Vectors.dense) } @@ -383,7 +428,7 @@ private[python] class PythonMLLibAPI extends Serializable { if (seed != null) als.setSeed(seed) - val model = als.run(ratingsJRDD.rdd) + val model = als.run(ratingsJRDD.rdd) new MatrixFactorizationModelWrapper(model) } @@ -414,7 +459,7 @@ private[python] class PythonMLLibAPI extends Serializable { if (seed != null) als.setSeed(seed) - val model = als.run(ratingsJRDD.rdd) + val model = als.run(ratingsJRDD.rdd) new MatrixFactorizationModelWrapper(model) } @@ -449,7 +494,7 @@ private[python] class PythonMLLibAPI extends Serializable { def normalizeVector(p: Double, rdd: JavaRDD[Vector]): JavaRDD[Vector] = { new Normalizer(p).transform(rdd) } - + /** * Java stub for StandardScaler.fit(). This stub returns a * handle to the Java object instead of the content of the Java object. @@ -463,6 +508,16 @@ private[python] class PythonMLLibAPI extends Serializable { new StandardScaler(withMean, withStd).fit(data.rdd) } + /** + * Java stub for ChiSqSelector.fit(). This stub returns a + * handle to the Java object instead of the content of the Java object. + * Extra care needs to be taken in the Python code to ensure it gets freed on + * exit; see the Py4J documentation. + */ + def fitChiSqSelector(numTopFeatures: Int, data: JavaRDD[LabeledPoint]): ChiSqSelectorModel = { + new ChiSqSelector(numTopFeatures).fit(data.rdd) + } + /** * Java stub for IDF.fit(). This stub returns a * handle to the Java object instead of the content of the Java object. @@ -882,6 +937,14 @@ private[python] class PythonMLLibAPI extends Serializable { RG.gammaVectorRDD(jsc.sc, shape, scale, numRows, numCols, parts, s) } + /** + * Java stub for the constructor of Python mllib RankingMetrics + */ + def newRankingMetrics(predictionAndLabels: DataFrame): RankingMetrics[Any] = { + new RankingMetrics(predictionAndLabels.map( + r => (r.getSeq(0).toArray[Any], r.getSeq(1).toArray[Any]))) + } + } @@ -1015,6 +1078,61 @@ private[spark] object SerDe extends Serializable { } } + // Pickler for SparseMatrix + private[python] class SparseMatrixPickler extends BasePickler[SparseMatrix] { + + def saveState(obj: Object, out: OutputStream, pickler: Pickler): Unit = { + val s = obj.asInstanceOf[SparseMatrix] + val order = ByteOrder.nativeOrder() + + val colPtrsBytes = new Array[Byte](4 * s.colPtrs.length) + val indicesBytes = new Array[Byte](4 * s.rowIndices.length) + val valuesBytes = new Array[Byte](8 * s.values.length) + val isTransposed = if (s.isTransposed) 1 else 0 + ByteBuffer.wrap(colPtrsBytes).order(order).asIntBuffer().put(s.colPtrs) + ByteBuffer.wrap(indicesBytes).order(order).asIntBuffer().put(s.rowIndices) + ByteBuffer.wrap(valuesBytes).order(order).asDoubleBuffer().put(s.values) + + out.write(Opcodes.MARK) + out.write(Opcodes.BININT) + out.write(PickleUtils.integer_to_bytes(s.numRows)) + out.write(Opcodes.BININT) + out.write(PickleUtils.integer_to_bytes(s.numCols)) + out.write(Opcodes.BINSTRING) + out.write(PickleUtils.integer_to_bytes(colPtrsBytes.length)) + out.write(colPtrsBytes) + out.write(Opcodes.BINSTRING) + out.write(PickleUtils.integer_to_bytes(indicesBytes.length)) + out.write(indicesBytes) + out.write(Opcodes.BINSTRING) + out.write(PickleUtils.integer_to_bytes(valuesBytes.length)) + out.write(valuesBytes) + out.write(Opcodes.BININT) + out.write(PickleUtils.integer_to_bytes(isTransposed)) + out.write(Opcodes.TUPLE) + } + + def construct(args: Array[Object]): Object = { + if (args.length != 6) { + throw new PickleException("should be 6") + } + val order = ByteOrder.nativeOrder() + val colPtrsBytes = getBytes(args(2)) + val indicesBytes = getBytes(args(3)) + val valuesBytes = getBytes(args(4)) + val colPtrs = new Array[Int](colPtrsBytes.length / 4) + val rowIndices = new Array[Int](indicesBytes.length / 4) + val values = new Array[Double](valuesBytes.length / 8) + ByteBuffer.wrap(colPtrsBytes).order(order).asIntBuffer().get(colPtrs) + ByteBuffer.wrap(indicesBytes).order(order).asIntBuffer().get(rowIndices) + ByteBuffer.wrap(valuesBytes).order(order).asDoubleBuffer().get(values) + val isTransposed = args(5).asInstanceOf[Int] == 1 + new SparseMatrix( + args(0).asInstanceOf[Int], args(1).asInstanceOf[Int], colPtrs, rowIndices, values, + isTransposed) + } + } + // Pickler for SparseVector private[python] class SparseVectorPickler extends BasePickler[SparseVector] { @@ -1099,6 +1217,7 @@ private[spark] object SerDe extends Serializable { if (!initialized) { new DenseVectorPickler().register() new DenseMatrixPickler().register() + new SparseMatrixPickler().register() new SparseVectorPickler().register() new LabeledPointPickler().register() new RatingPickler().register() @@ -1123,7 +1242,7 @@ private[spark] object SerDe extends Serializable { } /* convert RDD[Tuple2[,]] to RDD[Array[Any]] */ - def fromTuple2RDD(rdd: RDD[(Any, Any)]): RDD[Array[Any]] = { + def fromTuple2RDD(rdd: RDD[(Any, Any)]): RDD[Array[Any]] = { rdd.map(x => Array(x._1, x._2)) } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala index bd2e9079ce1ae..2df4d21e8cd55 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala @@ -163,7 +163,7 @@ class LogisticRegressionModel ( override protected def formatVersion: String = "1.0" override def toString: String = { - s"${super.toString}, numClasses = ${numClasses}, threshold = ${threshold.get}" + s"${super.toString}, numClasses = ${numClasses}, threshold = ${threshold.getOrElse("None")}" } } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala index c9b3ff0172e2e..f51ee36d0dfcb 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala @@ -21,21 +21,16 @@ import java.lang.{Iterable => JIterable} import scala.collection.JavaConverters._ -import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, argmax => brzArgmax, sum => brzSum, Axis} -import breeze.numerics.{exp => brzExp, log => brzLog} - import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ -import org.json4s.{DefaultFormats, JValue} import org.apache.spark.{Logging, SparkContext, SparkException} -import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector} +import org.apache.spark.mllib.linalg.{BLAS, DenseMatrix, DenseVector, SparseVector, Vector} import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.util.{Loader, Saveable} import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, SQLContext} - /** * Model for Naive Bayes Classifiers. * @@ -43,7 +38,7 @@ import org.apache.spark.sql.{DataFrame, SQLContext} * @param pi log of class priors, whose dimension is C, number of labels * @param theta log of class conditional probabilities, whose dimension is C-by-D, * where D is number of features - * @param modelType The type of NB model to fit can be "Multinomial" or "Bernoulli" + * @param modelType The type of NB model to fit can be "multinomial" or "bernoulli" */ class NaiveBayesModel private[mllib] ( val labels: Array[Double], @@ -52,8 +47,13 @@ class NaiveBayesModel private[mllib] ( val modelType: String) extends ClassificationModel with Serializable with Saveable { + import NaiveBayes.{Bernoulli, Multinomial, supportedModelTypes} + + private val piVector = new DenseVector(pi) + private val thetaMatrix = new DenseMatrix(labels.length, theta(0).length, theta.flatten, true) + private[mllib] def this(labels: Array[Double], pi: Array[Double], theta: Array[Array[Double]]) = - this(labels, pi, theta, "Multinomial") + this(labels, pi, theta, NaiveBayes.Multinomial) /** A Java-friendly constructor that takes three Iterable parameters. */ private[mllib] def this( @@ -62,20 +62,24 @@ class NaiveBayesModel private[mllib] ( theta: JIterable[JIterable[Double]]) = this(labels.asScala.toArray, pi.asScala.toArray, theta.asScala.toArray.map(_.asScala.toArray)) - private val brzPi = new BDV[Double](pi) - private val brzTheta = new BDM(theta(0).length, theta.length, theta.flatten).t + require(supportedModelTypes.contains(modelType), + s"Invalid modelType $modelType. Supported modelTypes are $supportedModelTypes.") // Bernoulli scoring requires log(condprob) if 1, log(1-condprob) if 0. - // This precomputes log(1.0 - exp(theta)) and its sum which are used for the linear algebra + // This precomputes log(1.0 - exp(theta)) and its sum which are used for the linear algebra // application of this condition (in predict function). - private val (brzNegTheta, brzNegThetaSum) = modelType match { - case "Multinomial" => (None, None) - case "Bernoulli" => - val negTheta = brzLog((brzExp(brzTheta.copy) :*= (-1.0)) :+= 1.0) // log(1.0 - exp(x)) - (Option(negTheta), Option(brzSum(negTheta, Axis._1))) + private val (thetaMinusNegTheta, negThetaSum) = modelType match { + case Multinomial => (None, None) + case Bernoulli => + val negTheta = thetaMatrix.map(value => math.log(1.0 - math.exp(value))) + val ones = new DenseVector(Array.fill(thetaMatrix.numCols){1.0}) + val thetaMinusNegTheta = thetaMatrix.map { value => + value - math.log(1.0 - math.exp(value)) + } + (Option(thetaMinusNegTheta), Option(negTheta.multiply(ones))) case _ => // This should never happen. - throw new UnknownError(s"NaiveBayesModel was created with an unknown ModelType: $modelType") + throw new UnknownError(s"Invalid modelType: $modelType.") } override def predict(testData: RDD[Vector]): RDD[Double] = { @@ -88,14 +92,24 @@ class NaiveBayesModel private[mllib] ( override def predict(testData: Vector): Double = { modelType match { - case "Multinomial" => - labels (brzArgmax (brzPi + brzTheta * testData.toBreeze) ) - case "Bernoulli" => - labels (brzArgmax (brzPi + - (brzTheta - brzNegTheta.get) * testData.toBreeze + brzNegThetaSum.get)) + case Multinomial => + val prob = thetaMatrix.multiply(testData) + BLAS.axpy(1.0, piVector, prob) + labels(prob.argmax) + case Bernoulli => + testData.foreachActive { (index, value) => + if (value != 0.0 && value != 1.0) { + throw new SparkException( + s"Bernoulli naive Bayes requires 0 or 1 feature values but found $testData.") + } + } + val prob = thetaMinusNegTheta.get.multiply(testData) + BLAS.axpy(1.0, piVector, prob) + BLAS.axpy(1.0, negThetaSum.get, prob) + labels(prob.argmax) case _ => // This should never happen. - throw new UnknownError(s"NaiveBayesModel was created with an unknown ModelType: $modelType") + throw new UnknownError(s"Invalid modelType: $modelType.") } } @@ -137,17 +151,17 @@ object NaiveBayesModel extends Loader[NaiveBayesModel] { // Create Parquet data. val dataRDD: DataFrame = sc.parallelize(Seq(data), 1).toDF() - dataRDD.saveAsParquetFile(dataPath(path)) + dataRDD.write.parquet(dataPath(path)) } def load(sc: SparkContext, path: String): NaiveBayesModel = { val sqlContext = new SQLContext(sc) // Load Parquet data. - val dataRDD = sqlContext.parquetFile(dataPath(path)) + val dataRDD = sqlContext.read.parquet(dataPath(path)) // Check schema explicitly since erasure makes it hard to use match-case for checking. checkSchema[Data](dataRDD.schema) val dataArray = dataRDD.select("labels", "pi", "theta", "modelType").take(1) - assert(dataArray.size == 1, s"Unable to load NaiveBayesModel data from: ${dataPath(path)}") + assert(dataArray.length == 1, s"Unable to load NaiveBayesModel data from: ${dataPath(path)}") val data = dataArray(0) val labels = data.getAs[Seq[Double]](0).toArray val pi = data.getAs[Seq[Double]](1).toArray @@ -183,17 +197,17 @@ object NaiveBayesModel extends Loader[NaiveBayesModel] { // Create Parquet data. val dataRDD: DataFrame = sc.parallelize(Seq(data), 1).toDF() - dataRDD.saveAsParquetFile(dataPath(path)) + dataRDD.write.parquet(dataPath(path)) } def load(sc: SparkContext, path: String): NaiveBayesModel = { val sqlContext = new SQLContext(sc) // Load Parquet data. - val dataRDD = sqlContext.parquetFile(dataPath(path)) + val dataRDD = sqlContext.read.parquet(dataPath(path)) // Check schema explicitly since erasure makes it hard to use match-case for checking. checkSchema[Data](dataRDD.schema) val dataArray = dataRDD.select("labels", "pi", "theta").take(1) - assert(dataArray.size == 1, s"Unable to load NaiveBayesModel data from: ${dataPath(path)}") + assert(dataArray.length == 1, s"Unable to load NaiveBayesModel data from: ${dataPath(path)}") val data = dataArray(0) val labels = data.getAs[Seq[Double]](0).toArray val pi = data.getAs[Seq[Double]](1).toArray @@ -220,16 +234,16 @@ object NaiveBayesModel extends Loader[NaiveBayesModel] { s"($loadedClassName, $version). Supported:\n" + s" ($classNameV1_0, 1.0)") } - assert(model.pi.size == numClasses, + assert(model.pi.length == numClasses, s"NaiveBayesModel.load expected $numClasses classes," + - s" but class priors vector pi had ${model.pi.size} elements") - assert(model.theta.size == numClasses, + s" but class priors vector pi had ${model.pi.length} elements") + assert(model.theta.length == numClasses, s"NaiveBayesModel.load expected $numClasses classes," + - s" but class conditionals array theta had ${model.theta.size} elements") - assert(model.theta.forall(_.size == numFeatures), + s" but class conditionals array theta had ${model.theta.length} elements") + assert(model.theta.forall(_.length == numFeatures), s"NaiveBayesModel.load expected $numFeatures features," + s" but class conditionals array theta had elements of size:" + - s" ${model.theta.map(_.size).mkString(",")}") + s" ${model.theta.map(_.length).mkString(",")}") model } } @@ -247,9 +261,11 @@ class NaiveBayes private ( private var lambda: Double, private var modelType: String) extends Serializable with Logging { - def this(lambda: Double) = this(lambda, "Multinomial") + import NaiveBayes.{Bernoulli, Multinomial} + + def this(lambda: Double) = this(lambda, NaiveBayes.Multinomial) - def this() = this(1.0, "Multinomial") + def this() = this(1.0, NaiveBayes.Multinomial) /** Set the smoothing parameter. Default: 1.0. */ def setLambda(lambda: Double): NaiveBayes = { @@ -262,12 +278,11 @@ class NaiveBayes private ( /** * Set the model type using a string (case-sensitive). - * Supported options: "Multinomial" and "Bernoulli". - * (default: Multinomial) + * Supported options: "multinomial" (default) and "bernoulli". */ - def setModelType(modelType:String): NaiveBayes = { + def setModelType(modelType: String): NaiveBayes = { require(NaiveBayes.supportedModelTypes.contains(modelType), - s"NaiveBayes was created with an unknown ModelType: $modelType") + s"NaiveBayes was created with an unknown modelType: $modelType.") this.modelType = modelType this } @@ -283,30 +298,46 @@ class NaiveBayes private ( def run(data: RDD[LabeledPoint]): NaiveBayesModel = { val requireNonnegativeValues: Vector => Unit = (v: Vector) => { val values = v match { - case SparseVector(size, indices, values) => - values - case DenseVector(values) => - values + case sv: SparseVector => sv.values + case dv: DenseVector => dv.values } if (!values.forall(_ >= 0.0)) { throw new SparkException(s"Naive Bayes requires nonnegative feature values but found $v.") } } + val requireZeroOneBernoulliValues: Vector => Unit = (v: Vector) => { + val values = v match { + case sv: SparseVector => sv.values + case dv: DenseVector => dv.values + } + if (!values.forall(v => v == 0.0 || v == 1.0)) { + throw new SparkException( + s"Bernoulli naive Bayes requires 0 or 1 feature values but found $v.") + } + } + // Aggregates term frequencies per label. // TODO: Calling combineByKey and collect creates two stages, we can implement something // TODO: similar to reduceByKeyLocally to save one stage. - val aggregated = data.map(p => (p.label, p.features)).combineByKey[(Long, BDV[Double])]( + val aggregated = data.map(p => (p.label, p.features)).combineByKey[(Long, DenseVector)]( createCombiner = (v: Vector) => { - requireNonnegativeValues(v) - (1L, v.toBreeze.toDenseVector) + if (modelType == Bernoulli) { + requireZeroOneBernoulliValues(v) + } else { + requireNonnegativeValues(v) + } + (1L, v.copy.toDense) }, - mergeValue = (c: (Long, BDV[Double]), v: Vector) => { + mergeValue = (c: (Long, DenseVector), v: Vector) => { requireNonnegativeValues(v) - (c._1 + 1L, c._2 += v.toBreeze) + BLAS.axpy(1.0, v, c._2) + (c._1 + 1L, c._2) }, - mergeCombiners = (c1: (Long, BDV[Double]), c2: (Long, BDV[Double])) => - (c1._1 + c2._1, c1._2 += c2._2) + mergeCombiners = (c1: (Long, DenseVector), c2: (Long, DenseVector)) => { + BLAS.axpy(1.0, c2._2, c1._2) + (c1._1 + c2._1, c1._2) + } ).collect() val numLabels = aggregated.length @@ -326,11 +357,11 @@ class NaiveBayes private ( labels(i) = label pi(i) = math.log(n + lambda) - piLogDenom val thetaLogDenom = modelType match { - case "Multinomial" => math.log(brzSum(sumTermFreqs) + numFeatures * lambda) - case "Bernoulli" => math.log(n + 2.0 * lambda) + case Multinomial => math.log(sumTermFreqs.values.sum + numFeatures * lambda) + case Bernoulli => math.log(n + 2.0 * lambda) case _ => // This should never happen. - throw new UnknownError(s"NaiveBayes was created with an unknown ModelType: $modelType") + throw new UnknownError(s"Invalid modelType: $modelType.") } var j = 0 while (j < numFeatures) { @@ -349,8 +380,14 @@ class NaiveBayes private ( */ object NaiveBayes { + /** String name for multinomial model type. */ + private[classification] val Multinomial: String = "multinomial" + + /** String name for Bernoulli model type. */ + private[classification] val Bernoulli: String = "bernoulli" + /* Set of modelTypes that NaiveBayes supports */ - private[mllib] val supportedModelTypes = Set("Multinomial", "Bernoulli") + private[classification] val supportedModelTypes = Set(Multinomial, Bernoulli) /** * Trains a Naive Bayes model given an RDD of `(label, features)` pairs. @@ -380,7 +417,7 @@ object NaiveBayes { * @param lambda The smoothing parameter */ def train(input: RDD[LabeledPoint], lambda: Double): NaiveBayesModel = { - new NaiveBayes(lambda, "Multinomial").run(input) + new NaiveBayes(lambda, Multinomial).run(input) } /** @@ -403,7 +440,7 @@ object NaiveBayes { */ def train(input: RDD[LabeledPoint], lambda: Double, modelType: String): NaiveBayesModel = { require(supportedModelTypes.contains(modelType), - s"NaiveBayes was created with an unknown ModelType: $modelType") + s"NaiveBayes was created with an unknown modelType: $modelType.") new NaiveBayes(lambda, modelType).run(input) } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala index 33104cf06c6ea..348485560713e 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala @@ -89,7 +89,7 @@ class SVMModel ( override protected def formatVersion: String = "1.0" override def toString: String = { - s"${super.toString}, numClasses = 2, threshold = ${threshold.get}" + s"${super.toString}, numClasses = 2, threshold = ${threshold.getOrElse("None")}" } } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala index 3b6790cce47c6..fe09f6b75d28b 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/impl/GLMClassificationModel.scala @@ -62,7 +62,7 @@ private[classification] object GLMClassificationModel { // Create Parquet data. val data = Data(weights, intercept, threshold) - sc.parallelize(Seq(data), 1).toDF().saveAsParquetFile(Loader.dataPath(path)) + sc.parallelize(Seq(data), 1).toDF().write.parquet(Loader.dataPath(path)) } /** @@ -75,7 +75,7 @@ private[classification] object GLMClassificationModel { def loadData(sc: SparkContext, path: String, modelClass: String): Data = { val datapath = Loader.dataPath(path) val sqlContext = new SQLContext(sc) - val dataRDD = sqlContext.parquetFile(datapath) + val dataRDD = sqlContext.read.parquet(datapath) val dataArray = dataRDD.select("weights", "intercept", "threshold").take(1) assert(dataArray.size == 1, s"Unable to load $modelClass data from: $datapath") val data = dataArray(0) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala index 568b65305649f..70b0e40948e51 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala @@ -36,11 +36,11 @@ import org.apache.spark.util.Utils * independent Gaussian distributions with associated "mixing" weights * specifying each's contribution to the composite. * - * Given a set of sample points, this class will maximize the log-likelihood - * for a mixture of k Gaussians, iterating until the log-likelihood changes by + * Given a set of sample points, this class will maximize the log-likelihood + * for a mixture of k Gaussians, iterating until the log-likelihood changes by * less than convergenceTol, or until it has reached the max number of iterations. * While this process is generally guaranteed to converge, it is not guaranteed - * to find a global optimum. + * to find a global optimum. * * Note: For high-dimensional data (with many features), this algorithm may perform poorly. * This is due to high-dimensional data (a) making it difficult to cluster at all (based @@ -53,24 +53,24 @@ import org.apache.spark.util.Utils */ @Experimental class GaussianMixture private ( - private var k: Int, - private var convergenceTol: Double, + private var k: Int, + private var convergenceTol: Double, private var maxIterations: Int, private var seed: Long) extends Serializable { - + /** * Constructs a default instance. The default parameters are {k: 2, convergenceTol: 0.01, * maxIterations: 100, seed: random}. */ def this() = this(2, 0.01, 100, Utils.random.nextLong()) - + // number of samples per cluster to use when initializing Gaussians private val nSamples = 5 - - // an initializing GMM can be provided rather than using the + + // an initializing GMM can be provided rather than using the // default random starting point private var initialModel: Option[GaussianMixtureModel] = None - + /** Set the initial GMM starting point, bypassing the random initialization. * You must call setK() prior to calling this method, and the condition * (model.k == this.k) must be met; failure will result in an IllegalArgumentException @@ -83,37 +83,37 @@ class GaussianMixture private ( } this } - + /** Return the user supplied initial GMM, if supplied */ def getInitialModel: Option[GaussianMixtureModel] = initialModel - + /** Set the number of Gaussians in the mixture model. Default: 2 */ def setK(k: Int): this.type = { this.k = k this } - + /** Return the number of Gaussians in the mixture model */ def getK: Int = k - + /** Set the maximum number of iterations to run. Default: 100 */ def setMaxIterations(maxIterations: Int): this.type = { this.maxIterations = maxIterations this } - + /** Return the maximum number of iterations to run */ def getMaxIterations: Int = maxIterations - + /** - * Set the largest change in log-likelihood at which convergence is + * Set the largest change in log-likelihood at which convergence is * considered to have occurred. */ def setConvergenceTol(convergenceTol: Double): this.type = { this.convergenceTol = convergenceTol this } - + /** * Return the largest change in log-likelihood at which convergence is * considered to have occurred. @@ -132,41 +132,41 @@ class GaussianMixture private ( /** Perform expectation maximization */ def run(data: RDD[Vector]): GaussianMixtureModel = { val sc = data.sparkContext - + // we will operate on the data as breeze data val breezeData = data.map(_.toBreeze).cache() - + // Get length of the input vectors val d = breezeData.first().length - + // Determine initial weights and corresponding Gaussians. // If the user supplied an initial GMM, we use those values, otherwise // we start with uniform weights, a random mean from the data, and // diagonal covariance matrices using component variances - // derived from the samples + // derived from the samples val (weights, gaussians) = initialModel match { case Some(gmm) => (gmm.weights, gmm.gaussians) - + case None => { val samples = breezeData.takeSample(withReplacement = true, k * nSamples, seed) - (Array.fill(k)(1.0 / k), Array.tabulate(k) { i => + (Array.fill(k)(1.0 / k), Array.tabulate(k) { i => val slice = samples.view(i * nSamples, (i + 1) * nSamples) - new MultivariateGaussian(vectorMean(slice), initCovariance(slice)) + new MultivariateGaussian(vectorMean(slice), initCovariance(slice)) }) } } - - var llh = Double.MinValue // current log-likelihood + + var llh = Double.MinValue // current log-likelihood var llhp = 0.0 // previous log-likelihood - + var iter = 0 - while(iter < maxIterations && Math.abs(llh-llhp) > convergenceTol) { + while (iter < maxIterations && math.abs(llh-llhp) > convergenceTol) { // create and broadcast curried cluster contribution function val compute = sc.broadcast(ExpectationSum.add(weights, gaussians)_) - + // aggregate the cluster contribution for all sample points val sums = breezeData.aggregate(ExpectationSum.zero(k, d))(compute.value, _ += _) - + // Create new distributions based on the partial assignments // (often referred to as the "M" step in literature) val sumWeights = sums.weights.sum @@ -179,22 +179,22 @@ class GaussianMixture private ( gaussians(i) = new MultivariateGaussian(mu, sums.sigmas(i) / sums.weights(i)) i = i + 1 } - + llhp = llh // current becomes previous llh = sums.logLikelihood // this is the freshly computed log-likelihood iter += 1 - } - + } + new GaussianMixtureModel(weights, gaussians) } - + /** Average of dense breeze vectors */ private def vectorMean(x: IndexedSeq[BV[Double]]): BDV[Double] = { val v = BDV.zeros[Double](x(0).length) x.foreach(xi => v += xi) - v / x.length.toDouble + v / x.length.toDouble } - + /** * Construct matrix where diagonal entries are element-wise * variance of input vectors (computes biased variance) @@ -210,14 +210,14 @@ class GaussianMixture private ( // companion class to provide zero constructor for ExpectationSum private object ExpectationSum { def zero(k: Int, d: Int): ExpectationSum = { - new ExpectationSum(0.0, Array.fill(k)(0.0), - Array.fill(k)(BDV.zeros(d)), Array.fill(k)(BreezeMatrix.zeros(d,d))) + new ExpectationSum(0.0, Array.fill(k)(0.0), + Array.fill(k)(BDV.zeros(d)), Array.fill(k)(BreezeMatrix.zeros(d, d))) } - + // compute cluster contributions for each input point // (U, T) => U for aggregation def add( - weights: Array[Double], + weights: Array[Double], dists: Array[MultivariateGaussian]) (sums: ExpectationSum, x: BV[Double]): ExpectationSum = { val p = weights.zip(dists).map { @@ -235,7 +235,7 @@ private object ExpectationSum { i = i + 1 } sums - } + } } // Aggregation class for partial expectation results @@ -244,9 +244,9 @@ private class ExpectationSum( val weights: Array[Double], val means: Array[BDV[Double]], val sigmas: Array[BreezeMatrix[Double]]) extends Serializable { - + val k = weights.length - + def +=(x: ExpectationSum): ExpectationSum = { var i = 0 while (i < k) { @@ -257,5 +257,5 @@ private class ExpectationSum( } logLikelihood += x.logLikelihood this - } + } } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala index ec65a3da689de..5fc2cb1b62d33 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala @@ -34,21 +34,20 @@ import org.apache.spark.sql.{SQLContext, Row} /** * :: Experimental :: * - * Multivariate Gaussian Mixture Model (GMM) consisting of k Gaussians, where points - * are drawn from each Gaussian i=1..k with probability w(i); mu(i) and sigma(i) are - * the respective mean and covariance for each Gaussian distribution i=1..k. - * - * @param weight Weights for each Gaussian distribution in the mixture, where weight(i) is - * the weight for Gaussian i, and weight.sum == 1 - * @param mu Means for each Gaussian in the mixture, where mu(i) is the mean for Gaussian i - * @param sigma Covariance maxtrix for each Gaussian in the mixture, where sigma(i) is the - * covariance matrix for Gaussian i + * Multivariate Gaussian Mixture Model (GMM) consisting of k Gaussians, where points + * are drawn from each Gaussian i=1..k with probability w(i); mu(i) and sigma(i) are + * the respective mean and covariance for each Gaussian distribution i=1..k. + * + * @param weights Weights for each Gaussian distribution in the mixture, where weights(i) is + * the weight for Gaussian i, and weights.sum == 1 + * @param gaussians Array of MultivariateGaussian where gaussians(i) represents + * the Multivariate Gaussian (Normal) Distribution for Gaussian i */ @Experimental class GaussianMixtureModel( - val weights: Array[Double], + val weights: Array[Double], val gaussians: Array[MultivariateGaussian]) extends Serializable with Saveable{ - + require(weights.length == gaussians.length, "Length of weight and Gaussian arrays must match") override protected def formatVersion = "1.0" @@ -65,20 +64,20 @@ class GaussianMixtureModel( val responsibilityMatrix = predictSoft(points) responsibilityMatrix.map(r => r.indexOf(r.max)) } - + /** * Given the input vectors, return the membership value of each vector - * to all mixture components. + * to all mixture components. */ def predictSoft(points: RDD[Vector]): RDD[Array[Double]] = { val sc = points.sparkContext val bcDists = sc.broadcast(gaussians) val bcWeights = sc.broadcast(weights) - points.map { x => + points.map { x => computeSoftAssignments(x.toBreeze.toDenseVector, bcDists.value, bcWeights.value, k) } } - + /** * Compute the partial assignments for each vector */ @@ -90,7 +89,7 @@ class GaussianMixtureModel( val p = weights.zip(dists).map { case (weight, dist) => MLUtils.EPSILON + weight * dist.pdf(pt) } - val pSum = p.sum + val pSum = p.sum for (i <- 0 until k) { p(i) /= pSum } @@ -127,13 +126,13 @@ object GaussianMixtureModel extends Loader[GaussianMixtureModel] { val dataArray = Array.tabulate(weights.length) { i => Data(weights(i), gaussians(i).mu, gaussians(i).sigma) } - sc.parallelize(dataArray, 1).toDF().saveAsParquetFile(Loader.dataPath(path)) + sc.parallelize(dataArray, 1).toDF().write.parquet(Loader.dataPath(path)) } def load(sc: SparkContext, path: String): GaussianMixtureModel = { val dataPath = Loader.dataPath(path) val sqlContext = new SQLContext(sc) - val dataFrame = sqlContext.parquetFile(dataPath) + val dataFrame = sqlContext.read.parquet(dataPath) val dataArray = dataFrame.select("weight", "mu", "sigma").collect() // Check schema explicitly since erasure makes it hard to use match-case for checking. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala index ba228b11fcec3..8ecb3df11d95e 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala @@ -110,7 +110,7 @@ object KMeansModel extends Loader[KMeansModel] { val dataRDD = sc.parallelize(model.clusterCenters.zipWithIndex).map { case (point, id) => Cluster(id, point) }.toDF() - dataRDD.saveAsParquetFile(Loader.dataPath(path)) + dataRDD.write.parquet(Loader.dataPath(path)) } def load(sc: SparkContext, path: String): KMeansModel = { @@ -120,7 +120,7 @@ object KMeansModel extends Loader[KMeansModel] { assert(className == thisClassName) assert(formatVersion == thisFormatVersion) val k = (metadata \ "k").extract[Int] - val centriods = sqlContext.parquetFile(Loader.dataPath(path)) + val centriods = sqlContext.read.parquet(Loader.dataPath(path)) Loader.checkSchema[Cluster](centriods.schema) val localCentriods = centriods.map(Cluster.apply).collect() assert(k == localCentriods.size) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala index c8daa2388e868..a410547a72fda 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala @@ -18,8 +18,9 @@ package org.apache.spark.mllib.clustering import breeze.linalg.{DenseVector => BDV} + import org.apache.spark.Logging -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{DeveloperApi, Experimental} import org.apache.spark.api.java.JavaPairRDD import org.apache.spark.graphx._ import org.apache.spark.mllib.linalg.Vector @@ -197,12 +198,20 @@ class LDA private ( } - /** LDAOptimizer used to perform the actual calculation */ + /** + * :: DeveloperApi :: + * + * LDAOptimizer used to perform the actual calculation + */ + @DeveloperApi def getOptimizer: LDAOptimizer = ldaOptimizer /** + * :: DeveloperApi :: + * * LDAOptimizer used to perform the actual calculation (default = EMLDAOptimizer) */ + @DeveloperApi def setOptimizer(optimizer: LDAOptimizer): this.type = { this.ldaOptimizer = optimizer this @@ -210,7 +219,7 @@ class LDA private ( /** * Set the LDAOptimizer used to perform the actual calculation by algorithm name. - * Currently "em", "online" is supported. + * Currently "em", "online" are supported. */ def setOptimizer(optimizerName: String): this.type = { this.ldaOptimizer = diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala index 093aa0f315ab2..8e5154b902d1d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala @@ -23,7 +23,7 @@ import breeze.linalg.{DenseVector => BDV, DenseMatrix => BDM, sum, normalize, kr import breeze.numerics.{digamma, exp, abs} import breeze.stats.distributions.{Gamma, RandBasis} -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.DeveloperApi import org.apache.spark.graphx._ import org.apache.spark.graphx.impl.GraphImpl import org.apache.spark.mllib.impl.PeriodicGraphCheckpointer @@ -31,13 +31,13 @@ import org.apache.spark.mllib.linalg.{Matrices, SparseVector, DenseVector, Vecto import org.apache.spark.rdd.RDD /** - * :: Experimental :: + * :: DeveloperApi :: * * An LDAOptimizer specifies which optimization/learning/inference algorithm to use, and it can * hold optimizer-specific parameters for users to set. */ -@Experimental -trait LDAOptimizer { +@DeveloperApi +sealed trait LDAOptimizer { /* DEVELOPERS NOTE: @@ -59,7 +59,7 @@ trait LDAOptimizer { } /** - * :: Experimental :: + * :: DeveloperApi :: * * Optimizer for EM algorithm which stores data + parameter graph, plus algorithm parameters. * @@ -75,8 +75,8 @@ trait LDAOptimizer { * "On Smoothing and Inference for Topic Models." UAI, 2009. * */ -@Experimental -class EMLDAOptimizer extends LDAOptimizer { +@DeveloperApi +final class EMLDAOptimizer extends LDAOptimizer { import LDA._ @@ -211,7 +211,7 @@ class EMLDAOptimizer extends LDAOptimizer { /** - * :: Experimental :: + * :: DeveloperApi :: * * An online optimizer for LDA. The Optimizer implements the Online variational Bayes LDA * algorithm, which processes a subset of the corpus on each iteration, and updates the term-topic @@ -220,8 +220,8 @@ class EMLDAOptimizer extends LDAOptimizer { * Original Online LDA paper: * Hoffman, Blei and Bach, "Online Learning for Latent Dirichlet Allocation." NIPS, 2010. */ -@Experimental -class OnlineLDAOptimizer extends LDAOptimizer { +@DeveloperApi +final class OnlineLDAOptimizer extends LDAOptimizer { // LDA common parameters private var k: Int = 0 @@ -243,8 +243,8 @@ class OnlineLDAOptimizer extends LDAOptimizer { private var randomGenerator: java.util.Random = null // Online LDA specific parameters - // Learning rate is: (tau_0 + t)^{-kappa} - private var tau_0: Double = 1024 + // Learning rate is: (tau0 + t)^{-kappa} + private var tau0: Double = 1024 private var kappa: Double = 0.51 private var miniBatchFraction: Double = 0.05 @@ -265,16 +265,16 @@ class OnlineLDAOptimizer extends LDAOptimizer { * A (positive) learning parameter that downweights early iterations. Larger values make early * iterations count less. */ - def getTau_0: Double = this.tau_0 + def getTau0: Double = this.tau0 /** * A (positive) learning parameter that downweights early iterations. Larger values make early * iterations count less. * Default: 1024, following the original Online LDA paper. */ - def setTau_0(tau_0: Double): this.type = { - require(tau_0 > 0, s"LDA tau_0 must be positive, but was set to $tau_0") - this.tau_0 = tau_0 + def setTau0(tau0: Double): this.type = { + require(tau0 > 0, s"LDA tau0 must be positive, but was set to $tau0") + this.tau0 = tau0 this } @@ -339,7 +339,7 @@ class OnlineLDAOptimizer extends LDAOptimizer { override private[clustering] def initialize( docs: RDD[(Long, Vector)], - lda: LDA): OnlineLDAOptimizer = { + lda: LDA): OnlineLDAOptimizer = { this.k = lda.getK this.corpusSize = docs.count() this.vocabSize = docs.first()._2.size @@ -434,11 +434,8 @@ class OnlineLDAOptimizer extends LDAOptimizer { * Update lambda based on the batch submitted. batchSize can be different for each iteration. */ private[clustering] def update(stat: BDM[Double], iter: Int, batchSize: Int): Unit = { - val tau_0 = this.getTau_0 - val kappa = this.getKappa - // weight of the mini-batch. - val weight = math.pow(tau_0 + iter, -kappa) + val weight = math.pow(getTau0 + iter, -getKappa) // Update lambda based on documents. lambda = lambda * (1 - weight) + @@ -461,7 +458,7 @@ class OnlineLDAOptimizer extends LDAOptimizer { * uses digamma which is accurate but expensive. */ private def dirichletExpectation(alpha: BDM[Double]): BDM[Double] = { - val rowSum = sum(alpha(breeze.linalg.*, ::)) + val rowSum = sum(alpha(breeze.linalg.*, ::)) val digAlpha = digamma(alpha) val digRowSum = digamma(rowSum) val result = digAlpha(::, breeze.linalg.*) - digRowSum diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala index aa53e88d59856..e7a243f854e33 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala @@ -74,7 +74,7 @@ object PowerIterationClusteringModel extends Loader[PowerIterationClusteringMode sc.parallelize(Seq(metadata), 1).saveAsTextFile(Loader.metadataPath(path)) val dataRDD = model.assignments.toDF() - dataRDD.saveAsParquetFile(Loader.dataPath(path)) + dataRDD.write.parquet(Loader.dataPath(path)) } def load(sc: SparkContext, path: String): PowerIterationClusteringModel = { @@ -86,7 +86,7 @@ object PowerIterationClusteringModel extends Loader[PowerIterationClusteringMode assert(formatVersion == thisFormatVersion) val k = (metadata \ "k").extract[Int] - val assignments = sqlContext.parquetFile(Loader.dataPath(path)) + val assignments = sqlContext.read.parquet(Loader.dataPath(path)) Loader.checkSchema[PowerIterationClustering.Assignment](assignments.schema) val assignmentsRDD = assignments.map { @@ -121,7 +121,7 @@ class PowerIterationClustering private[clustering] ( import org.apache.spark.mllib.clustering.PowerIterationClustering._ /** Constructs a PIC instance with default parameters: {k: 2, maxIterations: 100, - * initMode: "random"}. + * initMode: "random"}. */ def this() = this(k = 2, maxIterations = 100, initMode = "random") @@ -243,7 +243,7 @@ object PowerIterationClustering extends Logging { /** * Generates random vertex properties (v0) to start power iteration. - * + * * @param g a graph representing the normalized affinity matrix (W) * @return a graph with edges representing W and vertices representing a random vector * with unit 1-norm @@ -266,7 +266,7 @@ object PowerIterationClustering extends Logging { * Generates the degree vector as the vertex properties (v0) to start power iteration. * It is not exactly the node degrees but just the normalized sum similarities. Call it * as degree vector because it is used in the PIC paper. - * + * * @param g a graph representing the normalized affinity matrix (W) * @return a graph with edges representing W and vertices representing the degree vector */ @@ -276,7 +276,7 @@ object PowerIterationClustering extends Logging { val v0 = g.vertices.mapValues(_ / sum) GraphImpl.fromExistingRDDs(VertexRDD(v0), g.edges) } - + /** * Runs power iteration. * @param g input graph with edges representing the normalized affinity matrix (W) and vertices diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala index 812014a041719..c21e4fe7dc9b6 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala @@ -178,7 +178,7 @@ class StreamingKMeans( /** Set the decay factor directly (for forgetful algorithms). */ def setDecayFactor(a: Double): this.type = { - this.decayFactor = decayFactor + this.decayFactor = a this } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala index 666362ae6739a..4628dc5690913 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MulticlassMetrics.scala @@ -23,6 +23,7 @@ import org.apache.spark.SparkContext._ import org.apache.spark.annotation.Experimental import org.apache.spark.mllib.linalg.{Matrices, Matrix} import org.apache.spark.rdd.RDD +import org.apache.spark.sql.DataFrame /** * ::Experimental:: @@ -33,6 +34,13 @@ import org.apache.spark.rdd.RDD @Experimental class MulticlassMetrics(predictionAndLabels: RDD[(Double, Double)]) { + /** + * An auxiliary constructor taking a DataFrame. + * @param predictionAndLabels a DataFrame with two double columns: prediction and label + */ + private[mllib] def this(predictionAndLabels: DataFrame) = + this(predictionAndLabels.map(r => (r.getDouble(0), r.getDouble(1)))) + private lazy val labelCountByClass: Map[Double, Long] = predictionAndLabels.values.countByValue() private lazy val labelCount: Long = labelCountByClass.values.sum private lazy val tpByClass: Map[Double, Int] = predictionAndLabels diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala index a8378a76d20ae..bf6eb1d5bd2ab 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala @@ -19,6 +19,7 @@ package org.apache.spark.mllib.evaluation import org.apache.spark.rdd.RDD import org.apache.spark.SparkContext._ +import org.apache.spark.sql.DataFrame /** * Evaluator for multilabel classification. @@ -27,6 +28,13 @@ import org.apache.spark.SparkContext._ */ class MultilabelMetrics(predictionAndLabels: RDD[(Array[Double], Array[Double])]) { + /** + * An auxiliary constructor taking a DataFrame. + * @param predictionAndLabels a DataFrame with two double array columns: prediction and label + */ + private[mllib] def this(predictionAndLabels: DataFrame) = + this(predictionAndLabels.map(r => (r.getSeq[Double](0).toArray, r.getSeq[Double](1).toArray))) + private lazy val numDocs: Long = predictionAndLabels.count() private lazy val numLabels: Long = predictionAndLabels.flatMap { case (_, labels) => diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala index 93a7353e2c070..5b5a2a1450f7f 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala @@ -17,17 +17,22 @@ package org.apache.spark.mllib.evaluation +import java.{lang => jl} + +import scala.collection.JavaConverters._ import scala.reflect.ClassTag import org.apache.spark.Logging -import org.apache.spark.SparkContext._ import org.apache.spark.annotation.Experimental +import org.apache.spark.api.java.{JavaSparkContext, JavaRDD} import org.apache.spark.rdd.RDD /** * ::Experimental:: * Evaluator for ranking algorithms. * + * Java users should use [[RankingMetrics$.of]] to create a [[RankingMetrics]] instance. + * * @param predictionAndLabels an RDD of (predicted ranking, ground truth set) pairs. */ @Experimental @@ -71,7 +76,7 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])] logWarning("Empty ground truth set, check input data") 0.0 } - }.mean + }.mean() } /** @@ -100,7 +105,7 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])] logWarning("Empty ground truth set, check input data") 0.0 } - }.mean + }.mean() } /** @@ -146,7 +151,23 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])] logWarning("Empty ground truth set, check input data") 0.0 } - }.mean + }.mean() } } + +@Experimental +object RankingMetrics { + + /** + * Creates a [[RankingMetrics]] instance (for Java users). + * @param predictionAndLabels a JavaRDD of (predicted ranking, ground truth set) pairs + */ + def of[E, T <: jl.Iterable[E]](predictionAndLabels: JavaRDD[(T, T)]): RankingMetrics[E] = { + implicit val tag = JavaSparkContext.fakeClassTag[E] + val rdd = predictionAndLabels.rdd.map { case (predictions, labels) => + (predictions.asScala.toArray, labels.asScala.toArray) + } + new RankingMetrics(rdd) + } +} diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala index 693117d820580..e577bf87f885e 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala @@ -22,6 +22,7 @@ import org.apache.spark.rdd.RDD import org.apache.spark.Logging import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, MultivariateOnlineSummarizer} +import org.apache.spark.sql.DataFrame /** * :: Experimental :: @@ -32,6 +33,14 @@ import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, Multivariate @Experimental class RegressionMetrics(predictionAndObservations: RDD[(Double, Double)]) extends Logging { + /** + * An auxiliary constructor taking a DataFrame. + * @param predictionAndObservations a DataFrame with two double columns: + * prediction and observation + */ + private[mllib] def this(predictionAndObservations: DataFrame) = + this(predictionAndObservations.map(r => (r.getDouble(0), r.getDouble(1)))) + /** * Use MultivariateOnlineSummarizer to calculate summary statistics of observations and errors. */ diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala index c6057c7f837b1..5f8c1dea237b4 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala @@ -38,7 +38,8 @@ class ChiSqSelectorModel (val selectedFeatures: Array[Int]) extends VectorTransf protected def isSorted(array: Array[Int]): Boolean = { var i = 1 - while (i < array.length) { + val len = array.length + while (i < len) { if (array(i) < array(i-1)) return false i += 1 } @@ -107,7 +108,7 @@ class ChiSqSelectorModel (val selectedFeatures: Array[Int]) extends VectorTransf * (ordered by statistic value descending) */ @Experimental -class ChiSqSelector (val numTopFeatures: Int) { +class ChiSqSelector (val numTopFeatures: Int) extends Serializable { /** * Returns a ChiSquared feature selector. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala new file mode 100644 index 0000000000000..b0985baf9b278 --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ElementwiseProduct.scala @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.feature + +import org.apache.spark.annotation.Experimental +import org.apache.spark.mllib.linalg._ + +/** + * :: Experimental :: + * Outputs the Hadamard product (i.e., the element-wise product) of each input vector with a + * provided "weight" vector. In other words, it scales each column of the dataset by a scalar + * multiplier. + * @param scalingVector The values used to scale the reference vector's individual components. + */ +@Experimental +class ElementwiseProduct(val scalingVector: Vector) extends VectorTransformer { + + /** + * Does the hadamard product transformation. + * + * @param vector vector to be transformed. + * @return transformed vector. + */ + override def transform(vector: Vector): Vector = { + require(vector.size == scalingVector.size, + s"vector sizes do not match: Expected ${scalingVector.size} but found ${vector.size}") + vector match { + case dv: DenseVector => + val values: Array[Double] = dv.values.clone() + val dim = scalingVector.size + var i = 0 + while (i < dim) { + values(i) *= scalingVector(i) + i += 1 + } + Vectors.dense(values) + case SparseVector(size, indices, vs) => + val values = vs.clone() + val dim = values.length + var i = 0 + while (i < dim) { + values(i) *= scalingVector(indices(i)) + i += 1 + } + Vectors.sparse(size, indices, values) + case v => throw new IllegalArgumentException("Does not support vector type " + v.getClass) + } + } +} diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala index a89eea0e21be2..efbfeb4059f5a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/IDF.scala @@ -144,7 +144,7 @@ private object IDF { * Since arrays are initialized to 0 by default, * we just omit changing those entries. */ - if(df(j) >= minDocFreq) { + if (df(j) >= minDocFreq) { inv(j) = math.log((m + 1.0) / (df(j) + 1.0)) } j += 1 diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala new file mode 100644 index 0000000000000..4e01e402b4283 --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.feature + +import org.apache.spark.api.java.JavaRDD +import org.apache.spark.mllib.linalg._ +import org.apache.spark.mllib.linalg.distributed.RowMatrix +import org.apache.spark.rdd.RDD + +/** + * A feature transformer that projects vectors to a low-dimensional space using PCA. + * + * @param k number of principal components + */ +class PCA(val k: Int) { + require(k >= 1, s"PCA requires a number of principal components k >= 1 but was given $k") + + /** + * Computes a [[PCAModel]] that contains the principal components of the input vectors. + * + * @param sources source vectors + */ + def fit(sources: RDD[Vector]): PCAModel = { + require(k <= sources.first().size, + s"source vector size is ${sources.first().size} must be greater than k=$k") + + val mat = new RowMatrix(sources) + val pc = mat.computePrincipalComponents(k) match { + case dm: DenseMatrix => + dm + case sm: SparseMatrix => + /* Convert a sparse matrix to dense. + * + * RowMatrix.computePrincipalComponents always returns a dense matrix. + * The following code is a safeguard. + */ + sm.toDense + case m => + throw new IllegalArgumentException("Unsupported matrix format. Expected " + + s"SparseMatrix or DenseMatrix. Instead got: ${m.getClass}") + + } + new PCAModel(k, pc) + } + + /** Java-friendly version of [[fit()]] */ + def fit(sources: JavaRDD[Vector]): PCAModel = fit(sources.rdd) +} + +/** + * Model fitted by [[PCA]] that can project vectors to a low-dimensional space using PCA. + * + * @param k number of principal components. + * @param pc a principal components Matrix. Each column is one principal component. + */ +class PCAModel private[mllib] (val k: Int, val pc: DenseMatrix) extends VectorTransformer { + /** + * Transform a vector by computed Principal Components. + * + * @param vector vector to be transformed. + * Vector must be the same length as the source vectors given to [[PCA.fit()]]. + * @return transformed vector. Vector will be of length k. + */ + override def transform(vector: Vector): Vector = { + vector match { + case dv: DenseVector => + pc.transpose.multiply(dv) + case SparseVector(size, indices, values) => + /* SparseVector -> single row SparseMatrix */ + val sm = Matrices.sparse(size, 1, Array(0, indices.length), indices, values).transpose + val projection = sm.multiply(pc) + Vectors.dense(projection.values) + case _ => + throw new IllegalArgumentException("Unsupported vector format. Expected " + + s"SparseVector or DenseVector. Instead got: ${vector.getClass}") + } + } +} diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala index 6ae6917eae595..c73b8f258060d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala @@ -90,7 +90,7 @@ class StandardScalerModel ( @DeveloperApi def setWithMean(withMean: Boolean): this.type = { - require(!(withMean && this.mean == null),"cannot set withMean to true while mean is null") + require(!(withMean && this.mean == null), "cannot set withMean to true while mean is null") this.withMean = withMean this } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala index 98e83112f52ae..375deccf18e21 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala @@ -42,32 +42,32 @@ import org.apache.spark.util.random.XORShiftRandom import org.apache.spark.sql.{SQLContext, Row} /** - * Entry in vocabulary + * Entry in vocabulary */ private case class VocabWord( var word: String, var cn: Int, var point: Array[Int], var code: Array[Int], - var codeLen:Int + var codeLen: Int ) /** * :: Experimental :: * Word2Vec creates vector representation of words in a text corpus. * The algorithm first constructs a vocabulary from the corpus - * and then learns vector representation of words in the vocabulary. - * The vector representation can be used as features in + * and then learns vector representation of words in the vocabulary. + * The vector representation can be used as features in * natural language processing and machine learning algorithms. - * - * We used skip-gram model in our implementation and hierarchical softmax + * + * We used skip-gram model in our implementation and hierarchical softmax * method to train the model. The variable names in the implementation * matches the original C implementation. * - * For original C implementation, see https://code.google.com/p/word2vec/ - * For research papers, see + * For original C implementation, see https://code.google.com/p/word2vec/ + * For research papers, see * Efficient Estimation of Word Representations in Vector Space - * and + * and * Distributed Representations of Words and Phrases and their Compositionality. */ @Experimental @@ -79,7 +79,7 @@ class Word2Vec extends Serializable with Logging { private var numIterations = 1 private var seed = Utils.random.nextLong() private var minCount = 5 - + /** * Sets vector size (default: 100). */ @@ -122,15 +122,15 @@ class Word2Vec extends Serializable with Logging { this } - /** - * Sets minCount, the minimum number of times a token must appear to be included in the word2vec + /** + * Sets minCount, the minimum number of times a token must appear to be included in the word2vec * model's vocabulary (default: 5). */ def setMinCount(minCount: Int): this.type = { this.minCount = minCount this } - + private val EXP_TABLE_SIZE = 1000 private val MAX_EXP = 6 private val MAX_CODE_LENGTH = 40 @@ -150,13 +150,13 @@ class Word2Vec extends Serializable with Logging { .map(x => VocabWord( x._1, x._2, - new Array[Int](MAX_CODE_LENGTH), - new Array[Int](MAX_CODE_LENGTH), + new Array[Int](MAX_CODE_LENGTH), + new Array[Int](MAX_CODE_LENGTH), 0)) .filter(_.cn >= minCount) .collect() .sortWith((a, b) => a.cn > b.cn) - + vocabSize = vocab.length var a = 0 while (a < vocabSize) { @@ -195,8 +195,8 @@ class Word2Vec extends Serializable with Logging { } var pos1 = vocabSize - 1 var pos2 = vocabSize - - var min1i = 0 + + var min1i = 0 var min2i = 0 a = 0 @@ -265,15 +265,15 @@ class Word2Vec extends Serializable with Logging { val words = dataset.flatMap(x => x) learnVocab(words) - + createBinaryTree() - + val sc = dataset.context val expTable = sc.broadcast(createExpTable()) val bcVocab = sc.broadcast(vocab) val bcVocabHash = sc.broadcast(vocabHash) - + val sentences: RDD[Array[Int]] = words.mapPartitions { iter => new Iterator[Array[Int]] { def hasNext: Boolean = iter.hasNext @@ -294,7 +294,7 @@ class Word2Vec extends Serializable with Logging { } } } - + val newSentences = sentences.repartition(numPartitions).cache() val initRandom = new XORShiftRandom(seed) @@ -399,7 +399,7 @@ class Word2Vec extends Serializable with Logging { } } newSentences.unpersist() - + val word2VecMap = mutable.HashMap.empty[String, Array[Float]] var i = 0 while (i < vocabSize) { @@ -466,7 +466,7 @@ class Word2VecModel private[mllib] ( val norm1 = blas.snrm2(n, v1, 1) val norm2 = blas.snrm2(n, v2, 1) if (norm1 == 0 || norm2 == 0) return 0.0 - blas.sdot(n, v1, 1, v2,1) / norm1 / norm2 + blas.sdot(n, v1, 1, v2, 1) / norm1 / norm2 } override protected def formatVersion = "1.0" @@ -477,7 +477,7 @@ class Word2VecModel private[mllib] ( /** * Transforms a word to its vector representation - * @param word a word + * @param word a word * @return vector representation of word */ def transform(word: String): Vector = { @@ -492,18 +492,18 @@ class Word2VecModel private[mllib] ( /** * Find synonyms of a word * @param word a word - * @param num number of synonyms to find + * @param num number of synonyms to find * @return array of (word, cosineSimilarity) */ def findSynonyms(word: String, num: Int): Array[(String, Double)] = { val vector = transform(word) - findSynonyms(vector,num) + findSynonyms(vector, num) } /** * Find synonyms of the vector representation of a word * @param vector vector representation of a word - * @param num number of synonyms to find + * @param num number of synonyms to find * @return array of (word, cosineSimilarity) */ def findSynonyms(vector: Vector, num: Int): Array[(String, Double)] = { @@ -556,7 +556,7 @@ object Word2VecModel extends Loader[Word2VecModel] { def load(sc: SparkContext, path: String): Word2VecModel = { val dataPath = Loader.dataPath(path) val sqlContext = new SQLContext(sc) - val dataFrame = sqlContext.parquetFile(dataPath) + val dataFrame = sqlContext.read.parquet(dataPath) val dataArray = dataFrame.select("word", "vector").collect() @@ -580,7 +580,7 @@ object Word2VecModel extends Loader[Word2VecModel] { sc.parallelize(Seq(metadata), 1).saveAsTextFile(Loader.metadataPath(path)) val dataArray = model.toSeq.map { case (w, v) => Data(w, v) } - sc.parallelize(dataArray.toSeq, 1).toDF().saveAsParquetFile(Loader.dataPath(path)) + sc.parallelize(dataArray.toSeq, 1).toDF().write.parquet(Loader.dataPath(path)) } } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala index 87052e1ba8539..557119f7b1cd1 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala @@ -228,7 +228,7 @@ private[spark] object BLAS extends Serializable with Logging { } _nativeBLAS } - + /** * A := alpha * x * x^T^ + A * @param alpha a real scalar that will be multiplied to x * x^T^. @@ -264,7 +264,7 @@ private[spark] object BLAS extends Serializable with Logging { j += 1 } i += 1 - } + } } private def syr(alpha: Double, x: SparseVector, A: DenseMatrix) { @@ -463,7 +463,7 @@ private[spark] object BLAS extends Serializable with Logging { def gemv( alpha: Double, A: Matrix, - x: DenseVector, + x: Vector, beta: Double, y: DenseVector): Unit = { require(A.numCols == x.size, @@ -473,27 +473,32 @@ private[spark] object BLAS extends Serializable with Logging { if (alpha == 0.0) { logDebug("gemv: alpha is equal to 0. Returning y.") } else { - A match { - case sparse: SparseMatrix => - gemv(alpha, sparse, x, beta, y) - case dense: DenseMatrix => - gemv(alpha, dense, x, beta, y) + (A, x) match { + case (smA: SparseMatrix, dvx: DenseVector) => + gemv(alpha, smA, dvx, beta, y) + case (smA: SparseMatrix, svx: SparseVector) => + gemv(alpha, smA, svx, beta, y) + case (dmA: DenseMatrix, dvx: DenseVector) => + gemv(alpha, dmA, dvx, beta, y) + case (dmA: DenseMatrix, svx: SparseVector) => + gemv(alpha, dmA, svx, beta, y) case _ => - throw new IllegalArgumentException(s"gemv doesn't support matrix type ${A.getClass}.") + throw new IllegalArgumentException(s"gemv doesn't support running on matrix type " + + s"${A.getClass} and vector type ${x.getClass}.") } } } /** * y := alpha * A * x + beta * y - * For `DenseMatrix` A. + * For `DenseMatrix` A and `DenseVector` x. */ private def gemv( alpha: Double, A: DenseMatrix, x: DenseVector, beta: Double, - y: DenseVector): Unit = { + y: DenseVector): Unit = { val tStrA = if (A.isTransposed) "T" else "N" val mA = if (!A.isTransposed) A.numRows else A.numCols val nA = if (!A.isTransposed) A.numCols else A.numRows @@ -503,14 +508,134 @@ private[spark] object BLAS extends Serializable with Logging { /** * y := alpha * A * x + beta * y - * For `SparseMatrix` A. + * For `DenseMatrix` A and `SparseVector` x. + */ + private def gemv( + alpha: Double, + A: DenseMatrix, + x: SparseVector, + beta: Double, + y: DenseVector): Unit = { + val mA: Int = A.numRows + val nA: Int = A.numCols + + val Avals = A.values + + val xIndices = x.indices + val xNnz = xIndices.length + val xValues = x.values + val yValues = y.values + + if (alpha == 0.0) { + scal(beta, y) + return + } + + if (A.isTransposed) { + var rowCounterForA = 0 + while (rowCounterForA < mA) { + var sum = 0.0 + var k = 0 + while (k < xNnz) { + sum += xValues(k) * Avals(xIndices(k) + rowCounterForA * nA) + k += 1 + } + yValues(rowCounterForA) = sum * alpha + beta * yValues(rowCounterForA) + rowCounterForA += 1 + } + } else { + var rowCounterForA = 0 + while (rowCounterForA < mA) { + var sum = 0.0 + var k = 0 + while (k < xNnz) { + sum += xValues(k) * Avals(xIndices(k) * mA + rowCounterForA) + k += 1 + } + yValues(rowCounterForA) = sum * alpha + beta * yValues(rowCounterForA) + rowCounterForA += 1 + } + } + } + + /** + * y := alpha * A * x + beta * y + * For `SparseMatrix` A and `SparseVector` x. + */ + private def gemv( + alpha: Double, + A: SparseMatrix, + x: SparseVector, + beta: Double, + y: DenseVector): Unit = { + val xValues = x.values + val xIndices = x.indices + val xNnz = xIndices.length + + val yValues = y.values + + val mA: Int = A.numRows + val nA: Int = A.numCols + + val Avals = A.values + val Arows = if (!A.isTransposed) A.rowIndices else A.colPtrs + val Acols = if (!A.isTransposed) A.colPtrs else A.rowIndices + + if (alpha == 0.0) { + scal(beta, y) + return + } + + if (A.isTransposed) { + var rowCounter = 0 + while (rowCounter < mA) { + var i = Arows(rowCounter) + val indEnd = Arows(rowCounter + 1) + var sum = 0.0 + var k = 0 + while (k < xNnz && i < indEnd) { + if (xIndices(k) == Acols(i)) { + sum += Avals(i) * xValues(k) + i += 1 + } + k += 1 + } + yValues(rowCounter) = sum * alpha + beta * yValues(rowCounter) + rowCounter += 1 + } + } else { + scal(beta, y) + + var colCounterForA = 0 + var k = 0 + while (colCounterForA < nA && k < xNnz) { + if (xIndices(k) == colCounterForA) { + var i = Acols(colCounterForA) + val indEnd = Acols(colCounterForA + 1) + + val xTemp = xValues(k) * alpha + while (i < indEnd) { + val rowIndex = Arows(i) + yValues(Arows(i)) += Avals(i) * xTemp + i += 1 + } + k += 1 + } + colCounterForA += 1 + } + } + } + + /** + * y := alpha * A * x + beta * y + * For `SparseMatrix` A and `DenseVector` x. */ private def gemv( alpha: Double, A: SparseMatrix, x: DenseVector, beta: Double, - y: DenseVector): Unit = { + y: DenseVector): Unit = { val xValues = x.values val yValues = y.values val mA: Int = A.numRows @@ -534,10 +659,7 @@ private[spark] object BLAS extends Serializable with Logging { rowCounter += 1 } } else { - // Scale vector first if `beta` is not equal to 0.0 - if (beta != 0.0) { - scal(beta, y) - } + scal(beta, y) // Perform matrix-vector multiplication and add to y var colCounterForA = 0 while (colCounterForA < nA) { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala index 866936aa4f118..ae3ba3099c878 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala @@ -81,7 +81,7 @@ private[mllib] object EigenValueDecomposition { require(n * ncv.toLong <= Integer.MAX_VALUE && ncv * (ncv.toLong + 8) <= Integer.MAX_VALUE, s"k = $k and/or n = $n are too large to compute an eigendecomposition") - + var ido = new intW(0) var info = new intW(0) var resid = new Array[Double](n) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index 3fa5e068d16d4..9584da8e3a0f9 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -77,8 +77,13 @@ sealed trait Matrix extends Serializable { C } - /** Convenience method for `Matrix`-`DenseVector` multiplication. */ + /** Convenience method for `Matrix`-`DenseVector` multiplication. For binary compatibility. */ def multiply(y: DenseVector): DenseVector = { + multiply(y.asInstanceOf[Vector]) + } + + /** Convenience method for `Matrix`-`Vector` multiplication. */ + def multiply(y: Vector): DenseVector = { val output = new DenseVector(new Array[Double](numRows)) BLAS.gemv(1.0, this, y, 0.0, output) output @@ -273,7 +278,8 @@ class DenseMatrix( override def copy: DenseMatrix = new DenseMatrix(numRows, numCols, values.clone()) - private[mllib] def map(f: Double => Double) = new DenseMatrix(numRows, numCols, values.map(f)) + private[mllib] def map(f: Double => Double) = new DenseMatrix(numRows, numCols, values.map(f), + isTransposed) private[mllib] def update(f: Double => Double): DenseMatrix = { val len = values.length @@ -535,7 +541,7 @@ class SparseMatrix( } private[mllib] def map(f: Double => Double) = - new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values.map(f)) + new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values.map(f), isTransposed) private[mllib] def update(f: Double => Double): SparseMatrix = { val len = values.length diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala index 188d1e542b5b5..2ffa497a99d93 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala @@ -176,27 +176,31 @@ private[spark] class VectorUDT extends UserDefinedType[Vector] { } override def serialize(obj: Any): Row = { - val row = new GenericMutableRow(4) obj match { case SparseVector(size, indices, values) => + val row = new GenericMutableRow(4) row.setByte(0, 0) row.setInt(1, size) row.update(2, indices.toSeq) row.update(3, values.toSeq) + row case DenseVector(values) => + val row = new GenericMutableRow(4) row.setByte(0, 1) row.setNullAt(1) row.setNullAt(2) row.update(3, values.toSeq) + row + // TODO: There are bugs in UDT serialization because we don't have a clear separation between + // TODO: internal SQL types and language specific types (including UDT). UDT serialize and + // TODO: deserialize may get called twice. See SPARK-7186. + case row: Row => + row } - row } override def deserialize(datum: Any): Vector = { datum match { - // TODO: something wrong with UDT serialization - case v: Vector => - v case row: Row => require(row.length == 4, s"VectorUDT.deserialize given row with length ${row.length} but requires length == 4") @@ -211,6 +215,11 @@ private[spark] class VectorUDT extends UserDefinedType[Vector] { val values = row.getAs[Iterable[Double]](3).toArray new DenseVector(values) } + // TODO: There are bugs in UDT serialization because we don't have a clear separation between + // TODO: internal SQL types and language specific types (including UDT). UDT serialize and + // TODO: deserialize may get called twice. See SPARK-7186. + case v: Vector => + v } } @@ -587,6 +596,28 @@ class DenseVector(val values: Array[Double]) extends Vector { } new SparseVector(size, ii, vv) } + + /** + * Find the index of a maximal element. Returns the first maximal element in case of a tie. + * Returns -1 if vector has length 0. + */ + private[spark] def argmax: Int = { + if (size == 0) { + -1 + } else { + var maxIdx = 0 + var maxValue = values(0) + var i = 1 + while (i < size) { + if (values(i) > maxValue) { + maxIdx = i + maxValue = values(i) + } + i += 1 + } + maxIdx + } + } } object DenseVector { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala index 9a89a6f3a515f..1626da9c3d2ee 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala @@ -219,7 +219,7 @@ class RowMatrix( val computeMode = mode match { case "auto" => - if(k > 5000) { + if (k > 5000) { logWarning(s"computing svd with k=$k and n=$n, please check necessity") } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala index 4766f7708295d..64d52bae00907 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/NNLS.scala @@ -91,7 +91,7 @@ private[spark] object NNLS { val dir = ws.dir val lastDir = ws.lastDir val res = ws.res - val iterMax = Math.max(400, 20 * n) + val iterMax = math.max(400, 20 * n) var lastNorm = 0.0 var iterno = 0 var lastWall = 0 // Last iteration when we hit a bound constraint. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala index 3ed3a5b9b3843..9f463e0cafb6f 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala @@ -116,7 +116,8 @@ class L1Updater extends Updater { // Apply proximal operator (soft thresholding) val shrinkageVal = regParam * thisIterStepSize var i = 0 - while (i < brzWeights.length) { + val len = brzWeights.length + while (i < len) { val wi = brzWeights(i) brzWeights(i) = signum(wi) * max(0.0, abs(wi) - shrinkageVal) i += 1 diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala b/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala index 354e90f3eeaa6..5e882d4ebb10b 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/PMMLExportable.scala @@ -23,13 +23,16 @@ import javax.xml.transform.stream.StreamResult import org.jpmml.model.JAXBUtil import org.apache.spark.SparkContext +import org.apache.spark.annotation.{DeveloperApi, Experimental} import org.apache.spark.mllib.pmml.export.PMMLModelExportFactory /** + * :: DeveloperApi :: * Export model to the PMML format * Predictive Model Markup Language (PMML) is an XML-based file format * developed by the Data Mining Group (www.dmg.org). */ +@DeveloperApi trait PMMLExportable { /** @@ -41,30 +44,38 @@ trait PMMLExportable { } /** + * :: Experimental :: * Export the model to a local file in PMML format */ + @Experimental def toPMML(localPath: String): Unit = { toPMML(new StreamResult(new File(localPath))) } /** + * :: Experimental :: * Export the model to a directory on a distributed file system in PMML format */ + @Experimental def toPMML(sc: SparkContext, path: String): Unit = { val pmml = toPMML() sc.parallelize(Array(pmml), 1).saveAsTextFile(path) } /** + * :: Experimental :: * Export the model to the OutputStream in PMML format */ + @Experimental def toPMML(outputStream: OutputStream): Unit = { toPMML(new StreamResult(outputStream)) } /** + * :: Experimental :: * Export the model to a String in PMML format */ + @Experimental def toPMML(): String = { val writer = new StringWriter toPMML(new StreamResult(writer)) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExport.scala b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExport.scala index 34b447584e521..622b53a252ac5 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExport.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExport.scala @@ -27,10 +27,10 @@ import org.apache.spark.mllib.regression.GeneralizedLinearModel * PMML Model Export for GeneralizedLinearModel class with binary ClassificationModel */ private[mllib] class BinaryClassificationPMMLModelExport( - model : GeneralizedLinearModel, + model : GeneralizedLinearModel, description : String, normalizationMethod : RegressionNormalizationMethodType, - threshold: Double) + threshold: Double) extends PMMLModelExport { populateBinaryClassificationPMML() @@ -72,7 +72,7 @@ private[mllib] class BinaryClassificationPMMLModelExport( .withUsageType(FieldUsageType.ACTIVE)) regressionTableYES.withNumericPredictors(new NumericPredictor(fields(i), model.weights(i))) } - + // add target field val targetField = FieldName.create("target") dataDictionary @@ -80,9 +80,9 @@ private[mllib] class BinaryClassificationPMMLModelExport( miningSchema .withMiningFields(new MiningField(targetField) .withUsageType(FieldUsageType.TARGET)) - + dataDictionary.withNumberOfFields(dataDictionary.getDataFields.size) - + pmml.setDataDictionary(dataDictionary) pmml.withModels(regressionModel) } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExport.scala b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExport.scala index ebdeae50bb32f..c5fdecd3ca17f 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExport.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExport.scala @@ -25,7 +25,7 @@ import scala.beans.BeanProperty import org.dmg.pmml.{Application, Header, PMML, Timestamp} private[mllib] trait PMMLModelExport { - + /** * Holder of the exported model in PMML format */ @@ -33,7 +33,7 @@ private[mllib] trait PMMLModelExport { val pmml: PMML = new PMML setHeader(pmml) - + private def setHeader(pmml: PMML): Unit = { val version = getClass.getPackage.getImplementationVersion val app = new Application().withName("Apache Spark MLlib").withVersion(version) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExportFactory.scala b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExportFactory.scala index c16e83d6a067d..29bd689e1185a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExportFactory.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/PMMLModelExportFactory.scala @@ -27,9 +27,9 @@ import org.apache.spark.mllib.regression.LinearRegressionModel import org.apache.spark.mllib.regression.RidgeRegressionModel private[mllib] object PMMLModelExportFactory { - + /** - * Factory object to help creating the necessary PMMLModelExport implementation + * Factory object to help creating the necessary PMMLModelExport implementation * taking as input the machine learning model (for example KMeansModel). */ def createPMMLModelExport(model: Any): PMMLModelExport = { @@ -44,7 +44,7 @@ private[mllib] object PMMLModelExportFactory { new GeneralizedLinearPMMLModelExport(lasso, "lasso regression") case svm: SVMModel => new BinaryClassificationPMMLModelExport( - svm, "linear SVM", RegressionNormalizationMethodType.NONE, + svm, "linear SVM", RegressionNormalizationMethodType.NONE, svm.getThreshold.getOrElse(0.0)) case logistic: LogisticRegressionModel => if (logistic.numClasses == 2) { @@ -60,5 +60,5 @@ private[mllib] object PMMLModelExportFactory { "PMML Export not supported for model: " + model.getClass.getName) } } - + } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala index 8341bb86afd71..174d5e0f6c9f0 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomRDDs.scala @@ -52,7 +52,7 @@ object RandomRDDs { numPartitions: Int = 0, seed: Long = Utils.random.nextLong()): RDD[Double] = { val uniform = new UniformGenerator() - randomRDD(sc, uniform, size, numPartitionsOrDefault(sc, numPartitions), seed) + randomRDD(sc, uniform, size, numPartitionsOrDefault(sc, numPartitions), seed) } /** @@ -234,7 +234,7 @@ object RandomRDDs { * * @param sc SparkContext used to create the RDD. * @param shape shape parameter (> 0) for the gamma distribution - * @param scale scale parameter (> 0) for the gamma distribution + * @param scale scale parameter (> 0) for the gamma distribution * @param size Size of the RDD. * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`). * @param seed Random seed (default: a random long integer). @@ -293,7 +293,7 @@ object RandomRDDs { * * @param sc SparkContext used to create the RDD. * @param mean mean for the log normal distribution - * @param std standard deviation for the log normal distribution + * @param std standard deviation for the log normal distribution * @param size Size of the RDD. * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`). * @param seed Random seed (default: a random long integer). @@ -671,7 +671,7 @@ object RandomRDDs { * * @param sc SparkContext used to create the RDD. * @param shape shape parameter (> 0) for the gamma distribution. - * @param scale scale parameter (> 0) for the gamma distribution. + * @param scale scale parameter (> 0) for the gamma distribution. * @param numRows Number of Vectors in the RDD. * @param numCols Number of elements in each Vector. * @param numPartitions Number of partitions in the RDD (default: `sc.defaultParallelism`) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala index 5af55aaf84802..1b93e2d764c69 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala @@ -46,7 +46,7 @@ class MLPairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)]) extends Se combOp = (queue1, queue2) => { queue1 ++= queue2 } - ).mapValues(_.toArray.reverse) // This is an min-heap, so we reverse the order. + ).mapValues(_.toArray.sorted(ord.reverse)) // This is an min-heap, so we reverse the order. } } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala index dddefe1944e9d..93290e6508529 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala @@ -175,7 +175,7 @@ class ALS private ( /** * :: DeveloperApi :: * Sets storage level for final RDDs (user/product used in MatrixFactorizationModel). The default - * value is `MEMORY_AND_DISK`. Users can change it to a serialized storage, e.g. + * value is `MEMORY_AND_DISK`. Users can change it to a serialized storage, e.g. * `MEMORY_AND_DISK_SER` and set `spark.rdd.compress` to `true` to reduce the space requirement, * at the cost of speed. */ diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala index 88c2148403313..93aa41e49961e 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala @@ -281,8 +281,8 @@ object MatrixFactorizationModel extends Loader[MatrixFactorizationModel] { val metadata = compact(render( ("class" -> thisClassName) ~ ("version" -> thisFormatVersion) ~ ("rank" -> model.rank))) sc.parallelize(Seq(metadata), 1).saveAsTextFile(metadataPath(path)) - model.userFeatures.toDF("id", "features").saveAsParquetFile(userPath(path)) - model.productFeatures.toDF("id", "features").saveAsParquetFile(productPath(path)) + model.userFeatures.toDF("id", "features").write.parquet(userPath(path)) + model.productFeatures.toDF("id", "features").write.parquet(productPath(path)) } def load(sc: SparkContext, path: String): MatrixFactorizationModel = { @@ -292,11 +292,11 @@ object MatrixFactorizationModel extends Loader[MatrixFactorizationModel] { assert(className == thisClassName) assert(formatVersion == thisFormatVersion) val rank = (metadata \ "rank").extract[Int] - val userFeatures = sqlContext.parquetFile(userPath(path)) + val userFeatures = sqlContext.read.parquet(userPath(path)) .map { case Row(id: Int, features: Seq[_]) => (id, features.asInstanceOf[Seq[Double]].toArray) } - val productFeatures = sqlContext.parquetFile(productPath(path)) + val productFeatures = sqlContext.read.parquet(productPath(path)) .map { case Row(id: Int, features: Seq[_]) => (id, features.asInstanceOf[Seq[Double]].toArray) } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala index 1d7617046b6c7..f3b46c75c05f3 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala @@ -21,18 +21,20 @@ import java.io.Serializable import java.lang.{Double => JDouble} import java.util.Arrays.binarySearch +import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ +import org.apache.spark.SparkContext import org.apache.spark.annotation.Experimental import org.apache.spark.api.java.{JavaDoubleRDD, JavaRDD} +import org.apache.spark.mllib.linalg.{Vector, Vectors} import org.apache.spark.mllib.util.{Loader, Saveable} import org.apache.spark.rdd.RDD -import org.apache.spark.SparkContext -import org.apache.spark.sql.{DataFrame, SQLContext} +import org.apache.spark.sql.SQLContext /** * :: Experimental :: @@ -57,10 +59,18 @@ class IsotonicRegressionModel ( assertOrdered(boundaries) assertOrdered(predictions)(predictionOrd) + /** A Java-friendly constructor that takes two Iterable parameters and one Boolean parameter. */ + def this(boundaries: java.lang.Iterable[Double], + predictions: java.lang.Iterable[Double], + isotonic: java.lang.Boolean) = { + this(boundaries.asScala.toArray, predictions.asScala.toArray, isotonic) + } + /** Asserts the input array is monotone with the given ordering. */ private def assertOrdered(xs: Array[Double])(implicit ord: Ordering[Double]): Unit = { var i = 1 - while (i < xs.length) { + val len = xs.length + while (i < len) { require(ord.compare(xs(i - 1), xs(i)) <= 0, s"Elements (${xs(i - 1)}, ${xs(i)}) are not ordered.") i += 1 @@ -132,6 +142,12 @@ class IsotonicRegressionModel ( } } + /** A convenient method for boundaries called by the Python API. */ + private[mllib] def boundaryVector: Vector = Vectors.dense(boundaries) + + /** A convenient method for boundaries called by the Python API. */ + private[mllib] def predictionVector: Vector = Vectors.dense(predictions) + override def save(sc: SparkContext, path: String): Unit = { IsotonicRegressionModel.SaveLoadV1_0.save(sc, path, boundaries, predictions, isotonic) } @@ -154,26 +170,26 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] { case class Data(boundary: Double, prediction: Double) def save( - sc: SparkContext, - path: String, - boundaries: Array[Double], - predictions: Array[Double], + sc: SparkContext, + path: String, + boundaries: Array[Double], + predictions: Array[Double], isotonic: Boolean): Unit = { val sqlContext = new SQLContext(sc) val metadata = compact(render( - ("class" -> thisClassName) ~ ("version" -> thisFormatVersion) ~ + ("class" -> thisClassName) ~ ("version" -> thisFormatVersion) ~ ("isotonic" -> isotonic))) sc.parallelize(Seq(metadata), 1).saveAsTextFile(metadataPath(path)) sqlContext.createDataFrame( boundaries.toSeq.zip(predictions).map { case (b, p) => Data(b, p) } - ).saveAsParquetFile(dataPath(path)) + ).write.parquet(dataPath(path)) } def load(sc: SparkContext, path: String): (Array[Double], Array[Double]) = { val sqlContext = new SQLContext(sc) - val dataRDD = sqlContext.parquetFile(dataPath(path)) + val dataRDD = sqlContext.read.parquet(dataPath(path)) checkSchema[Data](dataRDD.schema) val dataArray = dataRDD.select("boundary", "prediction").collect() @@ -187,7 +203,7 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] { override def load(sc: SparkContext, path: String): IsotonicRegressionModel = { implicit val formats = DefaultFormats val (loadedClassName, version, metadata) = loadMetadata(sc, path) - val isotonic = (metadata \ "isotonic").extract[Boolean] + val isotonic = (metadata \ "isotonic").extract[Boolean] val classNameV1_0 = SaveLoadV1_0.thisClassName (loadedClassName, version) match { case (className, "1.0") if className == classNameV1_0 => @@ -314,11 +330,12 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali } var i = 0 - while (i < input.length) { + val len = input.length + while (i < len) { var j = i // Find monotonicity violating sequence, if any. - while (j < input.length - 1 && input(j)._1 > input(j + 1)._1) { + while (j < len - 1 && input(j)._1 > input(j + 1)._1) { j = j + 1 } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala index b55944f74f623..317d3a5702636 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/impl/GLMRegressionModel.scala @@ -60,7 +60,7 @@ private[regression] object GLMRegressionModel { val data = Data(weights, intercept) val dataRDD: DataFrame = sc.parallelize(Seq(data), 1).toDF() // TODO: repartition with 1 partition after SPARK-5532 gets fixed - dataRDD.saveAsParquetFile(Loader.dataPath(path)) + dataRDD.write.parquet(Loader.dataPath(path)) } /** @@ -72,7 +72,7 @@ private[regression] object GLMRegressionModel { def loadData(sc: SparkContext, path: String, modelClass: String, numFeatures: Int): Data = { val datapath = Loader.dataPath(path) val sqlContext = new SQLContext(sc) - val dataRDD = sqlContext.parquetFile(datapath) + val dataRDD = sqlContext.read.parquet(datapath) val dataArray = dataRDD.select("weights", "intercept").take(1) assert(dataArray.size == 1, s"Unable to load $modelClass data from: $datapath") val data = dataArray(0) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala index 0deef11b4511a..58a50f9c19f14 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/KernelDensity.scala @@ -17,55 +17,104 @@ package org.apache.spark.mllib.stat +import com.github.fommil.netlib.BLAS.{getInstance => blas} + +import org.apache.spark.annotation.Experimental +import org.apache.spark.api.java.JavaRDD import org.apache.spark.rdd.RDD -private[stat] object KernelDensity { +/** + * :: Experimental :: + * Kernel density estimation. Given a sample from a population, estimate its probability density + * function at each of the given evaluation points using kernels. Only Gaussian kernel is supported. + * + * Scala example: + * + * {{{ + * val sample = sc.parallelize(Seq(0.0, 1.0, 4.0, 4.0)) + * val kd = new KernelDensity() + * .setSample(sample) + * .setBandwidth(3.0) + * val densities = kd.estimate(Array(-1.0, 2.0, 5.0)) + * }}} + */ +@Experimental +class KernelDensity extends Serializable { + + import KernelDensity._ + + /** Bandwidth of the kernel function. */ + private var bandwidth: Double = 1.0 + + /** A sample from a population. */ + private var sample: RDD[Double] = _ + /** - * Given a set of samples from a distribution, estimates its density at the set of given points. - * Uses a Gaussian kernel with the given standard deviation. + * Sets the bandwidth (standard deviation) of the Gaussian kernel (default: `1.0`). */ - def estimate(samples: RDD[Double], standardDeviation: Double, - evaluationPoints: Array[Double]): Array[Double] = { - if (standardDeviation <= 0.0) { - throw new IllegalArgumentException("Standard deviation must be positive") - } + def setBandwidth(bandwidth: Double): this.type = { + require(bandwidth > 0, s"Bandwidth must be positive, but got $bandwidth.") + this.bandwidth = bandwidth + this + } - // This gets used in each Gaussian PDF computation, so compute it up front - val logStandardDeviationPlusHalfLog2Pi = - Math.log(standardDeviation) + 0.5 * Math.log(2 * Math.PI) + /** + * Sets the sample to use for density estimation. + */ + def setSample(sample: RDD[Double]): this.type = { + this.sample = sample + this + } + + /** + * Sets the sample to use for density estimation (for Java users). + */ + def setSample(sample: JavaRDD[java.lang.Double]): this.type = { + this.sample = sample.rdd.asInstanceOf[RDD[Double]] + this + } + + /** + * Estimates probability density function at the given array of points. + */ + def estimate(points: Array[Double]): Array[Double] = { + val sample = this.sample + val bandwidth = this.bandwidth + + require(sample != null, "Must set sample before calling estimate.") - val (points, count) = samples.aggregate((new Array[Double](evaluationPoints.length), 0))( + val n = points.length + // This gets used in each Gaussian PDF computation, so compute it up front + val logStandardDeviationPlusHalfLog2Pi = math.log(bandwidth) + 0.5 * math.log(2 * math.Pi) + val (densities, count) = sample.aggregate((new Array[Double](n), 0L))( (x, y) => { var i = 0 - while (i < evaluationPoints.length) { - x._1(i) += normPdf(y, standardDeviation, logStandardDeviationPlusHalfLog2Pi, - evaluationPoints(i)) + while (i < n) { + x._1(i) += normPdf(y, bandwidth, logStandardDeviationPlusHalfLog2Pi, points(i)) i += 1 } - (x._1, i) + (x._1, x._2 + 1) }, (x, y) => { - var i = 0 - while (i < evaluationPoints.length) { - x._1(i) += y._1(i) - i += 1 - } + blas.daxpy(n, 1.0, y._1, 1, x._1, 1) (x._1, x._2 + y._2) }) - - var i = 0 - while (i < points.length) { - points(i) /= count - i += 1 - } - points + blas.dscal(n, 1.0 / count, densities, 1) + densities } +} + +private object KernelDensity { - private def normPdf(mean: Double, standardDeviation: Double, - logStandardDeviationPlusHalfLog2Pi: Double, x: Double): Double = { + /** Evaluates the PDF of a normal distribution. */ + def normPdf( + mean: Double, + standardDeviation: Double, + logStandardDeviationPlusHalfLog2Pi: Double, + x: Double): Double = { val x0 = x - mean val x1 = x0 / standardDeviation val logDensity = -0.5 * x1 * x1 - logStandardDeviationPlusHalfLog2Pi - Math.exp(logDensity) + math.exp(logDensity) } } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala index fcc2a148791bd..d321cc554c1cc 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala @@ -70,23 +70,30 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S require(n == sample.size, s"Dimensions mismatch when adding new sample." + s" Expecting $n but got ${sample.size}.") + val localCurrMean = currMean + val localCurrM2n = currM2n + val localCurrM2 = currM2 + val localCurrL1 = currL1 + val localNnz = nnz + val localCurrMax = currMax + val localCurrMin = currMin sample.foreachActive { (index, value) => if (value != 0.0) { - if (currMax(index) < value) { - currMax(index) = value + if (localCurrMax(index) < value) { + localCurrMax(index) = value } - if (currMin(index) > value) { - currMin(index) = value + if (localCurrMin(index) > value) { + localCurrMin(index) = value } - val prevMean = currMean(index) + val prevMean = localCurrMean(index) val diff = value - prevMean - currMean(index) = prevMean + diff / (nnz(index) + 1.0) - currM2n(index) += (value - currMean(index)) * diff - currM2(index) += value * value - currL1(index) += math.abs(value) + localCurrMean(index) = prevMean + diff / (localNnz(index) + 1.0) + localCurrM2n(index) += (value - localCurrMean(index)) * diff + localCurrM2(index) += value * value + localCurrL1(index) += math.abs(value) - nnz(index) += 1.0 + localNnz(index) += 1.0 } } @@ -130,14 +137,14 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S } } else if (totalCnt == 0 && other.totalCnt != 0) { this.n = other.n - this.currMean = other.currMean.clone - this.currM2n = other.currM2n.clone - this.currM2 = other.currM2.clone - this.currL1 = other.currL1.clone + this.currMean = other.currMean.clone() + this.currM2n = other.currM2n.clone() + this.currM2 = other.currM2.clone() + this.currL1 = other.currL1.clone() this.totalCnt = other.totalCnt - this.nnz = other.nnz.clone - this.currMax = other.currMax.clone - this.currMin = other.currMin.clone + this.nnz = other.nnz.clone() + this.currMax = other.currMax.clone() + this.currMin = other.currMin.clone() } this } @@ -165,7 +172,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S if (denominator > 0.0) { val deltaMean = currMean var i = 0 - while (i < currM2n.size) { + val len = currM2n.length + while (i < len) { realVariance(i) = currM2n(i) + deltaMean(i) * deltaMean(i) * nnz(i) * (totalCnt - nnz(i)) / totalCnt realVariance(i) /= denominator @@ -211,7 +219,8 @@ class MultivariateOnlineSummarizer extends MultivariateStatisticalSummary with S val realMagnitude = Array.ofDim[Double](n) var i = 0 - while (i < currM2.size) { + val len = currM2.length + while (i < len) { realMagnitude(i) = math.sqrt(currM2(i)) i += 1 } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala index 32561620ac914..b3fad0c52d655 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala @@ -149,18 +149,4 @@ object Statistics { def chiSqTest(data: RDD[LabeledPoint]): Array[ChiSqTestResult] = { ChiSqTest.chiSquaredFeatures(data) } - - /** - * Given an empirical distribution defined by the input RDD of samples, estimate its density at - * each of the given evaluation points using a Gaussian kernel. - * - * @param samples The samples RDD used to define the empirical distribution. - * @param standardDeviation The standard deviation of the kernel Gaussians. - * @param evaluationPoints The points at which to estimate densities. - * @return An array the same size as evaluationPoints with the density at each point. - */ - def kernelDensity(samples: RDD[Double], standardDeviation: Double, - evaluationPoints: Iterable[Double]): Array[Double] = { - KernelDensity.estimate(samples, standardDeviation, evaluationPoints.toArray) - } } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala index cd6add9d60b0d..cf51b24ff777f 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala @@ -29,102 +29,102 @@ import org.apache.spark.mllib.util.MLUtils * the event that the covariance matrix is singular, the density will be computed in a * reduced dimensional subspace under which the distribution is supported. * (see [[http://en.wikipedia.org/wiki/Multivariate_normal_distribution#Degenerate_case]]) - * + * * @param mu The mean vector of the distribution * @param sigma The covariance matrix of the distribution */ @DeveloperApi class MultivariateGaussian ( - val mu: Vector, + val mu: Vector, val sigma: Matrix) extends Serializable { require(sigma.numCols == sigma.numRows, "Covariance matrix must be square") require(mu.size == sigma.numCols, "Mean vector length must match covariance matrix size") - + private val breezeMu = mu.toBreeze.toDenseVector - + /** * private[mllib] constructor - * + * * @param mu The mean vector of the distribution * @param sigma The covariance matrix of the distribution */ private[mllib] def this(mu: DBV[Double], sigma: DBM[Double]) = { this(Vectors.fromBreeze(mu), Matrices.fromBreeze(sigma)) } - + /** * Compute distribution dependent constants: * rootSigmaInv = D^(-1/2)^ * U, where sigma = U * D * U.t - * u = log((2*pi)^(-k/2)^ * det(sigma)^(-1/2)^) + * u = log((2*pi)^(-k/2)^ * det(sigma)^(-1/2)^) */ private val (rootSigmaInv: DBM[Double], u: Double) = calculateCovarianceConstants - + /** Returns density of this multivariate Gaussian at given point, x */ def pdf(x: Vector): Double = { pdf(x.toBreeze) } - + /** Returns the log-density of this multivariate Gaussian at given point, x */ def logpdf(x: Vector): Double = { logpdf(x.toBreeze) } - + /** Returns density of this multivariate Gaussian at given point, x */ private[mllib] def pdf(x: BV[Double]): Double = { math.exp(logpdf(x)) } - + /** Returns the log-density of this multivariate Gaussian at given point, x */ private[mllib] def logpdf(x: BV[Double]): Double = { val delta = x - breezeMu val v = rootSigmaInv * delta u + v.t * v * -0.5 } - + /** * Calculate distribution dependent components used for the density function: * pdf(x) = (2*pi)^(-k/2)^ * det(sigma)^(-1/2)^ * exp((-1/2) * (x-mu).t * inv(sigma) * (x-mu)) * where k is length of the mean vector. - * - * We here compute distribution-fixed parts + * + * We here compute distribution-fixed parts * log((2*pi)^(-k/2)^ * det(sigma)^(-1/2)^) * and * D^(-1/2)^ * U, where sigma = U * D * U.t - * + * * Both the determinant and the inverse can be computed from the singular value decomposition * of sigma. Noting that covariance matrices are always symmetric and positive semi-definite, * we can use the eigendecomposition. We also do not compute the inverse directly; noting - * that - * + * that + * * sigma = U * D * U.t - * inv(Sigma) = U * inv(D) * U.t + * inv(Sigma) = U * inv(D) * U.t * = (D^{-1/2}^ * U).t * (D^{-1/2}^ * U) - * + * * and thus - * + * * -0.5 * (x-mu).t * inv(Sigma) * (x-mu) = -0.5 * norm(D^{-1/2}^ * U * (x-mu))^2^ - * - * To guard against singular covariance matrices, this method computes both the + * + * To guard against singular covariance matrices, this method computes both the * pseudo-determinant and the pseudo-inverse (Moore-Penrose). Singular values are considered * to be non-zero only if they exceed a tolerance based on machine precision, matrix size, and * relation to the maximum singular value (same tolerance used by, e.g., Octave). */ private def calculateCovarianceConstants: (DBM[Double], Double) = { val eigSym.EigSym(d, u) = eigSym(sigma.toBreeze.toDenseMatrix) // sigma = u * diag(d) * u.t - + // For numerical stability, values are considered to be non-zero only if they exceed tol. // This prevents any inverted value from exceeding (eps * n * max(d))^-1 val tol = MLUtils.EPSILON * max(d) * d.length - + try { // log(pseudo-determinant) is sum of the logs of all non-zero singular values val logPseudoDetSigma = d.activeValuesIterator.filter(_ > tol).map(math.log).sum - - // calculate the root-pseudo-inverse of the diagonal matrix of singular values + + // calculate the root-pseudo-inverse of the diagonal matrix of singular values // by inverting the square root of all non-zero values val pinvS = diag(new DBV(d.map(v => if (v > tol) math.sqrt(1.0 / v) else 0.0).toArray)) - + (pinvS * u, -0.5 * (mu.size * math.log(2.0 * math.Pi) + logPseudoDetSigma)) } catch { case uex: UnsupportedOperationException => diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala index ea82d39b72c03..23c8d7c7c8075 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala @@ -196,7 +196,7 @@ private[stat] object ChiSqTest extends Logging { * Pearson's independence test on the input contingency matrix. * TODO: optimize for SparseMatrix when it becomes supported. */ - def chiSquaredMatrix(counts: Matrix, methodName:String = PEARSON.name): ChiSqTestResult = { + def chiSquaredMatrix(counts: Matrix, methodName: String = PEARSON.name): ChiSqTestResult = { val method = methodFromString(methodName) val numRows = counts.numRows val numCols = counts.numCols @@ -205,8 +205,10 @@ private[stat] object ChiSqTest extends Logging { val colSums = new Array[Double](numCols) val rowSums = new Array[Double](numRows) val colMajorArr = counts.toArray + val colMajorArrLen = colMajorArr.length + var i = 0 - while (i < colMajorArr.size) { + while (i < colMajorArrLen) { val elem = colMajorArr(i) if (elem < 0.0) { throw new IllegalArgumentException("Contingency table cannot contain negative entries.") @@ -220,7 +222,7 @@ private[stat] object ChiSqTest extends Logging { // second pass to collect statistic var statistic = 0.0 var j = 0 - while (j < colMajorArr.size) { + while (j < colMajorArrLen) { val col = j / numRows val colSum = colSums(col) if (colSum == 0.0) { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala index dfe3a0b6913ef..cecd1fed896d5 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala @@ -169,7 +169,7 @@ object DecisionTree extends Serializable with Logging { numClasses: Int, maxBins: Int, quantileCalculationStrategy: QuantileStrategy, - categoricalFeaturesInfo: Map[Int,Int]): DecisionTreeModel = { + categoricalFeaturesInfo: Map[Int, Int]): DecisionTreeModel = { val strategy = new Strategy(algo, impurity, maxDepth, numClasses, maxBins, quantileCalculationStrategy, categoricalFeaturesInfo) new DecisionTree(strategy).run(input) @@ -768,7 +768,7 @@ object DecisionTree extends Serializable with Logging { */ private def calculatePredictImpurity( leftImpurityCalculator: ImpurityCalculator, - rightImpurityCalculator: ImpurityCalculator): (Predict, Double) = { + rightImpurityCalculator: ImpurityCalculator): (Predict, Double) = { val parentNodeAgg = leftImpurityCalculator.copy parentNodeAgg.add(rightImpurityCalculator) val predict = calculatePredict(parentNodeAgg) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala index 1f779584dcffd..a835f96d5d0e3 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/GradientBoostedTrees.scala @@ -60,12 +60,12 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy) def run(input: RDD[LabeledPoint]): GradientBoostedTreesModel = { val algo = boostingStrategy.treeStrategy.algo algo match { - case Regression => GradientBoostedTrees.boost(input, input, boostingStrategy, validate=false) + case Regression => + GradientBoostedTrees.boost(input, input, boostingStrategy, validate = false) case Classification => // Map labels to -1, +1 so binary classification can be treated as regression. val remappedInput = input.map(x => new LabeledPoint((x.label * 2) - 1, x.features)) - GradientBoostedTrees.boost(remappedInput, - remappedInput, boostingStrategy, validate=false) + GradientBoostedTrees.boost(remappedInput, remappedInput, boostingStrategy, validate = false) case _ => throw new IllegalArgumentException(s"$algo is not supported by the gradient boosting.") } @@ -93,8 +93,8 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy) validationInput: RDD[LabeledPoint]): GradientBoostedTreesModel = { val algo = boostingStrategy.treeStrategy.algo algo match { - case Regression => GradientBoostedTrees.boost( - input, validationInput, boostingStrategy, validate=true) + case Regression => + GradientBoostedTrees.boost(input, validationInput, boostingStrategy, validate = true) case Classification => // Map labels to -1, +1 so binary classification can be treated as regression. val remappedInput = input.map( @@ -102,7 +102,7 @@ class GradientBoostedTrees(private val boostingStrategy: BoostingStrategy) val remappedValidationInput = validationInput.map( x => new LabeledPoint((x.label * 2) - 1, x.features)) GradientBoostedTrees.boost(remappedInput, remappedValidationInput, boostingStrategy, - validate=true) + validate = true) case _ => throw new IllegalArgumentException(s"$algo is not supported by the gradient boosting.") } @@ -270,7 +270,7 @@ object GradientBoostedTrees extends Logging { logInfo(s"$timer") if (persistedInput) input.unpersist() - + if (validate) { new GradientBoostedTreesModel( boostingStrategy.treeStrategy.algo, diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala index 055e60c7d9c95..069959976a188 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/RandomForest.scala @@ -36,6 +36,7 @@ import org.apache.spark.mllib.tree.model._ import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel import org.apache.spark.util.Utils +import org.apache.spark.util.random.SamplingUtils /** * :: Experimental :: @@ -248,7 +249,7 @@ private class RandomForest ( try { nodeIdCache.get.deleteAllCheckpoints() } catch { - case e:IOException => + case e: IOException => logWarning(s"delete all checkpoints failed. Error reason: ${e.getMessage}") } } @@ -473,9 +474,8 @@ object RandomForest extends Serializable with Logging { val (treeIndex, node) = nodeQueue.head // Choose subset of features for node (if subsampling). val featureSubset: Option[Array[Int]] = if (metadata.subsamplingFeatures) { - // TODO: Use more efficient subsampling? (use selection-and-rejection or reservoir) - Some(rng.shuffle(Range(0, metadata.numFeatures).toList) - .take(metadata.numFeaturesPerNode).toArray) + Some(SamplingUtils.reservoirSampleAndCount(Range(0, + metadata.numFeatures).iterator, metadata.numFeaturesPerNode, rng.nextLong)._1) } else { None } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala index 60e2ab2bb829e..72eb24c49264a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala @@ -111,11 +111,12 @@ private[tree] abstract class ImpurityCalculator(val stats: Array[Double]) { * Add the stats from another calculator into this one, modifying and returning this calculator. */ def add(other: ImpurityCalculator): ImpurityCalculator = { - require(stats.size == other.stats.size, + require(stats.length == other.stats.length, s"Two ImpurityCalculator instances cannot be added with different counts sizes." + - s" Sizes are ${stats.size} and ${other.stats.size}.") + s" Sizes are ${stats.length} and ${other.stats.length}.") var i = 0 - while (i < other.stats.size) { + val len = other.stats.length + while (i < len) { stats(i) += other.stats(i) i += 1 } @@ -127,11 +128,12 @@ private[tree] abstract class ImpurityCalculator(val stats: Array[Double]) { * calculator. */ def subtract(other: ImpurityCalculator): ImpurityCalculator = { - require(stats.size == other.stats.size, + require(stats.length == other.stats.length, s"Two ImpurityCalculator instances cannot be subtracted with different counts sizes." + - s" Sizes are ${stats.size} and ${other.stats.size}.") + s" Sizes are ${stats.length} and ${other.stats.length}.") var i = 0 - while (i < other.stats.size) { + val len = other.stats.length + while (i < len) { stats(i) -= other.stats(i) i += 1 } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala index 331af428533de..25bb1453db404 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala @@ -223,14 +223,14 @@ object DecisionTreeModel extends Loader[DecisionTreeModel] with Logging { val dataRDD: DataFrame = sc.parallelize(nodes) .map(NodeData.apply(0, _)) .toDF() - dataRDD.saveAsParquetFile(Loader.dataPath(path)) + dataRDD.write.parquet(Loader.dataPath(path)) } def load(sc: SparkContext, path: String, algo: String, numNodes: Int): DecisionTreeModel = { val datapath = Loader.dataPath(path) val sqlContext = new SQLContext(sc) // Load Parquet data. - val dataRDD = sqlContext.parquetFile(datapath) + val dataRDD = sqlContext.read.parquet(datapath) // Check schema explicitly since erasure makes it hard to use match-case for checking. Loader.checkSchema[NodeData](dataRDD.schema) val nodes = dataRDD.map(NodeData.apply) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala index 431a839817eac..a6d1398fc267b 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala @@ -83,7 +83,7 @@ class Node ( def predict(features: Vector) : Double = { if (isLeaf) { predict.predict - } else{ + } else { if (split.get.featureType == Continuous) { if (features(split.get.feature) <= split.get.threshold) { leftNode.get.predict(features) @@ -151,9 +151,9 @@ class Node ( s"(feature ${split.feature} > ${split.threshold})" } case Categorical => if (left) { - s"(feature ${split.feature} in ${split.categories.mkString("{",",","}")})" + s"(feature ${split.feature} in ${split.categories.mkString("{", ",", "}")})" } else { - s"(feature ${split.feature} not in ${split.categories.mkString("{",",","}")})" + s"(feature ${split.feature} not in ${split.categories.mkString("{", ",", "}")})" } } } @@ -161,9 +161,9 @@ class Node ( if (isLeaf) { prefix + s"Predict: ${predict.predict}\n" } else { - prefix + s"If ${splitToString(split.get, left=true)}\n" + + prefix + s"If ${splitToString(split.get, left = true)}\n" + leftNode.get.subtreeToString(indentFactor + 1) + - prefix + s"Else ${splitToString(split.get, left=false)}\n" + + prefix + s"Else ${splitToString(split.get, left = false)}\n" + rightNode.get.subtreeToString(indentFactor + 1) } } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala index 8341219bfa71c..1e3333d8d81d0 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala @@ -414,7 +414,7 @@ private[tree] object TreeEnsembleModel extends Logging { val dataRDD = sc.parallelize(model.trees.zipWithIndex).flatMap { case (tree, treeId) => tree.topNode.subtreeIterator.toSeq.map(node => NodeData(treeId, node)) }.toDF() - dataRDD.saveAsParquetFile(Loader.dataPath(path)) + dataRDD.write.parquet(Loader.dataPath(path)) } /** @@ -437,7 +437,7 @@ private[tree] object TreeEnsembleModel extends Logging { treeAlgo: String): Array[DecisionTreeModel] = { val datapath = Loader.dataPath(path) val sqlContext = new SQLContext(sc) - val nodes = sqlContext.parquetFile(datapath).map(NodeData.apply) + val nodes = sqlContext.read.parquet(datapath).map(NodeData.apply) val trees = constructTrees(nodes) trees.map(new DecisionTreeModel(_, Algo.fromString(treeAlgo))) } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala index b1a4517344970..b4e33c98ba7e5 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala @@ -107,7 +107,8 @@ object LinearDataGenerator { x.foreach { v => var i = 0 - while (i < v.length) { + val len = v.length + while (i < len) { v(i) = (v(i) - 0.5) * math.sqrt(12.0 * xVariance(i)) + xMean(i) i += 1 } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala index 0c5b4f9d04a74..bd73a866c8a82 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala @@ -82,8 +82,7 @@ object MFDataGenerator { BLAS.gemm(z, A, B, 1.0, fullData) val df = rank * (m + n - rank) - val sampSize = scala.math.min(scala.math.round(trainSampFact * df), - scala.math.round(.99 * m * n)).toInt + val sampSize = math.min(math.round(trainSampFact * df), math.round(.99 * m * n)).toInt val rand = new Random() val mn = m * n val shuffled = rand.shuffle((0 until mn).toList) @@ -102,8 +101,8 @@ object MFDataGenerator { // optionally generate testing data if (test) { - val testSampSize = scala.math - .min(scala.math.round(sampSize * testSampFact),scala.math.round(mn - sampSize)).toInt + val testSampSize = math.min( + math.round(sampSize * testSampFact), math.round(mn - sampSize)).toInt val testOmega = shuffled.slice(sampSize, sampSize + testSampSize) val testOrdered = testOmega.sortWith(_ < _).toArray val testData: RDD[(Int, Int, Double)] = sc.parallelize(testOrdered) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala index 5d6ddd47f67d6..541f3288b6c43 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala @@ -265,7 +265,7 @@ object MLUtils { } Vectors.fromBreeze(vector1) } - + /** * Returns the squared Euclidean distance between two vectors. The following formula will be used * if it does not introduce too much numerical error: @@ -331,7 +331,7 @@ object MLUtils { * @param x a floating-point value as input. * @return the result of `math.log(1 + math.exp(x))`. */ - private[mllib] def log1pExp(x: Double): Double = { + private[spark] def log1pExp(x: Double): Double = { if (x > 0) { x + math.log1p(math.exp(-x)) } else { diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java index 3f8e59de0f05c..f75e024a713ee 100644 --- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaLogisticRegressionSuite.java @@ -84,9 +84,10 @@ public void logisticRegressionWithSetters() { .setThreshold(0.6) .setProbabilityCol("myProbability"); LogisticRegressionModel model = lr.fit(dataset); - assert(model.fittingParamMap().apply(lr.maxIter()).equals(10)); - assert(model.fittingParamMap().apply(lr.regParam()).equals(1.0)); - assert(model.fittingParamMap().apply(lr.threshold()).equals(0.6)); + LogisticRegression parent = (LogisticRegression) model.parent(); + assert(parent.getMaxIter() == 10); + assert(parent.getRegParam() == 1.0); + assert(parent.getThreshold() == 0.6); assert(model.getThreshold() == 0.6); // Modify model params, and check that the params worked. @@ -109,9 +110,10 @@ public void logisticRegressionWithSetters() { // Call fit() with new params, and check as many params as we can. LogisticRegressionModel model2 = lr.fit(dataset, lr.maxIter().w(5), lr.regParam().w(0.1), lr.threshold().w(0.4), lr.probabilityCol().w("theProb")); - assert(model2.fittingParamMap().apply(lr.maxIter()).equals(5)); - assert(model2.fittingParamMap().apply(lr.regParam()).equals(0.1)); - assert(model2.fittingParamMap().apply(lr.threshold()).equals(0.4)); + LogisticRegression parent2 = (LogisticRegression) model2.parent(); + assert(parent2.getMaxIter() == 5); + assert(parent2.getRegParam() == 0.1); + assert(parent2.getThreshold() == 0.4); assert(model2.getThreshold() == 0.4); assert(model2.getProbabilityCol().equals("theProb")); } diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java new file mode 100644 index 0000000000000..a1ee554152372 --- /dev/null +++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaOneVsRestSuite.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.classification; + +import java.io.Serializable; +import java.util.List; + +import static scala.collection.JavaConversions.seqAsJavaList; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import static org.apache.spark.mllib.classification.LogisticRegressionSuite.generateMultinomialLogisticInput; +import org.apache.spark.mllib.regression.LabeledPoint; +import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.SQLContext; + +public class JavaOneVsRestSuite implements Serializable { + + private transient JavaSparkContext jsc; + private transient SQLContext jsql; + private transient DataFrame dataset; + private transient JavaRDD datasetRDD; + + @Before + public void setUp() { + jsc = new JavaSparkContext("local", "JavaLOneVsRestSuite"); + jsql = new SQLContext(jsc); + int nPoints = 3; + + // The following weights and xMean/xVariance are computed from iris dataset with lambda=0.2. + // As a result, we are drawing samples from probability distribution of an actual model. + double[] weights = { + -0.57997, 0.912083, -0.371077, -0.819866, 2.688191, + -0.16624, -0.84355, -0.048509, -0.301789, 4.170682 }; + + double[] xMean = {5.843, 3.057, 3.758, 1.199}; + double[] xVariance = {0.6856, 0.1899, 3.116, 0.581}; + List points = seqAsJavaList(generateMultinomialLogisticInput( + weights, xMean, xVariance, true, nPoints, 42)); + datasetRDD = jsc.parallelize(points, 2); + dataset = jsql.createDataFrame(datasetRDD, LabeledPoint.class); + } + + @After + public void tearDown() { + jsc.stop(); + jsc = null; + } + + @Test + public void oneVsRestDefaultParams() { + OneVsRest ova = new OneVsRest(); + ova.setClassifier(new LogisticRegression()); + Assert.assertEquals(ova.getLabelCol() , "label"); + Assert.assertEquals(ova.getPredictionCol() , "prediction"); + OneVsRestModel ovaModel = ova.fit(dataset); + DataFrame predictions = ovaModel.transform(dataset).select("label", "prediction"); + predictions.collectAsList(); + Assert.assertEquals(ovaModel.getLabelCol(), "label"); + Assert.assertEquals(ovaModel.getPredictionCol() , "prediction"); + } +} diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java new file mode 100644 index 0000000000000..d5bd230a957a1 --- /dev/null +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.feature; + +import com.google.common.collect.Lists; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.Metadata; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; + +public class JavaBucketizerSuite { + private transient JavaSparkContext jsc; + private transient SQLContext jsql; + + @Before + public void setUp() { + jsc = new JavaSparkContext("local", "JavaBucketizerSuite"); + jsql = new SQLContext(jsc); + } + + @After + public void tearDown() { + jsc.stop(); + jsc = null; + } + + @Test + public void bucketizerTest() { + double[] splits = {-0.5, 0.0, 0.5}; + + JavaRDD data = jsc.parallelize(Lists.newArrayList( + RowFactory.create(-0.5), + RowFactory.create(-0.3), + RowFactory.create(0.0), + RowFactory.create(0.2) + )); + StructType schema = new StructType(new StructField[] { + new StructField("feature", DataTypes.DoubleType, false, Metadata.empty()) + }); + DataFrame dataset = jsql.createDataFrame(data, schema); + + Bucketizer bucketizer = new Bucketizer() + .setInputCol("feature") + .setOutputCol("result") + .setSplits(splits); + + Row[] result = bucketizer.transform(dataset).select("result").collect(); + + for (Row r : result) { + double index = r.getDouble(0); + Assert.assertTrue((index >= 0) && (index <= 1)); + } + } +} diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java new file mode 100644 index 0000000000000..da2218056307e --- /dev/null +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.feature; + +import com.google.common.collect.Lists; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.mllib.linalg.Vector; +import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.Metadata; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; + + +public class JavaHashingTFSuite { + private transient JavaSparkContext jsc; + private transient SQLContext jsql; + + @Before + public void setUp() { + jsc = new JavaSparkContext("local", "JavaHashingTFSuite"); + jsql = new SQLContext(jsc); + } + + @After + public void tearDown() { + jsc.stop(); + jsc = null; + } + + @Test + public void hashingTF() { + JavaRDD jrdd = jsc.parallelize(Lists.newArrayList( + RowFactory.create(0, "Hi I heard about Spark"), + RowFactory.create(0, "I wish Java could use case classes"), + RowFactory.create(1, "Logistic regression models are neat") + )); + StructType schema = new StructType(new StructField[]{ + new StructField("label", DataTypes.DoubleType, false, Metadata.empty()), + new StructField("sentence", DataTypes.StringType, false, Metadata.empty()) + }); + + DataFrame sentenceData = jsql.createDataFrame(jrdd, schema); + Tokenizer tokenizer = new Tokenizer() + .setInputCol("sentence") + .setOutputCol("words"); + DataFrame wordsData = tokenizer.transform(sentenceData); + int numFeatures = 20; + HashingTF hashingTF = new HashingTF() + .setInputCol("words") + .setOutputCol("rawFeatures") + .setNumFeatures(numFeatures); + DataFrame featurizedData = hashingTF.transform(wordsData); + IDF idf = new IDF().setInputCol("rawFeatures").setOutputCol("features"); + IDFModel idfModel = idf.fit(featurizedData); + DataFrame rescaledData = idfModel.transform(featurizedData); + for (Row r : rescaledData.select("features", "label").take(3)) { + Vector features = r.getAs(0); + Assert.assertEquals(features.size(), numFeatures); + } + } +} diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaNormalizerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaNormalizerSuite.java new file mode 100644 index 0000000000000..d82f3b7e8c076 --- /dev/null +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaNormalizerSuite.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.feature; + +import java.util.List; + +import com.google.common.collect.Lists; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.mllib.linalg.Vectors; +import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.SQLContext; + +public class JavaNormalizerSuite { + private transient JavaSparkContext jsc; + private transient SQLContext jsql; + + @Before + public void setUp() { + jsc = new JavaSparkContext("local", "JavaNormalizerSuite"); + jsql = new SQLContext(jsc); + } + + @After + public void tearDown() { + jsc.stop(); + jsc = null; + } + + @Test + public void normalizer() { + // The tests are to check Java compatibility. + List points = Lists.newArrayList( + new VectorIndexerSuite.FeatureData(Vectors.dense(0.0, -2.0)), + new VectorIndexerSuite.FeatureData(Vectors.dense(1.0, 3.0)), + new VectorIndexerSuite.FeatureData(Vectors.dense(1.0, 4.0)) + ); + DataFrame dataFrame = jsql.createDataFrame(jsc.parallelize(points, 2), + VectorIndexerSuite.FeatureData.class); + Normalizer normalizer = new Normalizer() + .setInputCol("features") + .setOutputCol("normFeatures"); + + // Normalize each Vector using $L^2$ norm. + DataFrame l2NormData = normalizer.transform(dataFrame, normalizer.p().w(2)); + l2NormData.count(); + + // Normalize each Vector using $L^\infty$ norm. + DataFrame lInfNormData = + normalizer.transform(dataFrame, normalizer.p().w(Double.POSITIVE_INFINITY)); + lInfNormData.count(); + } +} diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java new file mode 100644 index 0000000000000..5e8211c2c5118 --- /dev/null +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaPolynomialExpansionSuite.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.feature; + +import com.google.common.collect.Lists; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.mllib.linalg.Vector; +import org.apache.spark.mllib.linalg.VectorUDT; +import org.apache.spark.mllib.linalg.Vectors; +import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.types.Metadata; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; + +public class JavaPolynomialExpansionSuite { + private transient JavaSparkContext jsc; + private transient SQLContext jsql; + + @Before + public void setUp() { + jsc = new JavaSparkContext("local", "JavaPolynomialExpansionSuite"); + jsql = new SQLContext(jsc); + } + + @After + public void tearDown() { + jsc.stop(); + jsc = null; + } + + @Test + public void polynomialExpansionTest() { + PolynomialExpansion polyExpansion = new PolynomialExpansion() + .setInputCol("features") + .setOutputCol("polyFeatures") + .setDegree(3); + + JavaRDD data = jsc.parallelize(Lists.newArrayList( + RowFactory.create( + Vectors.dense(-2.0, 2.3), + Vectors.dense(-2.0, 4.0, -8.0, 2.3, -4.6, 9.2, 5.29, -10.58, 12.17) + ), + RowFactory.create(Vectors.dense(0.0, 0.0), Vectors.dense(new double[9])), + RowFactory.create( + Vectors.dense(0.6, -1.1), + Vectors.dense(0.6, 0.36, 0.216, -1.1, -0.66, -0.396, 1.21, 0.726, -1.331) + ) + )); + + StructType schema = new StructType(new StructField[] { + new StructField("features", new VectorUDT(), false, Metadata.empty()), + new StructField("expected", new VectorUDT(), false, Metadata.empty()) + }); + + DataFrame dataset = jsql.createDataFrame(data, schema); + + Row[] pairs = polyExpansion.transform(dataset) + .select("polyFeatures", "expected") + .collect(); + + for (Row r : pairs) { + double[] polyFeatures = ((Vector)r.get(0)).toArray(); + double[] expected = ((Vector)r.get(1)).toArray(); + Assert.assertArrayEquals(polyFeatures, expected, 1e-1); + } + } +} diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaStandardScalerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaStandardScalerSuite.java new file mode 100644 index 0000000000000..74eb2733f06ef --- /dev/null +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaStandardScalerSuite.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.feature; + +import java.util.List; + +import com.google.common.collect.Lists; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.mllib.linalg.Vectors; +import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.SQLContext; + +public class JavaStandardScalerSuite { + private transient JavaSparkContext jsc; + private transient SQLContext jsql; + + @Before + public void setUp() { + jsc = new JavaSparkContext("local", "JavaStandardScalerSuite"); + jsql = new SQLContext(jsc); + } + + @After + public void tearDown() { + jsc.stop(); + jsc = null; + } + + @Test + public void standardScaler() { + // The tests are to check Java compatibility. + List points = Lists.newArrayList( + new VectorIndexerSuite.FeatureData(Vectors.dense(0.0, -2.0)), + new VectorIndexerSuite.FeatureData(Vectors.dense(1.0, 3.0)), + new VectorIndexerSuite.FeatureData(Vectors.dense(1.0, 4.0)) + ); + DataFrame dataFrame = jsql.createDataFrame(jsc.parallelize(points, 2), + VectorIndexerSuite.FeatureData.class); + StandardScaler scaler = new StandardScaler() + .setInputCol("features") + .setOutputCol("scaledFeatures") + .setWithStd(true) + .setWithMean(false); + + // Compute summary statistics by fitting the StandardScaler + StandardScalerModel scalerModel = scaler.fit(dataFrame); + + // Normalize each feature to have unit standard deviation. + DataFrame scaledData = scalerModel.transform(dataFrame); + scaledData.count(); + } +} diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaStringIndexerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaStringIndexerSuite.java new file mode 100644 index 0000000000000..35b18c5308f61 --- /dev/null +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaStringIndexerSuite.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.feature; + +import java.util.Arrays; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; +import static org.apache.spark.sql.types.DataTypes.*; + +public class JavaStringIndexerSuite { + private transient JavaSparkContext jsc; + private transient SQLContext sqlContext; + + @Before + public void setUp() { + jsc = new JavaSparkContext("local", "JavaStringIndexerSuite"); + sqlContext = new SQLContext(jsc); + } + + @After + public void tearDown() { + jsc.stop(); + sqlContext = null; + } + + @Test + public void testStringIndexer() { + StructType schema = createStructType(new StructField[] { + createStructField("id", IntegerType, false), + createStructField("label", StringType, false) + }); + JavaRDD rdd = jsc.parallelize( + Arrays.asList(c(0, "a"), c(1, "b"), c(2, "c"), c(3, "a"), c(4, "a"), c(5, "c"))); + DataFrame dataset = sqlContext.createDataFrame(rdd, schema); + + StringIndexer indexer = new StringIndexer() + .setInputCol("label") + .setOutputCol("labelIndex"); + DataFrame output = indexer.fit(dataset).transform(dataset); + + Assert.assertArrayEquals( + new Row[] { c(0, 0.0), c(1, 2.0), c(2, 1.0), c(3, 0.0), c(4, 0.0), c(5, 1.0) }, + output.orderBy("id").select("id", "labelIndex").collect()); + } + + /** An alias for RowFactory.create. */ + private Row c(Object... values) { + return RowFactory.create(values); + } +} diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorAssemblerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorAssemblerSuite.java new file mode 100644 index 0000000000000..b7c564caad3bd --- /dev/null +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorAssemblerSuite.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.feature; + +import java.util.Arrays; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.mllib.linalg.Vector; +import org.apache.spark.mllib.linalg.VectorUDT; +import org.apache.spark.mllib.linalg.Vectors; +import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.types.*; +import static org.apache.spark.sql.types.DataTypes.*; + +public class JavaVectorAssemblerSuite { + private transient JavaSparkContext jsc; + private transient SQLContext sqlContext; + + @Before + public void setUp() { + jsc = new JavaSparkContext("local", "JavaVectorAssemblerSuite"); + sqlContext = new SQLContext(jsc); + } + + @After + public void tearDown() { + jsc.stop(); + jsc = null; + } + + @Test + public void testVectorAssembler() { + StructType schema = createStructType(new StructField[] { + createStructField("id", IntegerType, false), + createStructField("x", DoubleType, false), + createStructField("y", new VectorUDT(), false), + createStructField("name", StringType, false), + createStructField("z", new VectorUDT(), false), + createStructField("n", LongType, false) + }); + Row row = RowFactory.create( + 0, 0.0, Vectors.dense(1.0, 2.0), "a", + Vectors.sparse(2, new int[] {1}, new double[] {3.0}), 10L); + JavaRDD rdd = jsc.parallelize(Arrays.asList(row)); + DataFrame dataset = sqlContext.createDataFrame(rdd, schema); + VectorAssembler assembler = new VectorAssembler() + .setInputCols(new String[] {"x", "y", "z", "n"}) + .setOutputCol("features"); + DataFrame output = assembler.transform(dataset); + Assert.assertEquals( + Vectors.sparse(6, new int[] {1, 2, 4, 5}, new double[] {1.0, 2.0, 3.0, 10.0}), + output.select("features").first().getAs(0)); + } +} diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java index 161100134c92d..c7ae5468b9429 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaVectorIndexerSuite.java @@ -19,6 +19,7 @@ import java.io.Serializable; import java.util.List; +import java.util.Map; import org.junit.After; import org.junit.Assert; @@ -64,7 +65,8 @@ public void vectorIndexerAPI() { .setMaxCategories(2); VectorIndexerModel model = indexer.fit(data); Assert.assertEquals(model.numFeatures(), 2); - Assert.assertEquals(model.categoryMaps().size(), 1); + Map> categoryMaps = model.javaCategoryMaps(); + Assert.assertEquals(categoryMaps.size(), 1); DataFrame indexedData = model.transform(data); } } diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java new file mode 100644 index 0000000000000..39c70157f83c0 --- /dev/null +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaWord2VecSuite.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.feature; + +import com.google.common.collect.Lists; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.mllib.linalg.Vector; +import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.types.*; + +public class JavaWord2VecSuite { + private transient JavaSparkContext jsc; + private transient SQLContext sqlContext; + + @Before + public void setUp() { + jsc = new JavaSparkContext("local", "JavaWord2VecSuite"); + sqlContext = new SQLContext(jsc); + } + + @After + public void tearDown() { + jsc.stop(); + jsc = null; + } + + @Test + public void testJavaWord2Vec() { + JavaRDD jrdd = jsc.parallelize(Lists.newArrayList( + RowFactory.create(Lists.newArrayList("Hi I heard about Spark".split(" "))), + RowFactory.create(Lists.newArrayList("I wish Java could use case classes".split(" "))), + RowFactory.create(Lists.newArrayList("Logistic regression models are neat".split(" "))) + )); + StructType schema = new StructType(new StructField[]{ + new StructField("text", new ArrayType(DataTypes.StringType, true), false, Metadata.empty()) + }); + DataFrame documentDF = sqlContext.createDataFrame(jrdd, schema); + + Word2Vec word2Vec = new Word2Vec() + .setInputCol("text") + .setOutputCol("result") + .setVectorSize(3) + .setMinCount(0); + Word2VecModel model = word2Vec.fit(documentDF); + DataFrame result = model.transform(documentDF); + + for (Row r: result.select("result").collect()) { + double[] polyFeatures = ((Vector)r.get(0)).toArray(); + Assert.assertEquals(polyFeatures.length, 3); + } + } +} diff --git a/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java b/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java index 8abe575610d19..947ae3a2ce06f 100644 --- a/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java +++ b/mllib/src/test/java/org/apache/spark/ml/param/JavaTestParams.java @@ -21,43 +21,66 @@ import com.google.common.collect.Lists; +import org.apache.spark.ml.util.Identifiable$; + /** * A subclass of Params for testing. */ public class JavaTestParams extends JavaParams { - public IntParam myIntParam; + public JavaTestParams() { + this.uid_ = Identifiable$.MODULE$.randomUID("javaTestParams"); + init(); + } + + public JavaTestParams(String uid) { + this.uid_ = uid; + init(); + } + + private String uid_; + + @Override + public String uid() { + return uid_; + } - public int getMyIntParam() { return (Integer)getOrDefault(myIntParam); } + private IntParam myIntParam_; + public IntParam myIntParam() { return myIntParam_; } + + public int getMyIntParam() { return (Integer)getOrDefault(myIntParam_); } public JavaTestParams setMyIntParam(int value) { - set(myIntParam, value); return this; + set(myIntParam_, value); return this; } - public DoubleParam myDoubleParam; + private DoubleParam myDoubleParam_; + public DoubleParam myDoubleParam() { return myDoubleParam_; } - public double getMyDoubleParam() { return (Double)getOrDefault(myDoubleParam); } + public double getMyDoubleParam() { return (Double)getOrDefault(myDoubleParam_); } public JavaTestParams setMyDoubleParam(double value) { - set(myDoubleParam, value); return this; + set(myDoubleParam_, value); return this; } - public Param myStringParam; + private Param myStringParam_; + public Param myStringParam() { return myStringParam_; } - public String getMyStringParam() { return (String)getOrDefault(myStringParam); } + public String getMyStringParam() { return getOrDefault(myStringParam_); } public JavaTestParams setMyStringParam(String value) { - set(myStringParam, value); return this; + set(myStringParam_, value); return this; } - public JavaTestParams() { - myIntParam = new IntParam(this, "myIntParam", "this is an int param", ParamValidators.gt(0)); - myDoubleParam = new DoubleParam(this, "myDoubleParam", "this is a double param", + private void init() { + myIntParam_ = new IntParam(this, "myIntParam", "this is an int param", ParamValidators.gt(0)); + myDoubleParam_ = new DoubleParam(this, "myDoubleParam", "this is a double param", ParamValidators.inRange(0.0, 1.0)); List validStrings = Lists.newArrayList("a", "b"); - myStringParam = new Param(this, "myStringParam", "this is a string param", + myStringParam_ = new Param(this, "myStringParam", "this is a string param", ParamValidators.inArray(validStrings)); - setDefault(myIntParam, 1); - setDefault(myDoubleParam, 0.5); + setDefault(myIntParam_, 1); + setDefault(myDoubleParam_, 0.5); + setDefault(myIntParam().w(1), myDoubleParam().w(0.5)); } } diff --git a/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java b/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java index 0cc36c8d56d70..d591a456864e4 100644 --- a/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/regression/JavaLinearRegressionSuite.java @@ -23,14 +23,15 @@ import org.junit.After; import org.junit.Before; import org.junit.Test; +import static org.junit.Assert.assertEquals; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; -import static org.apache.spark.mllib.classification.LogisticRegressionSuite - .generateLogisticInputAsList; import org.apache.spark.mllib.regression.LabeledPoint; import org.apache.spark.sql.DataFrame; import org.apache.spark.sql.SQLContext; +import static org.apache.spark.mllib.classification.LogisticRegressionSuite + .generateLogisticInputAsList; public class JavaLinearRegressionSuite implements Serializable { @@ -65,8 +66,8 @@ public void linearRegressionDefaultParams() { DataFrame predictions = jsql.sql("SELECT label, prediction FROM prediction"); predictions.collect(); // Check defaults - assert(model.getFeaturesCol().equals("features")); - assert(model.getPredictionCol().equals("prediction")); + assertEquals("features", model.getFeaturesCol()); + assertEquals("prediction", model.getPredictionCol()); } @Test @@ -76,14 +77,16 @@ public void linearRegressionWithSetters() { .setMaxIter(10) .setRegParam(1.0); LinearRegressionModel model = lr.fit(dataset); - assert(model.fittingParamMap().apply(lr.maxIter()).equals(10)); - assert(model.fittingParamMap().apply(lr.regParam()).equals(1.0)); + LinearRegression parent = (LinearRegression) model.parent(); + assertEquals(10, parent.getMaxIter()); + assertEquals(1.0, parent.getRegParam(), 0.0); // Call fit() with new params, and check as many params as we can. LinearRegressionModel model2 = lr.fit(dataset, lr.maxIter().w(5), lr.regParam().w(0.1), lr.predictionCol().w("thePred")); - assert(model2.fittingParamMap().apply(lr.maxIter()).equals(5)); - assert(model2.fittingParamMap().apply(lr.regParam()).equals(0.1)); - assert(model2.getPredictionCol().equals("thePred")); + LinearRegression parent2 = (LinearRegression) model2.parent(); + assertEquals(5, parent2.getMaxIter()); + assertEquals(0.1, parent2.getRegParam(), 0.0); + assertEquals("thePred", model2.getPredictionCol()); } } diff --git a/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java b/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java index 0bb6b489f2757..08eeca53f0721 100644 --- a/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/tuning/JavaCrossValidatorSuite.java @@ -68,8 +68,8 @@ public void crossValidationWithLogisticRegression() { .setEvaluator(eval) .setNumFolds(3); CrossValidatorModel cvModel = cv.fit(dataset); - ParamMap bestParamMap = cvModel.bestModel().fittingParamMap(); - Assert.assertEquals(0.001, bestParamMap.apply(lr.regParam())); - Assert.assertEquals(10, bestParamMap.apply(lr.maxIter())); + LogisticRegression parent = (LogisticRegression) cvModel.bestModel().parent(); + Assert.assertEquals(0.001, parent.getRegParam(), 0.0); + Assert.assertEquals(10, parent.getMaxIter()); } } diff --git a/mllib/src/test/java/org/apache/spark/ml/util/IdentifiableSuite.scala b/mllib/src/test/java/org/apache/spark/ml/util/IdentifiableSuite.scala new file mode 100644 index 0000000000000..67c262d0f9d8d --- /dev/null +++ b/mllib/src/test/java/org/apache/spark/ml/util/IdentifiableSuite.scala @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.util + +import org.scalatest.FunSuite + +class IdentifiableSuite extends FunSuite { + + import IdentifiableSuite.Test + + test("Identifiable") { + val test0 = new Test("test_0") + assert(test0.uid === "test_0") + + val test1 = new Test + assert(test1.uid.startsWith("test_")) + } +} + +object IdentifiableSuite { + + class Test(override val uid: String) extends Identifiable { + def this() = this(Identifiable.randomUID("test")) + } +} diff --git a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java index 71fb7f13c39c2..3771c0ea7ad83 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaNaiveBayesSuite.java @@ -108,7 +108,7 @@ public Vector call(LabeledPoint v) throws Exception { @Test public void testModelTypeSetters() { NaiveBayes nb = new NaiveBayes() - .setModelType("Bernoulli") - .setModelType("Multinomial"); + .setModelType("bernoulli") + .setModelType("multinomial"); } } diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java index f394d903966de..96c2da169961f 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java @@ -117,7 +117,7 @@ public void OnlineOptimizerCompatibility() { // Train a model OnlineLDAOptimizer op = new OnlineLDAOptimizer() - .setTau_0(1024) + .setTau0(1024) .setKappa(0.51) .setGammaShape(1e40) .setMiniBatchFraction(0.5); diff --git a/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java b/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java new file mode 100644 index 0000000000000..effc8a1a6dabc --- /dev/null +++ b/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.evaluation; + +import java.io.Serializable; +import java.util.ArrayList; + +import scala.Tuple2; +import scala.Tuple2$; + +import com.google.common.collect.Lists; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; + +public class JavaRankingMetricsSuite implements Serializable { + private transient JavaSparkContext sc; + private transient JavaRDD, ArrayList>> predictionAndLabels; + + @Before + public void setUp() { + sc = new JavaSparkContext("local", "JavaRankingMetricsSuite"); + predictionAndLabels = sc.parallelize(Lists.newArrayList( + Tuple2$.MODULE$.apply( + Lists.newArrayList(1, 6, 2, 7, 8, 3, 9, 10, 4, 5), Lists.newArrayList(1, 2, 3, 4, 5)), + Tuple2$.MODULE$.apply( + Lists.newArrayList(4, 1, 5, 6, 2, 7, 3, 8, 9, 10), Lists.newArrayList(1, 2, 3)), + Tuple2$.MODULE$.apply( + Lists.newArrayList(1, 2, 3, 4, 5), Lists.newArrayList())), 2); + } + + @After + public void tearDown() { + sc.stop(); + sc = null; + } + + @Test + public void rankingMetrics() { + @SuppressWarnings("unchecked") + RankingMetrics metrics = RankingMetrics.of(predictionAndLabels); + Assert.assertEquals(0.355026, metrics.meanAveragePrecision(), 1e-5); + Assert.assertEquals(0.75 / 3.0, metrics.precisionAt(4), 1e-5); + } +} diff --git a/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala index 2f175fb117941..2b04a3034782e 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala @@ -42,30 +42,32 @@ class PipelineSuite extends FunSuite { val dataset3 = mock[DataFrame] val dataset4 = mock[DataFrame] - when(estimator0.fit(meq(dataset0), any[ParamMap]())).thenReturn(model0) - when(model0.transform(meq(dataset0), any[ParamMap]())).thenReturn(dataset1) + when(estimator0.copy(any[ParamMap])).thenReturn(estimator0) + when(model0.copy(any[ParamMap])).thenReturn(model0) + when(transformer1.copy(any[ParamMap])).thenReturn(transformer1) + when(estimator2.copy(any[ParamMap])).thenReturn(estimator2) + when(model2.copy(any[ParamMap])).thenReturn(model2) + when(transformer3.copy(any[ParamMap])).thenReturn(transformer3) + + when(estimator0.fit(meq(dataset0))).thenReturn(model0) + when(model0.transform(meq(dataset0))).thenReturn(dataset1) when(model0.parent).thenReturn(estimator0) - when(transformer1.transform(meq(dataset1), any[ParamMap])).thenReturn(dataset2) - when(estimator2.fit(meq(dataset2), any[ParamMap]())).thenReturn(model2) - when(model2.transform(meq(dataset2), any[ParamMap]())).thenReturn(dataset3) + when(transformer1.transform(meq(dataset1))).thenReturn(dataset2) + when(estimator2.fit(meq(dataset2))).thenReturn(model2) + when(model2.transform(meq(dataset2))).thenReturn(dataset3) when(model2.parent).thenReturn(estimator2) - when(transformer3.transform(meq(dataset3), any[ParamMap]())).thenReturn(dataset4) + when(transformer3.transform(meq(dataset3))).thenReturn(dataset4) val pipeline = new Pipeline() .setStages(Array(estimator0, transformer1, estimator2, transformer3)) val pipelineModel = pipeline.fit(dataset0) - assert(pipelineModel.stages.size === 4) + assert(pipelineModel.stages.length === 4) assert(pipelineModel.stages(0).eq(model0)) assert(pipelineModel.stages(1).eq(transformer1)) assert(pipelineModel.stages(2).eq(model2)) assert(pipelineModel.stages(3).eq(transformer3)) - assert(pipelineModel.getModel(estimator0).eq(model0)) - assert(pipelineModel.getModel(estimator2).eq(model2)) - intercept[NoSuchElementException] { - pipelineModel.getModel(mock[Estimator[MyModel]]) - } val output = pipelineModel.transform(dataset0) assert(output.eq(dataset4)) } diff --git a/mllib/src/test/scala/org/apache/spark/ml/attribute/AttributeSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/attribute/AttributeSuite.scala index 3e1a7196e37cb..ec9b717e41ce8 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/attribute/AttributeSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/attribute/AttributeSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.ml.attribute import org.scalatest.FunSuite -import org.apache.spark.sql.types.{DoubleType, MetadataBuilder, Metadata} +import org.apache.spark.sql.types._ class AttributeSuite extends FunSuite { @@ -209,4 +209,12 @@ class AttributeSuite extends FunSuite { intercept[IllegalArgumentException](attr.withName("")) intercept[IllegalArgumentException](attr.withIndex(-1)) } + + test("attribute from struct field") { + val metadata = NumericAttribute.defaultAttr.withName("label").toMetadata() + val fldWithoutMeta = new StructField("x", DoubleType, false, Metadata.empty) + assert(Attribute.fromStructField(fldWithoutMeta) == UnresolvedAttribute) + val fldWithMeta = new StructField("x", DoubleType, false, metadata) + assert(Attribute.fromStructField(fldWithMeta).isNumeric) + } } diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala index 9b31adecdcb1c..1e385e7540261 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala @@ -266,9 +266,9 @@ private[ml] object DecisionTreeClassifierSuite extends FunSuite { val oldTree = OldDecisionTree.train(data, oldStrategy) val newData: DataFrame = TreeTests.setMetadata(data, categoricalFeatures, numClasses) val newTree = dt.fit(newData) - // Use parent, fittingParamMap from newTree since these are not checked anyways. - val oldTreeAsNew = DecisionTreeClassificationModel.fromOld(oldTree, newTree.parent, - newTree.fittingParamMap, categoricalFeatures) + // Use parent from newTree since this is not checked anyways. + val oldTreeAsNew = DecisionTreeClassificationModel.fromOld( + oldTree, newTree.parent.asInstanceOf[DecisionTreeClassifier], categoricalFeatures) TreeTests.checkEqual(oldTreeAsNew, newTree) } } diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala index e6ccc2c93cba8..fdc0e2219b25a 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala @@ -128,9 +128,9 @@ private object GBTClassifierSuite { val oldModel = oldGBT.run(data) val newData: DataFrame = TreeTests.setMetadata(data, categoricalFeatures, numClasses = 2) val newModel = gbt.fit(newData) - // Use parent, fittingParamMap from newTree since these are not checked anyways. - val oldModelAsNew = GBTClassificationModel.fromOld(oldModel, newModel.parent, - newModel.fittingParamMap, categoricalFeatures) + // Use parent from newTree since this is not checked anyways. + val oldModelAsNew = GBTClassificationModel.fromOld( + oldModel, newModel.parent.asInstanceOf[GBTClassifier], categoricalFeatures) TreeTests.checkEqual(oldModelAsNew, newModel) } } diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala index 35d8c2e16c6cd..9f77d5f3efc55 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala @@ -19,44 +19,69 @@ package org.apache.spark.ml.classification import org.scalatest.FunSuite -import org.apache.spark.mllib.classification.LogisticRegressionSuite.generateLogisticInput +import org.apache.spark.mllib.classification.LogisticRegressionSuite._ import org.apache.spark.mllib.linalg.Vector import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.mllib.util.TestingUtils._ -import org.apache.spark.sql.{DataFrame, Row, SQLContext} - +import org.apache.spark.sql.{DataFrame, Row} class LogisticRegressionSuite extends FunSuite with MLlibTestSparkContext { - @transient var sqlContext: SQLContext = _ @transient var dataset: DataFrame = _ + @transient var binaryDataset: DataFrame = _ private val eps: Double = 1e-5 override def beforeAll(): Unit = { super.beforeAll() - sqlContext = new SQLContext(sc) - dataset = sqlContext.createDataFrame( - sc.parallelize(generateLogisticInput(1.0, 1.0, nPoints = 100, seed = 42), 2)) + + dataset = sqlContext.createDataFrame(generateLogisticInput(1.0, 1.0, nPoints = 100, seed = 42)) + + /** + * Here is the instruction describing how to export the test data into CSV format + * so we can validate the training accuracy compared with R's glmnet package. + * + * import org.apache.spark.mllib.classification.LogisticRegressionSuite + * val nPoints = 10000 + * val weights = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191) + * val xMean = Array(5.843, 3.057, 3.758, 1.199) + * val xVariance = Array(0.6856, 0.1899, 3.116, 0.581) + * val data = sc.parallelize(LogisticRegressionSuite.generateMultinomialLogisticInput( + * weights, xMean, xVariance, true, nPoints, 42), 1) + * data.map(x=> x.label + ", " + x.features(0) + ", " + x.features(1) + ", " + * + x.features(2) + ", " + x.features(3)).saveAsTextFile("path") + */ + binaryDataset = { + val nPoints = 10000 + val weights = Array(-0.57997, 0.912083, -0.371077, -0.819866, 2.688191) + val xMean = Array(5.843, 3.057, 3.758, 1.199) + val xVariance = Array(0.6856, 0.1899, 3.116, 0.581) + + val testData = generateMultinomialLogisticInput(weights, xMean, xVariance, true, nPoints, 42) + + sqlContext.createDataFrame( + generateMultinomialLogisticInput(weights, xMean, xVariance, true, nPoints, 42)) + } } test("logistic regression: default params") { val lr = new LogisticRegression - assert(lr.getLabelCol == "label") - assert(lr.getFeaturesCol == "features") - assert(lr.getPredictionCol == "prediction") - assert(lr.getRawPredictionCol == "rawPrediction") - assert(lr.getProbabilityCol == "probability") - assert(lr.getFitIntercept == true) + assert(lr.getLabelCol === "label") + assert(lr.getFeaturesCol === "features") + assert(lr.getPredictionCol === "prediction") + assert(lr.getRawPredictionCol === "rawPrediction") + assert(lr.getProbabilityCol === "probability") + assert(lr.getFitIntercept) val model = lr.fit(dataset) model.transform(dataset) .select("label", "probability", "prediction", "rawPrediction") .collect() assert(model.getThreshold === 0.5) - assert(model.getFeaturesCol == "features") - assert(model.getPredictionCol == "prediction") - assert(model.getRawPredictionCol == "rawPrediction") - assert(model.getProbabilityCol == "probability") + assert(model.getFeaturesCol === "features") + assert(model.getPredictionCol === "prediction") + assert(model.getRawPredictionCol === "rawPrediction") + assert(model.getProbabilityCol === "probability") assert(model.intercept !== 0.0) + assert(model.hasParent) } test("logistic regression doesn't fit intercept when fitIntercept is off") { @@ -74,9 +99,10 @@ class LogisticRegressionSuite extends FunSuite with MLlibTestSparkContext { .setThreshold(0.6) .setProbabilityCol("myProbability") val model = lr.fit(dataset) - assert(model.fittingParamMap.get(lr.maxIter) === Some(10)) - assert(model.fittingParamMap.get(lr.regParam) === Some(1.0)) - assert(model.fittingParamMap.get(lr.threshold) === Some(0.6)) + val parent = model.parent.asInstanceOf[LogisticRegression] + assert(parent.getMaxIter === 10) + assert(parent.getRegParam === 1.0) + assert(parent.getThreshold === 0.6) assert(model.getThreshold === 0.6) // Modify model params, and check that the params worked. @@ -99,11 +125,12 @@ class LogisticRegressionSuite extends FunSuite with MLlibTestSparkContext { // Call fit() with new params, and check as many params as we can. val model2 = lr.fit(dataset, lr.maxIter -> 5, lr.regParam -> 0.1, lr.threshold -> 0.4, lr.probabilityCol -> "theProb") - assert(model2.fittingParamMap.get(lr.maxIter).get === 5) - assert(model2.fittingParamMap.get(lr.regParam).get === 0.1) - assert(model2.fittingParamMap.get(lr.threshold).get === 0.4) + val parent2 = model2.parent.asInstanceOf[LogisticRegression] + assert(parent2.getMaxIter === 5) + assert(parent2.getRegParam === 0.1) + assert(parent2.getThreshold === 0.4) assert(model2.getThreshold === 0.4) - assert(model2.getProbabilityCol == "theProb") + assert(model2.getProbabilityCol === "theProb") } test("logistic regression: Predictor, Classifier methods") { @@ -117,7 +144,7 @@ class LogisticRegressionSuite extends FunSuite with MLlibTestSparkContext { val results = model.transform(dataset) // Compare rawPrediction with probability - results.select("rawPrediction", "probability").collect().map { + results.select("rawPrediction", "probability").collect().foreach { case Row(raw: Vector, prob: Vector) => assert(raw.size === 2) assert(prob.size === 2) @@ -127,10 +154,370 @@ class LogisticRegressionSuite extends FunSuite with MLlibTestSparkContext { } // Compare prediction with probability - results.select("prediction", "probability").collect().map { + results.select("prediction", "probability").collect().foreach { case Row(pred: Double, prob: Vector) => val predFromProb = prob.toArray.zipWithIndex.maxBy(_._1)._2 assert(pred == predFromProb) } } + + test("MultiClassSummarizer") { + val summarizer1 = (new MultiClassSummarizer) + .add(0.0).add(3.0).add(4.0).add(3.0).add(6.0) + assert(summarizer1.histogram.zip(Array[Long](1, 0, 0, 2, 1, 0, 1)).forall(x => x._1 === x._2)) + assert(summarizer1.countInvalid === 0) + assert(summarizer1.numClasses === 7) + + val summarizer2 = (new MultiClassSummarizer) + .add(1.0).add(5.0).add(3.0).add(0.0).add(4.0).add(1.0) + assert(summarizer2.histogram.zip(Array[Long](1, 2, 0, 1, 1, 1)).forall(x => x._1 === x._2)) + assert(summarizer2.countInvalid === 0) + assert(summarizer2.numClasses === 6) + + val summarizer3 = (new MultiClassSummarizer) + .add(0.0).add(1.3).add(5.2).add(2.5).add(2.0).add(4.0).add(4.0).add(4.0).add(1.0) + assert(summarizer3.histogram.zip(Array[Long](1, 1, 1, 0, 3)).forall(x => x._1 === x._2)) + assert(summarizer3.countInvalid === 3) + assert(summarizer3.numClasses === 5) + + val summarizer4 = (new MultiClassSummarizer) + .add(3.1).add(4.3).add(2.0).add(1.0).add(3.0) + assert(summarizer4.histogram.zip(Array[Long](0, 1, 1, 1)).forall(x => x._1 === x._2)) + assert(summarizer4.countInvalid === 2) + assert(summarizer4.numClasses === 4) + + // small map merges large one + val summarizerA = summarizer1.merge(summarizer2) + assert(summarizerA.hashCode() === summarizer2.hashCode()) + assert(summarizerA.histogram.zip(Array[Long](2, 2, 0, 3, 2, 1, 1)).forall(x => x._1 === x._2)) + assert(summarizerA.countInvalid === 0) + assert(summarizerA.numClasses === 7) + + // large map merges small one + val summarizerB = summarizer3.merge(summarizer4) + assert(summarizerB.hashCode() === summarizer3.hashCode()) + assert(summarizerB.histogram.zip(Array[Long](1, 2, 2, 1, 3)).forall(x => x._1 === x._2)) + assert(summarizerB.countInvalid === 5) + assert(summarizerB.numClasses === 5) + } + + test("binary logistic regression with intercept without regularization") { + val trainer = (new LogisticRegression).setFitIntercept(true) + val model = trainer.fit(binaryDataset) + + /** + * Using the following R code to load the data and train the model using glmnet package. + * + * > library("glmnet") + * > data <- read.csv("path", header=FALSE) + * > label = factor(data$V1) + * > features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5)) + * > weights = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 0)) + * > weights + * 5 x 1 sparse Matrix of class "dgCMatrix" + * s0 + * (Intercept) 2.8366423 + * data.V2 -0.5895848 + * data.V3 0.8931147 + * data.V4 -0.3925051 + * data.V5 -0.7996864 + */ + val interceptR = 2.8366423 + val weightsR = Array(-0.5895848, 0.8931147, -0.3925051, -0.7996864) + + assert(model.intercept ~== interceptR relTol 1E-3) + assert(model.weights(0) ~== weightsR(0) relTol 1E-3) + assert(model.weights(1) ~== weightsR(1) relTol 1E-3) + assert(model.weights(2) ~== weightsR(2) relTol 1E-3) + assert(model.weights(3) ~== weightsR(3) relTol 1E-3) + } + + test("binary logistic regression without intercept without regularization") { + val trainer = (new LogisticRegression).setFitIntercept(false) + val model = trainer.fit(binaryDataset) + + /** + * Using the following R code to load the data and train the model using glmnet package. + * + * > library("glmnet") + * > data <- read.csv("path", header=FALSE) + * > label = factor(data$V1) + * > features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5)) + * > weights = + * coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 0, intercept=FALSE)) + * > weights + * 5 x 1 sparse Matrix of class "dgCMatrix" + * s0 + * (Intercept) . + * data.V2 -0.3534996 + * data.V3 1.2964482 + * data.V4 -0.3571741 + * data.V5 -0.7407946 + */ + val interceptR = 0.0 + val weightsR = Array(-0.3534996, 1.2964482, -0.3571741, -0.7407946) + + assert(model.intercept ~== interceptR relTol 1E-3) + assert(model.weights(0) ~== weightsR(0) relTol 1E-2) + assert(model.weights(1) ~== weightsR(1) relTol 1E-2) + assert(model.weights(2) ~== weightsR(2) relTol 1E-3) + assert(model.weights(3) ~== weightsR(3) relTol 1E-3) + } + + test("binary logistic regression with intercept with L1 regularization") { + val trainer = (new LogisticRegression).setFitIntercept(true) + .setElasticNetParam(1.0).setRegParam(0.12) + val model = trainer.fit(binaryDataset) + + /** + * Using the following R code to load the data and train the model using glmnet package. + * + * > library("glmnet") + * > data <- read.csv("path", header=FALSE) + * > label = factor(data$V1) + * > features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5)) + * > weights = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12)) + * > weights + * 5 x 1 sparse Matrix of class "dgCMatrix" + * s0 + * (Intercept) -0.05627428 + * data.V2 . + * data.V3 . + * data.V4 -0.04325749 + * data.V5 -0.02481551 + */ + val interceptR = -0.05627428 + val weightsR = Array(0.0, 0.0, -0.04325749, -0.02481551) + + assert(model.intercept ~== interceptR relTol 1E-2) + assert(model.weights(0) ~== weightsR(0) relTol 1E-3) + assert(model.weights(1) ~== weightsR(1) relTol 1E-3) + assert(model.weights(2) ~== weightsR(2) relTol 1E-2) + assert(model.weights(3) ~== weightsR(3) relTol 2E-2) + } + + test("binary logistic regression without intercept with L1 regularization") { + val trainer = (new LogisticRegression).setFitIntercept(false) + .setElasticNetParam(1.0).setRegParam(0.12) + val model = trainer.fit(binaryDataset) + + /** + * Using the following R code to load the data and train the model using glmnet package. + * + * > library("glmnet") + * > data <- read.csv("path", header=FALSE) + * > label = factor(data$V1) + * > features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5)) + * > weights = coef(glmnet(features,label, family="binomial", alpha = 1, lambda = 0.12, + * intercept=FALSE)) + * > weights + * 5 x 1 sparse Matrix of class "dgCMatrix" + * s0 + * (Intercept) . + * data.V2 . + * data.V3 . + * data.V4 -0.05189203 + * data.V5 -0.03891782 + */ + val interceptR = 0.0 + val weightsR = Array(0.0, 0.0, -0.05189203, -0.03891782) + + assert(model.intercept ~== interceptR relTol 1E-3) + assert(model.weights(0) ~== weightsR(0) relTol 1E-3) + assert(model.weights(1) ~== weightsR(1) relTol 1E-3) + assert(model.weights(2) ~== weightsR(2) relTol 1E-2) + assert(model.weights(3) ~== weightsR(3) relTol 1E-2) + } + + test("binary logistic regression with intercept with L2 regularization") { + val trainer = (new LogisticRegression).setFitIntercept(true) + .setElasticNetParam(0.0).setRegParam(1.37) + val model = trainer.fit(binaryDataset) + + /** + * Using the following R code to load the data and train the model using glmnet package. + * + * > library("glmnet") + * > data <- read.csv("path", header=FALSE) + * > label = factor(data$V1) + * > features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5)) + * > weights = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37)) + * > weights + * 5 x 1 sparse Matrix of class "dgCMatrix" + * s0 + * (Intercept) 0.15021751 + * data.V2 -0.07251837 + * data.V3 0.10724191 + * data.V4 -0.04865309 + * data.V5 -0.10062872 + */ + val interceptR = 0.15021751 + val weightsR = Array(-0.07251837, 0.10724191, -0.04865309, -0.10062872) + + assert(model.intercept ~== interceptR relTol 1E-3) + assert(model.weights(0) ~== weightsR(0) relTol 1E-3) + assert(model.weights(1) ~== weightsR(1) relTol 1E-3) + assert(model.weights(2) ~== weightsR(2) relTol 1E-3) + assert(model.weights(3) ~== weightsR(3) relTol 1E-3) + } + + test("binary logistic regression without intercept with L2 regularization") { + val trainer = (new LogisticRegression).setFitIntercept(false) + .setElasticNetParam(0.0).setRegParam(1.37) + val model = trainer.fit(binaryDataset) + + /** + * Using the following R code to load the data and train the model using glmnet package. + * + * > library("glmnet") + * > data <- read.csv("path", header=FALSE) + * > label = factor(data$V1) + * > features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5)) + * > weights = coef(glmnet(features,label, family="binomial", alpha = 0, lambda = 1.37, + * intercept=FALSE)) + * > weights + * 5 x 1 sparse Matrix of class "dgCMatrix" + * s0 + * (Intercept) . + * data.V2 -0.06099165 + * data.V3 0.12857058 + * data.V4 -0.04708770 + * data.V5 -0.09799775 + */ + val interceptR = 0.0 + val weightsR = Array(-0.06099165, 0.12857058, -0.04708770, -0.09799775) + + assert(model.intercept ~== interceptR relTol 1E-3) + assert(model.weights(0) ~== weightsR(0) relTol 1E-2) + assert(model.weights(1) ~== weightsR(1) relTol 1E-2) + assert(model.weights(2) ~== weightsR(2) relTol 1E-3) + assert(model.weights(3) ~== weightsR(3) relTol 1E-3) + } + + test("binary logistic regression with intercept with ElasticNet regularization") { + val trainer = (new LogisticRegression).setFitIntercept(true) + .setElasticNetParam(0.38).setRegParam(0.21) + val model = trainer.fit(binaryDataset) + + /** + * Using the following R code to load the data and train the model using glmnet package. + * + * > library("glmnet") + * > data <- read.csv("path", header=FALSE) + * > label = factor(data$V1) + * > features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5)) + * > weights = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21)) + * > weights + * 5 x 1 sparse Matrix of class "dgCMatrix" + * s0 + * (Intercept) 0.57734851 + * data.V2 -0.05310287 + * data.V3 . + * data.V4 -0.08849250 + * data.V5 -0.15458796 + */ + val interceptR = 0.57734851 + val weightsR = Array(-0.05310287, 0.0, -0.08849250, -0.15458796) + + assert(model.intercept ~== interceptR relTol 6E-3) + assert(model.weights(0) ~== weightsR(0) relTol 5E-3) + assert(model.weights(1) ~== weightsR(1) relTol 1E-3) + assert(model.weights(2) ~== weightsR(2) relTol 5E-3) + assert(model.weights(3) ~== weightsR(3) relTol 1E-3) + } + + test("binary logistic regression without intercept with ElasticNet regularization") { + val trainer = (new LogisticRegression).setFitIntercept(false) + .setElasticNetParam(0.38).setRegParam(0.21) + val model = trainer.fit(binaryDataset) + + /** + * Using the following R code to load the data and train the model using glmnet package. + * + * > library("glmnet") + * > data <- read.csv("path", header=FALSE) + * > label = factor(data$V1) + * > features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5)) + * > weights = coef(glmnet(features,label, family="binomial", alpha = 0.38, lambda = 0.21, + * intercept=FALSE)) + * > weights + * 5 x 1 sparse Matrix of class "dgCMatrix" + * s0 + * (Intercept) . + * data.V2 -0.001005743 + * data.V3 0.072577857 + * data.V4 -0.081203769 + * data.V5 -0.142534158 + */ + val interceptR = 0.0 + val weightsR = Array(-0.001005743, 0.072577857, -0.081203769, -0.142534158) + + assert(model.intercept ~== interceptR relTol 1E-3) + assert(model.weights(0) ~== weightsR(0) absTol 1E-3) + assert(model.weights(1) ~== weightsR(1) absTol 1E-2) + assert(model.weights(2) ~== weightsR(2) relTol 1E-3) + assert(model.weights(3) ~== weightsR(3) relTol 1E-2) + } + + test("binary logistic regression with intercept with strong L1 regularization") { + val trainer = (new LogisticRegression).setFitIntercept(true) + .setElasticNetParam(1.0).setRegParam(6.0) + val model = trainer.fit(binaryDataset) + + val histogram = binaryDataset.map { case Row(label: Double, features: Vector) => label } + .treeAggregate(new MultiClassSummarizer)( + seqOp = (c, v) => (c, v) match { + case (classSummarizer: MultiClassSummarizer, label: Double) => classSummarizer.add(label) + }, + combOp = (c1, c2) => (c1, c2) match { + case (classSummarizer1: MultiClassSummarizer, classSummarizer2: MultiClassSummarizer) => + classSummarizer1.merge(classSummarizer2) + }).histogram + + /** + * For binary logistic regression with strong L1 regularization, all the weights will be zeros. + * As a result, + * {{{ + * P(0) = 1 / (1 + \exp(b)), and + * P(1) = \exp(b) / (1 + \exp(b)) + * }}}, hence + * {{{ + * b = \log{P(1) / P(0)} = \log{count_1 / count_0} + * }}} + */ + val interceptTheory = math.log(histogram(1).toDouble / histogram(0).toDouble) + val weightsTheory = Array(0.0, 0.0, 0.0, 0.0) + + assert(model.intercept ~== interceptTheory relTol 1E-5) + assert(model.weights(0) ~== weightsTheory(0) absTol 1E-6) + assert(model.weights(1) ~== weightsTheory(1) absTol 1E-6) + assert(model.weights(2) ~== weightsTheory(2) absTol 1E-6) + assert(model.weights(3) ~== weightsTheory(3) absTol 1E-6) + + /** + * Using the following R code to load the data and train the model using glmnet package. + * + * > library("glmnet") + * > data <- read.csv("path", header=FALSE) + * > label = factor(data$V1) + * > features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5)) + * > weights = coef(glmnet(features,label, family="binomial", alpha = 1.0, lambda = 6.0)) + * > weights + * 5 x 1 sparse Matrix of class "dgCMatrix" + * s0 + * (Intercept) -0.2480643 + * data.V2 0.0000000 + * data.V3 . + * data.V4 . + * data.V5 . + */ + val interceptR = -0.248065 + val weightsR = Array(0.0, 0.0, 0.0, 0.0) + + assert(model.intercept ~== interceptR relTol 1E-5) + assert(model.weights(0) ~== weightsR(0) absTol 1E-6) + assert(model.weights(1) ~== weightsR(1) absTol 1E-6) + assert(model.weights(2) ~== weightsR(2) absTol 1E-6) + assert(model.weights(3) ~== weightsR(3) absTol 1E-6) + } } diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala new file mode 100644 index 0000000000000..1b354d077dc8d --- /dev/null +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/OneVsRestSuite.scala @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.classification + +import org.scalatest.FunSuite + +import org.apache.spark.ml.attribute.NominalAttribute +import org.apache.spark.ml.util.MetadataUtils +import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS +import org.apache.spark.mllib.classification.LogisticRegressionSuite._ +import org.apache.spark.mllib.evaluation.MulticlassMetrics +import org.apache.spark.mllib.regression.LabeledPoint +import org.apache.spark.mllib.util.MLlibTestSparkContext +import org.apache.spark.mllib.util.TestingUtils._ +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.DataFrame + +class OneVsRestSuite extends FunSuite with MLlibTestSparkContext { + + @transient var dataset: DataFrame = _ + @transient var rdd: RDD[LabeledPoint] = _ + + override def beforeAll(): Unit = { + super.beforeAll() + + val nPoints = 1000 + + // The following weights and xMean/xVariance are computed from iris dataset with lambda=0.2. + // As a result, we are drawing samples from probability distribution of an actual model. + val weights = Array( + -0.57997, 0.912083, -0.371077, -0.819866, 2.688191, + -0.16624, -0.84355, -0.048509, -0.301789, 4.170682) + + val xMean = Array(5.843, 3.057, 3.758, 1.199) + val xVariance = Array(0.6856, 0.1899, 3.116, 0.581) + rdd = sc.parallelize(generateMultinomialLogisticInput( + weights, xMean, xVariance, true, nPoints, 42), 2) + dataset = sqlContext.createDataFrame(rdd) + } + + test("one-vs-rest: default params") { + val numClasses = 3 + val ova = new OneVsRest() + .setClassifier(new LogisticRegression) + assert(ova.getLabelCol === "label") + assert(ova.getPredictionCol === "prediction") + val ovaModel = ova.fit(dataset) + assert(ovaModel.models.size === numClasses) + val transformedDataset = ovaModel.transform(dataset) + + // check for label metadata in prediction col + val predictionColSchema = transformedDataset.schema(ovaModel.getPredictionCol) + assert(MetadataUtils.getNumClasses(predictionColSchema) === Some(3)) + + val ovaResults = transformedDataset + .select("prediction", "label") + .map(row => (row.getDouble(0), row.getDouble(1))) + + val lr = new LogisticRegressionWithLBFGS().setIntercept(true).setNumClasses(numClasses) + lr.optimizer.setRegParam(0.1).setNumIterations(100) + + val model = lr.run(rdd) + val results = model.predict(rdd.map(_.features)).zip(rdd.map(_.label)) + // determine the #confusion matrix in each class. + // bound how much error we allow compared to multinomial logistic regression. + val expectedMetrics = new MulticlassMetrics(results) + val ovaMetrics = new MulticlassMetrics(ovaResults) + assert(expectedMetrics.confusionMatrix ~== ovaMetrics.confusionMatrix absTol 400) + } + + test("one-vs-rest: pass label metadata correctly during train") { + val numClasses = 3 + val ova = new OneVsRest() + ova.setClassifier(new MockLogisticRegression) + + val labelMetadata = NominalAttribute.defaultAttr.withName("label").withNumValues(numClasses) + val labelWithMetadata = dataset("label").as("label", labelMetadata.toMetadata()) + val features = dataset("features").as("features") + val datasetWithLabelMetadata = dataset.select(labelWithMetadata, features) + ova.fit(datasetWithLabelMetadata) + } + + test("SPARK-8049: OneVsRest shouldn't output temp columns") { + val logReg = new LogisticRegression() + .setMaxIter(1) + val ovr = new OneVsRest() + .setClassifier(logReg) + val output = ovr.fit(dataset).transform(dataset) + assert(output.schema.fieldNames.toSet === Set("label", "features", "prediction")) + } +} + +private class MockLogisticRegression(uid: String) extends LogisticRegression(uid) { + + def this() = this("mockLogReg") + + setMaxIter(1) + + override protected def train(dataset: DataFrame): LogisticRegressionModel = { + val labelSchema = dataset.schema($(labelCol)) + // check for label attribute propagation. + assert(MetadataUtils.getNumClasses(labelSchema).forall(_ == 2)) + super.train(dataset) + } +} diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala index ed41a9664f94f..be860f65c7070 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala @@ -158,9 +158,11 @@ private object RandomForestClassifierSuite { data, oldStrategy, rf.getNumTrees, rf.getFeatureSubsetStrategy, rf.getSeed.toInt) val newData: DataFrame = TreeTests.setMetadata(data, categoricalFeatures, numClasses) val newModel = rf.fit(newData) - // Use parent, fittingParamMap from newTree since these are not checked anyways. - val oldModelAsNew = RandomForestClassificationModel.fromOld(oldModel, newModel.parent, - newModel.fittingParamMap, categoricalFeatures) + // Use parent from newTree since this is not checked anyways. + val oldModelAsNew = RandomForestClassificationModel.fromOld( + oldModel, newModel.parent.asInstanceOf[RandomForestClassifier], categoricalFeatures) TreeTests.checkEqual(oldModelAsNew, newModel) + assert(newModel.hasParent) + assert(!newModel.trees.head.asInstanceOf[DecisionTreeClassificationModel].hasParent) } } diff --git a/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala new file mode 100644 index 0000000000000..057a9ae193a34 --- /dev/null +++ b/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.evaluation + +import org.scalatest.FunSuite + +import org.apache.spark.ml.regression.LinearRegression +import org.apache.spark.mllib.util.{LinearDataGenerator, MLlibTestSparkContext} +import org.apache.spark.mllib.util.TestingUtils._ + +class RegressionEvaluatorSuite extends FunSuite with MLlibTestSparkContext { + + test("Regression Evaluator: default params") { + /** + * Here is the instruction describing how to export the test data into CSV format + * so we can validate the metrics compared with R's mmetric package. + * + * import org.apache.spark.mllib.util.LinearDataGenerator + * val data = sc.parallelize(LinearDataGenerator.generateLinearInput(6.3, + * Array(4.7, 7.2), Array(0.9, -1.3), Array(0.7, 1.2), 100, 42, 0.1)) + * data.map(x=> x.label + ", " + x.features(0) + ", " + x.features(1)) + * .saveAsTextFile("path") + */ + val dataset = sqlContext.createDataFrame( + sc.parallelize(LinearDataGenerator.generateLinearInput( + 6.3, Array(4.7, 7.2), Array(0.9, -1.3), Array(0.7, 1.2), 100, 42, 0.1), 2)) + + /** + * Using the following R code to load the data, train the model and evaluate metrics. + * + * > library("glmnet") + * > library("rminer") + * > data <- read.csv("path", header=FALSE, stringsAsFactors=FALSE) + * > features <- as.matrix(data.frame(as.numeric(data$V2), as.numeric(data$V3))) + * > label <- as.numeric(data$V1) + * > model <- glmnet(features, label, family="gaussian", alpha = 0, lambda = 0) + * > rmse <- mmetric(label, predict(model, features), metric='RMSE') + * > mae <- mmetric(label, predict(model, features), metric='MAE') + * > r2 <- mmetric(label, predict(model, features), metric='R2') + */ + val trainer = new LinearRegression + val model = trainer.fit(dataset) + val predictions = model.transform(dataset) + + // default = rmse + val evaluator = new RegressionEvaluator() + assert(evaluator.evaluate(predictions) ~== 0.1019382 absTol 0.001) + + // r2 score + evaluator.setMetricName("r2") + assert(evaluator.evaluate(predictions) ~== 0.9998196 absTol 0.001) + + // mae + evaluator.setMetricName("mae") + assert(evaluator.evaluate(predictions) ~== 0.08036075 absTol 0.001) + } +} diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala index caf1b759593f3..c2785dca6e2bc 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala @@ -20,18 +20,14 @@ package org.apache.spark.ml.feature import org.scalatest.FunSuite import org.apache.spark.mllib.util.MLlibTestSparkContext -import org.apache.spark.mllib.util.TestingUtils._ -import org.apache.spark.sql.{DataFrame, Row, SQLContext} - +import org.apache.spark.sql.{DataFrame, Row} class BinarizerSuite extends FunSuite with MLlibTestSparkContext { @transient var data: Array[Double] = _ - @transient var sqlContext: SQLContext = _ override def beforeAll(): Unit = { super.beforeAll() - sqlContext = new SQLContext(sc) data = Array(0.1, -0.5, 0.2, -0.3, 0.8, 0.7, -0.1, -0.4) } @@ -52,7 +48,7 @@ class BinarizerSuite extends FunSuite with MLlibTestSparkContext { test("Binarize continuous features with setter") { val threshold: Double = 0.2 - val thresholdBinarized: Array[Double] = data.map(x => if (x > threshold) 1.0 else 0.0) + val thresholdBinarized: Array[Double] = data.map(x => if (x > threshold) 1.0 else 0.0) val dataFrame: DataFrame = sqlContext.createDataFrame( data.zip(thresholdBinarized)).toDF("feature", "expected") diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala new file mode 100644 index 0000000000000..0391bd8427c2c --- /dev/null +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.feature + +import scala.util.Random + +import org.scalatest.FunSuite + +import org.apache.spark.SparkException +import org.apache.spark.mllib.linalg.Vectors +import org.apache.spark.mllib.util.MLlibTestSparkContext +import org.apache.spark.mllib.util.TestingUtils._ +import org.apache.spark.sql.{DataFrame, Row} + +class BucketizerSuite extends FunSuite with MLlibTestSparkContext { + + test("Bucket continuous features, without -inf,inf") { + // Check a set of valid feature values. + val splits = Array(-0.5, 0.0, 0.5) + val validData = Array(-0.5, -0.3, 0.0, 0.2) + val expectedBuckets = Array(0.0, 0.0, 1.0, 1.0) + val dataFrame: DataFrame = + sqlContext.createDataFrame(validData.zip(expectedBuckets)).toDF("feature", "expected") + + val bucketizer: Bucketizer = new Bucketizer() + .setInputCol("feature") + .setOutputCol("result") + .setSplits(splits) + + bucketizer.transform(dataFrame).select("result", "expected").collect().foreach { + case Row(x: Double, y: Double) => + assert(x === y, + s"The feature value is not correct after bucketing. Expected $y but found $x") + } + + // Check for exceptions when using a set of invalid feature values. + val invalidData1: Array[Double] = Array(-0.9) ++ validData + val invalidData2 = Array(0.51) ++ validData + val badDF1 = sqlContext.createDataFrame(invalidData1.zipWithIndex).toDF("feature", "idx") + withClue("Invalid feature value -0.9 was not caught as an invalid feature!") { + intercept[SparkException] { + bucketizer.transform(badDF1).collect() + } + } + val badDF2 = sqlContext.createDataFrame(invalidData2.zipWithIndex).toDF("feature", "idx") + withClue("Invalid feature value 0.51 was not caught as an invalid feature!") { + intercept[SparkException] { + bucketizer.transform(badDF2).collect() + } + } + } + + test("Bucket continuous features, with -inf,inf") { + val splits = Array(Double.NegativeInfinity, -0.5, 0.0, 0.5, Double.PositiveInfinity) + val validData = Array(-0.9, -0.5, -0.3, 0.0, 0.2, 0.5, 0.9) + val expectedBuckets = Array(0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0) + val dataFrame: DataFrame = + sqlContext.createDataFrame(validData.zip(expectedBuckets)).toDF("feature", "expected") + + val bucketizer: Bucketizer = new Bucketizer() + .setInputCol("feature") + .setOutputCol("result") + .setSplits(splits) + + bucketizer.transform(dataFrame).select("result", "expected").collect().foreach { + case Row(x: Double, y: Double) => + assert(x === y, + s"The feature value is not correct after bucketing. Expected $y but found $x") + } + } + + test("Binary search correctness on hand-picked examples") { + import BucketizerSuite.checkBinarySearch + // length 3, with -inf + checkBinarySearch(Array(Double.NegativeInfinity, 0.0, 1.0)) + // length 4 + checkBinarySearch(Array(-1.0, -0.5, 0.0, 1.0)) + // length 5 + checkBinarySearch(Array(-1.0, -0.5, 0.0, 1.0, 1.5)) + // length 3, with inf + checkBinarySearch(Array(0.0, 1.0, Double.PositiveInfinity)) + // length 3, with -inf and inf + checkBinarySearch(Array(Double.NegativeInfinity, 1.0, Double.PositiveInfinity)) + // length 4, with -inf and inf + checkBinarySearch(Array(Double.NegativeInfinity, 0.0, 1.0, Double.PositiveInfinity)) + } + + test("Binary search correctness in contrast with linear search, on random data") { + val data = Array.fill(100)(Random.nextDouble()) + val splits: Array[Double] = Double.NegativeInfinity +: + Array.fill(10)(Random.nextDouble()).sorted :+ Double.PositiveInfinity + val bsResult = Vectors.dense(data.map(x => Bucketizer.binarySearchForBuckets(splits, x))) + val lsResult = Vectors.dense(data.map(x => BucketizerSuite.linearSearchForBuckets(splits, x))) + assert(bsResult ~== lsResult absTol 1e-5) + } +} + +private object BucketizerSuite extends FunSuite { + /** Brute force search for buckets. Bucket i is defined by the range [split(i), split(i+1)). */ + def linearSearchForBuckets(splits: Array[Double], feature: Double): Double = { + require(feature >= splits.head) + var i = 0 + val n = splits.length - 1 + while (i < n) { + if (feature < splits(i + 1)) return i + i += 1 + } + throw new RuntimeException( + s"linearSearchForBuckets failed to find bucket for feature value $feature") + } + + /** Check all values in splits, plus values between all splits. */ + def checkBinarySearch(splits: Array[Double]): Unit = { + def testFeature(feature: Double, expectedBucket: Double): Unit = { + assert(Bucketizer.binarySearchForBuckets(splits, feature) === expectedBucket, + s"Expected feature value $feature to be in bucket $expectedBucket with splits:" + + s" ${splits.mkString(", ")}") + } + var i = 0 + val n = splits.length - 1 + while (i < n) { + // Split i should fall in bucket i. + testFeature(splits(i), i) + // Value between splits i,i+1 should be in i, which is also true if the (i+1)-th split is inf. + testFeature((splits(i) + splits(i + 1)) / 2, i) + i += 1 + } + } +} diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala new file mode 100644 index 0000000000000..2e4beb0bfff63 --- /dev/null +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.feature + +import org.scalatest.FunSuite + +import org.apache.spark.ml.attribute.AttributeGroup +import org.apache.spark.ml.param.ParamsSuite +import org.apache.spark.mllib.linalg.{Vector, Vectors} +import org.apache.spark.mllib.util.MLlibTestSparkContext +import org.apache.spark.mllib.util.TestingUtils._ +import org.apache.spark.util.Utils + +class HashingTFSuite extends FunSuite with MLlibTestSparkContext { + + test("params") { + val hashingTF = new HashingTF + ParamsSuite.checkParams(hashingTF, 3) + } + + test("hashingTF") { + val df = sqlContext.createDataFrame(Seq( + (0, "a a b b c d".split(" ").toSeq) + )).toDF("id", "words") + val n = 100 + val hashingTF = new HashingTF() + .setInputCol("words") + .setOutputCol("features") + .setNumFeatures(n) + val output = hashingTF.transform(df) + val attrGroup = AttributeGroup.fromStructField(output.schema("features")) + require(attrGroup.numAttributes === Some(n)) + val features = output.select("features").first().getAs[Vector](0) + // Assume perfect hash on "a", "b", "c", and "d". + def idx(any: Any): Int = Utils.nonNegativeMod(any.##, n) + val expected = Vectors.sparse(n, + Seq((idx("a"), 2.0), (idx("b"), 2.0), (idx("c"), 1.0), (idx("d"), 1.0))) + assert(features ~== expected absTol 1e-14) + } +} diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala index eaee3443c1f23..f85e85471617a 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/IDFSuite.scala @@ -22,17 +22,10 @@ import org.scalatest.FunSuite import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors} import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.mllib.util.TestingUtils._ -import org.apache.spark.sql.{Row, SQLContext} +import org.apache.spark.sql.Row class IDFSuite extends FunSuite with MLlibTestSparkContext { - @transient var sqlContext: SQLContext = _ - - override def beforeAll(): Unit = { - super.beforeAll() - sqlContext = new SQLContext(sc) - } - def scaleDataWithIDF(dataSet: Array[Vector], model: Vector): Array[Vector] = { dataSet.map { case data: DenseVector => diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala new file mode 100644 index 0000000000000..9018d0024d5f0 --- /dev/null +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.feature + +import org.scalatest.FunSuite + +import org.apache.spark.ml.attribute.{AttributeGroup, BinaryAttribute, NominalAttribute} +import org.apache.spark.mllib.linalg.Vector +import org.apache.spark.mllib.util.MLlibTestSparkContext +import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.functions.col + +class OneHotEncoderSuite extends FunSuite with MLlibTestSparkContext { + + def stringIndexed(): DataFrame = { + val data = sc.parallelize(Seq((0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c")), 2) + val df = sqlContext.createDataFrame(data).toDF("id", "label") + val indexer = new StringIndexer() + .setInputCol("label") + .setOutputCol("labelIndex") + .fit(df) + indexer.transform(df) + } + + test("OneHotEncoder dropLast = false") { + val transformed = stringIndexed() + val encoder = new OneHotEncoder() + .setInputCol("labelIndex") + .setOutputCol("labelVec") + .setDropLast(false) + val encoded = encoder.transform(transformed) + + val output = encoded.select("id", "labelVec").map { r => + val vec = r.getAs[Vector](1) + (r.getInt(0), vec(0), vec(1), vec(2)) + }.collect().toSet + // a -> 0, b -> 2, c -> 1 + val expected = Set((0, 1.0, 0.0, 0.0), (1, 0.0, 0.0, 1.0), (2, 0.0, 1.0, 0.0), + (3, 1.0, 0.0, 0.0), (4, 1.0, 0.0, 0.0), (5, 0.0, 1.0, 0.0)) + assert(output === expected) + } + + test("OneHotEncoder dropLast = true") { + val transformed = stringIndexed() + val encoder = new OneHotEncoder() + .setInputCol("labelIndex") + .setOutputCol("labelVec") + val encoded = encoder.transform(transformed) + + val output = encoded.select("id", "labelVec").map { r => + val vec = r.getAs[Vector](1) + (r.getInt(0), vec(0), vec(1)) + }.collect().toSet + // a -> 0, b -> 2, c -> 1 + val expected = Set((0, 1.0, 0.0), (1, 0.0, 0.0), (2, 0.0, 1.0), + (3, 1.0, 0.0), (4, 1.0, 0.0), (5, 0.0, 1.0)) + assert(output === expected) + } + + test("input column with ML attribute") { + val attr = NominalAttribute.defaultAttr.withValues("small", "medium", "large") + val df = sqlContext.createDataFrame(Seq(0.0, 1.0, 2.0, 1.0).map(Tuple1.apply)).toDF("size") + .select(col("size").as("size", attr.toMetadata())) + val encoder = new OneHotEncoder() + .setInputCol("size") + .setOutputCol("encoded") + val output = encoder.transform(df) + val group = AttributeGroup.fromStructField(output.schema("encoded")) + assert(group.size === 2) + assert(group.getAttr(0) === BinaryAttribute.defaultAttr.withName("size_is_small").withIndex(0)) + assert(group.getAttr(1) === BinaryAttribute.defaultAttr.withName("size_is_medium").withIndex(1)) + } + + test("input column without ML attribute") { + val df = sqlContext.createDataFrame(Seq(0.0, 1.0, 2.0, 1.0).map(Tuple1.apply)).toDF("index") + val encoder = new OneHotEncoder() + .setInputCol("index") + .setOutputCol("encoded") + val output = encoder.transform(df) + val group = AttributeGroup.fromStructField(output.schema("encoded")) + assert(group.size === 2) + assert(group.getAttr(0) === BinaryAttribute.defaultAttr.withName("index_is_0").withIndex(0)) + assert(group.getAttr(1) === BinaryAttribute.defaultAttr.withName("index_is_1").withIndex(1)) + } +} diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala index c1d64fba0aa8f..aa230ca073d5b 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala @@ -18,22 +18,15 @@ package org.apache.spark.ml.feature import org.scalatest.FunSuite +import org.scalatest.exceptions.TestFailedException import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors} import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.mllib.util.TestingUtils._ -import org.apache.spark.sql.{Row, SQLContext} -import org.scalatest.exceptions.TestFailedException +import org.apache.spark.sql.Row class PolynomialExpansionSuite extends FunSuite with MLlibTestSparkContext { - @transient var sqlContext: SQLContext = _ - - override def beforeAll(): Unit = { - super.beforeAll() - sqlContext = new SQLContext(sc) - } - test("Polynomial expansion with default parameter") { val data = Array( Vectors.sparse(3, Seq((0, -2.0), (1, 2.3))), diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala index b6939e5870410..89c2fe45573aa 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala @@ -21,15 +21,8 @@ import org.scalatest.FunSuite import org.apache.spark.ml.attribute.{Attribute, NominalAttribute} import org.apache.spark.mllib.util.MLlibTestSparkContext -import org.apache.spark.sql.SQLContext class StringIndexerSuite extends FunSuite with MLlibTestSparkContext { - private var sqlContext: SQLContext = _ - - override def beforeAll(): Unit = { - super.beforeAll() - sqlContext = new SQLContext(sc) - } test("StringIndexer") { val data = sc.parallelize(Seq((0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c")), 2) diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/TokenizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/TokenizerSuite.scala index d186ead8f542f..eabda089d0988 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/TokenizerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/TokenizerSuite.scala @@ -22,49 +22,41 @@ import scala.beans.BeanInfo import org.scalatest.FunSuite import org.apache.spark.mllib.util.MLlibTestSparkContext -import org.apache.spark.sql.{DataFrame, Row, SQLContext} +import org.apache.spark.sql.{DataFrame, Row} @BeanInfo case class TokenizerTestData(rawText: String, wantedTokens: Array[String]) class RegexTokenizerSuite extends FunSuite with MLlibTestSparkContext { import org.apache.spark.ml.feature.RegexTokenizerSuite._ - - @transient var sqlContext: SQLContext = _ - - override def beforeAll(): Unit = { - super.beforeAll() - sqlContext = new SQLContext(sc) - } test("RegexTokenizer") { - val tokenizer = new RegexTokenizer() + val tokenizer0 = new RegexTokenizer() + .setGaps(false) + .setPattern("\\w+|\\p{Punct}") .setInputCol("rawText") .setOutputCol("tokens") - val dataset0 = sqlContext.createDataFrame(Seq( TokenizerTestData("Test for tokenization.", Array("Test", "for", "tokenization", ".")), TokenizerTestData("Te,st. punct", Array("Te", ",", "st", ".", "punct")) )) - testRegexTokenizer(tokenizer, dataset0) + testRegexTokenizer(tokenizer0, dataset0) val dataset1 = sqlContext.createDataFrame(Seq( TokenizerTestData("Test for tokenization.", Array("Test", "for", "tokenization")), TokenizerTestData("Te,st. punct", Array("punct")) )) + tokenizer0.setMinTokenLength(3) + testRegexTokenizer(tokenizer0, dataset1) - tokenizer.setMinTokenLength(3) - testRegexTokenizer(tokenizer, dataset1) - - tokenizer - .setPattern("\\s") - .setGaps(true) - .setMinTokenLength(0) + val tokenizer2 = new RegexTokenizer() + .setInputCol("rawText") + .setOutputCol("tokens") val dataset2 = sqlContext.createDataFrame(Seq( TokenizerTestData("Test for tokenization.", Array("Test", "for", "tokenization.")), - TokenizerTestData("Te,st. punct", Array("Te,st.", "", "punct")) + TokenizerTestData("Te,st. punct", Array("Te,st.", "punct")) )) - testRegexTokenizer(tokenizer, dataset2) + testRegexTokenizer(tokenizer2, dataset2) } } @@ -74,9 +66,8 @@ object RegexTokenizerSuite extends FunSuite { t.transform(dataset) .select("tokens", "wantedTokens") .collect() - .foreach { - case Row(tokens, wantedTokens) => - assert(tokens === wantedTokens) - } + .foreach { case Row(tokens, wantedTokens) => + assert(tokens === wantedTokens) + } } } diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala index 57d0278e03639..43534e89928b1 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala @@ -20,19 +20,14 @@ package org.apache.spark.ml.feature import org.scalatest.FunSuite import org.apache.spark.SparkException -import org.apache.spark.mllib.linalg.{Vector, Vectors} +import org.apache.spark.ml.attribute.{AttributeGroup, NominalAttribute, NumericAttribute} +import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors} import org.apache.spark.mllib.util.MLlibTestSparkContext -import org.apache.spark.sql.{Row, SQLContext} +import org.apache.spark.sql.Row +import org.apache.spark.sql.functions.col class VectorAssemblerSuite extends FunSuite with MLlibTestSparkContext { - @transient var sqlContext: SQLContext = _ - - override def beforeAll(): Unit = { - super.beforeAll() - sqlContext = new SQLContext(sc) - } - test("assemble") { import org.apache.spark.ml.feature.VectorAssembler.assemble assert(assemble(0.0) === Vectors.sparse(1, Array.empty, Array.empty)) @@ -48,6 +43,14 @@ class VectorAssemblerSuite extends FunSuite with MLlibTestSparkContext { } } + test("assemble should compress vectors") { + import org.apache.spark.ml.feature.VectorAssembler.assemble + val v1 = assemble(0.0, 0.0, 0.0, Vectors.dense(4.0)) + assert(v1.isInstanceOf[SparseVector]) + val v2 = assemble(1.0, 2.0, 3.0, Vectors.sparse(1, Array(0), Array(4.0))) + assert(v2.isInstanceOf[DenseVector]) + } + test("VectorAssembler") { val df = sqlContext.createDataFrame(Seq( (0, 0.0, Vectors.dense(1.0, 2.0), "a", Vectors.sparse(2, Array(1), Array(3.0)), 10L) @@ -60,4 +63,39 @@ class VectorAssemblerSuite extends FunSuite with MLlibTestSparkContext { assert(v === Vectors.sparse(6, Array(1, 2, 4, 5), Array(1.0, 2.0, 3.0, 10.0))) } } + + test("ML attributes") { + val browser = NominalAttribute.defaultAttr.withValues("chrome", "firefox", "safari") + val hour = NumericAttribute.defaultAttr.withMin(0.0).withMax(24.0) + val user = new AttributeGroup("user", Array( + NominalAttribute.defaultAttr.withName("gender").withValues("male", "female"), + NumericAttribute.defaultAttr.withName("salary"))) + val row = (1.0, 0.5, 1, Vectors.dense(1.0, 1000.0), Vectors.sparse(2, Array(1), Array(2.0))) + val df = sqlContext.createDataFrame(Seq(row)).toDF("browser", "hour", "count", "user", "ad") + .select( + col("browser").as("browser", browser.toMetadata()), + col("hour").as("hour", hour.toMetadata()), + col("count"), // "count" is an integer column without ML attribute + col("user").as("user", user.toMetadata()), + col("ad")) // "ad" is a vector column without ML attribute + val assembler = new VectorAssembler() + .setInputCols(Array("browser", "hour", "count", "user", "ad")) + .setOutputCol("features") + val output = assembler.transform(df) + val schema = output.schema + val features = AttributeGroup.fromStructField(schema("features")) + assert(features.size === 7) + val browserOut = features.getAttr(0) + assert(browserOut === browser.withIndex(0).withName("browser")) + val hourOut = features.getAttr(1) + assert(hourOut === hour.withIndex(1).withName("hour")) + val countOut = features.getAttr(2) + assert(countOut === NumericAttribute.defaultAttr.withName("count").withIndex(2)) + val userGenderOut = features.getAttr(3) + assert(userGenderOut === user.getAttr("gender").withName("user_gender").withIndex(3)) + val userSalaryOut = features.getAttr(4) + assert(userSalaryOut === user.getAttr("salary").withName("user_salary").withIndex(4)) + assert(features.getAttr(5) === NumericAttribute.defaultAttr.withIndex(5)) + assert(features.getAttr(6) === NumericAttribute.defaultAttr.withIndex(6)) + } } diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorIndexerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorIndexerSuite.scala index 38dc83b1241cf..b11b029c6343e 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorIndexerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorIndexerSuite.scala @@ -26,15 +26,12 @@ import org.apache.spark.ml.attribute._ import org.apache.spark.mllib.linalg.{SparseVector, Vector, Vectors} import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.rdd.RDD -import org.apache.spark.sql.{DataFrame, SQLContext} - +import org.apache.spark.sql.DataFrame class VectorIndexerSuite extends FunSuite with MLlibTestSparkContext { import VectorIndexerSuite.FeatureData - @transient var sqlContext: SQLContext = _ - // identical, of length 3 @transient var densePoints1: DataFrame = _ @transient var sparsePoints1: DataFrame = _ @@ -86,7 +83,6 @@ class VectorIndexerSuite extends FunSuite with MLlibTestSparkContext { checkPair(densePoints1Seq, sparsePoints1Seq) checkPair(densePoints2Seq, sparsePoints2Seq) - sqlContext = new SQLContext(sc) densePoints1 = sqlContext.createDataFrame(sc.parallelize(densePoints1Seq, 2).map(FeatureData)) sparsePoints1 = sqlContext.createDataFrame(sc.parallelize(sparsePoints1Seq, 2).map(FeatureData)) densePoints2 = sqlContext.createDataFrame(sc.parallelize(densePoints2Seq, 2).map(FeatureData)) diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala index 03ba86670d453..df446d0c22015 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala @@ -35,9 +35,9 @@ class Word2VecSuite extends FunSuite with MLlibTestSparkContext { val doc = sc.parallelize(Seq(sentence, sentence)).map(line => line.split(" ")) val codes = Map( - "a" -> Array(-0.2811822295188904,-0.6356269121170044,-0.3020961284637451), - "b" -> Array(1.0309048891067505,-1.29472815990448,0.22276712954044342), - "c" -> Array(-0.08456747233867645,0.5137411952018738,0.11731560528278351) + "a" -> Array(-0.2811822295188904, -0.6356269121170044, -0.3020961284637451), + "b" -> Array(1.0309048891067505, -1.29472815990448, 0.22276712954044342), + "c" -> Array(-0.08456747233867645, 0.5137411952018738, 0.11731560528278351) ) val expected = doc.map { sentence => @@ -52,6 +52,7 @@ class Word2VecSuite extends FunSuite with MLlibTestSparkContext { .setVectorSize(3) .setInputCol("text") .setOutputCol("result") + .setSeed(42L) .fit(docDF) model.transform(docDF).select("result", "expected").collect().foreach { diff --git a/mllib/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala index f8852606abbf2..04f2af4727ea4 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala @@ -23,21 +23,22 @@ class ParamsSuite extends FunSuite { test("param") { val solver = new TestParams() + val uid = solver.uid import solver.{maxIter, inputCol} assert(maxIter.name === "maxIter") assert(maxIter.doc === "max number of iterations (>= 0)") - assert(maxIter.parent.eq(solver)) - assert(maxIter.toString === "maxIter: max number of iterations (>= 0) (default: 10)") + assert(maxIter.parent === uid) + assert(maxIter.toString === s"${uid}__maxIter") assert(!maxIter.isValid(-1)) assert(maxIter.isValid(0)) assert(maxIter.isValid(1)) solver.setMaxIter(5) - assert(maxIter.toString === + assert(solver.explainParam(maxIter) === "maxIter: max number of iterations (>= 0) (default: 10, current: 5)") - assert(inputCol.toString === "inputCol: input column name (undefined)") + assert(inputCol.toString === s"${uid}__inputCol") intercept[IllegalArgumentException] { solver.setMaxIter(-1) @@ -118,23 +119,28 @@ class ParamsSuite extends FunSuite { assert(!solver.isDefined(inputCol)) intercept[NoSuchElementException](solver.getInputCol) - assert(solver.explainParams() === Seq(inputCol, maxIter).mkString("\n")) + assert(solver.explainParam(maxIter) === + "maxIter: max number of iterations (>= 0) (default: 10, current: 100)") + assert(solver.explainParams() === + Seq(inputCol, maxIter).map(solver.explainParam).mkString("\n")) assert(solver.getParam("inputCol").eq(inputCol)) assert(solver.getParam("maxIter").eq(maxIter)) + assert(solver.hasParam("inputCol")) + assert(!solver.hasParam("abc")) intercept[NoSuchElementException] { solver.getParam("abc") } intercept[IllegalArgumentException] { - solver.validate() + solver.validateParams() } - solver.validate(ParamMap(inputCol -> "input")) + solver.copy(ParamMap(inputCol -> "input")).validateParams() solver.setInputCol("input") assert(solver.isSet(inputCol)) assert(solver.isDefined(inputCol)) assert(solver.getInputCol === "input") - solver.validate() + solver.validateParams() intercept[IllegalArgumentException] { ParamMap(maxIter -> -10) } @@ -144,6 +150,11 @@ class ParamsSuite extends FunSuite { solver.clearMaxIter() assert(!solver.isSet(maxIter)) + + val copied = solver.copy(ParamMap(solver.maxIter -> 50)) + assert(copied.uid === solver.uid) + assert(copied.getInputCol === solver.getInputCol) + assert(copied.getMaxIter === 50) } test("ParamValidate") { @@ -190,3 +201,23 @@ class ParamsSuite extends FunSuite { assert(inArray(1) && inArray(2) && !inArray(0)) } } + +object ParamsSuite extends FunSuite { + + /** + * Checks common requirements for [[Params.params]]: 1) number of params; 2) params are ordered + * by names; 3) param parent has the same UID as the object's UID; 4) param name is the same as + * the param method name. + */ + def checkParams(obj: Params, expectedNumParams: Int): Unit = { + val params = obj.params + require(params.length === expectedNumParams, + s"Expect $expectedNumParams params but got ${params.length}: ${params.map(_.name).toSeq}.") + val paramNames = params.map(_.name) + require(paramNames === paramNames.sorted) + params.foreach { p => + assert(p.parent === obj.uid) + assert(obj.getParam(p.name) === p) + } + } +} diff --git a/mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala b/mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala index 6f9c9cb5166ae..a9e78366ad98f 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala @@ -18,20 +18,27 @@ package org.apache.spark.ml.param import org.apache.spark.ml.param.shared.{HasInputCol, HasMaxIter} +import org.apache.spark.ml.util.Identifiable /** A subclass of Params for testing. */ -class TestParams extends Params with HasMaxIter with HasInputCol { +class TestParams(override val uid: String) extends Params with HasMaxIter with HasInputCol { + + def this() = this(Identifiable.randomUID("testParams")) def setMaxIter(value: Int): this.type = { set(maxIter, value); this } + def setInputCol(value: String): this.type = { set(inputCol, value); this } setDefault(maxIter -> 10) - override def validate(paramMap: ParamMap): Unit = { - val m = extractParamMap(paramMap) - // Note: maxIter is validated when it is set. - require(m.contains(inputCol)) + def clearMaxIter(): this.type = clear(maxIter) + + override def validateParams(): Unit = { + super.validateParams() + require(isDefined(inputCol)) } - def clearMaxIter(): this.type = clear(maxIter) + override def copy(extra: ParamMap): TestParams = { + super.copy(extra).asInstanceOf[TestParams] + } } diff --git a/mllib/src/test/scala/org/apache/spark/ml/param/shared/SharedParamsSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/param/shared/SharedParamsSuite.scala new file mode 100644 index 0000000000000..ca18fa1ad3c15 --- /dev/null +++ b/mllib/src/test/scala/org/apache/spark/ml/param/shared/SharedParamsSuite.scala @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.param.shared + +import org.scalatest.FunSuite + +import org.apache.spark.ml.param.Params + +class SharedParamsSuite extends FunSuite { + + test("outputCol") { + + class Obj(override val uid: String) extends Params with HasOutputCol + + val obj = new Obj("obj") + + assert(obj.hasDefault(obj.outputCol)) + assert(obj.getOrDefault(obj.outputCol) === "obj__output") + } +} diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala index fc7349330cf86..9a35555e52b90 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala @@ -38,14 +38,12 @@ import org.apache.spark.util.Utils class ALSSuite extends FunSuite with MLlibTestSparkContext with Logging { - private var sqlContext: SQLContext = _ private var tempDir: File = _ override def beforeAll(): Unit = { super.beforeAll() tempDir = Utils.createTempDir() sc.setCheckpointDir(tempDir.getAbsolutePath) - sqlContext = new SQLContext(sc) } override def afterAll(): Unit = { @@ -345,6 +343,7 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext with Logging { .setImplicitPrefs(implicitPrefs) .setNumUserBlocks(numUserBlocks) .setNumItemBlocks(numItemBlocks) + .setSeed(0) val alpha = als.getAlpha val model = als.fit(training.toDF()) val predictions = model.transform(test.toDF()) @@ -425,17 +424,18 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext with Logging { val (ratings, _) = genImplicitTestData(numUsers = 20, numItems = 40, rank = 2, noiseStd = 0.01) val longRatings = ratings.map(r => Rating(r.user.toLong, r.item.toLong, r.rating)) - val (longUserFactors, _) = ALS.train(longRatings, rank = 2, maxIter = 4) + val (longUserFactors, _) = ALS.train(longRatings, rank = 2, maxIter = 4, seed = 0) assert(longUserFactors.first()._1.getClass === classOf[Long]) val strRatings = ratings.map(r => Rating(r.user.toString, r.item.toString, r.rating)) - val (strUserFactors, _) = ALS.train(strRatings, rank = 2, maxIter = 4) + val (strUserFactors, _) = ALS.train(strRatings, rank = 2, maxIter = 4, seed = 0) assert(strUserFactors.first()._1.getClass === classOf[String]) } test("nonnegative constraint") { val (ratings, _) = genImplicitTestData(numUsers = 20, numItems = 40, rank = 2, noiseStd = 0.01) - val (userFactors, itemFactors) = ALS.train(ratings, rank = 2, maxIter = 4, nonnegative = true) + val (userFactors, itemFactors) = + ALS.train(ratings, rank = 2, maxIter = 4, nonnegative = true, seed = 0) def isNonnegative(factors: RDD[(Int, Array[Float])]): Boolean = { factors.values.map { _.forall(_ >= 0.0) }.reduce(_ && _) } @@ -459,7 +459,7 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext with Logging { test("partitioner in returned factors") { val (ratings, _) = genImplicitTestData(numUsers = 20, numItems = 40, rank = 2, noiseStd = 0.01) val (userFactors, itemFactors) = ALS.train( - ratings, rank = 2, maxIter = 4, numUserBlocks = 3, numItemBlocks = 4) + ratings, rank = 2, maxIter = 4, numUserBlocks = 3, numItemBlocks = 4, seed = 0) for ((tpe, factors) <- Seq(("User", userFactors), ("Item", itemFactors))) { assert(userFactors.partitioner.isDefined, s"$tpe factors should have partitioner.") val part = userFactors.partitioner.get @@ -476,8 +476,8 @@ class ALSSuite extends FunSuite with MLlibTestSparkContext with Logging { test("als with large number of iterations") { val (ratings, _) = genExplicitTestData(numUsers = 4, numItems = 4, rank = 1) - ALS.train(ratings, rank = 1, maxIter = 50, numUserBlocks = 2, numItemBlocks = 2) - ALS.train( - ratings, rank = 1, maxIter = 50, numUserBlocks = 2, numItemBlocks = 2, implicitPrefs = true) + ALS.train(ratings, rank = 1, maxIter = 50, numUserBlocks = 2, numItemBlocks = 2, seed = 0) + ALS.train(ratings, rank = 1, maxIter = 50, numUserBlocks = 2, numItemBlocks = 2, + implicitPrefs = true, seed = 0) } } diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala index c87a171b4b229..e6b0c552b7c01 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala @@ -83,9 +83,9 @@ private[ml] object DecisionTreeRegressorSuite extends FunSuite { val oldTree = OldDecisionTree.train(data, oldStrategy) val newData: DataFrame = TreeTests.setMetadata(data, categoricalFeatures, numClasses = 0) val newTree = dt.fit(newData) - // Use parent, fittingParamMap from newTree since these are not checked anyways. - val oldTreeAsNew = DecisionTreeRegressionModel.fromOld(oldTree, newTree.parent, - newTree.fittingParamMap, categoricalFeatures) + // Use parent from newTree since this is not checked anyways. + val oldTreeAsNew = DecisionTreeRegressionModel.fromOld( + oldTree, newTree.parent.asInstanceOf[DecisionTreeRegressor], categoricalFeatures) TreeTests.checkEqual(oldTreeAsNew, newTree) } } diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala index 4aec36948ac92..d997a3f8c5da5 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala @@ -129,9 +129,9 @@ private object GBTRegressorSuite { val oldModel = oldGBT.run(data) val newData: DataFrame = TreeTests.setMetadata(data, categoricalFeatures, numClasses = 0) val newModel = gbt.fit(newData) - // Use parent, fittingParamMap from newTree since these are not checked anyways. - val oldModelAsNew = GBTRegressionModel.fromOld(oldModel, newModel.parent, - newModel.fittingParamMap, categoricalFeatures) + // Use parent from newTree since this is not checked anyways. + val oldModelAsNew = GBTRegressionModel.fromOld( + oldModel, newModel.parent.asInstanceOf[GBTRegressor], categoricalFeatures) TreeTests.checkEqual(oldModelAsNew, newModel) } } diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala index 80323ef5201a6..50a78631fa6d6 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala @@ -22,11 +22,10 @@ import org.scalatest.FunSuite import org.apache.spark.mllib.linalg.DenseVector import org.apache.spark.mllib.util.{LinearDataGenerator, MLlibTestSparkContext} import org.apache.spark.mllib.util.TestingUtils._ -import org.apache.spark.sql.{Row, SQLContext, DataFrame} +import org.apache.spark.sql.{DataFrame, Row} class LinearRegressionSuite extends FunSuite with MLlibTestSparkContext { - @transient var sqlContext: SQLContext = _ @transient var dataset: DataFrame = _ /** @@ -41,7 +40,6 @@ class LinearRegressionSuite extends FunSuite with MLlibTestSparkContext { */ override def beforeAll(): Unit = { super.beforeAll() - sqlContext = new SQLContext(sc) dataset = sqlContext.createDataFrame( sc.parallelize(LinearDataGenerator.generateLinearInput( 6.3, Array(4.7, 7.2), Array(0.9, -1.3), Array(0.7, 1.2), 10000, 42, 0.1), 2)) diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala index c6dc1cc29b6ff..fe81e84289639 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala @@ -114,9 +114,9 @@ private object RandomForestRegressorSuite extends FunSuite { data, oldStrategy, rf.getNumTrees, rf.getFeatureSubsetStrategy, rf.getSeed.toInt) val newData: DataFrame = TreeTests.setMetadata(data, categoricalFeatures, numClasses = 0) val newModel = rf.fit(newData) - // Use parent, fittingParamMap from newTree since these are not checked anyways. - val oldModelAsNew = RandomForestRegressionModel.fromOld(oldModel, newModel.parent, - newModel.fittingParamMap, categoricalFeatures) + // Use parent from newTree since this is not checked anyways. + val oldModelAsNew = RandomForestRegressionModel.fromOld( + oldModel, newModel.parent.asInstanceOf[RandomForestRegressor], categoricalFeatures) TreeTests.checkEqual(oldModelAsNew, newModel) } } diff --git a/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala index 761ea821ef7c6..60d8bfe38fb13 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala @@ -19,11 +19,15 @@ package org.apache.spark.ml.tuning import org.scalatest.FunSuite +import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.classification.LogisticRegression -import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator +import org.apache.spark.ml.evaluation.{BinaryClassificationEvaluator, Evaluator} +import org.apache.spark.ml.param.ParamMap +import org.apache.spark.ml.param.shared.HasInputCol import org.apache.spark.mllib.classification.LogisticRegressionSuite.generateLogisticInput import org.apache.spark.mllib.util.MLlibTestSparkContext -import org.apache.spark.sql.{SQLContext, DataFrame} +import org.apache.spark.sql.{DataFrame, SQLContext} +import org.apache.spark.sql.types.StructType class CrossValidatorSuite extends FunSuite with MLlibTestSparkContext { @@ -49,8 +53,58 @@ class CrossValidatorSuite extends FunSuite with MLlibTestSparkContext { .setEvaluator(eval) .setNumFolds(3) val cvModel = cv.fit(dataset) - val bestParamMap = cvModel.bestModel.fittingParamMap - assert(bestParamMap(lr.regParam) === 0.001) - assert(bestParamMap(lr.maxIter) === 10) + val parent = cvModel.bestModel.parent.asInstanceOf[LogisticRegression] + assert(parent.getRegParam === 0.001) + assert(parent.getMaxIter === 10) + } + + test("validateParams should check estimatorParamMaps") { + import CrossValidatorSuite._ + + val est = new MyEstimator("est") + val eval = new MyEvaluator + val paramMaps = new ParamGridBuilder() + .addGrid(est.inputCol, Array("input1", "input2")) + .build() + + val cv = new CrossValidator() + .setEstimator(est) + .setEstimatorParamMaps(paramMaps) + .setEvaluator(eval) + + cv.validateParams() // This should pass. + + val invalidParamMaps = paramMaps :+ ParamMap(est.inputCol -> "") + cv.setEstimatorParamMaps(invalidParamMaps) + intercept[IllegalArgumentException] { + cv.validateParams() + } + } +} + +object CrossValidatorSuite { + + abstract class MyModel extends Model[MyModel] + + class MyEstimator(override val uid: String) extends Estimator[MyModel] with HasInputCol { + + override def validateParams(): Unit = require($(inputCol).nonEmpty) + + override def fit(dataset: DataFrame): MyModel = { + throw new UnsupportedOperationException + } + + override def transformSchema(schema: StructType): StructType = { + throw new UnsupportedOperationException + } + } + + class MyEvaluator extends Evaluator { + + override def evaluate(dataset: DataFrame): Double = { + throw new UnsupportedOperationException + } + + override val uid: String = "eval" } } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/api/python/PythonMLLibAPISuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/api/python/PythonMLLibAPISuite.scala index db8ed62fa46ce..3d362b5ee53ea 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/api/python/PythonMLLibAPISuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/api/python/PythonMLLibAPISuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.mllib.api.python import org.scalatest.FunSuite -import org.apache.spark.mllib.linalg.{DenseMatrix, Matrices, Vectors} +import org.apache.spark.mllib.linalg.{DenseMatrix, Matrices, Vectors, SparseMatrix} import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.recommendation.Rating @@ -77,6 +77,16 @@ class PythonMLLibAPISuite extends FunSuite { val emptyMatrix = Matrices.dense(0, 0, empty) val ne = SerDe.loads(SerDe.dumps(emptyMatrix)).asInstanceOf[DenseMatrix] assert(emptyMatrix == ne) + + val sm = new SparseMatrix(3, 2, Array(0, 1, 3), Array(1, 0, 2), Array(0.9, 1.2, 3.4)) + val nsm = SerDe.loads(SerDe.dumps(sm)).asInstanceOf[SparseMatrix] + assert(sm.toArray === nsm.toArray) + + val smt = new SparseMatrix( + 3, 3, Array(0, 2, 3, 5), Array(0, 2, 1, 0, 2), Array(0.9, 1.2, 3.4, 5.7, 8.9), + isTransposed = true) + val nsmt = SerDe.loads(SerDe.dumps(smt)).asInstanceOf[SparseMatrix] + assert(smt.toArray === nsmt.toArray) } test("pickle rating") { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala index a26c52852c4d7..b1014ab7c6203 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala @@ -91,21 +91,22 @@ object LogisticRegressionSuite { seed: Int): Seq[LabeledPoint] = { val rnd = new Random(seed) - val xDim = xMean.size + val xDim = xMean.length val xWithInterceptsDim = if (addIntercept) xDim + 1 else xDim - val nClasses = weights.size / xWithInterceptsDim + 1 + val nClasses = weights.length / xWithInterceptsDim + 1 val x = Array.fill[Vector](nPoints)(Vectors.dense(Array.fill[Double](xDim)(rnd.nextGaussian()))) - x.map(vector => { + x.foreach { vector => // This doesn't work if `vector` is a sparse vector. val vectorArray = vector.toArray var i = 0 - while (i < vectorArray.size) { + val len = vectorArray.length + while (i < len) { vectorArray(i) = vectorArray(i) * math.sqrt(xVariance(i)) + xMean(i) i += 1 } - }) + } val y = (0 until nPoints).map { idx => val xArray = x(idx).toArray @@ -118,7 +119,7 @@ object LogisticRegressionSuite { } // Preventing the overflow when we compute the probability val maxMargin = margins.max - if (maxMargin > 0) for (i <-0 until nClasses) margins(i) -= maxMargin + if (maxMargin > 0) for (i <- 0 until nClasses) margins(i) -= maxMargin // Computing the probabilities for each class from the margins. val norm = { @@ -129,7 +130,7 @@ object LogisticRegressionSuite { } temp } - for (i <-0 until nClasses) probs(i) /= norm + for (i <- 0 until nClasses) probs(i) /= norm // Compute the cumulative probability so we can generate a random number and assign a label. for (i <- 1 until nClasses) probs(i) += probs(i - 1) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala index ea89b17b7c08f..ea40b41bbbe5e 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala @@ -19,9 +19,8 @@ package org.apache.spark.mllib.classification import scala.util.Random -import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, argmax => brzArgmax, sum => brzSum, Axis} +import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, argmax => brzArgmax, sum => brzSum} import breeze.stats.distributions.{Multinomial => BrzMultinomial} - import org.scalatest.FunSuite import org.apache.spark.SparkException @@ -30,9 +29,10 @@ import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext} import org.apache.spark.util.Utils - object NaiveBayesSuite { + import NaiveBayes.{Multinomial, Bernoulli} + private def calcLabel(p: Double, pi: Array[Double]): Int = { var sum = 0.0 for (j <- 0 until pi.length) { @@ -48,7 +48,7 @@ object NaiveBayesSuite { theta: Array[Array[Double]], // CXD nPoints: Int, seed: Int, - modelType: String = "Multinomial", + modelType: String = Multinomial, sample: Int = 10): Seq[LabeledPoint] = { val D = theta(0).length val rnd = new Random(seed) @@ -58,10 +58,10 @@ object NaiveBayesSuite { for (i <- 0 until nPoints) yield { val y = calcLabel(rnd.nextDouble(), _pi) val xi = modelType match { - case "Bernoulli" => Array.tabulate[Double] (D) { j => + case Bernoulli => Array.tabulate[Double] (D) { j => if (rnd.nextDouble () < _theta(y)(j) ) 1 else 0 } - case "Multinomial" => + case Multinomial => val mult = BrzMultinomial(BDV(_theta(y))) val emptyMap = (0 until D).map(x => (x, 0.0)).toMap val counts = emptyMap ++ mult.sample(sample).groupBy(x => x).map { @@ -70,7 +70,7 @@ object NaiveBayesSuite { counts.toArray.sortBy(_._1).map(_._2) case _ => // This should never happen. - throw new UnknownError(s"NaiveBayesSuite found unknown ModelType: $modelType") + throw new UnknownError(s"Invalid modelType: $modelType.") } LabeledPoint(y, Vectors.dense(xi)) @@ -79,17 +79,17 @@ object NaiveBayesSuite { /** Bernoulli NaiveBayes with binary labels, 3 features */ private val binaryBernoulliModel = new NaiveBayesModel(labels = Array(0.0, 1.0), - pi = Array(0.2, 0.8), theta = Array(Array(0.1, 0.3, 0.6), Array(0.2, 0.4, 0.4)), - "Bernoulli") + pi = Array(0.2, 0.8), theta = Array(Array(0.1, 0.3, 0.6), Array(0.2, 0.4, 0.4)), Bernoulli) /** Multinomial NaiveBayes with binary labels, 3 features */ private val binaryMultinomialModel = new NaiveBayesModel(labels = Array(0.0, 1.0), - pi = Array(0.2, 0.8), theta = Array(Array(0.1, 0.3, 0.6), Array(0.2, 0.4, 0.4)), - "Multinomial") + pi = Array(0.2, 0.8), theta = Array(Array(0.1, 0.3, 0.6), Array(0.2, 0.4, 0.4)), Multinomial) } class NaiveBayesSuite extends FunSuite with MLlibTestSparkContext { + import NaiveBayes.{Multinomial, Bernoulli} + def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) { val numOfPredictions = predictions.zip(input).count { case (prediction, expected) => @@ -117,6 +117,11 @@ class NaiveBayesSuite extends FunSuite with MLlibTestSparkContext { } } + test("model types") { + assert(Multinomial === "multinomial") + assert(Bernoulli === "bernoulli") + } + test("get, set params") { val nb = new NaiveBayes() nb.setLambda(2.0) @@ -134,16 +139,15 @@ class NaiveBayesSuite extends FunSuite with MLlibTestSparkContext { Array(0.10, 0.10, 0.70, 0.10) // label 2 ).map(_.map(math.log)) - val testData = NaiveBayesSuite.generateNaiveBayesInput( - pi, theta, nPoints, 42, "Multinomial") + val testData = NaiveBayesSuite.generateNaiveBayesInput(pi, theta, nPoints, 42, Multinomial) val testRDD = sc.parallelize(testData, 2) testRDD.cache() - val model = NaiveBayes.train(testRDD, 1.0, "Multinomial") + val model = NaiveBayes.train(testRDD, 1.0, Multinomial) validateModelFit(pi, theta, model) val validationData = NaiveBayesSuite.generateNaiveBayesInput( - pi, theta, nPoints, 17, "Multinomial") + pi, theta, nPoints, 17, Multinomial) val validationRDD = sc.parallelize(validationData, 2) // Test prediction on RDD. @@ -159,19 +163,19 @@ class NaiveBayesSuite extends FunSuite with MLlibTestSparkContext { val theta = Array( Array(0.50, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.40), // label 0 Array(0.02, 0.70, 0.10, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02), // label 1 - Array(0.02, 0.02, 0.60, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.30) // label 2 + Array(0.02, 0.02, 0.60, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.30) // label 2 ).map(_.map(math.log)) val testData = NaiveBayesSuite.generateNaiveBayesInput( - pi, theta, nPoints, 45, "Bernoulli") + pi, theta, nPoints, 45, Bernoulli) val testRDD = sc.parallelize(testData, 2) testRDD.cache() - val model = NaiveBayes.train(testRDD, 1.0, "Bernoulli") + val model = NaiveBayes.train(testRDD, 1.0, Bernoulli) validateModelFit(pi, theta, model) val validationData = NaiveBayesSuite.generateNaiveBayesInput( - pi, theta, nPoints, 20, "Bernoulli") + pi, theta, nPoints, 20, Bernoulli) val validationRDD = sc.parallelize(validationData, 2) // Test prediction on RDD. @@ -208,6 +212,39 @@ class NaiveBayesSuite extends FunSuite with MLlibTestSparkContext { } } + test("detect non zero or one values in Bernoulli") { + val badTrain = Seq( + LabeledPoint(1.0, Vectors.dense(1.0)), + LabeledPoint(0.0, Vectors.dense(2.0)), + LabeledPoint(1.0, Vectors.dense(1.0)), + LabeledPoint(1.0, Vectors.dense(0.0))) + + intercept[SparkException] { + NaiveBayes.train(sc.makeRDD(badTrain, 2), 1.0, Bernoulli) + } + + val okTrain = Seq( + LabeledPoint(1.0, Vectors.dense(1.0)), + LabeledPoint(0.0, Vectors.dense(0.0)), + LabeledPoint(1.0, Vectors.dense(1.0)), + LabeledPoint(1.0, Vectors.dense(1.0)), + LabeledPoint(0.0, Vectors.dense(0.0)), + LabeledPoint(1.0, Vectors.dense(1.0)), + LabeledPoint(1.0, Vectors.dense(1.0)) + ) + + val badPredict = Seq( + Vectors.dense(1.0), + Vectors.dense(2.0), + Vectors.dense(1.0), + Vectors.dense(0.0)) + + val model = NaiveBayes.train(sc.makeRDD(okTrain, 2), 1.0, Bernoulli) + intercept[SparkException] { + model.predict(sc.makeRDD(badPredict, 2)).collect() + } + } + test("model save/load: 2.0 to 2.0") { val tempDir = Utils.createTempDir() val path = tempDir.toURI.toString @@ -242,7 +279,7 @@ class NaiveBayesSuite extends FunSuite with MLlibTestSparkContext { assert(model.labels === sameModel.labels) assert(model.pi === sameModel.pi) assert(model.theta === sameModel.theta) - assert(model.modelType === "Multinomial") + assert(model.modelType === Multinomial) } finally { Utils.deleteRecursively(tempDir) } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala index 6de098b383ba3..90f9cec6855bf 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala @@ -46,7 +46,7 @@ object SVMSuite { nPoints: Int, seed: Int): Seq[LabeledPoint] = { val rnd = new Random(seed) - val weightsMat = new DoubleMatrix(1, weights.length, weights:_*) + val weightsMat = new DoubleMatrix(1, weights.length, weights : _*) val x = Array.fill[Array[Double]](nPoints)( Array.fill[Double](weights.length)(rnd.nextDouble() * 2.0 - 1.0)) val y = x.map { xi => @@ -91,7 +91,7 @@ class SVMSuite extends FunSuite with MLlibTestSparkContext { val model = svm.run(testRDD) val validationData = SVMSuite.generateSVMInput(A, Array[Double](B, C), nPoints, 17) - val validationRDD = sc.parallelize(validationData, 2) + val validationRDD = sc.parallelize(validationData, 2) // Test prediction on RDD. @@ -117,7 +117,7 @@ class SVMSuite extends FunSuite with MLlibTestSparkContext { val B = -1.5 val C = 1.0 - val testData = SVMSuite.generateSVMInput(A, Array[Double](B,C), nPoints, 42) + val testData = SVMSuite.generateSVMInput(A, Array[Double](B, C), nPoints, 42) val testRDD = sc.parallelize(testData, 2) testRDD.cache() @@ -127,8 +127,8 @@ class SVMSuite extends FunSuite with MLlibTestSparkContext { val model = svm.run(testRDD) - val validationData = SVMSuite.generateSVMInput(A, Array[Double](B,C), nPoints, 17) - val validationRDD = sc.parallelize(validationData, 2) + val validationData = SVMSuite.generateSVMInput(A, Array[Double](B, C), nPoints, 17) + val validationRDD = sc.parallelize(validationData, 2) // Test prediction on RDD. validatePrediction(model.predict(validationRDD.map(_.features)).collect(), validationData) @@ -145,7 +145,7 @@ class SVMSuite extends FunSuite with MLlibTestSparkContext { val B = -1.5 val C = 1.0 - val testData = SVMSuite.generateSVMInput(A, Array[Double](B,C), nPoints, 42) + val testData = SVMSuite.generateSVMInput(A, Array[Double](B, C), nPoints, 42) val initialB = -1.0 val initialC = -1.0 @@ -159,8 +159,8 @@ class SVMSuite extends FunSuite with MLlibTestSparkContext { val model = svm.run(testRDD, initialWeights) - val validationData = SVMSuite.generateSVMInput(A, Array[Double](B,C), nPoints, 17) - val validationRDD = sc.parallelize(validationData,2) + val validationData = SVMSuite.generateSVMInput(A, Array[Double](B, C), nPoints, 17) + val validationRDD = sc.parallelize(validationData, 2) // Test prediction on RDD. validatePrediction(model.predict(validationRDD.map(_.features)).collect(), validationData) @@ -177,7 +177,7 @@ class SVMSuite extends FunSuite with MLlibTestSparkContext { val B = -1.5 val C = 1.0 - val testData = SVMSuite.generateSVMInput(A, Array[Double](B,C), nPoints, 42) + val testData = SVMSuite.generateSVMInput(A, Array[Double](B, C), nPoints, 42) val testRDD = sc.parallelize(testData, 2) val testRDDInvalid = testRDD.map { lp => diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/GaussianMixtureSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/GaussianMixtureSuite.scala index f356ffa3e3a26..4c17808feb2b5 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/GaussianMixtureSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/GaussianMixtureSuite.scala @@ -47,7 +47,7 @@ class GaussianMixtureSuite extends FunSuite with MLlibTestSparkContext { } } - + test("two clusters") { val data = sc.parallelize(GaussianTestData.data) @@ -63,7 +63,7 @@ class GaussianMixtureSuite extends FunSuite with MLlibTestSparkContext { val Ew = Array(1.0 / 3.0, 2.0 / 3.0) val Emu = Array(Vectors.dense(-4.3673), Vectors.dense(5.1604)) val Esigma = Array(Matrices.dense(1, 1, Array(1.1098)), Matrices.dense(1, 1, Array(0.86644))) - + val gmm = new GaussianMixture() .setK(2) .setInitialModel(initialGmm) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala index 0f2b26d462ad2..877e6dc699523 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala @@ -75,7 +75,7 @@ class KMeansSuite extends FunSuite with MLlibTestSparkContext { val center = Vectors.dense(1.0, 2.0, 3.0) // Make sure code runs. - var model = KMeans.train(data, k=2, maxIterations=1) + var model = KMeans.train(data, k = 2, maxIterations = 1) assert(model.clusterCenters.size === 2) } @@ -87,7 +87,7 @@ class KMeansSuite extends FunSuite with MLlibTestSparkContext { 2) // Make sure code runs. - var model = KMeans.train(data, k=3, maxIterations=1) + var model = KMeans.train(data, k = 3, maxIterations = 1) assert(model.clusterCenters.size === 3) } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala index 2dcc881f5abd2..d5b7d96335744 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala @@ -138,12 +138,12 @@ class LDASuite extends FunSuite with MLlibTestSparkContext { val lda = new LDA().setK(2) val corpus = sc.parallelize(tinyCorpus, 2) val op = new OnlineLDAOptimizer().initialize(corpus, lda) - op.setKappa(0.9876).setMiniBatchFraction(0.123).setTau_0(567) + op.setKappa(0.9876).setMiniBatchFraction(0.123).setTau0(567) assert(op.getAlpha == 0.5) // default 1.0 / k assert(op.getEta == 0.5) // default 1.0 / k assert(op.getKappa == 0.9876) assert(op.getMiniBatchFraction == 0.123) - assert(op.getTau_0 == 567) + assert(op.getTau0 == 567) } test("OnlineLDAOptimizer one iteration") { @@ -159,7 +159,7 @@ class LDASuite extends FunSuite with MLlibTestSparkContext { val corpus = sc.parallelize(docs, 2) // Set GammaShape large to avoid the stochastic impact. - val op = new OnlineLDAOptimizer().setTau_0(1024).setKappa(0.51).setGammaShape(1e40) + val op = new OnlineLDAOptimizer().setTau0(1024).setKappa(0.51).setGammaShape(1e40) .setMiniBatchFraction(1) val lda = new LDA().setK(k).setMaxIterations(1).setOptimizer(op).setSeed(12345) @@ -192,7 +192,7 @@ class LDASuite extends FunSuite with MLlibTestSparkContext { ).zipWithIndex.map { case (wordCounts, docId) => (docId.toLong, wordCounts) } val docs = sc.parallelize(toydata) - val op = new OnlineLDAOptimizer().setMiniBatchFraction(1).setTau_0(1024).setKappa(0.51) + val op = new OnlineLDAOptimizer().setMiniBatchFraction(1).setTau0(1024).setKappa(0.51) .setGammaShape(1e10) val lda = new LDA().setK(2) .setDocConcentration(0.01) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala index 6d6fe6fe46bab..9d2cd0a143db8 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala @@ -58,7 +58,7 @@ class PowerIterationClusteringSuite extends FunSuite with MLlibTestSparkContext predictions(a.cluster) += a.id } assert(predictions.toSet == Set((0 to 3).toSet, (4 to 15).toSet)) - + val model2 = new PowerIterationClustering() .setK(2) .setInitializationMode("degree") @@ -94,11 +94,13 @@ class PowerIterationClusteringSuite extends FunSuite with MLlibTestSparkContext */ val similarities = Seq[(Long, Long, Double)]( (0, 1, 1.0), (0, 2, 1.0), (0, 3, 1.0), (1, 2, 1.0), (2, 3, 1.0)) + // scalastyle:off val expected = Array( Array(0.0, 1.0/3.0, 1.0/3.0, 1.0/3.0), Array(1.0/2.0, 0.0, 1.0/2.0, 0.0), Array(1.0/3.0, 1.0/3.0, 0.0, 1.0/3.0), Array(1.0/2.0, 0.0, 1.0/2.0, 0.0)) + // scalastyle:on val w = normalize(sc.parallelize(similarities, 2)) w.edges.collect().foreach { case Edge(i, j, x) => assert(x ~== expected(i.toInt)(j.toInt) absTol 1e-14) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala index f90025d535e45..13f9b17c027a4 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala @@ -133,6 +133,13 @@ class StreamingKMeansSuite extends FunSuite with TestSuiteBase { assert(math.abs(c1) ~== 0.8 absTol 0.6) } + test("SPARK-7946 setDecayFactor") { + val kMeans = new StreamingKMeans() + assert(kMeans.decayFactor === 1.0) + kMeans.setDecayFactor(2.0) + assert(kMeans.decayFactor === 2.0) + } + def StreamingKMeansDataGenerator( numPoints: Int, numBatches: Int, diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala index 670b4c34e6095..3aa732474ec2e 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RegressionMetricsSuite.scala @@ -26,7 +26,7 @@ class RegressionMetricsSuite extends FunSuite with MLlibTestSparkContext { test("regression metrics") { val predictionAndObservations = sc.parallelize( - Seq((2.5,3.0),(0.0,-0.5),(2.0,2.0),(8.0,7.0)), 2) + Seq((2.5, 3.0), (0.0, -0.5), (2.0, 2.0), (8.0, 7.0)), 2) val metrics = new RegressionMetrics(predictionAndObservations) assert(metrics.explainedVariance ~== 0.95717 absTol 1E-5, "explained variance regression score mismatch") @@ -39,7 +39,7 @@ class RegressionMetricsSuite extends FunSuite with MLlibTestSparkContext { test("regression metrics with complete fitting") { val predictionAndObservations = sc.parallelize( - Seq((3.0,3.0),(0.0,0.0),(2.0,2.0),(8.0,8.0)), 2) + Seq((3.0, 3.0), (0.0, 0.0), (2.0, 2.0), (8.0, 8.0)), 2) val metrics = new RegressionMetrics(predictionAndObservations) assert(metrics.explainedVariance ~== 1.0 absTol 1E-5, "explained variance regression score mismatch") diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala new file mode 100644 index 0000000000000..f3a482abda873 --- /dev/null +++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/ElementwiseProductSuite.scala @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.feature + +import org.scalatest.FunSuite + +import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vectors} +import org.apache.spark.mllib.util.MLlibTestSparkContext +import org.apache.spark.mllib.util.TestingUtils._ + +class ElementwiseProductSuite extends FunSuite with MLlibTestSparkContext { + + test("elementwise (hadamard) product should properly apply vector to dense data set") { + val denseData = Array( + Vectors.dense(1.0, 4.0, 1.9, -9.0) + ) + val scalingVec = Vectors.dense(2.0, 0.5, 0.0, 0.25) + val transformer = new ElementwiseProduct(scalingVec) + val transformedData = transformer.transform(sc.makeRDD(denseData)) + val transformedVecs = transformedData.collect() + val transformedVec = transformedVecs(0) + val expectedVec = Vectors.dense(2.0, 2.0, 0.0, -2.25) + assert(transformedVec ~== expectedVec absTol 1E-5, + s"Expected transformed vector $expectedVec but found $transformedVec") + } + + test("elementwise (hadamard) product should properly apply vector to sparse data set") { + val sparseData = Array( + Vectors.sparse(3, Seq((1, -1.0), (2, -3.0))) + ) + val dataRDD = sc.parallelize(sparseData, 3) + val scalingVec = Vectors.dense(1.0, 0.0, 0.5) + val transformer = new ElementwiseProduct(scalingVec) + val data2 = sparseData.map(transformer.transform) + val data2RDD = transformer.transform(dataRDD) + + assert((sparseData, data2, data2RDD.collect()).zipped.forall { + case (v1: DenseVector, v2: DenseVector, v3: DenseVector) => true + case (v1: SparseVector, v2: SparseVector, v3: SparseVector) => true + case _ => false + }, "The vector type should be preserved after hadamard product") + + assert((data2, data2RDD.collect()).zipped.forall((v1, v2) => v1 ~== v2 absTol 1E-5)) + assert(data2(0) ~== Vectors.sparse(3, Seq((1, 0.0), (2, -1.5))) absTol 1E-5) + } +} diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala index 85fdd271b5ed1..5c4af2b99e68b 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/NormalizerSuite.scala @@ -106,10 +106,10 @@ class NormalizerSuite extends FunSuite with MLlibTestSparkContext { assert((dataInf, dataInfRDD.collect()).zipped.forall((v1, v2) => v1 ~== v2 absTol 1E-5)) - assert(dataInf(0).toArray.map(Math.abs).max ~== 1.0 absTol 1E-5) - assert(dataInf(2).toArray.map(Math.abs).max ~== 1.0 absTol 1E-5) - assert(dataInf(3).toArray.map(Math.abs).max ~== 1.0 absTol 1E-5) - assert(dataInf(4).toArray.map(Math.abs).max ~== 1.0 absTol 1E-5) + assert(dataInf(0).toArray.map(math.abs).max ~== 1.0 absTol 1E-5) + assert(dataInf(2).toArray.map(math.abs).max ~== 1.0 absTol 1E-5) + assert(dataInf(3).toArray.map(math.abs).max ~== 1.0 absTol 1E-5) + assert(dataInf(4).toArray.map(math.abs).max ~== 1.0 absTol 1E-5) assert(dataInf(0) ~== Vectors.sparse(3, Seq((0, -0.86956522), (1, 1.0))) absTol 1E-5) assert(dataInf(1) ~== Vectors.dense(0.0, 0.0, 0.0) absTol 1E-5) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/PCASuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/PCASuite.scala new file mode 100644 index 0000000000000..758af588f1c69 --- /dev/null +++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/PCASuite.scala @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.feature + +import org.scalatest.FunSuite + +import org.apache.spark.mllib.linalg.Vectors +import org.apache.spark.mllib.linalg.distributed.RowMatrix +import org.apache.spark.mllib.util.MLlibTestSparkContext + +class PCASuite extends FunSuite with MLlibTestSparkContext { + + private val data = Array( + Vectors.sparse(5, Seq((1, 1.0), (3, 7.0))), + Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0), + Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0) + ) + + private lazy val dataRDD = sc.parallelize(data, 2) + + test("Correct computing use a PCA wrapper") { + val k = dataRDD.count().toInt + val pca = new PCA(k).fit(dataRDD) + + val mat = new RowMatrix(dataRDD) + val pc = mat.computePrincipalComponents(k) + + val pca_transform = pca.transform(dataRDD).collect() + val mat_multiply = mat.multiply(pc).rows.collect() + + assert(pca_transform.toSet === mat_multiply.toSet) + } +} diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/StandardScalerSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/StandardScalerSuite.scala index 7f94564b2a3ae..1eb991869de40 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/feature/StandardScalerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/StandardScalerSuite.scala @@ -360,7 +360,7 @@ class StandardScalerSuite extends FunSuite with MLlibTestSparkContext { } withClue("model needs std and mean vectors to be equal size when both are provided") { intercept[IllegalArgumentException] { - val model = new StandardScalerModel(Vectors.dense(0.0), Vectors.dense(0.0,1.0)) + val model = new StandardScalerModel(Vectors.dense(0.0), Vectors.dense(0.0, 1.0)) } } } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala index 002cb253862b5..f5d7b3926c7cd 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BLASSuite.scala @@ -140,7 +140,7 @@ class BLASSuite extends FunSuite { syr(alpha, x, dA) assert(dA ~== expected absTol 1e-15) - + val dB = new DenseMatrix(3, 4, Array(0.0, 1.2, 2.2, 3.1, 1.2, 3.2, 5.3, 4.6, 2.2, 5.3, 1.8, 3.0)) @@ -149,7 +149,7 @@ class BLASSuite extends FunSuite { syr(alpha, x, dB) } } - + val dC = new DenseMatrix(3, 3, Array(0.0, 1.2, 2.2, 1.2, 3.2, 5.3, 2.2, 5.3, 1.8)) @@ -158,7 +158,7 @@ class BLASSuite extends FunSuite { syr(alpha, x, dC) } } - + val y = new DenseVector(Array(0.0, 2.7, 3.5, 2.1, 1.5)) withClue("Size of vector must match the rank of matrix") { @@ -257,32 +257,96 @@ class BLASSuite extends FunSuite { new DenseMatrix(4, 3, Array(0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0)) val sA = new SparseMatrix(4, 3, Array(0, 1, 3, 4), Array(1, 0, 2, 3), Array(1.0, 2.0, 1.0, 3.0)) - val x = new DenseVector(Array(1.0, 2.0, 3.0)) + val dA2 = + new DenseMatrix(4, 3, Array(0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0), true) + val sA2 = + new SparseMatrix(4, 3, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0), + true) + + val dx = new DenseVector(Array(1.0, 2.0, 3.0)) + val sx = dx.toSparse val expected = new DenseVector(Array(4.0, 1.0, 2.0, 9.0)) - assert(dA.multiply(x) ~== expected absTol 1e-15) - assert(sA.multiply(x) ~== expected absTol 1e-15) + assert(dA.multiply(dx) ~== expected absTol 1e-15) + assert(sA.multiply(dx) ~== expected absTol 1e-15) + assert(dA.multiply(sx) ~== expected absTol 1e-15) + assert(sA.multiply(sx) ~== expected absTol 1e-15) val y1 = new DenseVector(Array(1.0, 3.0, 1.0, 0.0)) val y2 = y1.copy val y3 = y1.copy val y4 = y1.copy + val y5 = y1.copy + val y6 = y1.copy + val y7 = y1.copy + val y8 = y1.copy + val y9 = y1.copy + val y10 = y1.copy + val y11 = y1.copy + val y12 = y1.copy + val y13 = y1.copy + val y14 = y1.copy + val y15 = y1.copy + val y16 = y1.copy + val expected2 = new DenseVector(Array(6.0, 7.0, 4.0, 9.0)) val expected3 = new DenseVector(Array(10.0, 8.0, 6.0, 18.0)) - gemv(1.0, dA, x, 2.0, y1) - gemv(1.0, sA, x, 2.0, y2) - gemv(2.0, dA, x, 2.0, y3) - gemv(2.0, sA, x, 2.0, y4) + gemv(1.0, dA, dx, 2.0, y1) + gemv(1.0, sA, dx, 2.0, y2) + gemv(1.0, dA, sx, 2.0, y3) + gemv(1.0, sA, sx, 2.0, y4) + + gemv(1.0, dA2, dx, 2.0, y5) + gemv(1.0, sA2, dx, 2.0, y6) + gemv(1.0, dA2, sx, 2.0, y7) + gemv(1.0, sA2, sx, 2.0, y8) + + gemv(2.0, dA, dx, 2.0, y9) + gemv(2.0, sA, dx, 2.0, y10) + gemv(2.0, dA, sx, 2.0, y11) + gemv(2.0, sA, sx, 2.0, y12) + + gemv(2.0, dA2, dx, 2.0, y13) + gemv(2.0, sA2, dx, 2.0, y14) + gemv(2.0, dA2, sx, 2.0, y15) + gemv(2.0, sA2, sx, 2.0, y16) + assert(y1 ~== expected2 absTol 1e-15) assert(y2 ~== expected2 absTol 1e-15) - assert(y3 ~== expected3 absTol 1e-15) - assert(y4 ~== expected3 absTol 1e-15) + assert(y3 ~== expected2 absTol 1e-15) + assert(y4 ~== expected2 absTol 1e-15) + + assert(y5 ~== expected2 absTol 1e-15) + assert(y6 ~== expected2 absTol 1e-15) + assert(y7 ~== expected2 absTol 1e-15) + assert(y8 ~== expected2 absTol 1e-15) + + assert(y9 ~== expected3 absTol 1e-15) + assert(y10 ~== expected3 absTol 1e-15) + assert(y11 ~== expected3 absTol 1e-15) + assert(y12 ~== expected3 absTol 1e-15) + + assert(y13 ~== expected3 absTol 1e-15) + assert(y14 ~== expected3 absTol 1e-15) + assert(y15 ~== expected3 absTol 1e-15) + assert(y16 ~== expected3 absTol 1e-15) + withClue("columns of A don't match the rows of B") { intercept[Exception] { - gemv(1.0, dA.transpose, x, 2.0, y1) + gemv(1.0, dA.transpose, dx, 2.0, y1) + } + intercept[Exception] { + gemv(1.0, sA.transpose, dx, 2.0, y1) + } + intercept[Exception] { + gemv(1.0, dA.transpose, sx, 2.0, y1) + } + intercept[Exception] { + gemv(1.0, sA.transpose, sx, 2.0, y1) } } + val dAT = new DenseMatrix(3, 4, Array(0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0)) val sAT = @@ -291,7 +355,9 @@ class BLASSuite extends FunSuite { val dATT = dAT.transpose val sATT = sAT.transpose - assert(dATT.multiply(x) ~== expected absTol 1e-15) - assert(sATT.multiply(x) ~== expected absTol 1e-15) + assert(dATT.multiply(dx) ~== expected absTol 1e-15) + assert(sATT.multiply(dx) ~== expected absTol 1e-15) + assert(dATT.multiply(sx) ~== expected absTol 1e-15) + assert(sATT.multiply(sx) ~== expected absTol 1e-15) } } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala index 24755e9ff46fc..0c6be0e745873 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala @@ -215,13 +215,13 @@ class VectorsSuite extends FunSuite { val squaredDist = breezeSquaredDistance(sparseVector1.toBreeze, sparseVector2.toBreeze) - // SparseVector vs. SparseVector - assert(Vectors.sqdist(sparseVector1, sparseVector2) ~== squaredDist relTol 1E-8) + // SparseVector vs. SparseVector + assert(Vectors.sqdist(sparseVector1, sparseVector2) ~== squaredDist relTol 1E-8) // DenseVector vs. SparseVector assert(Vectors.sqdist(denseVector1, sparseVector2) ~== squaredDist relTol 1E-8) // DenseVector vs. DenseVector assert(Vectors.sqdist(denseVector1, denseVector2) ~== squaredDist relTol 1E-8) - } + } } test("foreachActive") { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala index 949d1c9939570..a58336175899c 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala @@ -57,11 +57,13 @@ class BlockMatrixSuite extends FunSuite with MLlibTestSparkContext { val random = new ju.Random() // This should generate a 4x4 grid of 1x2 blocks. val part0 = GridPartitioner(4, 7, suggestedNumPartitions = 12) + // scalastyle:off val expected0 = Array( Array(0, 0, 4, 4, 8, 8, 12), Array(1, 1, 5, 5, 9, 9, 13), Array(2, 2, 6, 6, 10, 10, 14), Array(3, 3, 7, 7, 11, 11, 15)) + // scalastyle:on for (i <- 0 until 4; j <- 0 until 7) { assert(part0.getPartition((i, j)) === expected0(i)(j)) assert(part0.getPartition((i, j, random.nextInt())) === expected0(i)(j)) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala index 3309713e91f87..27bb19f472e1e 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala @@ -96,7 +96,7 @@ class RowMatrixSuite extends FunSuite with MLlibTestSparkContext { } test("similar columns") { - val colMags = Vectors.dense(Math.sqrt(126), Math.sqrt(66), Math.sqrt(94)) + val colMags = Vectors.dense(math.sqrt(126), math.sqrt(66), math.sqrt(94)) val expected = BDM( (0.0, 54.0, 72.0), (0.0, 0.0, 78.0), @@ -232,7 +232,7 @@ class RowMatrixSuite extends FunSuite with MLlibTestSparkContext { assert(summary.numNonzeros === Vectors.dense(3.0, 3.0, 4.0), "nnz mismatch") assert(summary.max === Vectors.dense(9.0, 7.0, 8.0), "max mismatch") assert(summary.min === Vectors.dense(0.0, 0.0, 1.0), "column mismatch.") - assert(summary.normL2 === Vectors.dense(Math.sqrt(126), Math.sqrt(66), Math.sqrt(94)), + assert(summary.normL2 === Vectors.dense(math.sqrt(126), math.sqrt(66), math.sqrt(94)), "magnitude mismatch.") assert(summary.normL1 === Vectors.dense(18.0, 12.0, 16.0), "L1 norm mismatch") } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala index 86481c6e66200..e110506d579b0 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/GradientDescentSuite.scala @@ -42,7 +42,7 @@ object GradientDescentSuite { offset: Double, scale: Double, nPoints: Int, - seed: Int): Seq[LabeledPoint] = { + seed: Int): Seq[LabeledPoint] = { val rnd = new Random(seed) val x1 = Array.fill[Double](nPoints)(rnd.nextGaussian()) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala index 70c64775e4c04..c8f2adcf155a7 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/LBFGSSuite.scala @@ -89,7 +89,7 @@ class LBFGSSuite extends FunSuite with MLlibTestSparkContext with Matchers { // it requires 90 iterations in GD. No matter how hard we increase // the number of iterations in GD here, the lossGD will be always // larger than lossLBFGS. This is based on observation, no theoretically guaranteed - assert(Math.abs((lossGD.last - loss.last) / loss.last) < 0.02, + assert(math.abs((lossGD.last - loss.last) / loss.last) < 0.02, "LBFGS should match GD result within 2% difference.") } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala index 22855e4e8f247..bb723fc471181 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/optimization/NNLSSuite.scala @@ -68,12 +68,14 @@ class NNLSSuite extends FunSuite { test("NNLS: nonnegativity constraint active") { val n = 5 + // scalastyle:off val ata = new DoubleMatrix(Array( Array( 4.377, -3.531, -1.306, -0.139, 3.418), Array(-3.531, 4.344, 0.934, 0.305, -2.140), Array(-1.306, 0.934, 2.644, -0.203, -0.170), Array(-0.139, 0.305, -0.203, 5.883, 1.428), Array( 3.418, -2.140, -0.170, 1.428, 4.684))) + // scalastyle:on val atb = new DoubleMatrix(Array(-1.632, 2.115, 1.094, -1.025, -0.636)) val goodx = Array(0.13025, 0.54506, 0.2874, 0.0, 0.028628) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExportSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExportSuite.scala index 0b646cf1ce6c4..5f6d12ed82d99 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExportSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/BinaryClassificationPMMLModelExportSuite.scala @@ -53,13 +53,13 @@ class BinaryClassificationPMMLModelExportSuite extends FunSuite { // ensure logistic regression has normalization method set to LOGIT assert(pmmlRegressionModel.getNormalizationMethod() == RegressionNormalizationMethodType.LOGIT) } - + test("linear SVM PMML export") { val linearInput = LinearDataGenerator.generateLinearInput(3.0, Array(10.0, 10.0), 1, 17) val svmModel = new SVMModel(linearInput(0).features, linearInput(0).label) - + val svmModelExport = PMMLModelExportFactory.createPMMLModelExport(svmModel) - + // assert that the PMML format is as expected assert(svmModelExport.isInstanceOf[PMMLModelExport]) val pmml = svmModelExport.getPmml @@ -80,5 +80,5 @@ class BinaryClassificationPMMLModelExportSuite extends FunSuite { // ensure linear SVM has normalization method set to NONE assert(pmmlRegressionModel.getNormalizationMethod() == RegressionNormalizationMethodType.NONE) } - + } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExportSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExportSuite.scala index b985d0446d7b0..0648674cfa831 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExportSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExportSuite.scala @@ -45,5 +45,5 @@ class KMeansPMMLModelExportSuite extends FunSuite { val pmmlClusteringModel = pmml.getModels.get(0).asInstanceOf[ClusteringModel] assert(pmmlClusteringModel.getNumberOfClusters === clusterCenters.length) } - + } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/PMMLModelExportFactorySuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/PMMLModelExportFactorySuite.scala index f28a4ac8ad01f..7be5439222f92 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/PMMLModelExportFactorySuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/pmml/export/PMMLModelExportFactorySuite.scala @@ -61,25 +61,25 @@ class PMMLModelExportFactorySuite extends FunSuite { test("PMMLModelExportFactory create BinaryClassificationPMMLModelExport " + "when passing a LogisticRegressionModel or SVMModel") { val linearInput = LinearDataGenerator.generateLinearInput(3.0, Array(10.0, 10.0), 1, 17) - + val logisticRegressionModel = new LogisticRegressionModel(linearInput(0).features, linearInput(0).label) val logisticRegressionModelExport = PMMLModelExportFactory.createPMMLModelExport(logisticRegressionModel) assert(logisticRegressionModelExport.isInstanceOf[BinaryClassificationPMMLModelExport]) - + val svmModel = new SVMModel(linearInput(0).features, linearInput(0).label) val svmModelExport = PMMLModelExportFactory.createPMMLModelExport(svmModel) assert(svmModelExport.isInstanceOf[BinaryClassificationPMMLModelExport]) } - + test("PMMLModelExportFactory throw IllegalArgumentException " + "when passing a Multinomial Logistic Regression") { /** 3 classes, 2 features */ val multiclassLogisticRegressionModel = new LogisticRegressionModel( - weights = Vectors.dense(0.1, 0.2, 0.3, 0.4), intercept = 1.0, + weights = Vectors.dense(0.1, 0.2, 0.3, 0.4), intercept = 1.0, numFeatures = 2, numClasses = 3) - + intercept[IllegalArgumentException] { PMMLModelExportFactory.createPMMLModelExport(multiclassLogisticRegressionModel) } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala index cb8fe4dba96f5..57216e8eb4a55 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala @@ -24,13 +24,14 @@ import org.apache.spark.mllib.rdd.MLPairRDDFunctions._ class MLPairRDDFunctionsSuite extends FunSuite with MLlibTestSparkContext { test("topByKey") { - val topMap = sc.parallelize(Array((1, 1), (1, 2), (3, 2), (3, 7), (5, 1), (3, 5)), 2) - .topByKey(2) + val topMap = sc.parallelize(Array((1, 7), (1, 3), (1, 6), (1, 1), (1, 2), (3, 2), (3, 7), (5, + 1), (3, 5)), 2) + .topByKey(5) .collectAsMap() assert(topMap.size === 3) - assert(topMap(1) === Array(2, 1)) - assert(topMap(3) === Array(7, 5)) + assert(topMap(1) === Array(7, 6, 3, 2, 1)) + assert(topMap(3) === Array(7, 5, 2)) assert(topMap(5) === Array(1)) } } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala index 8e12340bbd9d6..3b38bdf5ef5eb 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala @@ -26,7 +26,7 @@ import org.apache.spark.util.Utils class IsotonicRegressionSuite extends FunSuite with MLlibTestSparkContext with Matchers { private def round(d: Double) = { - Math.round(d * 100).toDouble / 100 + math.round(d * 100).toDouble / 100 } private def generateIsotonicInput(labels: Seq[Double]): Seq[(Double, Double, Double)] = { diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala index c9f5dc069ef2e..71dce50922991 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala @@ -67,11 +67,12 @@ class LassoSuite extends FunSuite with MLlibTestSparkContext { assert(weight1 >= -1.60 && weight1 <= -1.40, weight1 + " not in [-1.6, -1.4]") assert(weight2 >= -1.0e-3 && weight2 <= 1.0e-3, weight2 + " not in [-0.001, 0.001]") - val validationData = LinearDataGenerator.generateLinearInput(A, Array[Double](B,C), nPoints, 17) + val validationData = LinearDataGenerator + .generateLinearInput(A, Array[Double](B, C), nPoints, 17) .map { case LabeledPoint(label, features) => LabeledPoint(label, Vectors.dense(1.0 +: features.toArray)) } - val validationRDD = sc.parallelize(validationData, 2) + val validationRDD = sc.parallelize(validationData, 2) // Test prediction on RDD. validatePrediction(model.predict(validationRDD.map(_.features)).collect(), validationData) @@ -110,11 +111,12 @@ class LassoSuite extends FunSuite with MLlibTestSparkContext { assert(weight1 >= -1.60 && weight1 <= -1.40, weight1 + " not in [-1.6, -1.4]") assert(weight2 >= -1.0e-3 && weight2 <= 1.0e-3, weight2 + " not in [-0.001, 0.001]") - val validationData = LinearDataGenerator.generateLinearInput(A, Array[Double](B,C), nPoints, 17) + val validationData = LinearDataGenerator + .generateLinearInput(A, Array[Double](B, C), nPoints, 17) .map { case LabeledPoint(label, features) => LabeledPoint(label, Vectors.dense(1.0 +: features.toArray)) } - val validationRDD = sc.parallelize(validationData,2) + val validationRDD = sc.parallelize(validationData, 2) // Test prediction on RDD. validatePrediction(model.predict(validationRDD.map(_.features)).collect(), validationData) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala index d20a09b4b4925..a7e6fce31ff7e 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala @@ -96,11 +96,13 @@ class CorrelationSuite extends FunSuite with MLlibTestSparkContext { val X = sc.parallelize(data) val defaultMat = Statistics.corr(X) val pearsonMat = Statistics.corr(X, "pearson") + // scalastyle:off val expected = BDM( (1.00000000, 0.05564149, Double.NaN, 0.4004714), (0.05564149, 1.00000000, Double.NaN, 0.9135959), (Double.NaN, Double.NaN, 1.00000000, Double.NaN), - (0.40047142, 0.91359586, Double.NaN,1.0000000)) + (0.40047142, 0.91359586, Double.NaN, 1.0000000)) + // scalastyle:on assert(matrixApproxEqual(defaultMat.toBreeze, expected)) assert(matrixApproxEqual(pearsonMat.toBreeze, expected)) } @@ -108,11 +110,13 @@ class CorrelationSuite extends FunSuite with MLlibTestSparkContext { test("corr(X) spearman") { val X = sc.parallelize(data) val spearmanMat = Statistics.corr(X, "spearman") + // scalastyle:off val expected = BDM( (1.0000000, 0.1054093, Double.NaN, 0.4000000), (0.1054093, 1.0000000, Double.NaN, 0.9486833), (Double.NaN, Double.NaN, 1.00000000, Double.NaN), (0.4000000, 0.9486833, Double.NaN, 1.0000000)) + // scalastyle:on assert(matrixApproxEqual(spearmanMat.toBreeze, expected)) } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala index 16ecae23dd9d4..a309c942cf8ff 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/KernelDensitySuite.scala @@ -17,9 +17,8 @@ package org.apache.spark.mllib.stat -import org.scalatest.FunSuite - import org.apache.commons.math3.distribution.NormalDistribution +import org.scalatest.FunSuite import org.apache.spark.mllib.util.MLlibTestSparkContext @@ -27,21 +26,23 @@ class KernelDensitySuite extends FunSuite with MLlibTestSparkContext { test("kernel density single sample") { val rdd = sc.parallelize(Array(5.0)) val evaluationPoints = Array(5.0, 6.0) - val densities = KernelDensity.estimate(rdd, 3.0, evaluationPoints) + val densities = new KernelDensity().setSample(rdd).setBandwidth(3.0).estimate(evaluationPoints) val normal = new NormalDistribution(5.0, 3.0) val acceptableErr = 1e-6 - assert(densities(0) - normal.density(5.0) < acceptableErr) - assert(densities(0) - normal.density(6.0) < acceptableErr) + assert(math.abs(densities(0) - normal.density(5.0)) < acceptableErr) + assert(math.abs(densities(1) - normal.density(6.0)) < acceptableErr) } test("kernel density multiple samples") { val rdd = sc.parallelize(Array(5.0, 10.0)) val evaluationPoints = Array(5.0, 6.0) - val densities = KernelDensity.estimate(rdd, 3.0, evaluationPoints) + val densities = new KernelDensity().setSample(rdd).setBandwidth(3.0).estimate(evaluationPoints) val normal1 = new NormalDistribution(5.0, 3.0) val normal2 = new NormalDistribution(10.0, 3.0) val acceptableErr = 1e-6 - assert(densities(0) - (normal1.density(5.0) + normal2.density(5.0)) / 2 < acceptableErr) - assert(densities(0) - (normal1.density(6.0) + normal2.density(6.0)) / 2 < acceptableErr) + assert(math.abs( + densities(0) - (normal1.density(5.0) + normal2.density(5.0)) / 2) < acceptableErr) + assert(math.abs( + densities(1) - (normal1.density(6.0) + normal2.density(6.0)) / 2) < acceptableErr) } } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussianSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussianSuite.scala index fac2498e4dcb3..66795e02d15fb 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussianSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussianSuite.scala @@ -27,39 +27,39 @@ class MultivariateGaussianSuite extends FunSuite with MLlibTestSparkContext { test("univariate") { val x1 = Vectors.dense(0.0) val x2 = Vectors.dense(1.5) - + val mu = Vectors.dense(0.0) val sigma1 = Matrices.dense(1, 1, Array(1.0)) val dist1 = new MultivariateGaussian(mu, sigma1) assert(dist1.pdf(x1) ~== 0.39894 absTol 1E-5) assert(dist1.pdf(x2) ~== 0.12952 absTol 1E-5) - + val sigma2 = Matrices.dense(1, 1, Array(4.0)) val dist2 = new MultivariateGaussian(mu, sigma2) assert(dist2.pdf(x1) ~== 0.19947 absTol 1E-5) assert(dist2.pdf(x2) ~== 0.15057 absTol 1E-5) } - + test("multivariate") { val x1 = Vectors.dense(0.0, 0.0) val x2 = Vectors.dense(1.0, 1.0) - + val mu = Vectors.dense(0.0, 0.0) val sigma1 = Matrices.dense(2, 2, Array(1.0, 0.0, 0.0, 1.0)) val dist1 = new MultivariateGaussian(mu, sigma1) assert(dist1.pdf(x1) ~== 0.15915 absTol 1E-5) assert(dist1.pdf(x2) ~== 0.05855 absTol 1E-5) - + val sigma2 = Matrices.dense(2, 2, Array(4.0, -1.0, -1.0, 2.0)) val dist2 = new MultivariateGaussian(mu, sigma2) assert(dist2.pdf(x1) ~== 0.060155 absTol 1E-5) assert(dist2.pdf(x2) ~== 0.033971 absTol 1E-5) } - + test("multivariate degenerate") { val x1 = Vectors.dense(0.0, 0.0) val x2 = Vectors.dense(1.0, 1.0) - + val mu = Vectors.dense(0.0, 0.0) val sigma = Matrices.dense(2, 2, Array(1.0, 1.0, 1.0, 1.0)) val dist = new MultivariateGaussian(mu, sigma) diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala index ee3bc98486862..4ed66953cb628 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala @@ -196,7 +196,6 @@ class RandomForestSuite extends FunSuite with MLlibTestSparkContext { numClasses = 3, categoricalFeaturesInfo = categoricalFeaturesInfo) val model = RandomForest.trainClassifier(input, strategy, numTrees = 2, featureSubsetStrategy = "sqrt", seed = 12345) - EnsembleTestHelper.validateClassifier(model, arr, 1.0) } test("subsampling rate in RandomForest"){ diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala index 668fc1d43c5d6..d422128e03384 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala @@ -63,7 +63,7 @@ class MLUtilsSuite extends FunSuite with MLlibTestSparkContext { val fastSquaredDist3 = fastSquaredDistance(v2, norm2, v3, norm3, precision) assert((fastSquaredDist3 - squaredDist2) <= precision * squaredDist2, s"failed with m = $m") - if (m > 10) { + if (m > 10) { val v4 = Vectors.sparse(n, indices.slice(0, m - 10), indices.map(i => a(i) + 0.5).slice(0, m - 10)) val norm4 = Vectors.norm(v4, 2.0) @@ -168,7 +168,7 @@ class MLUtilsSuite extends FunSuite with MLlibTestSparkContext { "Each training+validation set combined should contain all of the data.") } // K fold cross validation should only have each element in the validation set exactly once - assert(foldedRdds.map(_._2).reduce((x,y) => x.union(y)).collect().sorted === + assert(foldedRdds.map(_._2).reduce((x, y) => x.union(y)).collect().sorted === data.collect().sorted) } } diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala index b658889476d37..5d1796ef65722 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala @@ -17,13 +17,14 @@ package org.apache.spark.mllib.util -import org.scalatest.Suite -import org.scalatest.BeforeAndAfterAll +import org.scalatest.{BeforeAndAfterAll, Suite} import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.sql.SQLContext trait MLlibTestSparkContext extends BeforeAndAfterAll { self: Suite => @transient var sc: SparkContext = _ + @transient var sqlContext: SQLContext = _ override def beforeAll() { super.beforeAll() @@ -31,12 +32,15 @@ trait MLlibTestSparkContext extends BeforeAndAfterAll { self: Suite => .setMaster("local[2]") .setAppName("MLlibUnitTest") sc = new SparkContext(conf) + sqlContext = new SQLContext(sc) } override def afterAll() { + sqlContext = null if (sc != null) { sc.stop() } + sc = null super.afterAll() } } diff --git a/network/common/pom.xml b/network/common/pom.xml index 0c3147761cfc5..3e541b5860601 100644 --- a/network/common/pom.xml +++ b/network/common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../../pom.xml diff --git a/network/common/src/main/java/org/apache/spark/network/server/TransportServer.java b/network/common/src/main/java/org/apache/spark/network/server/TransportServer.java index 941ef95772e16..f4fadb1ee3b8d 100644 --- a/network/common/src/main/java/org/apache/spark/network/server/TransportServer.java +++ b/network/common/src/main/java/org/apache/spark/network/server/TransportServer.java @@ -31,6 +31,7 @@ import io.netty.channel.ChannelOption; import io.netty.channel.EventLoopGroup; import io.netty.channel.socket.SocketChannel; +import org.apache.spark.network.util.JavaUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -65,7 +66,12 @@ public TransportServer( this.appRpcHandler = appRpcHandler; this.bootstraps = Lists.newArrayList(Preconditions.checkNotNull(bootstraps)); - init(portToBind); + try { + init(portToBind); + } catch (RuntimeException e) { + JavaUtils.closeQuietly(this); + throw e; + } } public int getPort() { @@ -114,7 +120,8 @@ protected void initChannel(SocketChannel ch) throws Exception { } }); - bindRightPort(portToBind); + channelFuture = bootstrap.bind(new InetSocketAddress(portToBind)); + channelFuture.syncUninterruptibly(); port = ((InetSocketAddress) channelFuture.channel().localAddress()).getPort(); logger.debug("Shuffle server started on port :" + port); @@ -135,38 +142,4 @@ public void close() { } bootstrap = null; } - - /** - * Attempt to bind to the specified port up to a fixed number of retries. - * If all attempts fail after the max number of retries, exit. - */ - private void bindRightPort(int portToBind) { - int maxPortRetries = conf.portMaxRetries(); - - for (int i = 0; i <= maxPortRetries; i++) { - int tryPort = -1; - if (0 == portToBind) { - // Do not increment port if tryPort is 0, which is treated as a special port - tryPort = 0; - } else { - // If the new port wraps around, do not try a privilege port - tryPort = ((portToBind + i - 1024) % (65536 - 1024)) + 1024; - } - try { - channelFuture = bootstrap.bind(new InetSocketAddress(tryPort)); - channelFuture.syncUninterruptibly(); - return; - } catch (Exception e) { - logger.warn("Netty service could not bind on port " + tryPort + - ". Attempting the next port."); - if (i >= maxPortRetries) { - logger.error(e.getMessage() + ": Netty server failed after " - + maxPortRetries + " retries."); - - // If it can't find a right port, it should exit directly. - System.exit(-1); - } - } - } - } } diff --git a/network/shuffle/pom.xml b/network/shuffle/pom.xml index 7dc7c65825e34..da790e98f0c7a 100644 --- a/network/shuffle/pom.xml +++ b/network/shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../../pom.xml diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java index 46ca9708621b9..e4faaf8854fc7 100644 --- a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java +++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandler.java @@ -46,18 +46,18 @@ public class ExternalShuffleBlockHandler extends RpcHandler { private final Logger logger = LoggerFactory.getLogger(ExternalShuffleBlockHandler.class); - private final ExternalShuffleBlockManager blockManager; + private final ExternalShuffleBlockResolver blockManager; private final OneForOneStreamManager streamManager; public ExternalShuffleBlockHandler(TransportConf conf) { - this(new OneForOneStreamManager(), new ExternalShuffleBlockManager(conf)); + this(new OneForOneStreamManager(), new ExternalShuffleBlockResolver(conf)); } /** Enables mocking out the StreamManager and BlockManager. */ @VisibleForTesting ExternalShuffleBlockHandler( OneForOneStreamManager streamManager, - ExternalShuffleBlockManager blockManager) { + ExternalShuffleBlockResolver blockManager) { this.streamManager = streamManager; this.blockManager = blockManager; } diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockManager.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java similarity index 95% rename from network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockManager.java rename to network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java index 93e6fdd7161fa..dd08e24cade23 100644 --- a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockManager.java +++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java @@ -44,13 +44,13 @@ * Manages converting shuffle BlockIds into physical segments of local files, from a process outside * of Executors. Each Executor must register its own configuration about where it stores its files * (local dirs) and how (shuffle manager). The logic for retrieval of individual files is replicated - * from Spark's FileShuffleBlockManager and IndexShuffleBlockManager. + * from Spark's FileShuffleBlockResolver and IndexShuffleBlockResolver. * * Executors with shuffle file consolidation are not currently supported, as the index is stored in - * the Executor's memory, unlike the IndexShuffleBlockManager. + * the Executor's memory, unlike the IndexShuffleBlockResolver. */ -public class ExternalShuffleBlockManager { - private static final Logger logger = LoggerFactory.getLogger(ExternalShuffleBlockManager.class); +public class ExternalShuffleBlockResolver { + private static final Logger logger = LoggerFactory.getLogger(ExternalShuffleBlockResolver.class); // Map containing all registered executors' metadata. private final ConcurrentMap executors; @@ -60,7 +60,7 @@ public class ExternalShuffleBlockManager { private final TransportConf conf; - public ExternalShuffleBlockManager(TransportConf conf) { + public ExternalShuffleBlockResolver(TransportConf conf) { this(conf, Executors.newSingleThreadExecutor( // Add `spark` prefix because it will run in NM in Yarn mode. NettyUtils.createThreadFactory("spark-shuffle-directory-cleaner"))); @@ -68,7 +68,7 @@ public ExternalShuffleBlockManager(TransportConf conf) { // Allows tests to have more control over when directories are cleaned up. @VisibleForTesting - ExternalShuffleBlockManager(TransportConf conf, Executor directoryCleaner) { + ExternalShuffleBlockResolver(TransportConf conf, Executor directoryCleaner) { this.conf = conf; this.executors = Maps.newConcurrentMap(); this.directoryCleaner = directoryCleaner; @@ -168,7 +168,7 @@ private void deleteExecutorDirs(String[] dirs) { /** * Hash-based shuffle data is simply stored as one file per block. - * This logic is from FileShuffleBlockManager. + * This logic is from FileShuffleBlockResolver. */ // TODO: Support consolidated hash shuffle files private ManagedBuffer getHashBasedShuffleBlockData(ExecutorShuffleInfo executor, String blockId) { @@ -178,7 +178,7 @@ private ManagedBuffer getHashBasedShuffleBlockData(ExecutorShuffleInfo executor, /** * Sort-based shuffle data uses an index called "shuffle_ShuffleId_MapId_0.index" into a data file - * called "shuffle_ShuffleId_MapId_0.data". This logic is from IndexShuffleBlockManager, + * called "shuffle_ShuffleId_MapId_0.data". This logic is from IndexShuffleBlockResolver, * and the block id format is from ShuffleDataBlockId and ShuffleIndexBlockId. */ private ManagedBuffer getSortBasedShuffleBlockData( diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java index 60485bace643c..ce954b8a289e4 100644 --- a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java +++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java @@ -24,6 +24,9 @@ import org.apache.spark.network.protocol.Encoders; +// Needed by ScalaDoc. See SPARK-7726 +import static org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type; + /** Request to read a set of blocks. Returns {@link StreamHandle}. */ public class OpenBlocks extends BlockTransferMessage { public final String appId; diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java index 38acae3b31d64..cca8b17c4f129 100644 --- a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java +++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java @@ -22,6 +22,9 @@ import org.apache.spark.network.protocol.Encoders; +// Needed by ScalaDoc. See SPARK-7726 +import static org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type; + /** * Initial registration message between an executor and its local shuffle server. * Returns nothing (empty bye array). diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java index 9a9220211a50c..1915295aa6cc2 100644 --- a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java +++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java @@ -20,6 +20,9 @@ import com.google.common.base.Objects; import io.netty.buffer.ByteBuf; +// Needed by ScalaDoc. See SPARK-7726 +import static org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type; + /** * Identifier for a fixed number of chunks to read from a stream created by an "open blocks" * message. This is used by {@link org.apache.spark.network.shuffle.OneForOneBlockFetcher}. diff --git a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java index 2ff9aaa650f92..3caed59d508fd 100644 --- a/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java +++ b/network/shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java @@ -24,6 +24,9 @@ import org.apache.spark.network.protocol.Encoders; +// Needed by ScalaDoc. See SPARK-7726 +import static org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type; + /** Request to upload a block with a certain StorageLevel. Returns nothing (empty byte array). */ public class UploadBlock extends BlockTransferMessage { diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java index 3f9fe1681cf27..73374cdc77a23 100644 --- a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java +++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockHandlerSuite.java @@ -45,14 +45,14 @@ public class ExternalShuffleBlockHandlerSuite { TransportClient client = mock(TransportClient.class); OneForOneStreamManager streamManager; - ExternalShuffleBlockManager blockManager; + ExternalShuffleBlockResolver blockResolver; RpcHandler handler; @Before public void beforeEach() { streamManager = mock(OneForOneStreamManager.class); - blockManager = mock(ExternalShuffleBlockManager.class); - handler = new ExternalShuffleBlockHandler(streamManager, blockManager); + blockResolver = mock(ExternalShuffleBlockResolver.class); + handler = new ExternalShuffleBlockHandler(streamManager, blockResolver); } @Test @@ -62,7 +62,7 @@ public void testRegisterExecutor() { ExecutorShuffleInfo config = new ExecutorShuffleInfo(new String[] {"/a", "/b"}, 16, "sort"); byte[] registerMessage = new RegisterExecutor("app0", "exec1", config).toByteArray(); handler.receive(client, registerMessage, callback); - verify(blockManager, times(1)).registerExecutor("app0", "exec1", config); + verify(blockResolver, times(1)).registerExecutor("app0", "exec1", config); verify(callback, times(1)).onSuccess((byte[]) any()); verify(callback, never()).onFailure((Throwable) any()); @@ -75,12 +75,12 @@ public void testOpenShuffleBlocks() { ManagedBuffer block0Marker = new NioManagedBuffer(ByteBuffer.wrap(new byte[3])); ManagedBuffer block1Marker = new NioManagedBuffer(ByteBuffer.wrap(new byte[7])); - when(blockManager.getBlockData("app0", "exec1", "b0")).thenReturn(block0Marker); - when(blockManager.getBlockData("app0", "exec1", "b1")).thenReturn(block1Marker); + when(blockResolver.getBlockData("app0", "exec1", "b0")).thenReturn(block0Marker); + when(blockResolver.getBlockData("app0", "exec1", "b1")).thenReturn(block1Marker); byte[] openBlocks = new OpenBlocks("app0", "exec1", new String[] { "b0", "b1" }).toByteArray(); handler.receive(client, openBlocks, callback); - verify(blockManager, times(1)).getBlockData("app0", "exec1", "b0"); - verify(blockManager, times(1)).getBlockData("app0", "exec1", "b1"); + verify(blockResolver, times(1)).getBlockData("app0", "exec1", "b0"); + verify(blockResolver, times(1)).getBlockData("app0", "exec1", "b1"); ArgumentCaptor response = ArgumentCaptor.forClass(byte[].class); verify(callback, times(1)).onSuccess(response.capture()); diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockManagerSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java similarity index 77% rename from network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockManagerSuite.java rename to network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java index dad6428a836fc..d02f4f0fdb682 100644 --- a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockManagerSuite.java +++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java @@ -30,7 +30,7 @@ import static org.junit.Assert.*; -public class ExternalShuffleBlockManagerSuite { +public class ExternalShuffleBlockResolverSuite { static String sortBlock0 = "Hello!"; static String sortBlock1 = "World!"; @@ -60,29 +60,29 @@ public static void afterAll() { @Test public void testBadRequests() { - ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf); + ExternalShuffleBlockResolver resolver = new ExternalShuffleBlockResolver(conf); // Unregistered executor try { - manager.getBlockData("app0", "exec1", "shuffle_1_1_0"); + resolver.getBlockData("app0", "exec1", "shuffle_1_1_0"); fail("Should have failed"); } catch (RuntimeException e) { assertTrue("Bad error message: " + e, e.getMessage().contains("not registered")); } // Invalid shuffle manager - manager.registerExecutor("app0", "exec2", dataContext.createExecutorInfo("foobar")); + resolver.registerExecutor("app0", "exec2", dataContext.createExecutorInfo("foobar")); try { - manager.getBlockData("app0", "exec2", "shuffle_1_1_0"); + resolver.getBlockData("app0", "exec2", "shuffle_1_1_0"); fail("Should have failed"); } catch (UnsupportedOperationException e) { // pass } // Nonexistent shuffle block - manager.registerExecutor("app0", "exec3", + resolver.registerExecutor("app0", "exec3", dataContext.createExecutorInfo("org.apache.spark.shuffle.sort.SortShuffleManager")); try { - manager.getBlockData("app0", "exec3", "shuffle_1_1_0"); + resolver.getBlockData("app0", "exec3", "shuffle_1_1_0"); fail("Should have failed"); } catch (Exception e) { // pass @@ -91,18 +91,18 @@ public void testBadRequests() { @Test public void testSortShuffleBlocks() throws IOException { - ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf); - manager.registerExecutor("app0", "exec0", + ExternalShuffleBlockResolver resolver = new ExternalShuffleBlockResolver(conf); + resolver.registerExecutor("app0", "exec0", dataContext.createExecutorInfo("org.apache.spark.shuffle.sort.SortShuffleManager")); InputStream block0Stream = - manager.getBlockData("app0", "exec0", "shuffle_0_0_0").createInputStream(); + resolver.getBlockData("app0", "exec0", "shuffle_0_0_0").createInputStream(); String block0 = CharStreams.toString(new InputStreamReader(block0Stream)); block0Stream.close(); assertEquals(sortBlock0, block0); InputStream block1Stream = - manager.getBlockData("app0", "exec0", "shuffle_0_0_1").createInputStream(); + resolver.getBlockData("app0", "exec0", "shuffle_0_0_1").createInputStream(); String block1 = CharStreams.toString(new InputStreamReader(block1Stream)); block1Stream.close(); assertEquals(sortBlock1, block1); @@ -110,18 +110,18 @@ public void testSortShuffleBlocks() throws IOException { @Test public void testHashShuffleBlocks() throws IOException { - ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf); - manager.registerExecutor("app0", "exec0", + ExternalShuffleBlockResolver resolver = new ExternalShuffleBlockResolver(conf); + resolver.registerExecutor("app0", "exec0", dataContext.createExecutorInfo("org.apache.spark.shuffle.hash.HashShuffleManager")); InputStream block0Stream = - manager.getBlockData("app0", "exec0", "shuffle_1_0_0").createInputStream(); + resolver.getBlockData("app0", "exec0", "shuffle_1_0_0").createInputStream(); String block0 = CharStreams.toString(new InputStreamReader(block0Stream)); block0Stream.close(); assertEquals(hashBlock0, block0); InputStream block1Stream = - manager.getBlockData("app0", "exec0", "shuffle_1_0_1").createInputStream(); + resolver.getBlockData("app0", "exec0", "shuffle_1_0_1").createInputStream(); String block1 = CharStreams.toString(new InputStreamReader(block1Stream)); block1Stream.close(); assertEquals(hashBlock1, block1); diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleCleanupSuite.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleCleanupSuite.java index 254e3a7a32b98..d9d9c1bf2f17a 100644 --- a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleCleanupSuite.java +++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleCleanupSuite.java @@ -41,14 +41,15 @@ public class ExternalShuffleCleanupSuite { public void noCleanupAndCleanup() throws IOException { TestShuffleDataContext dataContext = createSomeData(); - ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf, sameThreadExecutor); - manager.registerExecutor("app", "exec0", dataContext.createExecutorInfo("shuffleMgr")); - manager.applicationRemoved("app", false /* cleanup */); + ExternalShuffleBlockResolver resolver = + new ExternalShuffleBlockResolver(conf, sameThreadExecutor); + resolver.registerExecutor("app", "exec0", dataContext.createExecutorInfo("shuffleMgr")); + resolver.applicationRemoved("app", false /* cleanup */); assertStillThere(dataContext); - manager.registerExecutor("app", "exec1", dataContext.createExecutorInfo("shuffleMgr")); - manager.applicationRemoved("app", true /* cleanup */); + resolver.registerExecutor("app", "exec1", dataContext.createExecutorInfo("shuffleMgr")); + resolver.applicationRemoved("app", true /* cleanup */); assertCleanedUp(dataContext); } @@ -64,7 +65,7 @@ public void cleanupUsesExecutor() throws IOException { @Override public void execute(Runnable runnable) { cleanupCalled.set(true); } }; - ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf, noThreadExecutor); + ExternalShuffleBlockResolver manager = new ExternalShuffleBlockResolver(conf, noThreadExecutor); manager.registerExecutor("app", "exec0", dataContext.createExecutorInfo("shuffleMgr")); manager.applicationRemoved("app", true); @@ -81,11 +82,12 @@ public void cleanupMultipleExecutors() throws IOException { TestShuffleDataContext dataContext0 = createSomeData(); TestShuffleDataContext dataContext1 = createSomeData(); - ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf, sameThreadExecutor); + ExternalShuffleBlockResolver resolver = + new ExternalShuffleBlockResolver(conf, sameThreadExecutor); - manager.registerExecutor("app", "exec0", dataContext0.createExecutorInfo("shuffleMgr")); - manager.registerExecutor("app", "exec1", dataContext1.createExecutorInfo("shuffleMgr")); - manager.applicationRemoved("app", true); + resolver.registerExecutor("app", "exec0", dataContext0.createExecutorInfo("shuffleMgr")); + resolver.registerExecutor("app", "exec1", dataContext1.createExecutorInfo("shuffleMgr")); + resolver.applicationRemoved("app", true); assertCleanedUp(dataContext0); assertCleanedUp(dataContext1); @@ -96,25 +98,26 @@ public void cleanupOnlyRemovedApp() throws IOException { TestShuffleDataContext dataContext0 = createSomeData(); TestShuffleDataContext dataContext1 = createSomeData(); - ExternalShuffleBlockManager manager = new ExternalShuffleBlockManager(conf, sameThreadExecutor); + ExternalShuffleBlockResolver resolver = + new ExternalShuffleBlockResolver(conf, sameThreadExecutor); - manager.registerExecutor("app-0", "exec0", dataContext0.createExecutorInfo("shuffleMgr")); - manager.registerExecutor("app-1", "exec0", dataContext1.createExecutorInfo("shuffleMgr")); + resolver.registerExecutor("app-0", "exec0", dataContext0.createExecutorInfo("shuffleMgr")); + resolver.registerExecutor("app-1", "exec0", dataContext1.createExecutorInfo("shuffleMgr")); - manager.applicationRemoved("app-nonexistent", true); + resolver.applicationRemoved("app-nonexistent", true); assertStillThere(dataContext0); assertStillThere(dataContext1); - manager.applicationRemoved("app-0", true); + resolver.applicationRemoved("app-0", true); assertCleanedUp(dataContext0); assertStillThere(dataContext1); - manager.applicationRemoved("app-1", true); + resolver.applicationRemoved("app-1", true); assertCleanedUp(dataContext0); assertCleanedUp(dataContext1); // Make sure it's not an error to cleanup multiple times - manager.applicationRemoved("app-1", true); + resolver.applicationRemoved("app-1", true); assertCleanedUp(dataContext0); assertCleanedUp(dataContext1); } diff --git a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java index 76639114df5d9..3fdde054ab6c7 100644 --- a/network/shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java +++ b/network/shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java @@ -29,7 +29,7 @@ /** * Manages some sort- and hash-based shuffle data, including the creation - * and cleanup of directories that can be read by the {@link ExternalShuffleBlockManager}. + * and cleanup of directories that can be read by the {@link ExternalShuffleBlockResolver}. */ public class TestShuffleDataContext { public final String[] localDirs; @@ -61,9 +61,9 @@ public void insertSortShuffleData(int shuffleId, int mapId, byte[][] blocks) thr String blockId = "shuffle_" + shuffleId + "_" + mapId + "_0"; OutputStream dataStream = new FileOutputStream( - ExternalShuffleBlockManager.getFile(localDirs, subDirsPerLocalDir, blockId + ".data")); + ExternalShuffleBlockResolver.getFile(localDirs, subDirsPerLocalDir, blockId + ".data")); DataOutputStream indexStream = new DataOutputStream(new FileOutputStream( - ExternalShuffleBlockManager.getFile(localDirs, subDirsPerLocalDir, blockId + ".index"))); + ExternalShuffleBlockResolver.getFile(localDirs, subDirsPerLocalDir, blockId + ".index"))); long offset = 0; indexStream.writeLong(offset); @@ -82,7 +82,7 @@ public void insertHashShuffleData(int shuffleId, int mapId, byte[][] blocks) thr for (int i = 0; i < blocks.length; i ++) { String blockId = "shuffle_" + shuffleId + "_" + mapId + "_" + i; Files.write(blocks[i], - ExternalShuffleBlockManager.getFile(localDirs, subDirsPerLocalDir, blockId)); + ExternalShuffleBlockResolver.getFile(localDirs, subDirsPerLocalDir, blockId)); } } diff --git a/network/yarn/pom.xml b/network/yarn/pom.xml index 1e2e9c80af6cc..550159e134509 100644 --- a/network/yarn/pom.xml +++ b/network/yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../../pom.xml diff --git a/pom.xml b/pom.xml index de18741feae3a..de98c6a0e69f6 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 pom Spark Project Parent POM http://spark.apache.org/ @@ -107,6 +107,8 @@ examples repl launcher + external/kafka + external/kafka-assembly @@ -122,12 +124,13 @@ 1.7.10 1.2.17 2.2.0 - 2.4.1 + 2.5.0 ${hadoop.version} - 0.98.7-hadoop1 + 0.98.7-hadoop2 hbase 1.4.0 3.4.5 + 2.4.0 org.spark-project.hive 0.13.1a @@ -135,7 +138,7 @@ 0.13.1 10.10.1.1 1.6.0rc3 - 1.2.3 + 1.2.4 8.1.14.v20131031 3.0.0.v201112011016 0.5.0 @@ -143,10 +146,10 @@ 2.0.8 3.1.0 1.7.7 - + hadoop2 0.7.1 - 1.8.3 - 1.1.0 + 1.9.16 + 1.2.1 4.3.2 3.4.1 ${project.build.directory}/spark-test-classpath.txt @@ -155,7 +158,7 @@ ${scala.version} org.scala-lang 3.6.3 - 1.8.8 + 1.9.13 2.4.4 1.1.1.7 1.1.2 @@ -416,6 +419,11 @@ commons-math3 ${commons.math3.version} + + org.apache.ivy + ivy + ${ivy.version} + com.google.code.findbugs jsr305 @@ -476,7 +484,7 @@ com.ning compress-lzf - 1.0.0 + 1.0.3 org.xerial.snappy @@ -618,6 +626,18 @@ + + com.sun.jersey + jersey-server + 1.9 + ${hadoop.deps.scope} + + + com.sun.jersey + jersey-core + 1.9 + ${hadoop.deps.scope} + org.scala-lang scala-compiler @@ -652,7 +672,7 @@ org.mockito mockito-all - 1.9.0 + 1.9.5 test @@ -667,6 +687,18 @@ 4.10 test + + org.hamcrest + hamcrest-core + 1.3 + test + + + org.hamcrest + hamcrest-library + 1.3 + test + com.novocode junit-interface @@ -676,7 +708,7 @@ org.apache.curator curator-recipes - 2.4.0 + ${curator.version} ${hadoop.deps.scope} @@ -685,6 +717,16 @@ + + org.apache.curator + curator-client + ${curator.version} + + + org.apache.curator + curator-framework + ${curator.version} + org.apache.hadoop hadoop-client @@ -1615,26 +1657,27 @@ --> - hadoop-2.2 + hadoop-1 - 2.2.0 - 2.5.0 - 0.98.7-hadoop2 - hadoop2 - 1.9.13 + 1.0.4 + 2.4.1 + 0.98.7-hadoop1 + hadoop1 + 1.8.8 + + hadoop-2.2 + + + hadoop-2.3 2.3.0 - 2.5.0 0.9.3 - 0.98.7-hadoop2 3.1.1 - hadoop2 - 1.9.13 @@ -1642,12 +1685,19 @@ hadoop-2.4 2.4.0 - 2.5.0 0.9.3 - 0.98.7-hadoop2 3.1.1 - hadoop2 - 1.9.13 + +
+ + + hadoop-2.6 + + 2.6.0 + 0.9.3 + 3.1.1 + 3.4.6 + 2.6.0 @@ -1681,7 +1731,7 @@ org.apache.curator curator-recipes - 2.4.0 + ${curator.version} org.apache.zookeeper @@ -1703,22 +1753,6 @@ sql/hive-thriftserver - - hive-0.12.0 - - 0.12.0-protobuf-2.5 - 0.12.0 - 10.4.2.0 - - - - hive-0.13.1 - - 0.13.1a - 0.13.1 - 10.10.1.1 - - scala-2.10 @@ -1731,10 +1765,6 @@ ${scala.version} org.scala-lang - - external/kafka - external/kafka-assembly - diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index bf343d4b7e40b..11b439e7875fc 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -87,23 +87,74 @@ object MimaExcludes { ProblemFilters.exclude[MissingMethodProblem]( "org.apache.spark.mllib.linalg.Vector.toSparse"), ProblemFilters.exclude[MissingMethodProblem]( - "org.apache.spark.mllib.linalg.Vector.numActives") + "org.apache.spark.mllib.linalg.Vector.numActives"), + // SPARK-7681 add SparseVector support for gemv + ProblemFilters.exclude[MissingMethodProblem]( + "org.apache.spark.mllib.linalg.Matrix.multiply"), + ProblemFilters.exclude[MissingMethodProblem]( + "org.apache.spark.mllib.linalg.DenseMatrix.multiply"), + ProblemFilters.exclude[MissingMethodProblem]( + "org.apache.spark.mllib.linalg.SparseMatrix.multiply") ) ++ Seq( + // Execution should never be included as its always internal. + MimaBuild.excludeSparkPackage("sql.execution"), // This `protected[sql]` method was removed in 1.3.1 ProblemFilters.exclude[MissingMethodProblem]( "org.apache.spark.sql.SQLContext.checkAnalysis"), - // This `private[sql]` class was removed in 1.4.0: + // These `private[sql]` class were removed in 1.4.0: ProblemFilters.exclude[MissingClassProblem]( "org.apache.spark.sql.execution.AddExchange"), ProblemFilters.exclude[MissingClassProblem]( "org.apache.spark.sql.execution.AddExchange$"), + ProblemFilters.exclude[MissingClassProblem]( + "org.apache.spark.sql.parquet.PartitionSpec"), + ProblemFilters.exclude[MissingClassProblem]( + "org.apache.spark.sql.parquet.PartitionSpec$"), + ProblemFilters.exclude[MissingClassProblem]( + "org.apache.spark.sql.parquet.Partition"), + ProblemFilters.exclude[MissingClassProblem]( + "org.apache.spark.sql.parquet.Partition$"), + ProblemFilters.exclude[MissingClassProblem]( + "org.apache.spark.sql.parquet.ParquetRelation2$PartitionValues"), + ProblemFilters.exclude[MissingClassProblem]( + "org.apache.spark.sql.parquet.ParquetRelation2$PartitionValues$"), + ProblemFilters.exclude[MissingClassProblem]( + "org.apache.spark.sql.parquet.ParquetRelation2"), + ProblemFilters.exclude[MissingClassProblem]( + "org.apache.spark.sql.parquet.ParquetRelation2$"), + ProblemFilters.exclude[MissingClassProblem]( + "org.apache.spark.sql.parquet.ParquetRelation2$MetadataCache"), // These test support classes were moved out of src/main and into src/test: ProblemFilters.exclude[MissingClassProblem]( "org.apache.spark.sql.parquet.ParquetTestData"), ProblemFilters.exclude[MissingClassProblem]( "org.apache.spark.sql.parquet.ParquetTestData$"), ProblemFilters.exclude[MissingClassProblem]( - "org.apache.spark.sql.parquet.TestGroupWriteSupport") + "org.apache.spark.sql.parquet.TestGroupWriteSupport"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.CachedData"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.CachedData$"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.CacheManager"), + // TODO: Remove the following rule once ParquetTest has been moved to src/test. + ProblemFilters.exclude[MissingClassProblem]( + "org.apache.spark.sql.parquet.ParquetTest") + ) ++ Seq( + // SPARK-7530 Added StreamingContext.getState() + ProblemFilters.exclude[MissingMethodProblem]( + "org.apache.spark.streaming.StreamingContext.state_=") + ) ++ Seq( + // SPARK-7081 changed ShuffleWriter from a trait to an abstract class and removed some + // unnecessary type bounds in order to fix some compiler warnings that occurred when + // implementing this interface in Java. Note that ShuffleWriter is private[spark]. + ProblemFilters.exclude[IncompatibleTemplateDefProblem]( + "org.apache.spark.shuffle.ShuffleWriter") + ) ++ Seq( + // SPARK-6888 make jdbc driver handling user definable + // This patch renames some classes to API friendly names. + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.DriverQuirks$"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.DriverQuirks"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.PostgresQuirks"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.NoQuirks"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.jdbc.MySQLQuirks") ) case v if v.startsWith("1.3") => diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index b4431c7ee05b6..9a849639233bc 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -23,7 +23,6 @@ import scala.collection.JavaConversions._ import sbt._ import sbt.Classpaths.publishTask import sbt.Keys._ -import sbtunidoc.Plugin.genjavadocSettings import sbtunidoc.Plugin.UnidocKeys.unidocGenjavadocVersion import com.typesafe.sbt.pom.{loadEffectivePom, PomBuild, SbtPomKeys} import net.virtualvoid.sbt.graph.Plugin.graphSettings @@ -118,7 +117,12 @@ object SparkBuild extends PomBuild { lazy val MavenCompile = config("m2r") extend(Compile) lazy val publishLocalBoth = TaskKey[Unit]("publish-local", "publish local for m2 and ivy") - lazy val sharedSettings = graphSettings ++ genjavadocSettings ++ Seq ( + lazy val sparkGenjavadocSettings: Seq[sbt.Def.Setting[_]] = Seq( + libraryDependencies += compilerPlugin( + "org.spark-project" %% "genjavadoc-plugin" % unidocGenjavadocVersion.value cross CrossVersion.full), + scalacOptions <+= target.map(t => "-P:genjavadoc:out=" + (t / "java"))) + + lazy val sharedSettings = graphSettings ++ sparkGenjavadocSettings ++ Seq ( javaHome := sys.env.get("JAVA_HOME") .orElse(sys.props.get("java.home").map { p => new File(p).getParentFile().getAbsolutePath() }) .map(file), @@ -126,7 +130,7 @@ object SparkBuild extends PomBuild { retrieveManaged := true, retrievePattern := "[type]s/[artifact](-[revision])(-[classifier]).[ext]", publishMavenStyle := true, - unidocGenjavadocVersion := "0.8", + unidocGenjavadocVersion := "0.9-spark0", resolvers += Resolver.mavenLocal, otherResolvers <<= SbtPomKeys.mvnLocalRepository(dotM2 => Seq(Resolver.file("dotM2", dotM2))), @@ -168,7 +172,7 @@ object SparkBuild extends PomBuild { /* Enable Assembly for all assembly projects */ assemblyProjects.foreach(enable(Assembly.settings)) - /* Package pyspark artifacts in the main assembly. */ + /* Package pyspark artifacts in a separate zip file for YARN. */ enable(PySparkAssembly.settings)(assembly) /* Enable unidoc only for the root spark project */ @@ -193,6 +197,7 @@ object SparkBuild extends PomBuild { * Usage: `build/sbt sparkShell` */ val sparkShell = taskKey[Unit]("start a spark-shell.") + val sparkSql = taskKey[Unit]("starts the spark sql CLI.") enable(Seq( connectInput in run := true, @@ -203,6 +208,12 @@ object SparkBuild extends PomBuild { sparkShell := { (runMain in Compile).toTask(" org.apache.spark.repl.Main -usejavacp").value + }, + + javaOptions in Compile += "-Dspark.master=local", + + sparkSql := { + (runMain in Compile).toTask(" org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver").value } ))(assembly) @@ -317,6 +328,7 @@ object Hive { |import org.apache.spark.sql.functions._ |import org.apache.spark.sql.hive._ |import org.apache.spark.sql.hive.test.TestHive._ + |import org.apache.spark.sql.hive.test.TestHive.implicits._ |import org.apache.spark.sql.types._""".stripMargin, cleanupCommands in console := "sparkContext.stop()", // Some of our log4j jars make it impossible to submit jobs from this JVM to Hive Map/Reduce @@ -363,55 +375,50 @@ object Assembly { object PySparkAssembly { import sbtassembly.Plugin._ import AssemblyKeys._ + import java.util.zip.{ZipOutputStream, ZipEntry} lazy val settings = Seq( - unmanagedJars in Compile += { BuildCommons.sparkHome / "python/lib/py4j-0.8.2.1-src.zip" }, // Use a resource generator to copy all .py files from python/pyspark into a managed directory // to be included in the assembly. We can't just add "python/" to the assembly's resource dir // list since that will copy unneeded / unwanted files. resourceGenerators in Compile <+= resourceManaged in Compile map { outDir: File => - val dst = new File(outDir, "pyspark") - if (!dst.isDirectory()) { - require(dst.mkdirs()) - } - val src = new File(BuildCommons.sparkHome, "python/pyspark") - copy(src, dst) + val zipFile = new File(BuildCommons.sparkHome , "python/lib/pyspark.zip") + zipFile.delete() + zipRecursive(src, zipFile) + Seq[File]() } ) - private def copy(src: File, dst: File): Seq[File] = { - src.listFiles().flatMap { f => - val child = new File(dst, f.getName()) - if (f.isDirectory()) { - child.mkdir() - copy(f, child) - } else if (f.getName().endsWith(".py")) { - var in: Option[FileInputStream] = None - var out: Option[FileOutputStream] = None - try { - in = Some(new FileInputStream(f)) - out = Some(new FileOutputStream(child)) - - val bytes = new Array[Byte](1024) - var read = 0 - while (read >= 0) { - read = in.get.read(bytes) - if (read > 0) { - out.get.write(bytes, 0, read) - } - } - - Some(child) - } finally { - in.foreach(_.close()) - out.foreach(_.close()) + private def zipRecursive(source: File, destZipFile: File) = { + val destOutput = new ZipOutputStream(new FileOutputStream(destZipFile)) + addFilesToZipStream("", source, destOutput) + destOutput.flush() + destOutput.close() + } + + private def addFilesToZipStream(parent: String, source: File, output: ZipOutputStream): Unit = { + if (source.isDirectory()) { + output.putNextEntry(new ZipEntry(parent + source.getName())) + for (file <- source.listFiles()) { + addFilesToZipStream(parent + source.getName() + File.separator, file, output) + } + } else { + val in = new FileInputStream(source) + output.putNextEntry(new ZipEntry(parent + source.getName())) + val buf = new Array[Byte](8192) + var n = 0 + while (n != -1) { + n = in.read(buf) + if (n != -1) { + output.write(buf, 0, n) } - } else { - None } + output.closeEntry() + in.close() } } + } object Unidoc { @@ -497,7 +504,7 @@ object TestSettings { // Setting SPARK_DIST_CLASSPATH is a simple way to make sure any child processes // launched by the tests have access to the correct test-time classpath. envVars in Test ++= Map( - "SPARK_DIST_CLASSPATH" -> + "SPARK_DIST_CLASSPATH" -> (fullClasspath in Test).value.files.map(_.getAbsolutePath).mkString(":").stripSuffix(":"), "JAVA_HOME" -> sys.env.get("JAVA_HOME").getOrElse(sys.props("java.home"))), javaOptions in Test += "-Dspark.test.home=" + sparkHome, diff --git a/project/plugins.sbt b/project/plugins.sbt index 7096b0d3ee7de..75bd604a1b857 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -25,7 +25,7 @@ addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.1.6") addSbtPlugin("com.alpinenow" % "junit_xml_listener" % "0.5.1") -addSbtPlugin("com.eed3si9n" % "sbt-unidoc" % "0.3.1") +addSbtPlugin("com.eed3si9n" % "sbt-unidoc" % "0.3.3") addSbtPlugin("com.cavorite" % "sbt-avro" % "0.3.2") diff --git a/python/docs/pyspark.ml.rst b/python/docs/pyspark.ml.rst index 4da6d4a74a299..518b8e774dd5f 100644 --- a/python/docs/pyspark.ml.rst +++ b/python/docs/pyspark.ml.rst @@ -1,14 +1,22 @@ pyspark.ml package -===================== +================== -Module Context --------------- +ML Pipeline APIs +---------------- .. automodule:: pyspark.ml :members: :undoc-members: :inherited-members: +pyspark.ml.param module +----------------------- + +.. automodule:: pyspark.ml.param + :members: + :undoc-members: + :inherited-members: + pyspark.ml.feature module ------------------------- @@ -24,3 +32,35 @@ pyspark.ml.classification module :members: :undoc-members: :inherited-members: + +pyspark.ml.recommendation module +-------------------------------- + +.. automodule:: pyspark.ml.recommendation + :members: + :undoc-members: + :inherited-members: + +pyspark.ml.regression module +---------------------------- + +.. automodule:: pyspark.ml.regression + :members: + :undoc-members: + :inherited-members: + +pyspark.ml.tuning module +------------------------ + +.. automodule:: pyspark.ml.tuning + :members: + :undoc-members: + :inherited-members: + +pyspark.ml.evaluation module +---------------------------- + +.. automodule:: pyspark.ml.evaluation + :members: + :undoc-members: + :inherited-members: diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py index 0d21a132048a5..adca90ddaf397 100644 --- a/python/pyspark/accumulators.py +++ b/python/pyspark/accumulators.py @@ -261,3 +261,7 @@ def _start_update_server(): thread.daemon = True thread.start() return server + +if __name__ == "__main__": + import doctest + doctest.testmod() diff --git a/python/pyspark/context.py b/python/pyspark/context.py index 31992795a9e45..aeb7ad4f2f83e 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -173,6 +173,7 @@ def _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize, self._jvm.PythonAccumulatorParam(host, port)) self.pythonExec = os.environ.get("PYSPARK_PYTHON", 'python') + self.pythonVer = "%d.%d" % sys.version_info[:2] # Broadcast's __reduce__ method stores Broadcast instances here. # This allows other code to determine which Broadcast instances have @@ -290,6 +291,11 @@ def version(self): """ return self._jsc.version() + @property + def startTime(self): + """Return the epoch time when the Spark Context was started.""" + return self._jsc.startTime() + @property def defaultParallelism(self): """ @@ -318,6 +324,22 @@ def stop(self): with SparkContext._lock: SparkContext._active_spark_context = None + def range(self, start, end, step=1, numSlices=None): + """ + Create a new RDD of int containing elements from `start` to `end` + (exclusive), increased by `step` every element. + + :param start: the start value + :param end: the end value (exclusive) + :param step: the incremental step (default: 1) + :param numSlices: the number of partitions of the new RDD + :return: An RDD of int + + >>> sc.range(1, 7, 2).collect() + [1, 3, 5] + """ + return self.parallelize(xrange(start, end, step), numSlices) + def parallelize(self, c, numSlices=None): """ Distribute a local Python collection to form an RDD. Using xrange diff --git a/python/pyspark/ml/__init__.py b/python/pyspark/ml/__init__.py index 47fed80f42e13..327a11b14b5aa 100644 --- a/python/pyspark/ml/__init__.py +++ b/python/pyspark/ml/__init__.py @@ -15,7 +15,6 @@ # limitations under the License. # -from pyspark.ml.param import * -from pyspark.ml.pipeline import * +from pyspark.ml.pipeline import Transformer, Estimator, Model, Pipeline, PipelineModel -__all__ = ["Param", "Params", "Transformer", "Estimator", "Pipeline"] +__all__ = ["Transformer", "Estimator", "Model", "Pipeline", "PipelineModel"] diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index 45754bc9d4b10..7abbde8b260eb 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -17,17 +17,19 @@ from pyspark.ml.util import keyword_only from pyspark.ml.wrapper import JavaEstimator, JavaModel -from pyspark.ml.param.shared import HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,\ - HasRegParam +from pyspark.ml.param.shared import * +from pyspark.ml.regression import RandomForestParams from pyspark.mllib.common import inherit_doc -__all__ = ['LogisticRegression', 'LogisticRegressionModel'] +__all__ = ['LogisticRegression', 'LogisticRegressionModel', 'DecisionTreeClassifier', + 'DecisionTreeClassificationModel', 'GBTClassifier', 'GBTClassificationModel', + 'RandomForestClassifier', 'RandomForestClassificationModel'] @inherit_doc class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter, - HasRegParam): + HasRegParam, HasTol, HasProbabilityCol): """ Logistic regression. @@ -41,6 +43,10 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0))]).toDF() >>> model.transform(test0).head().prediction 0.0 + >>> model.weights + DenseVector([5.5...]) + >>> model.intercept + -2.68... >>> test1 = sc.parallelize([Row(features=Vectors.sparse(1, [0], [1.0]))]).toDF() >>> model.transform(test1).head().prediction 1.0 @@ -49,26 +55,52 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti ... TypeError: Method setParams forces keyword arguments. """ - _java_class = "org.apache.spark.ml.classification.LogisticRegression" + + # a placeholder to make it appear in the generated doc + elasticNetParam = \ + Param(Params._dummy(), "elasticNetParam", + "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, " + + "the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.") + fitIntercept = Param(Params._dummy(), "fitIntercept", "whether to fit an intercept term.") + threshold = Param(Params._dummy(), "threshold", + "threshold in binary classification prediction, in range [0, 1].") @keyword_only def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", - maxIter=100, regParam=0.1): + maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, + threshold=0.5, probabilityCol="probability"): """ __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ - maxIter=100, regParam=0.1) + maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \ + threshold=0.5, probabilityCol="probability") """ super(LogisticRegression, self).__init__() - self._setDefault(maxIter=100, regParam=0.1) + self._java_obj = self._new_java_obj( + "org.apache.spark.ml.classification.LogisticRegression", self.uid) + #: param for the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty + # is an L2 penalty. For alpha = 1, it is an L1 penalty. + self.elasticNetParam = \ + Param(self, "elasticNetParam", + "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty " + + "is an L2 penalty. For alpha = 1, it is an L1 penalty.") + #: param for whether to fit an intercept term. + self.fitIntercept = Param(self, "fitIntercept", "whether to fit an intercept term.") + #: param for threshold in binary classification prediction, in range [0, 1]. + self.threshold = Param(self, "threshold", + "threshold in binary classification prediction, in range [0, 1].") + self._setDefault(maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1E-6, + fitIntercept=True, threshold=0.5) kwargs = self.__init__._input_kwargs self.setParams(**kwargs) @keyword_only def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", - maxIter=100, regParam=0.1): + maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, + threshold=0.5, probabilityCol="probability"): """ setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ - maxIter=100, regParam=0.1) + maxIter=100, regParam=0.1, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \ + threshold=0.5, probabilityCol="probability") Sets params for logistic regression. """ kwargs = self.setParams._input_kwargs @@ -77,12 +109,460 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre def _create_model(self, java_model): return LogisticRegressionModel(java_model) + def setElasticNetParam(self, value): + """ + Sets the value of :py:attr:`elasticNetParam`. + """ + self._paramMap[self.elasticNetParam] = value + return self + + def getElasticNetParam(self): + """ + Gets the value of elasticNetParam or its default value. + """ + return self.getOrDefault(self.elasticNetParam) + + def setFitIntercept(self, value): + """ + Sets the value of :py:attr:`fitIntercept`. + """ + self._paramMap[self.fitIntercept] = value + return self + + def getFitIntercept(self): + """ + Gets the value of fitIntercept or its default value. + """ + return self.getOrDefault(self.fitIntercept) + + def setThreshold(self, value): + """ + Sets the value of :py:attr:`threshold`. + """ + self._paramMap[self.threshold] = value + return self + + def getThreshold(self): + """ + Gets the value of threshold or its default value. + """ + return self.getOrDefault(self.threshold) + class LogisticRegressionModel(JavaModel): """ Model fitted by LogisticRegression. """ + @property + def weights(self): + """ + Model weights. + """ + return self._call_java("weights") + + @property + def intercept(self): + """ + Model intercept. + """ + return self._call_java("intercept") + + +class TreeClassifierParams(object): + """ + Private class to track supported impurity measures. + """ + supportedImpurities = ["entropy", "gini"] + + +class GBTParams(object): + """ + Private class to track supported GBT params. + """ + supportedLossTypes = ["logistic"] + + +@inherit_doc +class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, + DecisionTreeParams, HasCheckpointInterval): + """ + `http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree` + learning algorithm for classification. + It supports both binary and multiclass labels, as well as both continuous and categorical + features. + + >>> from pyspark.mllib.linalg import Vectors + >>> from pyspark.ml.feature import StringIndexer + >>> df = sqlContext.createDataFrame([ + ... (1.0, Vectors.dense(1.0)), + ... (0.0, Vectors.sparse(1, [], []))], ["label", "features"]) + >>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed") + >>> si_model = stringIndexer.fit(df) + >>> td = si_model.transform(df) + >>> dt = DecisionTreeClassifier(maxDepth=2, labelCol="indexed") + >>> model = dt.fit(td) + >>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"]) + >>> model.transform(test0).head().prediction + 0.0 + >>> test1 = sqlContext.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"]) + >>> model.transform(test1).head().prediction + 1.0 + """ + + # a placeholder to make it appear in the generated doc + impurity = Param(Params._dummy(), "impurity", + "Criterion used for information gain calculation (case-insensitive). " + + "Supported options: " + ", ".join(TreeClassifierParams.supportedImpurities)) + + @keyword_only + def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", + maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini"): + """ + __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ + maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \ + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini") + """ + super(DecisionTreeClassifier, self).__init__() + self._java_obj = self._new_java_obj( + "org.apache.spark.ml.classification.DecisionTreeClassifier", self.uid) + #: param for Criterion used for information gain calculation (case-insensitive). + self.impurity = \ + Param(self, "impurity", + "Criterion used for information gain calculation (case-insensitive). " + + "Supported options: " + ", ".join(TreeClassifierParams.supportedImpurities)) + self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, + impurity="gini") + kwargs = self.__init__._input_kwargs + self.setParams(**kwargs) + + @keyword_only + def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", + maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, + impurity="gini"): + """ + setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ + maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \ + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini") + Sets params for the DecisionTreeClassifier. + """ + kwargs = self.setParams._input_kwargs + return self._set(**kwargs) + + def _create_model(self, java_model): + return DecisionTreeClassificationModel(java_model) + + def setImpurity(self, value): + """ + Sets the value of :py:attr:`impurity`. + """ + self._paramMap[self.impurity] = value + return self + + def getImpurity(self): + """ + Gets the value of impurity or its default value. + """ + return self.getOrDefault(self.impurity) + + +class DecisionTreeClassificationModel(JavaModel): + """ + Model fitted by DecisionTreeClassifier. + """ + + +@inherit_doc +class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasSeed, + DecisionTreeParams, HasCheckpointInterval): + """ + `http://en.wikipedia.org/wiki/Random_forest Random Forest` + learning algorithm for classification. + It supports both binary and multiclass labels, as well as both continuous and categorical + features. + + >>> from pyspark.mllib.linalg import Vectors + >>> from pyspark.ml.feature import StringIndexer + >>> df = sqlContext.createDataFrame([ + ... (1.0, Vectors.dense(1.0)), + ... (0.0, Vectors.sparse(1, [], []))], ["label", "features"]) + >>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed") + >>> si_model = stringIndexer.fit(df) + >>> td = si_model.transform(df) + >>> rf = RandomForestClassifier(numTrees=2, maxDepth=2, labelCol="indexed", seed=42) + >>> model = rf.fit(td) + >>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"]) + >>> model.transform(test0).head().prediction + 0.0 + >>> test1 = sqlContext.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"]) + >>> model.transform(test1).head().prediction + 1.0 + """ + + # a placeholder to make it appear in the generated doc + impurity = Param(Params._dummy(), "impurity", + "Criterion used for information gain calculation (case-insensitive). " + + "Supported options: " + ", ".join(TreeClassifierParams.supportedImpurities)) + subsamplingRate = Param(Params._dummy(), "subsamplingRate", + "Fraction of the training data used for learning each decision tree, " + + "in range (0, 1].") + numTrees = Param(Params._dummy(), "numTrees", "Number of trees to train (>= 1)") + featureSubsetStrategy = \ + Param(Params._dummy(), "featureSubsetStrategy", + "The number of features to consider for splits at each tree node. Supported " + + "options: " + ", ".join(RandomForestParams.supportedFeatureSubsetStrategies)) + + @keyword_only + def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", + maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini", + numTrees=20, featureSubsetStrategy="auto", seed=None): + """ + __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ + maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \ + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini", \ + numTrees=20, featureSubsetStrategy="auto", seed=None) + """ + super(RandomForestClassifier, self).__init__() + self._java_obj = self._new_java_obj( + "org.apache.spark.ml.classification.RandomForestClassifier", self.uid) + #: param for Criterion used for information gain calculation (case-insensitive). + self.impurity = \ + Param(self, "impurity", + "Criterion used for information gain calculation (case-insensitive). " + + "Supported options: " + ", ".join(TreeClassifierParams.supportedImpurities)) + #: param for Fraction of the training data used for learning each decision tree, + # in range (0, 1] + self.subsamplingRate = Param(self, "subsamplingRate", + "Fraction of the training data used for learning each " + + "decision tree, in range (0, 1].") + #: param for Number of trees to train (>= 1) + self.numTrees = Param(self, "numTrees", "Number of trees to train (>= 1)") + #: param for The number of features to consider for splits at each tree node + self.featureSubsetStrategy = \ + Param(self, "featureSubsetStrategy", + "The number of features to consider for splits at each tree node. Supported " + + "options: " + ", ".join(RandomForestParams.supportedFeatureSubsetStrategies)) + self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=None, + impurity="gini", numTrees=20, featureSubsetStrategy="auto") + kwargs = self.__init__._input_kwargs + self.setParams(**kwargs) + + @keyword_only + def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", + maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=None, + impurity="gini", numTrees=20, featureSubsetStrategy="auto"): + """ + setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ + maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \ + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=None, \ + impurity="gini", numTrees=20, featureSubsetStrategy="auto") + Sets params for linear classification. + """ + kwargs = self.setParams._input_kwargs + return self._set(**kwargs) + + def _create_model(self, java_model): + return RandomForestClassificationModel(java_model) + + def setImpurity(self, value): + """ + Sets the value of :py:attr:`impurity`. + """ + self._paramMap[self.impurity] = value + return self + + def getImpurity(self): + """ + Gets the value of impurity or its default value. + """ + return self.getOrDefault(self.impurity) + + def setSubsamplingRate(self, value): + """ + Sets the value of :py:attr:`subsamplingRate`. + """ + self._paramMap[self.subsamplingRate] = value + return self + + def getSubsamplingRate(self): + """ + Gets the value of subsamplingRate or its default value. + """ + return self.getOrDefault(self.subsamplingRate) + + def setNumTrees(self, value): + """ + Sets the value of :py:attr:`numTrees`. + """ + self._paramMap[self.numTrees] = value + return self + + def getNumTrees(self): + """ + Gets the value of numTrees or its default value. + """ + return self.getOrDefault(self.numTrees) + + def setFeatureSubsetStrategy(self, value): + """ + Sets the value of :py:attr:`featureSubsetStrategy`. + """ + self._paramMap[self.featureSubsetStrategy] = value + return self + + def getFeatureSubsetStrategy(self): + """ + Gets the value of featureSubsetStrategy or its default value. + """ + return self.getOrDefault(self.featureSubsetStrategy) + + +class RandomForestClassificationModel(JavaModel): + """ + Model fitted by RandomForestClassifier. + """ + + +@inherit_doc +class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter, + DecisionTreeParams, HasCheckpointInterval): + """ + `http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)` + learning algorithm for classification. + It supports binary labels, as well as both continuous and categorical features. + Note: Multiclass labels are not currently supported. + + >>> from pyspark.mllib.linalg import Vectors + >>> from pyspark.ml.feature import StringIndexer + >>> df = sqlContext.createDataFrame([ + ... (1.0, Vectors.dense(1.0)), + ... (0.0, Vectors.sparse(1, [], []))], ["label", "features"]) + >>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed") + >>> si_model = stringIndexer.fit(df) + >>> td = si_model.transform(df) + >>> gbt = GBTClassifier(maxIter=5, maxDepth=2, labelCol="indexed") + >>> model = gbt.fit(td) + >>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"]) + >>> model.transform(test0).head().prediction + 0.0 + >>> test1 = sqlContext.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"]) + >>> model.transform(test1).head().prediction + 1.0 + """ + + # a placeholder to make it appear in the generated doc + lossType = Param(Params._dummy(), "lossType", + "Loss function which GBT tries to minimize (case-insensitive). " + + "Supported options: " + ", ".join(GBTParams.supportedLossTypes)) + subsamplingRate = Param(Params._dummy(), "subsamplingRate", + "Fraction of the training data used for learning each decision tree, " + + "in range (0, 1].") + stepSize = Param(Params._dummy(), "stepSize", + "Step size (a.k.a. learning rate) in interval (0, 1] for shrinking the " + + "contribution of each estimator") + + @keyword_only + def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", + maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="logistic", + maxIter=20, stepSize=0.1): + """ + __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ + maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \ + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \ + lossType="logistic", maxIter=20, stepSize=0.1) + """ + super(GBTClassifier, self).__init__() + self._java_obj = self._new_java_obj( + "org.apache.spark.ml.classification.GBTClassifier", self.uid) + #: param for Loss function which GBT tries to minimize (case-insensitive). + self.lossType = Param(self, "lossType", + "Loss function which GBT tries to minimize (case-insensitive). " + + "Supported options: " + ", ".join(GBTParams.supportedLossTypes)) + #: Fraction of the training data used for learning each decision tree, in range (0, 1]. + self.subsamplingRate = Param(self, "subsamplingRate", + "Fraction of the training data used for learning each " + + "decision tree, in range (0, 1].") + #: Step size (a.k.a. learning rate) in interval (0, 1] for shrinking the contribution of + # each estimator + self.stepSize = Param(self, "stepSize", + "Step size (a.k.a. learning rate) in interval (0, 1] for shrinking " + + "the contribution of each estimator") + self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, + lossType="logistic", maxIter=20, stepSize=0.1) + kwargs = self.__init__._input_kwargs + self.setParams(**kwargs) + + @keyword_only + def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", + maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, + lossType="logistic", maxIter=20, stepSize=0.1): + """ + setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ + maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \ + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \ + lossType="logistic", maxIter=20, stepSize=0.1) + Sets params for Gradient Boosted Tree Classification. + """ + kwargs = self.setParams._input_kwargs + return self._set(**kwargs) + + def _create_model(self, java_model): + return GBTClassificationModel(java_model) + + def setLossType(self, value): + """ + Sets the value of :py:attr:`lossType`. + """ + self._paramMap[self.lossType] = value + return self + + def getLossType(self): + """ + Gets the value of lossType or its default value. + """ + return self.getOrDefault(self.lossType) + + def setSubsamplingRate(self, value): + """ + Sets the value of :py:attr:`subsamplingRate`. + """ + self._paramMap[self.subsamplingRate] = value + return self + + def getSubsamplingRate(self): + """ + Gets the value of subsamplingRate or its default value. + """ + return self.getOrDefault(self.subsamplingRate) + + def setStepSize(self, value): + """ + Sets the value of :py:attr:`stepSize`. + """ + self._paramMap[self.stepSize] = value + return self + + def getStepSize(self): + """ + Gets the value of stepSize or its default value. + """ + return self.getOrDefault(self.stepSize) + + +class GBTClassificationModel(JavaModel): + """ + Model fitted by GBTClassifier. + """ + if __name__ == "__main__": import doctest @@ -91,7 +571,7 @@ class LogisticRegressionModel(JavaModel): globs = globals().copy() # The small batch size here ensures that we see multiple batches, # even in these small test examples: - sc = SparkContext("local[2]", "ml.feature tests") + sc = SparkContext("local[2]", "ml.classification tests") sqlContext = SQLContext(sc) globs['sc'] = sc globs['sqlContext'] = sqlContext diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py new file mode 100644 index 0000000000000..d8ddb78c6d639 --- /dev/null +++ b/python/pyspark/ml/evaluation.py @@ -0,0 +1,230 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from abc import abstractmethod, ABCMeta + +from pyspark.ml.wrapper import JavaWrapper +from pyspark.ml.param import Param, Params +from pyspark.ml.param.shared import HasLabelCol, HasPredictionCol, HasRawPredictionCol +from pyspark.ml.util import keyword_only +from pyspark.mllib.common import inherit_doc + +__all__ = ['Evaluator', 'BinaryClassificationEvaluator', 'RegressionEvaluator'] + + +@inherit_doc +class Evaluator(Params): + """ + Base class for evaluators that compute metrics from predictions. + """ + + __metaclass__ = ABCMeta + + @abstractmethod + def _evaluate(self, dataset): + """ + Evaluates the output. + + :param dataset: a dataset that contains labels/observations and + predictions + :return: metric + """ + raise NotImplementedError() + + def evaluate(self, dataset, params={}): + """ + Evaluates the output with optional parameters. + + :param dataset: a dataset that contains labels/observations and + predictions + :param params: an optional param map that overrides embedded + params + :return: metric + """ + if isinstance(params, dict): + if params: + return self.copy(params)._evaluate(dataset) + else: + return self._evaluate(dataset) + else: + raise ValueError("Params must be a param map but got %s." % type(params)) + + +@inherit_doc +class JavaEvaluator(Evaluator, JavaWrapper): + """ + Base class for :py:class:`Evaluator`s that wrap Java/Scala + implementations. + """ + + __metaclass__ = ABCMeta + + def _evaluate(self, dataset): + """ + Evaluates the output. + :param dataset: a dataset that contains labels/observations and predictions. + :return: evaluation metric + """ + self._transfer_params_to_java() + return self._java_obj.evaluate(dataset._jdf) + + +@inherit_doc +class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPredictionCol): + """ + Evaluator for binary classification, which expects two input + columns: rawPrediction and label. + + >>> from pyspark.mllib.linalg import Vectors + >>> scoreAndLabels = map(lambda x: (Vectors.dense([1.0 - x[0], x[0]]), x[1]), + ... [(0.1, 0.0), (0.1, 1.0), (0.4, 0.0), (0.6, 0.0), (0.6, 1.0), (0.6, 1.0), (0.8, 1.0)]) + >>> dataset = sqlContext.createDataFrame(scoreAndLabels, ["raw", "label"]) + ... + >>> evaluator = BinaryClassificationEvaluator(rawPredictionCol="raw") + >>> evaluator.evaluate(dataset) + 0.70... + >>> evaluator.evaluate(dataset, {evaluator.metricName: "areaUnderPR"}) + 0.83... + """ + + # a placeholder to make it appear in the generated doc + metricName = Param(Params._dummy(), "metricName", + "metric name in evaluation (areaUnderROC|areaUnderPR)") + + @keyword_only + def __init__(self, rawPredictionCol="rawPrediction", labelCol="label", + metricName="areaUnderROC"): + """ + __init__(self, rawPredictionCol="rawPrediction", labelCol="label", \ + metricName="areaUnderROC") + """ + super(BinaryClassificationEvaluator, self).__init__() + self._java_obj = self._new_java_obj( + "org.apache.spark.ml.evaluation.BinaryClassificationEvaluator", self.uid) + #: param for metric name in evaluation (areaUnderROC|areaUnderPR) + self.metricName = Param(self, "metricName", + "metric name in evaluation (areaUnderROC|areaUnderPR)") + self._setDefault(rawPredictionCol="rawPrediction", labelCol="label", + metricName="areaUnderROC") + kwargs = self.__init__._input_kwargs + self._set(**kwargs) + + def setMetricName(self, value): + """ + Sets the value of :py:attr:`metricName`. + """ + self._paramMap[self.metricName] = value + return self + + def getMetricName(self): + """ + Gets the value of metricName or its default value. + """ + return self.getOrDefault(self.metricName) + + @keyword_only + def setParams(self, rawPredictionCol="rawPrediction", labelCol="label", + metricName="areaUnderROC"): + """ + setParams(self, rawPredictionCol="rawPrediction", labelCol="label", \ + metricName="areaUnderROC") + Sets params for binary classification evaluator. + """ + kwargs = self.setParams._input_kwargs + return self._set(**kwargs) + + +@inherit_doc +class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol): + """ + Evaluator for Regression, which expects two input + columns: prediction and label. + + >>> scoreAndLabels = [(-28.98343821, -27.0), (20.21491975, 21.5), + ... (-25.98418959, -22.0), (30.69731842, 33.0), (74.69283752, 71.0)] + >>> dataset = sqlContext.createDataFrame(scoreAndLabels, ["raw", "label"]) + ... + >>> evaluator = RegressionEvaluator(predictionCol="raw") + >>> evaluator.evaluate(dataset) + 2.842... + >>> evaluator.evaluate(dataset, {evaluator.metricName: "r2"}) + 0.993... + >>> evaluator.evaluate(dataset, {evaluator.metricName: "mae"}) + 2.649... + """ + # a placeholder to make it appear in the generated doc + metricName = Param(Params._dummy(), "metricName", + "metric name in evaluation (mse|rmse|r2|mae)") + + @keyword_only + def __init__(self, predictionCol="prediction", labelCol="label", + metricName="rmse"): + """ + __init__(self, predictionCol="prediction", labelCol="label", \ + metricName="rmse") + """ + super(RegressionEvaluator, self).__init__() + self._java_obj = self._new_java_obj( + "org.apache.spark.ml.evaluation.RegressionEvaluator", self.uid) + #: param for metric name in evaluation (mse|rmse|r2|mae) + self.metricName = Param(self, "metricName", + "metric name in evaluation (mse|rmse|r2|mae)") + self._setDefault(predictionCol="prediction", labelCol="label", + metricName="rmse") + kwargs = self.__init__._input_kwargs + self._set(**kwargs) + + def setMetricName(self, value): + """ + Sets the value of :py:attr:`metricName`. + """ + self._paramMap[self.metricName] = value + return self + + def getMetricName(self): + """ + Gets the value of metricName or its default value. + """ + return self.getOrDefault(self.metricName) + + @keyword_only + def setParams(self, predictionCol="prediction", labelCol="label", + metricName="rmse"): + """ + setParams(self, predictionCol="prediction", labelCol="label", \ + metricName="rmse") + Sets params for regression evaluator. + """ + kwargs = self.setParams._input_kwargs + return self._set(**kwargs) + +if __name__ == "__main__": + import doctest + from pyspark.context import SparkContext + from pyspark.sql import SQLContext + globs = globals().copy() + # The small batch size here ensures that we see multiple batches, + # even in these small test examples: + sc = SparkContext("local[2]", "ml.evaluation tests") + sqlContext = SQLContext(sc) + globs['sc'] = sc + globs['sqlContext'] = sqlContext + (failure_count, test_count) = doctest.testmod( + globs=globs, optionflags=doctest.ELLIPSIS) + sc.stop() + if failure_count: + exit(-1) diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 4e4614b859ac6..ddb33f427ac64 100644 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -16,12 +16,657 @@ # from pyspark.rdd import ignore_unicode_prefix -from pyspark.ml.param.shared import HasInputCol, HasOutputCol, HasNumFeatures +from pyspark.ml.param.shared import * from pyspark.ml.util import keyword_only -from pyspark.ml.wrapper import JavaTransformer +from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaTransformer from pyspark.mllib.common import inherit_doc -__all__ = ['Tokenizer', 'HashingTF'] +__all__ = ['Binarizer', 'HashingTF', 'IDF', 'IDFModel', 'Normalizer', 'OneHotEncoder', + 'PolynomialExpansion', 'RegexTokenizer', 'StandardScaler', 'StandardScalerModel', + 'StringIndexer', 'StringIndexerModel', 'Tokenizer', 'VectorAssembler', 'VectorIndexer', + 'Word2Vec', 'Word2VecModel'] + + +@inherit_doc +class Binarizer(JavaTransformer, HasInputCol, HasOutputCol): + """ + Binarize a column of continuous features given a threshold. + + >>> df = sqlContext.createDataFrame([(0.5,)], ["values"]) + >>> binarizer = Binarizer(threshold=1.0, inputCol="values", outputCol="features") + >>> binarizer.transform(df).head().features + 0.0 + >>> binarizer.setParams(outputCol="freqs").transform(df).head().freqs + 0.0 + >>> params = {binarizer.threshold: -0.5, binarizer.outputCol: "vector"} + >>> binarizer.transform(df, params).head().vector + 1.0 + """ + + # a placeholder to make it appear in the generated doc + threshold = Param(Params._dummy(), "threshold", + "threshold in binary classification prediction, in range [0, 1]") + + @keyword_only + def __init__(self, threshold=0.0, inputCol=None, outputCol=None): + """ + __init__(self, threshold=0.0, inputCol=None, outputCol=None) + """ + super(Binarizer, self).__init__() + self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Binarizer", self.uid) + self.threshold = Param(self, "threshold", + "threshold in binary classification prediction, in range [0, 1]") + self._setDefault(threshold=0.0) + kwargs = self.__init__._input_kwargs + self.setParams(**kwargs) + + @keyword_only + def setParams(self, threshold=0.0, inputCol=None, outputCol=None): + """ + setParams(self, threshold=0.0, inputCol=None, outputCol=None) + Sets params for this Binarizer. + """ + kwargs = self.setParams._input_kwargs + return self._set(**kwargs) + + def setThreshold(self, value): + """ + Sets the value of :py:attr:`threshold`. + """ + self._paramMap[self.threshold] = value + return self + + def getThreshold(self): + """ + Gets the value of threshold or its default value. + """ + return self.getOrDefault(self.threshold) + + +@inherit_doc +class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol): + """ + Maps a column of continuous features to a column of feature buckets. + + >>> df = sqlContext.createDataFrame([(0.1,), (0.4,), (1.2,), (1.5,)], ["values"]) + >>> bucketizer = Bucketizer(splits=[-float("inf"), 0.5, 1.4, float("inf")], + ... inputCol="values", outputCol="buckets") + >>> bucketed = bucketizer.transform(df).collect() + >>> bucketed[0].buckets + 0.0 + >>> bucketed[1].buckets + 0.0 + >>> bucketed[2].buckets + 1.0 + >>> bucketed[3].buckets + 2.0 + >>> bucketizer.setParams(outputCol="b").transform(df).head().b + 0.0 + """ + + # a placeholder to make it appear in the generated doc + splits = \ + Param(Params._dummy(), "splits", + "Split points for mapping continuous features into buckets. With n+1 splits, " + + "there are n buckets. A bucket defined by splits x,y holds values in the " + + "range [x,y) except the last bucket, which also includes y. The splits " + + "should be strictly increasing. Values at -inf, inf must be explicitly " + + "provided to cover all Double values; otherwise, values outside the splits " + + "specified will be treated as errors.") + + @keyword_only + def __init__(self, splits=None, inputCol=None, outputCol=None): + """ + __init__(self, splits=None, inputCol=None, outputCol=None) + """ + super(Bucketizer, self).__init__() + self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Bucketizer", self.uid) + #: param for Splitting points for mapping continuous features into buckets. With n+1 splits, + # there are n buckets. A bucket defined by splits x,y holds values in the range [x,y) + # except the last bucket, which also includes y. The splits should be strictly increasing. + # Values at -inf, inf must be explicitly provided to cover all Double values; otherwise, + # values outside the splits specified will be treated as errors. + self.splits = \ + Param(self, "splits", + "Split points for mapping continuous features into buckets. With n+1 splits, " + + "there are n buckets. A bucket defined by splits x,y holds values in the " + + "range [x,y) except the last bucket, which also includes y. The splits " + + "should be strictly increasing. Values at -inf, inf must be explicitly " + + "provided to cover all Double values; otherwise, values outside the splits " + + "specified will be treated as errors.") + kwargs = self.__init__._input_kwargs + self.setParams(**kwargs) + + @keyword_only + def setParams(self, splits=None, inputCol=None, outputCol=None): + """ + setParams(self, splits=None, inputCol=None, outputCol=None) + Sets params for this Bucketizer. + """ + kwargs = self.setParams._input_kwargs + return self._set(**kwargs) + + def setSplits(self, value): + """ + Sets the value of :py:attr:`splits`. + """ + self._paramMap[self.splits] = value + return self + + def getSplits(self): + """ + Gets the value of threshold or its default value. + """ + return self.getOrDefault(self.splits) + + +@inherit_doc +class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures): + """ + Maps a sequence of terms to their term frequencies using the + hashing trick. + + >>> df = sqlContext.createDataFrame([(["a", "b", "c"],)], ["words"]) + >>> hashingTF = HashingTF(numFeatures=10, inputCol="words", outputCol="features") + >>> hashingTF.transform(df).head().features + SparseVector(10, {7: 1.0, 8: 1.0, 9: 1.0}) + >>> hashingTF.setParams(outputCol="freqs").transform(df).head().freqs + SparseVector(10, {7: 1.0, 8: 1.0, 9: 1.0}) + >>> params = {hashingTF.numFeatures: 5, hashingTF.outputCol: "vector"} + >>> hashingTF.transform(df, params).head().vector + SparseVector(5, {2: 1.0, 3: 1.0, 4: 1.0}) + """ + + @keyword_only + def __init__(self, numFeatures=1 << 18, inputCol=None, outputCol=None): + """ + __init__(self, numFeatures=1 << 18, inputCol=None, outputCol=None) + """ + super(HashingTF, self).__init__() + self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.HashingTF", self.uid) + self._setDefault(numFeatures=1 << 18) + kwargs = self.__init__._input_kwargs + self.setParams(**kwargs) + + @keyword_only + def setParams(self, numFeatures=1 << 18, inputCol=None, outputCol=None): + """ + setParams(self, numFeatures=1 << 18, inputCol=None, outputCol=None) + Sets params for this HashingTF. + """ + kwargs = self.setParams._input_kwargs + return self._set(**kwargs) + + +@inherit_doc +class IDF(JavaEstimator, HasInputCol, HasOutputCol): + """ + Compute the Inverse Document Frequency (IDF) given a collection of documents. + + >>> from pyspark.mllib.linalg import DenseVector + >>> df = sqlContext.createDataFrame([(DenseVector([1.0, 2.0]),), + ... (DenseVector([0.0, 1.0]),), (DenseVector([3.0, 0.2]),)], ["tf"]) + >>> idf = IDF(minDocFreq=3, inputCol="tf", outputCol="idf") + >>> idf.fit(df).transform(df).head().idf + DenseVector([0.0, 0.0]) + >>> idf.setParams(outputCol="freqs").fit(df).transform(df).collect()[1].freqs + DenseVector([0.0, 0.0]) + >>> params = {idf.minDocFreq: 1, idf.outputCol: "vector"} + >>> idf.fit(df, params).transform(df).head().vector + DenseVector([0.2877, 0.0]) + """ + + # a placeholder to make it appear in the generated doc + minDocFreq = Param(Params._dummy(), "minDocFreq", + "minimum of documents in which a term should appear for filtering") + + @keyword_only + def __init__(self, minDocFreq=0, inputCol=None, outputCol=None): + """ + __init__(self, minDocFreq=0, inputCol=None, outputCol=None) + """ + super(IDF, self).__init__() + self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.IDF", self.uid) + self.minDocFreq = Param(self, "minDocFreq", + "minimum of documents in which a term should appear for filtering") + self._setDefault(minDocFreq=0) + kwargs = self.__init__._input_kwargs + self.setParams(**kwargs) + + @keyword_only + def setParams(self, minDocFreq=0, inputCol=None, outputCol=None): + """ + setParams(self, minDocFreq=0, inputCol=None, outputCol=None) + Sets params for this IDF. + """ + kwargs = self.setParams._input_kwargs + return self._set(**kwargs) + + def setMinDocFreq(self, value): + """ + Sets the value of :py:attr:`minDocFreq`. + """ + self._paramMap[self.minDocFreq] = value + return self + + def getMinDocFreq(self): + """ + Gets the value of minDocFreq or its default value. + """ + return self.getOrDefault(self.minDocFreq) + + def _create_model(self, java_model): + return IDFModel(java_model) + + +class IDFModel(JavaModel): + """ + Model fitted by IDF. + """ + + +@inherit_doc +class Normalizer(JavaTransformer, HasInputCol, HasOutputCol): + """ + Normalize a vector to have unit norm using the given p-norm. + + >>> from pyspark.mllib.linalg import Vectors + >>> svec = Vectors.sparse(4, {1: 4.0, 3: 3.0}) + >>> df = sqlContext.createDataFrame([(Vectors.dense([3.0, -4.0]), svec)], ["dense", "sparse"]) + >>> normalizer = Normalizer(p=2.0, inputCol="dense", outputCol="features") + >>> normalizer.transform(df).head().features + DenseVector([0.6, -0.8]) + >>> normalizer.setParams(inputCol="sparse", outputCol="freqs").transform(df).head().freqs + SparseVector(4, {1: 0.8, 3: 0.6}) + >>> params = {normalizer.p: 1.0, normalizer.inputCol: "dense", normalizer.outputCol: "vector"} + >>> normalizer.transform(df, params).head().vector + DenseVector([0.4286, -0.5714]) + """ + + # a placeholder to make it appear in the generated doc + p = Param(Params._dummy(), "p", "the p norm value.") + + @keyword_only + def __init__(self, p=2.0, inputCol=None, outputCol=None): + """ + __init__(self, p=2.0, inputCol=None, outputCol=None) + """ + super(Normalizer, self).__init__() + self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Normalizer", self.uid) + self.p = Param(self, "p", "the p norm value.") + self._setDefault(p=2.0) + kwargs = self.__init__._input_kwargs + self.setParams(**kwargs) + + @keyword_only + def setParams(self, p=2.0, inputCol=None, outputCol=None): + """ + setParams(self, p=2.0, inputCol=None, outputCol=None) + Sets params for this Normalizer. + """ + kwargs = self.setParams._input_kwargs + return self._set(**kwargs) + + def setP(self, value): + """ + Sets the value of :py:attr:`p`. + """ + self._paramMap[self.p] = value + return self + + def getP(self): + """ + Gets the value of p or its default value. + """ + return self.getOrDefault(self.p) + + +@inherit_doc +class OneHotEncoder(JavaTransformer, HasInputCol, HasOutputCol): + """ + A one-hot encoder that maps a column of category indices to a + column of binary vectors, with at most a single one-value per row + that indicates the input category index. + For example with 5 categories, an input value of 2.0 would map to + an output vector of `[0.0, 0.0, 1.0, 0.0]`. + The last category is not included by default (configurable via + :py:attr:`dropLast`) because it makes the vector entries sum up to + one, and hence linearly dependent. + So an input value of 4.0 maps to `[0.0, 0.0, 0.0, 0.0]`. + Note that this is different from scikit-learn's OneHotEncoder, + which keeps all categories. + The output vectors are sparse. + + .. seealso:: + + :py:class:`StringIndexer` for converting categorical values into + category indices + + >>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed") + >>> model = stringIndexer.fit(stringIndDf) + >>> td = model.transform(stringIndDf) + >>> encoder = OneHotEncoder(inputCol="indexed", outputCol="features") + >>> encoder.transform(td).head().features + SparseVector(2, {0: 1.0}) + >>> encoder.setParams(outputCol="freqs").transform(td).head().freqs + SparseVector(2, {0: 1.0}) + >>> params = {encoder.dropLast: False, encoder.outputCol: "test"} + >>> encoder.transform(td, params).head().test + SparseVector(3, {0: 1.0}) + """ + + # a placeholder to make it appear in the generated doc + dropLast = Param(Params._dummy(), "dropLast", "whether to drop the last category") + + @keyword_only + def __init__(self, dropLast=True, inputCol=None, outputCol=None): + """ + __init__(self, includeFirst=True, inputCol=None, outputCol=None) + """ + super(OneHotEncoder, self).__init__() + self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.OneHotEncoder", self.uid) + self.dropLast = Param(self, "dropLast", "whether to drop the last category") + self._setDefault(dropLast=True) + kwargs = self.__init__._input_kwargs + self.setParams(**kwargs) + + @keyword_only + def setParams(self, dropLast=True, inputCol=None, outputCol=None): + """ + setParams(self, dropLast=True, inputCol=None, outputCol=None) + Sets params for this OneHotEncoder. + """ + kwargs = self.setParams._input_kwargs + return self._set(**kwargs) + + def setDropLast(self, value): + """ + Sets the value of :py:attr:`dropLast`. + """ + self._paramMap[self.dropLast] = value + return self + + def getDropLast(self): + """ + Gets the value of dropLast or its default value. + """ + return self.getOrDefault(self.dropLast) + + +@inherit_doc +class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol): + """ + Perform feature expansion in a polynomial space. As said in wikipedia of Polynomial Expansion, + which is available at `http://en.wikipedia.org/wiki/Polynomial_expansion`, "In mathematics, an + expansion of a product of sums expresses it as a sum of products by using the fact that + multiplication distributes over addition". Take a 2-variable feature vector as an example: + `(x, y)`, if we want to expand it with degree 2, then we get `(x, x * x, y, x * y, y * y)`. + + >>> from pyspark.mllib.linalg import Vectors + >>> df = sqlContext.createDataFrame([(Vectors.dense([0.5, 2.0]),)], ["dense"]) + >>> px = PolynomialExpansion(degree=2, inputCol="dense", outputCol="expanded") + >>> px.transform(df).head().expanded + DenseVector([0.5, 0.25, 2.0, 1.0, 4.0]) + >>> px.setParams(outputCol="test").transform(df).head().test + DenseVector([0.5, 0.25, 2.0, 1.0, 4.0]) + """ + + # a placeholder to make it appear in the generated doc + degree = Param(Params._dummy(), "degree", "the polynomial degree to expand (>= 1)") + + @keyword_only + def __init__(self, degree=2, inputCol=None, outputCol=None): + """ + __init__(self, degree=2, inputCol=None, outputCol=None) + """ + super(PolynomialExpansion, self).__init__() + self._java_obj = self._new_java_obj( + "org.apache.spark.ml.feature.PolynomialExpansion", self.uid) + self.degree = Param(self, "degree", "the polynomial degree to expand (>= 1)") + self._setDefault(degree=2) + kwargs = self.__init__._input_kwargs + self.setParams(**kwargs) + + @keyword_only + def setParams(self, degree=2, inputCol=None, outputCol=None): + """ + setParams(self, degree=2, inputCol=None, outputCol=None) + Sets params for this PolynomialExpansion. + """ + kwargs = self.setParams._input_kwargs + return self._set(**kwargs) + + def setDegree(self, value): + """ + Sets the value of :py:attr:`degree`. + """ + self._paramMap[self.degree] = value + return self + + def getDegree(self): + """ + Gets the value of degree or its default value. + """ + return self.getOrDefault(self.degree) + + +@inherit_doc +@ignore_unicode_prefix +class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol): + """ + A regex based tokenizer that extracts tokens either by using the + provided regex pattern (in Java dialect) to split the text + (default) or repeatedly matching the regex (if gaps is true). + Optional parameters also allow filtering tokens using a minimal + length. + It returns an array of strings that can be empty. + + >>> df = sqlContext.createDataFrame([("a b c",)], ["text"]) + >>> reTokenizer = RegexTokenizer(inputCol="text", outputCol="words") + >>> reTokenizer.transform(df).head() + Row(text=u'a b c', words=[u'a', u'b', u'c']) + >>> # Change a parameter. + >>> reTokenizer.setParams(outputCol="tokens").transform(df).head() + Row(text=u'a b c', tokens=[u'a', u'b', u'c']) + >>> # Temporarily modify a parameter. + >>> reTokenizer.transform(df, {reTokenizer.outputCol: "words"}).head() + Row(text=u'a b c', words=[u'a', u'b', u'c']) + >>> reTokenizer.transform(df).head() + Row(text=u'a b c', tokens=[u'a', u'b', u'c']) + >>> # Must use keyword arguments to specify params. + >>> reTokenizer.setParams("text") + Traceback (most recent call last): + ... + TypeError: Method setParams forces keyword arguments. + """ + + # a placeholder to make it appear in the generated doc + minTokenLength = Param(Params._dummy(), "minTokenLength", "minimum token length (>= 0)") + gaps = Param(Params._dummy(), "gaps", "whether regex splits on gaps (True) or matches tokens") + pattern = Param(Params._dummy(), "pattern", "regex pattern (Java dialect) used for tokenizing") + + @keyword_only + def __init__(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, outputCol=None): + """ + __init__(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, outputCol=None) + """ + super(RegexTokenizer, self).__init__() + self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.RegexTokenizer", self.uid) + self.minTokenLength = Param(self, "minTokenLength", "minimum token length (>= 0)") + self.gaps = Param(self, "gaps", "whether regex splits on gaps (True) or matches tokens") + self.pattern = Param(self, "pattern", "regex pattern (Java dialect) used for tokenizing") + self._setDefault(minTokenLength=1, gaps=True, pattern="\\s+") + kwargs = self.__init__._input_kwargs + self.setParams(**kwargs) + + @keyword_only + def setParams(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, outputCol=None): + """ + setParams(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, outputCol=None) + Sets params for this RegexTokenizer. + """ + kwargs = self.setParams._input_kwargs + return self._set(**kwargs) + + def setMinTokenLength(self, value): + """ + Sets the value of :py:attr:`minTokenLength`. + """ + self._paramMap[self.minTokenLength] = value + return self + + def getMinTokenLength(self): + """ + Gets the value of minTokenLength or its default value. + """ + return self.getOrDefault(self.minTokenLength) + + def setGaps(self, value): + """ + Sets the value of :py:attr:`gaps`. + """ + self._paramMap[self.gaps] = value + return self + + def getGaps(self): + """ + Gets the value of gaps or its default value. + """ + return self.getOrDefault(self.gaps) + + def setPattern(self, value): + """ + Sets the value of :py:attr:`pattern`. + """ + self._paramMap[self.pattern] = value + return self + + def getPattern(self): + """ + Gets the value of pattern or its default value. + """ + return self.getOrDefault(self.pattern) + + +@inherit_doc +class StandardScaler(JavaEstimator, HasInputCol, HasOutputCol): + """ + Standardizes features by removing the mean and scaling to unit variance using column summary + statistics on the samples in the training set. + + >>> from pyspark.mllib.linalg import Vectors + >>> df = sqlContext.createDataFrame([(Vectors.dense([0.0]),), (Vectors.dense([2.0]),)], ["a"]) + >>> standardScaler = StandardScaler(inputCol="a", outputCol="scaled") + >>> model = standardScaler.fit(df) + >>> model.transform(df).collect()[1].scaled + DenseVector([1.4142]) + """ + + # a placeholder to make it appear in the generated doc + withMean = Param(Params._dummy(), "withMean", "Center data with mean") + withStd = Param(Params._dummy(), "withStd", "Scale to unit standard deviation") + + @keyword_only + def __init__(self, withMean=False, withStd=True, inputCol=None, outputCol=None): + """ + __init__(self, withMean=False, withStd=True, inputCol=None, outputCol=None) + """ + super(StandardScaler, self).__init__() + self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.StandardScaler", self.uid) + self.withMean = Param(self, "withMean", "Center data with mean") + self.withStd = Param(self, "withStd", "Scale to unit standard deviation") + self._setDefault(withMean=False, withStd=True) + kwargs = self.__init__._input_kwargs + self.setParams(**kwargs) + + @keyword_only + def setParams(self, withMean=False, withStd=True, inputCol=None, outputCol=None): + """ + setParams(self, withMean=False, withStd=True, inputCol=None, outputCol=None) + Sets params for this StandardScaler. + """ + kwargs = self.setParams._input_kwargs + return self._set(**kwargs) + + def setWithMean(self, value): + """ + Sets the value of :py:attr:`withMean`. + """ + self._paramMap[self.withMean] = value + return self + + def getWithMean(self): + """ + Gets the value of withMean or its default value. + """ + return self.getOrDefault(self.withMean) + + def setWithStd(self, value): + """ + Sets the value of :py:attr:`withStd`. + """ + self._paramMap[self.withStd] = value + return self + + def getWithStd(self): + """ + Gets the value of withStd or its default value. + """ + return self.getOrDefault(self.withStd) + + def _create_model(self, java_model): + return StandardScalerModel(java_model) + + +class StandardScalerModel(JavaModel): + """ + Model fitted by StandardScaler. + """ + + +@inherit_doc +class StringIndexer(JavaEstimator, HasInputCol, HasOutputCol): + """ + A label indexer that maps a string column of labels to an ML column of label indices. + If the input column is numeric, we cast it to string and index the string values. + The indices are in [0, numLabels), ordered by label frequencies. + So the most frequent label gets index 0. + + >>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed") + >>> model = stringIndexer.fit(stringIndDf) + >>> td = model.transform(stringIndDf) + >>> sorted(set([(i[0], i[1]) for i in td.select(td.id, td.indexed).collect()]), + ... key=lambda x: x[0]) + [(0, 0.0), (1, 2.0), (2, 1.0), (3, 0.0), (4, 0.0), (5, 1.0)] + """ + + @keyword_only + def __init__(self, inputCol=None, outputCol=None): + """ + __init__(self, inputCol=None, outputCol=None) + """ + super(StringIndexer, self).__init__() + self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.StringIndexer", self.uid) + kwargs = self.__init__._input_kwargs + self.setParams(**kwargs) + + @keyword_only + def setParams(self, inputCol=None, outputCol=None): + """ + setParams(self, inputCol=None, outputCol=None) + Sets params for this StringIndexer. + """ + kwargs = self.setParams._input_kwargs + return self._set(**kwargs) + + def _create_model(self, java_model): + return StringIndexerModel(java_model) + + +class StringIndexerModel(JavaModel): + """ + Model fitted by StringIndexer. + """ @inherit_doc @@ -31,8 +676,7 @@ class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol): A tokenizer that converts the input string to lowercase and then splits it by white spaces. - >>> from pyspark.sql import Row - >>> df = sc.parallelize([Row(text="a b c")]).toDF() + >>> df = sqlContext.createDataFrame([("a b c",)], ["text"]) >>> tokenizer = Tokenizer(inputCol="text", outputCol="words") >>> tokenizer.transform(df).head() Row(text=u'a b c', words=[u'a', u'b', u'c']) @@ -51,14 +695,13 @@ class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol): TypeError: Method setParams forces keyword arguments. """ - _java_class = "org.apache.spark.ml.feature.Tokenizer" - @keyword_only def __init__(self, inputCol=None, outputCol=None): """ __init__(self, inputCol=None, outputCol=None) """ super(Tokenizer, self).__init__() + self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Tokenizer", self.uid) kwargs = self.__init__._input_kwargs self.setParams(**kwargs) @@ -73,49 +716,255 @@ def setParams(self, inputCol=None, outputCol=None): @inherit_doc -class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures): +class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol): """ - Maps a sequence of terms to their term frequencies using the - hashing trick. + A feature transformer that merges multiple columns into a vector column. - >>> from pyspark.sql import Row - >>> df = sc.parallelize([Row(words=["a", "b", "c"])]).toDF() - >>> hashingTF = HashingTF(numFeatures=10, inputCol="words", outputCol="features") - >>> hashingTF.transform(df).head().features - SparseVector(10, {7: 1.0, 8: 1.0, 9: 1.0}) - >>> hashingTF.setParams(outputCol="freqs").transform(df).head().freqs - SparseVector(10, {7: 1.0, 8: 1.0, 9: 1.0}) - >>> params = {hashingTF.numFeatures: 5, hashingTF.outputCol: "vector"} - >>> hashingTF.transform(df, params).head().vector - SparseVector(5, {2: 1.0, 3: 1.0, 4: 1.0}) + >>> df = sqlContext.createDataFrame([(1, 0, 3)], ["a", "b", "c"]) + >>> vecAssembler = VectorAssembler(inputCols=["a", "b", "c"], outputCol="features") + >>> vecAssembler.transform(df).head().features + DenseVector([1.0, 0.0, 3.0]) + >>> vecAssembler.setParams(outputCol="freqs").transform(df).head().freqs + DenseVector([1.0, 0.0, 3.0]) + >>> params = {vecAssembler.inputCols: ["b", "a"], vecAssembler.outputCol: "vector"} + >>> vecAssembler.transform(df, params).head().vector + DenseVector([0.0, 1.0]) """ - _java_class = "org.apache.spark.ml.feature.HashingTF" + @keyword_only + def __init__(self, inputCols=None, outputCol=None): + """ + __init__(self, inputCols=None, outputCol=None) + """ + super(VectorAssembler, self).__init__() + self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.VectorAssembler", self.uid) + kwargs = self.__init__._input_kwargs + self.setParams(**kwargs) @keyword_only - def __init__(self, numFeatures=1 << 18, inputCol=None, outputCol=None): + def setParams(self, inputCols=None, outputCol=None): """ - __init__(self, numFeatures=1 << 18, inputCol=None, outputCol=None) + setParams(self, inputCols=None, outputCol=None) + Sets params for this VectorAssembler. """ - super(HashingTF, self).__init__() - self._setDefault(numFeatures=1 << 18) + kwargs = self.setParams._input_kwargs + return self._set(**kwargs) + + +@inherit_doc +class VectorIndexer(JavaEstimator, HasInputCol, HasOutputCol): + """ + Class for indexing categorical feature columns in a dataset of [[Vector]]. + + This has 2 usage modes: + - Automatically identify categorical features (default behavior) + - This helps process a dataset of unknown vectors into a dataset with some continuous + features and some categorical features. The choice between continuous and categorical + is based upon a maxCategories parameter. + - Set maxCategories to the maximum number of categorical any categorical feature should + have. + - E.g.: Feature 0 has unique values {-1.0, 0.0}, and feature 1 values {1.0, 3.0, 5.0}. + If maxCategories = 2, then feature 0 will be declared categorical and use indices {0, 1}, + and feature 1 will be declared continuous. + - Index all features, if all features are categorical + - If maxCategories is set to be very large, then this will build an index of unique + values for all features. + - Warning: This can cause problems if features are continuous since this will collect ALL + unique values to the driver. + - E.g.: Feature 0 has unique values {-1.0, 0.0}, and feature 1 values {1.0, 3.0, 5.0}. + If maxCategories >= 3, then both features will be declared categorical. + + This returns a model which can transform categorical features to use 0-based indices. + + Index stability: + - This is not guaranteed to choose the same category index across multiple runs. + - If a categorical feature includes value 0, then this is guaranteed to map value 0 to + index 0. This maintains vector sparsity. + - More stability may be added in the future. + + TODO: Future extensions: The following functionality is planned for the future: + - Preserve metadata in transform; if a feature's metadata is already present, + do not recompute. + - Specify certain features to not index, either via a parameter or via existing metadata. + - Add warning if a categorical feature has only 1 category. + - Add option for allowing unknown categories. + + >>> from pyspark.mllib.linalg import Vectors + >>> df = sqlContext.createDataFrame([(Vectors.dense([-1.0, 0.0]),), + ... (Vectors.dense([0.0, 1.0]),), (Vectors.dense([0.0, 2.0]),)], ["a"]) + >>> indexer = VectorIndexer(maxCategories=2, inputCol="a", outputCol="indexed") + >>> model = indexer.fit(df) + >>> model.transform(df).head().indexed + DenseVector([1.0, 0.0]) + >>> indexer.setParams(outputCol="test").fit(df).transform(df).collect()[1].test + DenseVector([0.0, 1.0]) + >>> params = {indexer.maxCategories: 3, indexer.outputCol: "vector"} + >>> model2 = indexer.fit(df, params) + >>> model2.transform(df).head().vector + DenseVector([1.0, 0.0]) + """ + + # a placeholder to make it appear in the generated doc + maxCategories = Param(Params._dummy(), "maxCategories", + "Threshold for the number of values a categorical feature can take " + + "(>= 2). If a feature is found to have > maxCategories values, then " + + "it is declared continuous.") + + @keyword_only + def __init__(self, maxCategories=20, inputCol=None, outputCol=None): + """ + __init__(self, maxCategories=20, inputCol=None, outputCol=None) + """ + super(VectorIndexer, self).__init__() + self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.VectorIndexer", self.uid) + self.maxCategories = Param(self, "maxCategories", + "Threshold for the number of values a categorical feature " + + "can take (>= 2). If a feature is found to have " + + "> maxCategories values, then it is declared continuous.") + self._setDefault(maxCategories=20) kwargs = self.__init__._input_kwargs self.setParams(**kwargs) @keyword_only - def setParams(self, numFeatures=1 << 18, inputCol=None, outputCol=None): + def setParams(self, maxCategories=20, inputCol=None, outputCol=None): """ - setParams(self, numFeatures=1 << 18, inputCol=None, outputCol=None) - Sets params for this HashingTF. + setParams(self, maxCategories=20, inputCol=None, outputCol=None) + Sets params for this VectorIndexer. + """ + kwargs = self.setParams._input_kwargs + return self._set(**kwargs) + + def setMaxCategories(self, value): + """ + Sets the value of :py:attr:`maxCategories`. + """ + self._paramMap[self.maxCategories] = value + return self + + def getMaxCategories(self): + """ + Gets the value of maxCategories or its default value. + """ + return self.getOrDefault(self.maxCategories) + + def _create_model(self, java_model): + return VectorIndexerModel(java_model) + + +class VectorIndexerModel(JavaModel): + """ + Model fitted by VectorIndexer. + """ + + +@inherit_doc +@ignore_unicode_prefix +class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, HasSeed, HasInputCol, HasOutputCol): + """ + Word2Vec trains a model of `Map(String, Vector)`, i.e. transforms a word into a code for further + natural language processing or machine learning process. + + >>> sent = ("a b " * 100 + "a c " * 10).split(" ") + >>> doc = sqlContext.createDataFrame([(sent,), (sent,)], ["sentence"]) + >>> model = Word2Vec(vectorSize=5, seed=42, inputCol="sentence", outputCol="model").fit(doc) + >>> model.transform(doc).head().model + DenseVector([-0.0422, -0.5138, -0.2546, 0.6885, 0.276]) + """ + + # a placeholder to make it appear in the generated doc + vectorSize = Param(Params._dummy(), "vectorSize", + "the dimension of codes after transforming from words") + numPartitions = Param(Params._dummy(), "numPartitions", + "number of partitions for sentences of words") + minCount = Param(Params._dummy(), "minCount", + "the minimum number of times a token must appear to be included in the " + + "word2vec model's vocabulary") + + @keyword_only + def __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, + seed=None, inputCol=None, outputCol=None): + """ + __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, \ + seed=None, inputCol=None, outputCol=None) + """ + super(Word2Vec, self).__init__() + self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Word2Vec", self.uid) + self.vectorSize = Param(self, "vectorSize", + "the dimension of codes after transforming from words") + self.numPartitions = Param(self, "numPartitions", + "number of partitions for sentences of words") + self.minCount = Param(self, "minCount", + "the minimum number of times a token must appear to be included " + + "in the word2vec model's vocabulary") + self._setDefault(vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, + seed=None) + kwargs = self.__init__._input_kwargs + self.setParams(**kwargs) + + @keyword_only + def setParams(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, + seed=None, inputCol=None, outputCol=None): + """ + setParams(self, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, seed=None, \ + inputCol=None, outputCol=None) + Sets params for this Word2Vec. """ kwargs = self.setParams._input_kwargs return self._set(**kwargs) + def setVectorSize(self, value): + """ + Sets the value of :py:attr:`vectorSize`. + """ + self._paramMap[self.vectorSize] = value + return self + + def getVectorSize(self): + """ + Gets the value of vectorSize or its default value. + """ + return self.getOrDefault(self.vectorSize) + + def setNumPartitions(self, value): + """ + Sets the value of :py:attr:`numPartitions`. + """ + self._paramMap[self.numPartitions] = value + return self + + def getNumPartitions(self): + """ + Gets the value of numPartitions or its default value. + """ + return self.getOrDefault(self.numPartitions) + + def setMinCount(self, value): + """ + Sets the value of :py:attr:`minCount`. + """ + self._paramMap[self.minCount] = value + return self + + def getMinCount(self): + """ + Gets the value of minCount or its default value. + """ + return self.getOrDefault(self.minCount) + + def _create_model(self, java_model): + return Word2VecModel(java_model) + + +class Word2VecModel(JavaModel): + """ + Model fitted by Word2Vec. + """ + if __name__ == "__main__": import doctest from pyspark.context import SparkContext - from pyspark.sql import SQLContext + from pyspark.sql import Row, SQLContext globs = globals().copy() # The small batch size here ensures that we see multiple batches, # even in these small test examples: @@ -123,8 +972,11 @@ def setParams(self, numFeatures=1 << 18, inputCol=None, outputCol=None): sqlContext = SQLContext(sc) globs['sc'] = sc globs['sqlContext'] = sqlContext - (failure_count, test_count) = doctest.testmod( - globs=globs, optionflags=doctest.ELLIPSIS) + testData = sc.parallelize([Row(id=0, label="a"), Row(id=1, label="b"), + Row(id=2, label="c"), Row(id=3, label="a"), + Row(id=4, label="a"), Row(id=5, label="c")], 2) + globs['stringIndDf'] = sqlContext.createDataFrame(testData) + (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS) sc.stop() if failure_count: exit(-1) diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py index 49c20b4cf70cf..7845536161e07 100644 --- a/python/pyspark/ml/param/__init__.py +++ b/python/pyspark/ml/param/__init__.py @@ -16,6 +16,7 @@ # from abc import ABCMeta +import copy from pyspark.ml.util import Identifiable @@ -29,9 +30,9 @@ class Param(object): """ def __init__(self, parent, name, doc): - if not isinstance(parent, Params): - raise TypeError("Parent must be a Params but got type %s." % type(parent)) - self.parent = parent + if not isinstance(parent, Identifiable): + raise TypeError("Parent must be an Identifiable but got type %s." % type(parent)) + self.parent = parent.uid self.name = str(name) self.doc = str(doc) @@ -41,6 +42,15 @@ def __str__(self): def __repr__(self): return "Param(parent=%r, name=%r, doc=%r)" % (self.parent, self.name, self.doc) + def __hash__(self): + return hash(str(self)) + + def __eq__(self, other): + if isinstance(other, Param): + return self.parent == other.parent and self.name == other.name + else: + return False + class Params(Identifiable): """ @@ -51,10 +61,13 @@ class Params(Identifiable): __metaclass__ = ABCMeta #: internal param map for user-supplied values param map - paramMap = {} + _paramMap = {} #: internal param map for default values - defaultParamMap = {} + _defaultParamMap = {} + + #: value returned by :py:func:`params` + _params = None @property def params(self): @@ -63,10 +76,12 @@ def params(self): uses :py:func:`dir` to get all attributes of type :py:class:`Param`. """ - return list(filter(lambda attr: isinstance(attr, Param), - [getattr(self, x) for x in dir(self) if x != "params"])) + if self._params is None: + self._params = list(filter(lambda attr: isinstance(attr, Param), + [getattr(self, x) for x in dir(self) if x != "params"])) + return self._params - def _explain(self, param): + def explainParam(self, param): """ Explains a single param and returns its name, doc, and optional default value and user-supplied value in a string. @@ -74,10 +89,10 @@ def _explain(self, param): param = self._resolveParam(param) values = [] if self.isDefined(param): - if param in self.defaultParamMap: - values.append("default: %s" % self.defaultParamMap[param]) - if param in self.paramMap: - values.append("current: %s" % self.paramMap[param]) + if param in self._defaultParamMap: + values.append("default: %s" % self._defaultParamMap[param]) + if param in self._paramMap: + values.append("current: %s" % self._paramMap[param]) else: values.append("undefined") valueStr = "(" + ", ".join(values) + ")" @@ -88,7 +103,7 @@ def explainParams(self): Returns the documentation of all params with their optionally default values and user-supplied values. """ - return "\n".join([self._explain(param) for param in self.params]) + return "\n".join([self.explainParam(param) for param in self.params]) def getParam(self, paramName): """ @@ -105,56 +120,76 @@ def isSet(self, param): Checks whether a param is explicitly set by user. """ param = self._resolveParam(param) - return param in self.paramMap + return param in self._paramMap def hasDefault(self, param): """ Checks whether a param has a default value. """ param = self._resolveParam(param) - return param in self.defaultParamMap + return param in self._defaultParamMap def isDefined(self, param): """ - Checks whether a param is explicitly set by user or has a default value. + Checks whether a param is explicitly set by user or has + a default value. """ return self.isSet(param) or self.hasDefault(param) + def hasParam(self, paramName): + """ + Tests whether this instance contains a param with a given + (string) name. + """ + param = self._resolveParam(paramName) + return param in self.params + def getOrDefault(self, param): """ Gets the value of a param in the user-supplied param map or its - default value. Raises an error if either is set. + default value. Raises an error if neither is set. """ - if isinstance(param, Param): - if param in self.paramMap: - return self.paramMap[param] - else: - return self.defaultParamMap[param] - elif isinstance(param, str): - return self.getOrDefault(self.getParam(param)) + param = self._resolveParam(param) + if param in self._paramMap: + return self._paramMap[param] else: - raise KeyError("Cannot recognize %r as a param." % param) + return self._defaultParamMap[param] - def extractParamMap(self, extraParamMap={}): + def extractParamMap(self, extra={}): """ Extracts the embedded default param values and user-supplied values, and then merges them with extra values from input into a flat param map, where the latter value is used if there exist conflicts, i.e., with ordering: default param values < - user-supplied values < extraParamMap. - :param extraParamMap: extra param values + user-supplied values < extra. + :param extra: extra param values :return: merged param map """ - paramMap = self.defaultParamMap.copy() - paramMap.update(self.paramMap) - paramMap.update(extraParamMap) + paramMap = self._defaultParamMap.copy() + paramMap.update(self._paramMap) + paramMap.update(extra) return paramMap + def copy(self, extra={}): + """ + Creates a copy of this instance with the same uid and some + extra params. The default implementation creates a + shallow copy using :py:func:`copy.copy`, and then copies the + embedded and extra parameters over and returns the copy. + Subclasses should override this method if the default approach + is not sufficient. + :param extra: Extra parameters to copy to the new instance + :return: Copy of this instance + """ + that = copy.copy(self) + that._paramMap = self.extractParamMap(extra) + return that + def _shouldOwn(self, param): """ Validates that the input param belongs to this Params instance. """ - if param.parent is not self: + if not (self.uid == param.parent and self.hasParam(param.name)): raise ValueError("Param %r does not belong to %r." % (param, self)) def _resolveParam(self, param): @@ -175,7 +210,8 @@ def _resolveParam(self, param): @staticmethod def _dummy(): """ - Returns a dummy Params instance used as a placeholder to generate docs. + Returns a dummy Params instance used as a placeholder to + generate docs. """ dummy = Params() dummy.uid = "undefined" @@ -186,7 +222,7 @@ def _set(self, **kwargs): Sets user-supplied params. """ for param, value in kwargs.items(): - self.paramMap[getattr(self, param)] = value + self._paramMap[getattr(self, param)] = value return self def _setDefault(self, **kwargs): @@ -194,5 +230,19 @@ def _setDefault(self, **kwargs): Sets default params. """ for param, value in kwargs.items(): - self.defaultParamMap[getattr(self, param)] = value + self._defaultParamMap[getattr(self, param)] = value return self + + def _copyValues(self, to, extra={}): + """ + Copies param values from this instance to another instance for + params shared by them. + :param to: the target instance + :param extra: extra params to be copied + :return: the target instance with param values copied + """ + paramMap = self.extractParamMap(extra) + for p in self.params: + if p in paramMap and to.hasParam(p.name): + to._set(**{p.name: paramMap[p]}) + return to diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py index 6a3192465d66d..69efc424ec4ef 100644 --- a/python/pyspark/ml/param/_shared_params_code_gen.py +++ b/python/pyspark/ml/param/_shared_params_code_gen.py @@ -38,16 +38,13 @@ # python _shared_params_code_gen.py > shared.py -def _gen_param_code(name, doc, defaultValueStr): +def _gen_param_header(name, doc, defaultValueStr): """ - Generates Python code for a shared param class. + Generates the header part for shared variables :param name: param name :param doc: param doc - :param defaultValueStr: string representation of the default value - :return: code string """ - # TODO: How to correctly inherit instance attributes? template = '''class Has$Name(Params): """ Mixin for param $name: $doc. @@ -59,15 +56,35 @@ def _gen_param_code(name, doc, defaultValueStr): def __init__(self): super(Has$Name, self).__init__() #: param for $doc - self.$name = Param(self, "$name", "$doc") - if $defaultValueStr is not None: - self._setDefault($name=$defaultValueStr) + self.$name = Param(self, "$name", "$doc")''' + if defaultValueStr is not None: + template += ''' + self._setDefault($name=$defaultValueStr)''' + + Name = name[0].upper() + name[1:] + return template \ + .replace("$name", name) \ + .replace("$Name", Name) \ + .replace("$doc", doc) \ + .replace("$defaultValueStr", str(defaultValueStr)) + +def _gen_param_code(name, doc, defaultValueStr): + """ + Generates Python code for a shared param class. + + :param name: param name + :param doc: param doc + :param defaultValueStr: string representation of the default value + :return: code string + """ + # TODO: How to correctly inherit instance attributes? + template = ''' def set$Name(self, value): """ Sets the value of :py:attr:`$name`. """ - self.paramMap[self.$name] = value + self._paramMap[self.$name] = value return self def get$Name(self): @@ -88,15 +105,64 @@ def get$Name(self): print("\n# DO NOT MODIFY THIS FILE! It was generated by _shared_params_code_gen.py.\n") print("from pyspark.ml.param import Param, Params\n\n") shared = [ - ("maxIter", "max number of iterations", None), - ("regParam", "regularization constant", None), + ("maxIter", "max number of iterations (>= 0)", None), + ("regParam", "regularization parameter (>= 0)", None), ("featuresCol", "features column name", "'features'"), ("labelCol", "label column name", "'label'"), ("predictionCol", "prediction column name", "'prediction'"), + ("probabilityCol", "Column name for predicted class conditional probabilities. " + + "Note: Not all models output well-calibrated probability estimates! These probabilities " + + "should be treated as confidences, not precise probabilities.", "'probability'"), + ("rawPredictionCol", "raw prediction (a.k.a. confidence) column name", "'rawPrediction'"), ("inputCol", "input column name", None), - ("outputCol", "output column name", None), - ("numFeatures", "number of features", None)] + ("inputCols", "input column names", None), + ("outputCol", "output column name", "self.uid + '__output'"), + ("numFeatures", "number of features", None), + ("checkpointInterval", "checkpoint interval (>= 1)", None), + ("seed", "random seed", "hash(type(self).__name__)"), + ("tol", "the convergence tolerance for iterative algorithms", None), + ("stepSize", "Step size to be used for each iteration of optimization.", None)] code = [] for name, doc, defaultValueStr in shared: - code.append(_gen_param_code(name, doc, defaultValueStr)) + param_code = _gen_param_header(name, doc, defaultValueStr) + code.append(param_code + "\n" + _gen_param_code(name, doc, defaultValueStr)) + + decisionTreeParams = [ + ("maxDepth", "Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; " + + "depth 1 means 1 internal node + 2 leaf nodes."), + ("maxBins", "Max number of bins for" + + " discretizing continuous features. Must be >=2 and >= number of categories for any" + + " categorical feature."), + ("minInstancesPerNode", "Minimum number of instances each child must have after split. " + + "If a split causes the left or right child to have fewer than minInstancesPerNode, the " + + "split will be discarded as invalid. Should be >= 1."), + ("minInfoGain", "Minimum information gain for a split to be considered at a tree node."), + ("maxMemoryInMB", "Maximum memory in MB allocated to histogram aggregation."), + ("cacheNodeIds", "If false, the algorithm will pass trees to executors to match " + + "instances with nodes. If true, the algorithm will cache node IDs for each instance. " + + "Caching can speed up training of deeper trees.")] + + decisionTreeCode = '''class DecisionTreeParams(Params): + """ + Mixin for Decision Tree parameters. + """ + + # a placeholder to make it appear in the generated doc + $dummyPlaceHolders + + def __init__(self): + super(DecisionTreeParams, self).__init__() + $realParams''' + dtParamMethods = "" + dummyPlaceholders = "" + realParams = "" + paramTemplate = """$name = Param($owner, "$name", "$doc")""" + for name, doc in decisionTreeParams: + variable = paramTemplate.replace("$name", name).replace("$doc", doc) + dummyPlaceholders += variable.replace("$owner", "Params._dummy()") + "\n " + realParams += "#: param for " + doc + "\n " + realParams += "self." + variable.replace("$owner", "self") + "\n " + dtParamMethods += _gen_param_code(name, doc, None) + "\n" + code.append(decisionTreeCode.replace("$dummyPlaceHolders", dummyPlaceholders) + .replace("$realParams", realParams) + dtParamMethods) print("\n\n\n".join(code)) diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py index 13b6749998ad0..bc088e4c29e26 100644 --- a/python/pyspark/ml/param/shared.py +++ b/python/pyspark/ml/param/shared.py @@ -22,24 +22,22 @@ class HasMaxIter(Params): """ - Mixin for param maxIter: max number of iterations. + Mixin for param maxIter: max number of iterations (>= 0). """ # a placeholder to make it appear in the generated doc - maxIter = Param(Params._dummy(), "maxIter", "max number of iterations") + maxIter = Param(Params._dummy(), "maxIter", "max number of iterations (>= 0)") def __init__(self): super(HasMaxIter, self).__init__() - #: param for max number of iterations - self.maxIter = Param(self, "maxIter", "max number of iterations") - if None is not None: - self._setDefault(maxIter=None) + #: param for max number of iterations (>= 0) + self.maxIter = Param(self, "maxIter", "max number of iterations (>= 0)") def setMaxIter(self, value): """ Sets the value of :py:attr:`maxIter`. """ - self.paramMap[self.maxIter] = value + self._paramMap[self.maxIter] = value return self def getMaxIter(self): @@ -51,24 +49,22 @@ def getMaxIter(self): class HasRegParam(Params): """ - Mixin for param regParam: regularization constant. + Mixin for param regParam: regularization parameter (>= 0). """ # a placeholder to make it appear in the generated doc - regParam = Param(Params._dummy(), "regParam", "regularization constant") + regParam = Param(Params._dummy(), "regParam", "regularization parameter (>= 0)") def __init__(self): super(HasRegParam, self).__init__() - #: param for regularization constant - self.regParam = Param(self, "regParam", "regularization constant") - if None is not None: - self._setDefault(regParam=None) + #: param for regularization parameter (>= 0) + self.regParam = Param(self, "regParam", "regularization parameter (>= 0)") def setRegParam(self, value): """ Sets the value of :py:attr:`regParam`. """ - self.paramMap[self.regParam] = value + self._paramMap[self.regParam] = value return self def getRegParam(self): @@ -90,14 +86,13 @@ def __init__(self): super(HasFeaturesCol, self).__init__() #: param for features column name self.featuresCol = Param(self, "featuresCol", "features column name") - if 'features' is not None: - self._setDefault(featuresCol='features') + self._setDefault(featuresCol='features') def setFeaturesCol(self, value): """ Sets the value of :py:attr:`featuresCol`. """ - self.paramMap[self.featuresCol] = value + self._paramMap[self.featuresCol] = value return self def getFeaturesCol(self): @@ -119,14 +114,13 @@ def __init__(self): super(HasLabelCol, self).__init__() #: param for label column name self.labelCol = Param(self, "labelCol", "label column name") - if 'label' is not None: - self._setDefault(labelCol='label') + self._setDefault(labelCol='label') def setLabelCol(self, value): """ Sets the value of :py:attr:`labelCol`. """ - self.paramMap[self.labelCol] = value + self._paramMap[self.labelCol] = value return self def getLabelCol(self): @@ -148,14 +142,13 @@ def __init__(self): super(HasPredictionCol, self).__init__() #: param for prediction column name self.predictionCol = Param(self, "predictionCol", "prediction column name") - if 'prediction' is not None: - self._setDefault(predictionCol='prediction') + self._setDefault(predictionCol='prediction') def setPredictionCol(self, value): """ Sets the value of :py:attr:`predictionCol`. """ - self.paramMap[self.predictionCol] = value + self._paramMap[self.predictionCol] = value return self def getPredictionCol(self): @@ -165,6 +158,62 @@ def getPredictionCol(self): return self.getOrDefault(self.predictionCol) +class HasProbabilityCol(Params): + """ + Mixin for param probabilityCol: Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities.. + """ + + # a placeholder to make it appear in the generated doc + probabilityCol = Param(Params._dummy(), "probabilityCol", "Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities.") + + def __init__(self): + super(HasProbabilityCol, self).__init__() + #: param for Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities. + self.probabilityCol = Param(self, "probabilityCol", "Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities.") + self._setDefault(probabilityCol='probability') + + def setProbabilityCol(self, value): + """ + Sets the value of :py:attr:`probabilityCol`. + """ + self._paramMap[self.probabilityCol] = value + return self + + def getProbabilityCol(self): + """ + Gets the value of probabilityCol or its default value. + """ + return self.getOrDefault(self.probabilityCol) + + +class HasRawPredictionCol(Params): + """ + Mixin for param rawPredictionCol: raw prediction (a.k.a. confidence) column name. + """ + + # a placeholder to make it appear in the generated doc + rawPredictionCol = Param(Params._dummy(), "rawPredictionCol", "raw prediction (a.k.a. confidence) column name") + + def __init__(self): + super(HasRawPredictionCol, self).__init__() + #: param for raw prediction (a.k.a. confidence) column name + self.rawPredictionCol = Param(self, "rawPredictionCol", "raw prediction (a.k.a. confidence) column name") + self._setDefault(rawPredictionCol='rawPrediction') + + def setRawPredictionCol(self, value): + """ + Sets the value of :py:attr:`rawPredictionCol`. + """ + self._paramMap[self.rawPredictionCol] = value + return self + + def getRawPredictionCol(self): + """ + Gets the value of rawPredictionCol or its default value. + """ + return self.getOrDefault(self.rawPredictionCol) + + class HasInputCol(Params): """ Mixin for param inputCol: input column name. @@ -177,14 +226,12 @@ def __init__(self): super(HasInputCol, self).__init__() #: param for input column name self.inputCol = Param(self, "inputCol", "input column name") - if None is not None: - self._setDefault(inputCol=None) def setInputCol(self, value): """ Sets the value of :py:attr:`inputCol`. """ - self.paramMap[self.inputCol] = value + self._paramMap[self.inputCol] = value return self def getInputCol(self): @@ -194,6 +241,33 @@ def getInputCol(self): return self.getOrDefault(self.inputCol) +class HasInputCols(Params): + """ + Mixin for param inputCols: input column names. + """ + + # a placeholder to make it appear in the generated doc + inputCols = Param(Params._dummy(), "inputCols", "input column names") + + def __init__(self): + super(HasInputCols, self).__init__() + #: param for input column names + self.inputCols = Param(self, "inputCols", "input column names") + + def setInputCols(self, value): + """ + Sets the value of :py:attr:`inputCols`. + """ + self._paramMap[self.inputCols] = value + return self + + def getInputCols(self): + """ + Gets the value of inputCols or its default value. + """ + return self.getOrDefault(self.inputCols) + + class HasOutputCol(Params): """ Mixin for param outputCol: output column name. @@ -206,14 +280,13 @@ def __init__(self): super(HasOutputCol, self).__init__() #: param for output column name self.outputCol = Param(self, "outputCol", "output column name") - if None is not None: - self._setDefault(outputCol=None) + self._setDefault(outputCol=self.uid + '__output') def setOutputCol(self, value): """ Sets the value of :py:attr:`outputCol`. """ - self.paramMap[self.outputCol] = value + self._paramMap[self.outputCol] = value return self def getOutputCol(self): @@ -235,14 +308,12 @@ def __init__(self): super(HasNumFeatures, self).__init__() #: param for number of features self.numFeatures = Param(self, "numFeatures", "number of features") - if None is not None: - self._setDefault(numFeatures=None) def setNumFeatures(self, value): """ Sets the value of :py:attr:`numFeatures`. """ - self.paramMap[self.numFeatures] = value + self._paramMap[self.numFeatures] = value return self def getNumFeatures(self): @@ -250,3 +321,221 @@ def getNumFeatures(self): Gets the value of numFeatures or its default value. """ return self.getOrDefault(self.numFeatures) + + +class HasCheckpointInterval(Params): + """ + Mixin for param checkpointInterval: checkpoint interval (>= 1). + """ + + # a placeholder to make it appear in the generated doc + checkpointInterval = Param(Params._dummy(), "checkpointInterval", "checkpoint interval (>= 1)") + + def __init__(self): + super(HasCheckpointInterval, self).__init__() + #: param for checkpoint interval (>= 1) + self.checkpointInterval = Param(self, "checkpointInterval", "checkpoint interval (>= 1)") + + def setCheckpointInterval(self, value): + """ + Sets the value of :py:attr:`checkpointInterval`. + """ + self._paramMap[self.checkpointInterval] = value + return self + + def getCheckpointInterval(self): + """ + Gets the value of checkpointInterval or its default value. + """ + return self.getOrDefault(self.checkpointInterval) + + +class HasSeed(Params): + """ + Mixin for param seed: random seed. + """ + + # a placeholder to make it appear in the generated doc + seed = Param(Params._dummy(), "seed", "random seed") + + def __init__(self): + super(HasSeed, self).__init__() + #: param for random seed + self.seed = Param(self, "seed", "random seed") + self._setDefault(seed=hash(type(self).__name__)) + + def setSeed(self, value): + """ + Sets the value of :py:attr:`seed`. + """ + self._paramMap[self.seed] = value + return self + + def getSeed(self): + """ + Gets the value of seed or its default value. + """ + return self.getOrDefault(self.seed) + + +class HasTol(Params): + """ + Mixin for param tol: the convergence tolerance for iterative algorithms. + """ + + # a placeholder to make it appear in the generated doc + tol = Param(Params._dummy(), "tol", "the convergence tolerance for iterative algorithms") + + def __init__(self): + super(HasTol, self).__init__() + #: param for the convergence tolerance for iterative algorithms + self.tol = Param(self, "tol", "the convergence tolerance for iterative algorithms") + + def setTol(self, value): + """ + Sets the value of :py:attr:`tol`. + """ + self._paramMap[self.tol] = value + return self + + def getTol(self): + """ + Gets the value of tol or its default value. + """ + return self.getOrDefault(self.tol) + + +class HasStepSize(Params): + """ + Mixin for param stepSize: Step size to be used for each iteration of optimization.. + """ + + # a placeholder to make it appear in the generated doc + stepSize = Param(Params._dummy(), "stepSize", "Step size to be used for each iteration of optimization.") + + def __init__(self): + super(HasStepSize, self).__init__() + #: param for Step size to be used for each iteration of optimization. + self.stepSize = Param(self, "stepSize", "Step size to be used for each iteration of optimization.") + + def setStepSize(self, value): + """ + Sets the value of :py:attr:`stepSize`. + """ + self._paramMap[self.stepSize] = value + return self + + def getStepSize(self): + """ + Gets the value of stepSize or its default value. + """ + return self.getOrDefault(self.stepSize) + + +class DecisionTreeParams(Params): + """ + Mixin for Decision Tree parameters. + """ + + # a placeholder to make it appear in the generated doc + maxDepth = Param(Params._dummy(), "maxDepth", "Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.") + maxBins = Param(Params._dummy(), "maxBins", "Max number of bins for discretizing continuous features. Must be >=2 and >= number of categories for any categorical feature.") + minInstancesPerNode = Param(Params._dummy(), "minInstancesPerNode", "Minimum number of instances each child must have after split. If a split causes the left or right child to have fewer than minInstancesPerNode, the split will be discarded as invalid. Should be >= 1.") + minInfoGain = Param(Params._dummy(), "minInfoGain", "Minimum information gain for a split to be considered at a tree node.") + maxMemoryInMB = Param(Params._dummy(), "maxMemoryInMB", "Maximum memory in MB allocated to histogram aggregation.") + cacheNodeIds = Param(Params._dummy(), "cacheNodeIds", "If false, the algorithm will pass trees to executors to match instances with nodes. If true, the algorithm will cache node IDs for each instance. Caching can speed up training of deeper trees.") + + + def __init__(self): + super(DecisionTreeParams, self).__init__() + #: param for Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes. + self.maxDepth = Param(self, "maxDepth", "Maximum depth of the tree. (>= 0) E.g., depth 0 means 1 leaf node; depth 1 means 1 internal node + 2 leaf nodes.") + #: param for Max number of bins for discretizing continuous features. Must be >=2 and >= number of categories for any categorical feature. + self.maxBins = Param(self, "maxBins", "Max number of bins for discretizing continuous features. Must be >=2 and >= number of categories for any categorical feature.") + #: param for Minimum number of instances each child must have after split. If a split causes the left or right child to have fewer than minInstancesPerNode, the split will be discarded as invalid. Should be >= 1. + self.minInstancesPerNode = Param(self, "minInstancesPerNode", "Minimum number of instances each child must have after split. If a split causes the left or right child to have fewer than minInstancesPerNode, the split will be discarded as invalid. Should be >= 1.") + #: param for Minimum information gain for a split to be considered at a tree node. + self.minInfoGain = Param(self, "minInfoGain", "Minimum information gain for a split to be considered at a tree node.") + #: param for Maximum memory in MB allocated to histogram aggregation. + self.maxMemoryInMB = Param(self, "maxMemoryInMB", "Maximum memory in MB allocated to histogram aggregation.") + #: param for If false, the algorithm will pass trees to executors to match instances with nodes. If true, the algorithm will cache node IDs for each instance. Caching can speed up training of deeper trees. + self.cacheNodeIds = Param(self, "cacheNodeIds", "If false, the algorithm will pass trees to executors to match instances with nodes. If true, the algorithm will cache node IDs for each instance. Caching can speed up training of deeper trees.") + + def setMaxDepth(self, value): + """ + Sets the value of :py:attr:`maxDepth`. + """ + self._paramMap[self.maxDepth] = value + return self + + def getMaxDepth(self): + """ + Gets the value of maxDepth or its default value. + """ + return self.getOrDefault(self.maxDepth) + + def setMaxBins(self, value): + """ + Sets the value of :py:attr:`maxBins`. + """ + self._paramMap[self.maxBins] = value + return self + + def getMaxBins(self): + """ + Gets the value of maxBins or its default value. + """ + return self.getOrDefault(self.maxBins) + + def setMinInstancesPerNode(self, value): + """ + Sets the value of :py:attr:`minInstancesPerNode`. + """ + self._paramMap[self.minInstancesPerNode] = value + return self + + def getMinInstancesPerNode(self): + """ + Gets the value of minInstancesPerNode or its default value. + """ + return self.getOrDefault(self.minInstancesPerNode) + + def setMinInfoGain(self, value): + """ + Sets the value of :py:attr:`minInfoGain`. + """ + self._paramMap[self.minInfoGain] = value + return self + + def getMinInfoGain(self): + """ + Gets the value of minInfoGain or its default value. + """ + return self.getOrDefault(self.minInfoGain) + + def setMaxMemoryInMB(self, value): + """ + Sets the value of :py:attr:`maxMemoryInMB`. + """ + self._paramMap[self.maxMemoryInMB] = value + return self + + def getMaxMemoryInMB(self): + """ + Gets the value of maxMemoryInMB or its default value. + """ + return self.getOrDefault(self.maxMemoryInMB) + + def setCacheNodeIds(self, value): + """ + Sets the value of :py:attr:`cacheNodeIds`. + """ + self._paramMap[self.cacheNodeIds] = value + return self + + def getCacheNodeIds(self): + """ + Gets the value of cacheNodeIds or its default value. + """ + return self.getOrDefault(self.cacheNodeIds) + diff --git a/python/pyspark/ml/pipeline.py b/python/pyspark/ml/pipeline.py index 7c1ec3026da6f..a563024b2cdcb 100644 --- a/python/pyspark/ml/pipeline.py +++ b/python/pyspark/ml/pipeline.py @@ -22,9 +22,6 @@ from pyspark.mllib.common import inherit_doc -__all__ = ['Estimator', 'Transformer', 'Pipeline', 'PipelineModel'] - - @inherit_doc class Estimator(Params): """ @@ -34,18 +31,40 @@ class Estimator(Params): __metaclass__ = ABCMeta @abstractmethod - def fit(self, dataset, params={}): + def _fit(self, dataset): """ - Fits a model to the input dataset with optional parameters. + Fits a model to the input dataset. This is called by the + default implementation of fit. :param dataset: input dataset, which is an instance of :py:class:`pyspark.sql.DataFrame` - :param params: an optional param map that overwrites embedded - params :returns: fitted model """ raise NotImplementedError() + def fit(self, dataset, params={}): + """ + Fits a model to the input dataset with optional parameters. + + :param dataset: input dataset, which is an instance of + :py:class:`pyspark.sql.DataFrame` + :param params: an optional param map that overrides embedded + params. If a list/tuple of param maps is given, + this calls fit on each param map and returns a + list of models. + :returns: fitted model(s) + """ + if isinstance(params, (list, tuple)): + return [self.fit(dataset, paramMap) for paramMap in params] + elif isinstance(params, dict): + if params: + return self.copy(params)._fit(dataset) + else: + return self._fit(dataset) + else: + raise ValueError("Params must be either a param map or a list/tuple of param maps, " + "but got %s." % type(params)) + @inherit_doc class Transformer(Params): @@ -57,18 +76,43 @@ class Transformer(Params): __metaclass__ = ABCMeta @abstractmethod - def transform(self, dataset, params={}): + def _transform(self, dataset): """ Transforms the input dataset with optional parameters. :param dataset: input dataset, which is an instance of :py:class:`pyspark.sql.DataFrame` - :param params: an optional param map that overwrites embedded - params :returns: transformed dataset """ raise NotImplementedError() + def transform(self, dataset, params={}): + """ + Transforms the input dataset with optional parameters. + + :param dataset: input dataset, which is an instance of + :py:class:`pyspark.sql.DataFrame` + :param params: an optional param map that overrides embedded + params. + :returns: transformed dataset + """ + if isinstance(params, dict): + if params: + return self.copy(params,)._transform(dataset) + else: + return self._transform(dataset) + else: + raise ValueError("Params must be either a param map but got %s." % type(params)) + + +@inherit_doc +class Model(Transformer): + """ + Abstract class for models that are fitted by estimators. + """ + + __metaclass__ = ABCMeta + @inherit_doc class Pipeline(Estimator): @@ -107,15 +151,15 @@ def setStages(self, value): :param value: a list of transformers or estimators :return: the pipeline instance """ - self.paramMap[self.stages] = value + self._paramMap[self.stages] = value return self def getStages(self): """ Get pipeline stages. """ - if self.stages in self.paramMap: - return self.paramMap[self.stages] + if self.stages in self._paramMap: + return self._paramMap[self.stages] @keyword_only def setParams(self, stages=[]): @@ -126,9 +170,8 @@ def setParams(self, stages=[]): kwargs = self.setParams._input_kwargs return self._set(**kwargs) - def fit(self, dataset, params={}): - paramMap = self.extractParamMap(params) - stages = paramMap[self.stages] + def _fit(self, dataset): + stages = self.getStages() for stage in stages: if not (isinstance(stage, Estimator) or isinstance(stage, Transformer)): raise TypeError( @@ -142,29 +185,37 @@ def fit(self, dataset, params={}): if i <= indexOfLastEstimator: if isinstance(stage, Transformer): transformers.append(stage) - dataset = stage.transform(dataset, paramMap) + dataset = stage.transform(dataset) else: # must be an Estimator - model = stage.fit(dataset, paramMap) + model = stage.fit(dataset) transformers.append(model) if i < indexOfLastEstimator: - dataset = model.transform(dataset, paramMap) + dataset = model.transform(dataset) else: transformers.append(stage) return PipelineModel(transformers) + def copy(self, extra={}): + that = Params.copy(self, extra) + stages = [stage.copy(extra) for stage in that.getStages()] + return that.setStages(stages) + @inherit_doc -class PipelineModel(Transformer): +class PipelineModel(Model): """ Represents a compiled pipeline with transformers and fitted models. """ - def __init__(self, transformers): + def __init__(self, stages): super(PipelineModel, self).__init__() - self.transformers = transformers + self.stages = stages - def transform(self, dataset, params={}): - paramMap = self.extractParamMap(params) - for t in self.transformers: - dataset = t.transform(dataset, paramMap) + def _transform(self, dataset): + for t in self.stages: + dataset = t.transform(dataset) return dataset + + def copy(self, extra={}): + stages = [stage.copy(extra) for stage in self.stages] + return PipelineModel(stages) diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py new file mode 100644 index 0000000000000..b06099ac0aee6 --- /dev/null +++ b/python/pyspark/ml/recommendation.py @@ -0,0 +1,306 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from pyspark.ml.util import keyword_only +from pyspark.ml.wrapper import JavaEstimator, JavaModel +from pyspark.ml.param.shared import * +from pyspark.mllib.common import inherit_doc + + +__all__ = ['ALS', 'ALSModel'] + + +@inherit_doc +class ALS(JavaEstimator, HasCheckpointInterval, HasMaxIter, HasPredictionCol, HasRegParam, HasSeed): + """ + Alternating Least Squares (ALS) matrix factorization. + + ALS attempts to estimate the ratings matrix `R` as the product of + two lower-rank matrices, `X` and `Y`, i.e. `X * Yt = R`. Typically + these approximations are called 'factor' matrices. The general + approach is iterative. During each iteration, one of the factor + matrices is held constant, while the other is solved for using least + squares. The newly-solved factor matrix is then held constant while + solving for the other factor matrix. + + This is a blocked implementation of the ALS factorization algorithm + that groups the two sets of factors (referred to as "users" and + "products") into blocks and reduces communication by only sending + one copy of each user vector to each product block on each + iteration, and only for the product blocks that need that user's + feature vector. This is achieved by pre-computing some information + about the ratings matrix to determine the "out-links" of each user + (which blocks of products it will contribute to) and "in-link" + information for each product (which of the feature vectors it + receives from each user block it will depend on). This allows us to + send only an array of feature vectors between each user block and + product block, and have the product block find the users' ratings + and update the products based on these messages. + + For implicit preference data, the algorithm used is based on + "Collaborative Filtering for Implicit Feedback Datasets", available + at `http://dx.doi.org/10.1109/ICDM.2008.22`, adapted for the blocked + approach used here. + + Essentially instead of finding the low-rank approximations to the + rating matrix `R`, this finds the approximations for a preference + matrix `P` where the elements of `P` are 1 if r > 0 and 0 if r <= 0. + The ratings then act as 'confidence' values related to strength of + indicated user preferences rather than explicit ratings given to + items. + + >>> df = sqlContext.createDataFrame( + ... [(0, 0, 4.0), (0, 1, 2.0), (1, 1, 3.0), (1, 2, 4.0), (2, 1, 1.0), (2, 2, 5.0)], + ... ["user", "item", "rating"]) + >>> als = ALS(rank=10, maxIter=5) + >>> model = als.fit(df) + >>> model.rank + 10 + >>> model.userFactors.orderBy("id").collect() + [Row(id=0, features=[...]), Row(id=1, ...), Row(id=2, ...)] + >>> test = sqlContext.createDataFrame([(0, 2), (1, 0), (2, 0)], ["user", "item"]) + >>> predictions = sorted(model.transform(test).collect(), key=lambda r: r[0]) + >>> predictions[0] + Row(user=0, item=2, prediction=0.39...) + >>> predictions[1] + Row(user=1, item=0, prediction=3.19...) + >>> predictions[2] + Row(user=2, item=0, prediction=-1.15...) + """ + + # a placeholder to make it appear in the generated doc + rank = Param(Params._dummy(), "rank", "rank of the factorization") + numUserBlocks = Param(Params._dummy(), "numUserBlocks", "number of user blocks") + numItemBlocks = Param(Params._dummy(), "numItemBlocks", "number of item blocks") + implicitPrefs = Param(Params._dummy(), "implicitPrefs", "whether to use implicit preference") + alpha = Param(Params._dummy(), "alpha", "alpha for implicit preference") + userCol = Param(Params._dummy(), "userCol", "column name for user ids") + itemCol = Param(Params._dummy(), "itemCol", "column name for item ids") + ratingCol = Param(Params._dummy(), "ratingCol", "column name for ratings") + nonnegative = Param(Params._dummy(), "nonnegative", + "whether to use nonnegative constraint for least squares") + + @keyword_only + def __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10, + implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=None, + ratingCol="rating", nonnegative=False, checkpointInterval=10): + """ + __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10, \ + implicitPrefs=false, alpha=1.0, userCol="user", itemCol="item", seed=None, \ + ratingCol="rating", nonnegative=false, checkpointInterval=10) + """ + super(ALS, self).__init__() + self._java_obj = self._new_java_obj("org.apache.spark.ml.recommendation.ALS", self.uid) + self.rank = Param(self, "rank", "rank of the factorization") + self.numUserBlocks = Param(self, "numUserBlocks", "number of user blocks") + self.numItemBlocks = Param(self, "numItemBlocks", "number of item blocks") + self.implicitPrefs = Param(self, "implicitPrefs", "whether to use implicit preference") + self.alpha = Param(self, "alpha", "alpha for implicit preference") + self.userCol = Param(self, "userCol", "column name for user ids") + self.itemCol = Param(self, "itemCol", "column name for item ids") + self.ratingCol = Param(self, "ratingCol", "column name for ratings") + self.nonnegative = Param(self, "nonnegative", + "whether to use nonnegative constraint for least squares") + self._setDefault(rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10, + implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=None, + ratingCol="rating", nonnegative=False, checkpointInterval=10) + kwargs = self.__init__._input_kwargs + self.setParams(**kwargs) + + @keyword_only + def setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10, + implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=None, + ratingCol="rating", nonnegative=False, checkpointInterval=10): + """ + setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10, \ + implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=None, \ + ratingCol="rating", nonnegative=False, checkpointInterval=10) + Sets params for ALS. + """ + kwargs = self.setParams._input_kwargs + return self._set(**kwargs) + + def _create_model(self, java_model): + return ALSModel(java_model) + + def setRank(self, value): + """ + Sets the value of :py:attr:`rank`. + """ + self._paramMap[self.rank] = value + return self + + def getRank(self): + """ + Gets the value of rank or its default value. + """ + return self.getOrDefault(self.rank) + + def setNumUserBlocks(self, value): + """ + Sets the value of :py:attr:`numUserBlocks`. + """ + self._paramMap[self.numUserBlocks] = value + return self + + def getNumUserBlocks(self): + """ + Gets the value of numUserBlocks or its default value. + """ + return self.getOrDefault(self.numUserBlocks) + + def setNumItemBlocks(self, value): + """ + Sets the value of :py:attr:`numItemBlocks`. + """ + self._paramMap[self.numItemBlocks] = value + return self + + def getNumItemBlocks(self): + """ + Gets the value of numItemBlocks or its default value. + """ + return self.getOrDefault(self.numItemBlocks) + + def setNumBlocks(self, value): + """ + Sets both :py:attr:`numUserBlocks` and :py:attr:`numItemBlocks` to the specific value. + """ + self._paramMap[self.numUserBlocks] = value + self._paramMap[self.numItemBlocks] = value + + def setImplicitPrefs(self, value): + """ + Sets the value of :py:attr:`implicitPrefs`. + """ + self._paramMap[self.implicitPrefs] = value + return self + + def getImplicitPrefs(self): + """ + Gets the value of implicitPrefs or its default value. + """ + return self.getOrDefault(self.implicitPrefs) + + def setAlpha(self, value): + """ + Sets the value of :py:attr:`alpha`. + """ + self._paramMap[self.alpha] = value + return self + + def getAlpha(self): + """ + Gets the value of alpha or its default value. + """ + return self.getOrDefault(self.alpha) + + def setUserCol(self, value): + """ + Sets the value of :py:attr:`userCol`. + """ + self._paramMap[self.userCol] = value + return self + + def getUserCol(self): + """ + Gets the value of userCol or its default value. + """ + return self.getOrDefault(self.userCol) + + def setItemCol(self, value): + """ + Sets the value of :py:attr:`itemCol`. + """ + self._paramMap[self.itemCol] = value + return self + + def getItemCol(self): + """ + Gets the value of itemCol or its default value. + """ + return self.getOrDefault(self.itemCol) + + def setRatingCol(self, value): + """ + Sets the value of :py:attr:`ratingCol`. + """ + self._paramMap[self.ratingCol] = value + return self + + def getRatingCol(self): + """ + Gets the value of ratingCol or its default value. + """ + return self.getOrDefault(self.ratingCol) + + def setNonnegative(self, value): + """ + Sets the value of :py:attr:`nonnegative`. + """ + self._paramMap[self.nonnegative] = value + return self + + def getNonnegative(self): + """ + Gets the value of nonnegative or its default value. + """ + return self.getOrDefault(self.nonnegative) + + +class ALSModel(JavaModel): + """ + Model fitted by ALS. + """ + + @property + def rank(self): + """rank of the matrix factorization model""" + return self._call_java("rank") + + @property + def userFactors(self): + """ + a DataFrame that stores user factors in two columns: `id` and + `features` + """ + return self._call_java("userFactors") + + @property + def itemFactors(self): + """ + a DataFrame that stores item factors in two columns: `id` and + `features` + """ + return self._call_java("itemFactors") + + +if __name__ == "__main__": + import doctest + from pyspark.context import SparkContext + from pyspark.sql import SQLContext + globs = globals().copy() + # The small batch size here ensures that we see multiple batches, + # even in these small test examples: + sc = SparkContext("local[2]", "ml.recommendation tests") + sqlContext = SQLContext(sc) + globs['sc'] = sc + globs['sqlContext'] = sqlContext + (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS) + sc.stop() + if failure_count: + exit(-1) diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py new file mode 100644 index 0000000000000..b139e27372d80 --- /dev/null +++ b/python/pyspark/ml/regression.py @@ -0,0 +1,541 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from pyspark.ml.util import keyword_only +from pyspark.ml.wrapper import JavaEstimator, JavaModel +from pyspark.ml.param.shared import * +from pyspark.mllib.common import inherit_doc + + +__all__ = ['DecisionTreeRegressor', 'DecisionTreeRegressionModel', 'GBTRegressor', + 'GBTRegressionModel', 'LinearRegression', 'LinearRegressionModel', + 'RandomForestRegressor', 'RandomForestRegressionModel'] + + +@inherit_doc +class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter, + HasRegParam, HasTol): + """ + Linear regression. + + The learning objective is to minimize the squared error, with regularization. + The specific squared error loss function used is: L = 1/2n ||A weights - y||^2^ + + This support multiple types of regularization: + - none (a.k.a. ordinary least squares) + - L2 (ridge regression) + - L1 (Lasso) + - L2 + L1 (elastic net) + + >>> from pyspark.mllib.linalg import Vectors + >>> df = sqlContext.createDataFrame([ + ... (1.0, Vectors.dense(1.0)), + ... (0.0, Vectors.sparse(1, [], []))], ["label", "features"]) + >>> lr = LinearRegression(maxIter=5, regParam=0.0) + >>> model = lr.fit(df) + >>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"]) + >>> model.transform(test0).head().prediction + -1.0 + >>> model.weights + DenseVector([1.0]) + >>> model.intercept + 0.0 + >>> test1 = sqlContext.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"]) + >>> model.transform(test1).head().prediction + 1.0 + >>> lr.setParams("vector") + Traceback (most recent call last): + ... + TypeError: Method setParams forces keyword arguments. + """ + + # a placeholder to make it appear in the generated doc + elasticNetParam = \ + Param(Params._dummy(), "elasticNetParam", + "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, " + + "the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty.") + + @keyword_only + def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", + maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6): + """ + __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ + maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6) + """ + super(LinearRegression, self).__init__() + self._java_obj = self._new_java_obj( + "org.apache.spark.ml.regression.LinearRegression", self.uid) + #: param for the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty + # is an L2 penalty. For alpha = 1, it is an L1 penalty. + self.elasticNetParam = \ + Param(self, "elasticNetParam", + "the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty " + + "is an L2 penalty. For alpha = 1, it is an L1 penalty.") + self._setDefault(maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6) + kwargs = self.__init__._input_kwargs + self.setParams(**kwargs) + + @keyword_only + def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", + maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6): + """ + setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ + maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6) + Sets params for linear regression. + """ + kwargs = self.setParams._input_kwargs + return self._set(**kwargs) + + def _create_model(self, java_model): + return LinearRegressionModel(java_model) + + def setElasticNetParam(self, value): + """ + Sets the value of :py:attr:`elasticNetParam`. + """ + self._paramMap[self.elasticNetParam] = value + return self + + def getElasticNetParam(self): + """ + Gets the value of elasticNetParam or its default value. + """ + return self.getOrDefault(self.elasticNetParam) + + +class LinearRegressionModel(JavaModel): + """ + Model fitted by LinearRegression. + """ + + @property + def weights(self): + """ + Model weights. + """ + return self._call_java("weights") + + @property + def intercept(self): + """ + Model intercept. + """ + return self._call_java("intercept") + + +class TreeRegressorParams(object): + """ + Private class to track supported impurity measures. + """ + supportedImpurities = ["variance"] + + +class RandomForestParams(object): + """ + Private class to track supported random forest parameters. + """ + supportedFeatureSubsetStrategies = ["auto", "all", "onethird", "sqrt", "log2"] + + +class GBTParams(object): + """ + Private class to track supported GBT params. + """ + supportedLossTypes = ["squared", "absolute"] + + +@inherit_doc +class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, + DecisionTreeParams, HasCheckpointInterval): + """ + `http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree` + learning algorithm for regression. + It supports both continuous and categorical features. + + >>> from pyspark.mllib.linalg import Vectors + >>> df = sqlContext.createDataFrame([ + ... (1.0, Vectors.dense(1.0)), + ... (0.0, Vectors.sparse(1, [], []))], ["label", "features"]) + >>> dt = DecisionTreeRegressor(maxDepth=2) + >>> model = dt.fit(df) + >>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"]) + >>> model.transform(test0).head().prediction + 0.0 + >>> test1 = sqlContext.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"]) + >>> model.transform(test1).head().prediction + 1.0 + """ + + # a placeholder to make it appear in the generated doc + impurity = Param(Params._dummy(), "impurity", + "Criterion used for information gain calculation (case-insensitive). " + + "Supported options: " + ", ".join(TreeRegressorParams.supportedImpurities)) + + @keyword_only + def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", + maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance"): + """ + __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ + maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \ + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance") + """ + super(DecisionTreeRegressor, self).__init__() + self._java_obj = self._new_java_obj( + "org.apache.spark.ml.regression.DecisionTreeRegressor", self.uid) + #: param for Criterion used for information gain calculation (case-insensitive). + self.impurity = \ + Param(self, "impurity", + "Criterion used for information gain calculation (case-insensitive). " + + "Supported options: " + ", ".join(TreeRegressorParams.supportedImpurities)) + self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, + impurity="variance") + kwargs = self.__init__._input_kwargs + self.setParams(**kwargs) + + @keyword_only + def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", + maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, + impurity="variance"): + """ + setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ + maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \ + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance") + Sets params for the DecisionTreeRegressor. + """ + kwargs = self.setParams._input_kwargs + return self._set(**kwargs) + + def _create_model(self, java_model): + return DecisionTreeRegressionModel(java_model) + + def setImpurity(self, value): + """ + Sets the value of :py:attr:`impurity`. + """ + self._paramMap[self.impurity] = value + return self + + def getImpurity(self): + """ + Gets the value of impurity or its default value. + """ + return self.getOrDefault(self.impurity) + + +class DecisionTreeRegressionModel(JavaModel): + """ + Model fitted by DecisionTreeRegressor. + """ + + +@inherit_doc +class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasSeed, + DecisionTreeParams, HasCheckpointInterval): + """ + `http://en.wikipedia.org/wiki/Random_forest Random Forest` + learning algorithm for regression. + It supports both continuous and categorical features. + + >>> from pyspark.mllib.linalg import Vectors + >>> df = sqlContext.createDataFrame([ + ... (1.0, Vectors.dense(1.0)), + ... (0.0, Vectors.sparse(1, [], []))], ["label", "features"]) + >>> rf = RandomForestRegressor(numTrees=2, maxDepth=2, seed=42) + >>> model = rf.fit(df) + >>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"]) + >>> model.transform(test0).head().prediction + 0.0 + >>> test1 = sqlContext.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"]) + >>> model.transform(test1).head().prediction + 0.5 + """ + + # a placeholder to make it appear in the generated doc + impurity = Param(Params._dummy(), "impurity", + "Criterion used for information gain calculation (case-insensitive). " + + "Supported options: " + ", ".join(TreeRegressorParams.supportedImpurities)) + subsamplingRate = Param(Params._dummy(), "subsamplingRate", + "Fraction of the training data used for learning each decision tree, " + + "in range (0, 1].") + numTrees = Param(Params._dummy(), "numTrees", "Number of trees to train (>= 1)") + featureSubsetStrategy = \ + Param(Params._dummy(), "featureSubsetStrategy", + "The number of features to consider for splits at each tree node. Supported " + + "options: " + ", ".join(RandomForestParams.supportedFeatureSubsetStrategies)) + + @keyword_only + def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", + maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance", + numTrees=20, featureSubsetStrategy="auto", seed=None): + """ + __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ + maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \ + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \ + impurity="variance", numTrees=20, \ + featureSubsetStrategy="auto", seed=None) + """ + super(RandomForestRegressor, self).__init__() + self._java_obj = self._new_java_obj( + "org.apache.spark.ml.regression.RandomForestRegressor", self.uid) + #: param for Criterion used for information gain calculation (case-insensitive). + self.impurity = \ + Param(self, "impurity", + "Criterion used for information gain calculation (case-insensitive). " + + "Supported options: " + ", ".join(TreeRegressorParams.supportedImpurities)) + #: param for Fraction of the training data used for learning each decision tree, + # in range (0, 1] + self.subsamplingRate = Param(self, "subsamplingRate", + "Fraction of the training data used for learning each " + + "decision tree, in range (0, 1].") + #: param for Number of trees to train (>= 1) + self.numTrees = Param(self, "numTrees", "Number of trees to train (>= 1)") + #: param for The number of features to consider for splits at each tree node + self.featureSubsetStrategy = \ + Param(self, "featureSubsetStrategy", + "The number of features to consider for splits at each tree node. Supported " + + "options: " + ", ".join(RandomForestParams.supportedFeatureSubsetStrategies)) + self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=None, + impurity="variance", numTrees=20, featureSubsetStrategy="auto") + kwargs = self.__init__._input_kwargs + self.setParams(**kwargs) + + @keyword_only + def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", + maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=None, + impurity="variance", numTrees=20, featureSubsetStrategy="auto"): + """ + setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ + maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \ + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=None, \ + impurity="variance", numTrees=20, featureSubsetStrategy="auto") + Sets params for linear regression. + """ + kwargs = self.setParams._input_kwargs + return self._set(**kwargs) + + def _create_model(self, java_model): + return RandomForestRegressionModel(java_model) + + def setImpurity(self, value): + """ + Sets the value of :py:attr:`impurity`. + """ + self._paramMap[self.impurity] = value + return self + + def getImpurity(self): + """ + Gets the value of impurity or its default value. + """ + return self.getOrDefault(self.impurity) + + def setSubsamplingRate(self, value): + """ + Sets the value of :py:attr:`subsamplingRate`. + """ + self._paramMap[self.subsamplingRate] = value + return self + + def getSubsamplingRate(self): + """ + Gets the value of subsamplingRate or its default value. + """ + return self.getOrDefault(self.subsamplingRate) + + def setNumTrees(self, value): + """ + Sets the value of :py:attr:`numTrees`. + """ + self._paramMap[self.numTrees] = value + return self + + def getNumTrees(self): + """ + Gets the value of numTrees or its default value. + """ + return self.getOrDefault(self.numTrees) + + def setFeatureSubsetStrategy(self, value): + """ + Sets the value of :py:attr:`featureSubsetStrategy`. + """ + self._paramMap[self.featureSubsetStrategy] = value + return self + + def getFeatureSubsetStrategy(self): + """ + Gets the value of featureSubsetStrategy or its default value. + """ + return self.getOrDefault(self.featureSubsetStrategy) + + +class RandomForestRegressionModel(JavaModel): + """ + Model fitted by RandomForestRegressor. + """ + + +@inherit_doc +class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter, + DecisionTreeParams, HasCheckpointInterval): + """ + `http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)` + learning algorithm for regression. + It supports both continuous and categorical features. + + >>> from pyspark.mllib.linalg import Vectors + >>> df = sqlContext.createDataFrame([ + ... (1.0, Vectors.dense(1.0)), + ... (0.0, Vectors.sparse(1, [], []))], ["label", "features"]) + >>> gbt = GBTRegressor(maxIter=5, maxDepth=2) + >>> model = gbt.fit(df) + >>> test0 = sqlContext.createDataFrame([(Vectors.dense(-1.0),)], ["features"]) + >>> model.transform(test0).head().prediction + 0.0 + >>> test1 = sqlContext.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"]) + >>> model.transform(test1).head().prediction + 1.0 + """ + + # a placeholder to make it appear in the generated doc + lossType = Param(Params._dummy(), "lossType", + "Loss function which GBT tries to minimize (case-insensitive). " + + "Supported options: " + ", ".join(GBTParams.supportedLossTypes)) + subsamplingRate = Param(Params._dummy(), "subsamplingRate", + "Fraction of the training data used for learning each decision tree, " + + "in range (0, 1].") + stepSize = Param(Params._dummy(), "stepSize", + "Step size (a.k.a. learning rate) in interval (0, 1] for shrinking the " + + "contribution of each estimator") + + @keyword_only + def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", + maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="squared", + maxIter=20, stepSize=0.1): + """ + __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ + maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \ + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \ + lossType="squared", maxIter=20, stepSize=0.1) + """ + super(GBTRegressor, self).__init__() + self._java_obj = self._new_java_obj("org.apache.spark.ml.regression.GBTRegressor", self.uid) + #: param for Loss function which GBT tries to minimize (case-insensitive). + self.lossType = Param(self, "lossType", + "Loss function which GBT tries to minimize (case-insensitive). " + + "Supported options: " + ", ".join(GBTParams.supportedLossTypes)) + #: Fraction of the training data used for learning each decision tree, in range (0, 1]. + self.subsamplingRate = Param(self, "subsamplingRate", + "Fraction of the training data used for learning each " + + "decision tree, in range (0, 1].") + #: Step size (a.k.a. learning rate) in interval (0, 1] for shrinking the contribution of + # each estimator + self.stepSize = Param(self, "stepSize", + "Step size (a.k.a. learning rate) in interval (0, 1] for shrinking " + + "the contribution of each estimator") + self._setDefault(maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, + lossType="squared", maxIter=20, stepSize=0.1) + kwargs = self.__init__._input_kwargs + self.setParams(**kwargs) + + @keyword_only + def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", + maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, + lossType="squared", maxIter=20, stepSize=0.1): + """ + setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ + maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \ + maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \ + lossType="squared", maxIter=20, stepSize=0.1) + Sets params for Gradient Boosted Tree Regression. + """ + kwargs = self.setParams._input_kwargs + return self._set(**kwargs) + + def _create_model(self, java_model): + return GBTRegressionModel(java_model) + + def setLossType(self, value): + """ + Sets the value of :py:attr:`lossType`. + """ + self._paramMap[self.lossType] = value + return self + + def getLossType(self): + """ + Gets the value of lossType or its default value. + """ + return self.getOrDefault(self.lossType) + + def setSubsamplingRate(self, value): + """ + Sets the value of :py:attr:`subsamplingRate`. + """ + self._paramMap[self.subsamplingRate] = value + return self + + def getSubsamplingRate(self): + """ + Gets the value of subsamplingRate or its default value. + """ + return self.getOrDefault(self.subsamplingRate) + + def setStepSize(self, value): + """ + Sets the value of :py:attr:`stepSize`. + """ + self._paramMap[self.stepSize] = value + return self + + def getStepSize(self): + """ + Gets the value of stepSize or its default value. + """ + return self.getOrDefault(self.stepSize) + + +class GBTRegressionModel(JavaModel): + """ + Model fitted by GBTRegressor. + """ + + +if __name__ == "__main__": + import doctest + from pyspark.context import SparkContext + from pyspark.sql import SQLContext + globs = globals().copy() + # The small batch size here ensures that we see multiple batches, + # even in these small test examples: + sc = SparkContext("local[2]", "ml.regression tests") + sqlContext = SQLContext(sc) + globs['sc'] = sc + globs['sqlContext'] = sqlContext + (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS) + sc.stop() + if failure_count: + exit(-1) diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py index 3a42bcf723894..6adbf166f34a8 100644 --- a/python/pyspark/ml/tests.py +++ b/python/pyspark/ml/tests.py @@ -31,10 +31,13 @@ import unittest from pyspark.tests import ReusedPySparkTestCase as PySparkTestCase -from pyspark.sql import DataFrame -from pyspark.ml.param import Param -from pyspark.ml.param.shared import HasMaxIter, HasInputCol -from pyspark.ml.pipeline import Transformer, Estimator, Pipeline +from pyspark.sql import DataFrame, SQLContext +from pyspark.ml.param import Param, Params +from pyspark.ml.param.shared import HasMaxIter, HasInputCol, HasSeed +from pyspark.ml.util import keyword_only +from pyspark.ml import Estimator, Model, Pipeline, Transformer +from pyspark.ml.feature import * +from pyspark.mllib.linalg import DenseVector class MockDataset(DataFrame): @@ -43,44 +46,43 @@ def __init__(self): self.index = 0 -class MockTransformer(Transformer): +class HasFake(Params): + + def __init__(self): + super(HasFake, self).__init__() + self.fake = Param(self, "fake", "fake param") + + def getFake(self): + return self.getOrDefault(self.fake) + + +class MockTransformer(Transformer, HasFake): def __init__(self): super(MockTransformer, self).__init__() - self.fake = Param(self, "fake", "fake") self.dataset_index = None - self.fake_param_value = None - def transform(self, dataset, params={}): + def _transform(self, dataset): self.dataset_index = dataset.index - if self.fake in params: - self.fake_param_value = params[self.fake] dataset.index += 1 return dataset -class MockEstimator(Estimator): +class MockEstimator(Estimator, HasFake): def __init__(self): super(MockEstimator, self).__init__() - self.fake = Param(self, "fake", "fake") self.dataset_index = None - self.fake_param_value = None - self.model = None - def fit(self, dataset, params={}): + def _fit(self, dataset): self.dataset_index = dataset.index - if self.fake in params: - self.fake_param_value = params[self.fake] model = MockModel() - self.model = model + self._copyValues(model) return model -class MockModel(MockTransformer, Transformer): - - def __init__(self): - super(MockModel, self).__init__() +class MockModel(MockTransformer, Model, HasFake): + pass class PipelineTests(PySparkTestCase): @@ -91,19 +93,17 @@ def test_pipeline(self): transformer1 = MockTransformer() estimator2 = MockEstimator() transformer3 = MockTransformer() - pipeline = Pipeline() \ - .setStages([estimator0, transformer1, estimator2, transformer3]) + pipeline = Pipeline(stages=[estimator0, transformer1, estimator2, transformer3]) pipeline_model = pipeline.fit(dataset, {estimator0.fake: 0, transformer1.fake: 1}) - self.assertEqual(0, estimator0.dataset_index) - self.assertEqual(0, estimator0.fake_param_value) - model0 = estimator0.model + model0, transformer1, model2, transformer3 = pipeline_model.stages self.assertEqual(0, model0.dataset_index) + self.assertEqual(0, model0.getFake()) self.assertEqual(1, transformer1.dataset_index) - self.assertEqual(1, transformer1.fake_param_value) - self.assertEqual(2, estimator2.dataset_index) - model2 = estimator2.model - self.assertIsNone(model2.dataset_index, "The model produced by the last estimator should " - "not be called during fit.") + self.assertEqual(1, transformer1.getFake()) + self.assertEqual(2, dataset.index) + self.assertIsNone(model2.dataset_index, "The last model shouldn't be called in fit.") + self.assertIsNone(transformer3.dataset_index, + "The last transformer shouldn't be called in fit.") dataset = pipeline_model.transform(dataset) self.assertEqual(2, model0.dataset_index) self.assertEqual(3, transformer1.dataset_index) @@ -112,14 +112,46 @@ def test_pipeline(self): self.assertEqual(6, dataset.index) -class TestParams(HasMaxIter, HasInputCol): +class TestParams(HasMaxIter, HasInputCol, HasSeed): """ - A subclass of Params mixed with HasMaxIter and HasInputCol. + A subclass of Params mixed with HasMaxIter, HasInputCol and HasSeed. """ - - def __init__(self): + @keyword_only + def __init__(self, seed=None): super(TestParams, self).__init__() self._setDefault(maxIter=10) + kwargs = self.__init__._input_kwargs + self.setParams(**kwargs) + + @keyword_only + def setParams(self, seed=None): + """ + setParams(self, seed=None) + Sets params for this test. + """ + kwargs = self.setParams._input_kwargs + return self._set(**kwargs) + + +class OtherTestParams(HasMaxIter, HasInputCol, HasSeed): + """ + A subclass of Params mixed with HasMaxIter, HasInputCol and HasSeed. + """ + @keyword_only + def __init__(self, seed=None): + super(OtherTestParams, self).__init__() + self._setDefault(maxIter=10) + kwargs = self.__init__._input_kwargs + self.setParams(**kwargs) + + @keyword_only + def setParams(self, seed=None): + """ + setParams(self, seed=None) + Sets params for this test. + """ + kwargs = self.setParams._input_kwargs + return self._set(**kwargs) class ParamTests(PySparkTestCase): @@ -128,17 +160,19 @@ def test_param(self): testParams = TestParams() maxIter = testParams.maxIter self.assertEqual(maxIter.name, "maxIter") - self.assertEqual(maxIter.doc, "max number of iterations") - self.assertTrue(maxIter.parent is testParams) + self.assertEqual(maxIter.doc, "max number of iterations (>= 0)") + self.assertTrue(maxIter.parent == testParams.uid) def test_params(self): testParams = TestParams() maxIter = testParams.maxIter inputCol = testParams.inputCol + seed = testParams.seed params = testParams.params - self.assertEqual(params, [inputCol, maxIter]) + self.assertEqual(params, [inputCol, maxIter, seed]) + self.assertTrue(testParams.hasParam(maxIter)) self.assertTrue(testParams.hasDefault(maxIter)) self.assertFalse(testParams.isSet(maxIter)) self.assertTrue(testParams.isDefined(maxIter)) @@ -147,16 +181,76 @@ def test_params(self): self.assertTrue(testParams.isSet(maxIter)) self.assertEquals(testParams.getMaxIter(), 100) + self.assertTrue(testParams.hasParam(inputCol)) self.assertFalse(testParams.hasDefault(inputCol)) self.assertFalse(testParams.isSet(inputCol)) self.assertFalse(testParams.isDefined(inputCol)) with self.assertRaises(KeyError): testParams.getInputCol() + # Since the default is normally random, set it to a known number for debug str + testParams._setDefault(seed=41) + testParams.setSeed(43) + self.assertEquals( testParams.explainParams(), "\n".join(["inputCol: input column name (undefined)", - "maxIter: max number of iterations (default: 10, current: 100)"])) + "maxIter: max number of iterations (>= 0) (default: 10, current: 100)", + "seed: random seed (default: 41, current: 43)"])) + + def test_hasseed(self): + noSeedSpecd = TestParams() + withSeedSpecd = TestParams(seed=42) + other = OtherTestParams() + # Check that we no longer use 42 as the magic number + self.assertNotEqual(noSeedSpecd.getSeed(), 42) + origSeed = noSeedSpecd.getSeed() + # Check that we only compute the seed once + self.assertEqual(noSeedSpecd.getSeed(), origSeed) + # Check that a specified seed is honored + self.assertEqual(withSeedSpecd.getSeed(), 42) + # Check that a different class has a different seed + self.assertNotEqual(other.getSeed(), noSeedSpecd.getSeed()) + + +class FeatureTests(PySparkTestCase): + + def test_binarizer(self): + b0 = Binarizer() + self.assertListEqual(b0.params, [b0.inputCol, b0.outputCol, b0.threshold]) + self.assertTrue(all([~b0.isSet(p) for p in b0.params])) + self.assertTrue(b0.hasDefault(b0.threshold)) + self.assertEqual(b0.getThreshold(), 0.0) + b0.setParams(inputCol="input", outputCol="output").setThreshold(1.0) + self.assertTrue(all([b0.isSet(p) for p in b0.params])) + self.assertEqual(b0.getThreshold(), 1.0) + self.assertEqual(b0.getInputCol(), "input") + self.assertEqual(b0.getOutputCol(), "output") + + b0c = b0.copy({b0.threshold: 2.0}) + self.assertEqual(b0c.uid, b0.uid) + self.assertListEqual(b0c.params, b0.params) + self.assertEqual(b0c.getThreshold(), 2.0) + + b1 = Binarizer(threshold=2.0, inputCol="input", outputCol="output") + self.assertNotEqual(b1.uid, b0.uid) + self.assertEqual(b1.getThreshold(), 2.0) + self.assertEqual(b1.getInputCol(), "input") + self.assertEqual(b1.getOutputCol(), "output") + + def test_idf(self): + sqlContext = SQLContext(self.sc) + dataset = sqlContext.createDataFrame([ + (DenseVector([1.0, 2.0]),), + (DenseVector([0.0, 1.0]),), + (DenseVector([3.0, 0.2]),)], ["tf"]) + idf0 = IDF(inputCol="tf") + self.assertListEqual(idf0.params, [idf0.inputCol, idf0.minDocFreq, idf0.outputCol]) + idf0m = idf0.fit(dataset, {idf0.outputCol: "idf"}) + self.assertEqual(idf0m.uid, idf0.uid, + "Model should inherit the UID from its parent estimator.") + output = idf0m.transform(dataset) + self.assertIsNotNone(output.head().idf) if __name__ == "__main__": diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py index 1773ab5bdcdb1..0bf988fd72f14 100644 --- a/python/pyspark/ml/tuning.py +++ b/python/pyspark/ml/tuning.py @@ -16,29 +16,33 @@ # import itertools +import numpy as np -__all__ = ['ParamGridBuilder'] +from pyspark.ml.param import Params, Param +from pyspark.ml import Estimator, Model +from pyspark.ml.util import keyword_only +from pyspark.sql.functions import rand + +__all__ = ['ParamGridBuilder', 'CrossValidator', 'CrossValidatorModel'] class ParamGridBuilder(object): - """ + r""" Builder for a param grid used in grid search-based model selection. - >>> from classification import LogisticRegression + >>> from pyspark.ml.classification import LogisticRegression >>> lr = LogisticRegression() - >>> output = ParamGridBuilder().baseOn({lr.labelCol: 'l'}) \ - .baseOn([lr.predictionCol, 'p']) \ - .addGrid(lr.regParam, [1.0, 2.0, 3.0]) \ - .addGrid(lr.maxIter, [1, 5]) \ - .addGrid(lr.featuresCol, ['f']) \ - .build() - >>> expected = [ \ -{lr.regParam: 1.0, lr.featuresCol: 'f', lr.maxIter: 1, lr.labelCol: 'l', lr.predictionCol: 'p'}, \ -{lr.regParam: 2.0, lr.featuresCol: 'f', lr.maxIter: 1, lr.labelCol: 'l', lr.predictionCol: 'p'}, \ -{lr.regParam: 3.0, lr.featuresCol: 'f', lr.maxIter: 1, lr.labelCol: 'l', lr.predictionCol: 'p'}, \ -{lr.regParam: 1.0, lr.featuresCol: 'f', lr.maxIter: 5, lr.labelCol: 'l', lr.predictionCol: 'p'}, \ -{lr.regParam: 2.0, lr.featuresCol: 'f', lr.maxIter: 5, lr.labelCol: 'l', lr.predictionCol: 'p'}, \ -{lr.regParam: 3.0, lr.featuresCol: 'f', lr.maxIter: 5, lr.labelCol: 'l', lr.predictionCol: 'p'}] + >>> output = ParamGridBuilder() \ + ... .baseOn({lr.labelCol: 'l'}) \ + ... .baseOn([lr.predictionCol, 'p']) \ + ... .addGrid(lr.regParam, [1.0, 2.0]) \ + ... .addGrid(lr.maxIter, [1, 5]) \ + ... .build() + >>> expected = [ + ... {lr.regParam: 1.0, lr.maxIter: 1, lr.labelCol: 'l', lr.predictionCol: 'p'}, + ... {lr.regParam: 2.0, lr.maxIter: 1, lr.labelCol: 'l', lr.predictionCol: 'p'}, + ... {lr.regParam: 1.0, lr.maxIter: 5, lr.labelCol: 'l', lr.predictionCol: 'p'}, + ... {lr.regParam: 2.0, lr.maxIter: 5, lr.labelCol: 'l', lr.predictionCol: 'p'}] >>> len(output) == len(expected) True >>> all([m in expected for m in output]) @@ -79,6 +83,198 @@ def build(self): return [dict(zip(keys, prod)) for prod in itertools.product(*grid_values)] +class CrossValidator(Estimator): + """ + K-fold cross validation. + + >>> from pyspark.ml.classification import LogisticRegression + >>> from pyspark.ml.evaluation import BinaryClassificationEvaluator + >>> from pyspark.mllib.linalg import Vectors + >>> dataset = sqlContext.createDataFrame( + ... [(Vectors.dense([0.0]), 0.0), + ... (Vectors.dense([0.4]), 1.0), + ... (Vectors.dense([0.5]), 0.0), + ... (Vectors.dense([0.6]), 1.0), + ... (Vectors.dense([1.0]), 1.0)] * 10, + ... ["features", "label"]) + >>> lr = LogisticRegression() + >>> grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1]).build() + >>> evaluator = BinaryClassificationEvaluator() + >>> cv = CrossValidator(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator) + >>> cvModel = cv.fit(dataset) + >>> evaluator.evaluate(cvModel.transform(dataset)) + 0.8333... + """ + + # a placeholder to make it appear in the generated doc + estimator = Param(Params._dummy(), "estimator", "estimator to be cross-validated") + + # a placeholder to make it appear in the generated doc + estimatorParamMaps = Param(Params._dummy(), "estimatorParamMaps", "estimator param maps") + + # a placeholder to make it appear in the generated doc + evaluator = Param( + Params._dummy(), "evaluator", + "evaluator used to select hyper-parameters that maximize the cross-validated metric") + + # a placeholder to make it appear in the generated doc + numFolds = Param(Params._dummy(), "numFolds", "number of folds for cross validation") + + @keyword_only + def __init__(self, estimator=None, estimatorParamMaps=None, evaluator=None, numFolds=3): + """ + __init__(self, estimator=None, estimatorParamMaps=None, evaluator=None, numFolds=3) + """ + super(CrossValidator, self).__init__() + #: param for estimator to be cross-validated + self.estimator = Param(self, "estimator", "estimator to be cross-validated") + #: param for estimator param maps + self.estimatorParamMaps = Param(self, "estimatorParamMaps", "estimator param maps") + #: param for the evaluator used to select hyper-parameters that + #: maximize the cross-validated metric + self.evaluator = Param( + self, "evaluator", + "evaluator used to select hyper-parameters that maximize the cross-validated metric") + #: param for number of folds for cross validation + self.numFolds = Param(self, "numFolds", "number of folds for cross validation") + self._setDefault(numFolds=3) + kwargs = self.__init__._input_kwargs + self._set(**kwargs) + + @keyword_only + def setParams(self, estimator=None, estimatorParamMaps=None, evaluator=None, numFolds=3): + """ + setParams(self, estimator=None, estimatorParamMaps=None, evaluator=None, numFolds=3): + Sets params for cross validator. + """ + kwargs = self.setParams._input_kwargs + return self._set(**kwargs) + + def setEstimator(self, value): + """ + Sets the value of :py:attr:`estimator`. + """ + self._paramMap[self.estimator] = value + return self + + def getEstimator(self): + """ + Gets the value of estimator or its default value. + """ + return self.getOrDefault(self.estimator) + + def setEstimatorParamMaps(self, value): + """ + Sets the value of :py:attr:`estimatorParamMaps`. + """ + self._paramMap[self.estimatorParamMaps] = value + return self + + def getEstimatorParamMaps(self): + """ + Gets the value of estimatorParamMaps or its default value. + """ + return self.getOrDefault(self.estimatorParamMaps) + + def setEvaluator(self, value): + """ + Sets the value of :py:attr:`evaluator`. + """ + self._paramMap[self.evaluator] = value + return self + + def getEvaluator(self): + """ + Gets the value of evaluator or its default value. + """ + return self.getOrDefault(self.evaluator) + + def setNumFolds(self, value): + """ + Sets the value of :py:attr:`numFolds`. + """ + self._paramMap[self.numFolds] = value + return self + + def getNumFolds(self): + """ + Gets the value of numFolds or its default value. + """ + return self.getOrDefault(self.numFolds) + + def _fit(self, dataset): + est = self.getOrDefault(self.estimator) + epm = self.getOrDefault(self.estimatorParamMaps) + numModels = len(epm) + eva = self.getOrDefault(self.evaluator) + nFolds = self.getOrDefault(self.numFolds) + h = 1.0 / nFolds + randCol = self.uid + "_rand" + df = dataset.select("*", rand(0).alias(randCol)) + metrics = np.zeros(numModels) + for i in range(nFolds): + validateLB = i * h + validateUB = (i + 1) * h + condition = (df[randCol] >= validateLB) & (df[randCol] < validateUB) + validation = df.filter(condition) + train = df.filter(~condition) + for j in range(numModels): + model = est.fit(train, epm[j]) + # TODO: duplicate evaluator to take extra params from input + metric = eva.evaluate(model.transform(validation, epm[j])) + metrics[j] += metric + bestIndex = np.argmax(metrics) + bestModel = est.fit(dataset, epm[bestIndex]) + return CrossValidatorModel(bestModel) + + def copy(self, extra={}): + newCV = Params.copy(self, extra) + if self.isSet(self.estimator): + newCV.setEstimator(self.getEstimator().copy(extra)) + # estimatorParamMaps remain the same + if self.isSet(self.evaluator): + newCV.setEvaluator(self.getEvaluator().copy(extra)) + return newCV + + +class CrossValidatorModel(Model): + """ + Model from k-fold cross validation. + """ + + def __init__(self, bestModel): + super(CrossValidatorModel, self).__init__() + #: best model from cross validation + self.bestModel = bestModel + + def _transform(self, dataset): + return self.bestModel.transform(dataset) + + def copy(self, extra={}): + """ + Creates a copy of this instance with a randomly generated uid + and some extra params. This copies the underlying bestModel, + creates a deep copy of the embedded paramMap, and + copies the embedded and extra parameters over. + :param extra: Extra parameters to copy to the new instance + :return: Copy of this instance + """ + return CrossValidatorModel(self.bestModel.copy(extra)) + + if __name__ == "__main__": import doctest - doctest.testmod() + from pyspark.context import SparkContext + from pyspark.sql import SQLContext + globs = globals().copy() + # The small batch size here ensures that we see multiple batches, + # even in these small test examples: + sc = SparkContext("local[2]", "ml.tuning tests") + sqlContext = SQLContext(sc) + globs['sc'] = sc + globs['sqlContext'] = sqlContext + (failure_count, test_count) = doctest.testmod( + globs=globs, optionflags=doctest.ELLIPSIS) + sc.stop() + if failure_count: + exit(-1) diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py index d3cb100a9efa5..cee9d67b05325 100644 --- a/python/pyspark/ml/util.py +++ b/python/pyspark/ml/util.py @@ -39,9 +39,16 @@ class Identifiable(object): """ def __init__(self): - #: A unique id for the object. The default implementation - #: concatenates the class name, "_", and 8 random hex chars. - self.uid = type(self).__name__ + "_" + uuid.uuid4().hex[:8] + #: A unique id for the object. + self.uid = self._randomUID() def __repr__(self): return self.uid + + @classmethod + def _randomUID(cls): + """ + Generate a unique id for the object. The default implementation + concatenates the class name, "_", and 12 random hex chars. + """ + return cls.__name__ + "_" + uuid.uuid4().hex[12:] diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py index 394f23c5e9b12..7b0893e2cdadc 100644 --- a/python/pyspark/ml/wrapper.py +++ b/python/pyspark/ml/wrapper.py @@ -20,8 +20,8 @@ from pyspark import SparkContext from pyspark.sql import DataFrame from pyspark.ml.param import Params -from pyspark.ml.pipeline import Estimator, Transformer -from pyspark.mllib.common import inherit_doc +from pyspark.ml.pipeline import Estimator, Transformer, Model +from pyspark.mllib.common import inherit_doc, _java2py, _py2java def _jvm(): @@ -45,43 +45,61 @@ class JavaWrapper(Params): __metaclass__ = ABCMeta - #: Fully-qualified class name of the wrapped Java component. - _java_class = None + #: The wrapped Java companion object. Subclasses should initialize + #: it properly. The param values in the Java object should be + #: synced with the Python wrapper in fit/transform/evaluate/copy. + _java_obj = None - def _java_obj(self): + @staticmethod + def _new_java_obj(java_class, *args): """ - Returns or creates a Java object. + Construct a new Java object. """ + sc = SparkContext._active_spark_context java_obj = _jvm() - for name in self._java_class.split("."): + for name in java_class.split("."): java_obj = getattr(java_obj, name) - return java_obj() + java_args = [_py2java(sc, arg) for arg in args] + return java_obj(*java_args) - def _transfer_params_to_java(self, params, java_obj): + def _make_java_param_pair(self, param, value): """ - Transforms the embedded params and additional params to the - input Java object. - :param params: additional params (overwriting embedded values) - :param java_obj: Java object to receive the params + Makes a Java parm pair. + """ + sc = SparkContext._active_spark_context + param = self._resolveParam(param) + java_param = self._java_obj.getParam(param.name) + java_value = _py2java(sc, value) + return java_param.w(java_value) + + def _transfer_params_to_java(self): + """ + Transforms the embedded params to the companion Java object. """ - paramMap = self.extractParamMap(params) + paramMap = self.extractParamMap() for param in self.params: if param in paramMap: - java_obj.set(param.name, paramMap[param]) + pair = self._make_java_param_pair(param, paramMap[param]) + self._java_obj.set(pair) + + def _transfer_params_from_java(self): + """ + Transforms the embedded params from the companion Java object. + """ + sc = SparkContext._active_spark_context + for param in self.params: + if self._java_obj.hasParam(param.name): + java_param = self._java_obj.getParam(param.name) + value = _java2py(sc, self._java_obj.getOrDefault(java_param)) + self._paramMap[param] = value - def _empty_java_param_map(self): + @staticmethod + def _empty_java_param_map(): """ Returns an empty Java ParamMap reference. """ return _jvm().org.apache.spark.ml.param.ParamMap() - def _create_java_param_map(self, params, java_obj): - paramMap = self._empty_java_param_map() - for param, value in params.items(): - if param.parent is self: - paramMap.put(java_obj.getParam(param.name), value) - return paramMap - @inherit_doc class JavaEstimator(Estimator, JavaWrapper): @@ -96,9 +114,9 @@ def _create_model(self, java_model): """ Creates a model from the input Java model reference. """ - return JavaModel(java_model) + raise NotImplementedError() - def _fit_java(self, dataset, params={}): + def _fit_java(self, dataset): """ Fits a Java model to the input dataset. :param dataset: input dataset, which is an instance of @@ -106,12 +124,11 @@ def _fit_java(self, dataset, params={}): :param params: additional params (overwriting embedded values) :return: fitted Java model """ - java_obj = self._java_obj() - self._transfer_params_to_java(params, java_obj) - return java_obj.fit(dataset._jdf, self._empty_java_param_map()) + self._transfer_params_to_java() + return self._java_obj.fit(dataset._jdf) - def fit(self, dataset, params={}): - java_model = self._fit_java(dataset, params) + def _fit(self, dataset): + java_model = self._fit_java(dataset) return self._create_model(java_model) @@ -124,26 +141,47 @@ class JavaTransformer(Transformer, JavaWrapper): __metaclass__ = ABCMeta - def transform(self, dataset, params={}): - java_obj = self._java_obj() - self._transfer_params_to_java({}, java_obj) - java_param_map = self._create_java_param_map(params, java_obj) - return DataFrame(java_obj.transform(dataset._jdf, java_param_map), - dataset.sql_ctx) + def _transform(self, dataset): + self._transfer_params_to_java() + return DataFrame(self._java_obj.transform(dataset._jdf), dataset.sql_ctx) @inherit_doc -class JavaModel(JavaTransformer): +class JavaModel(Model, JavaTransformer): """ Base class for :py:class:`Model`s that wrap Java/Scala - implementations. + implementations. Subclasses should inherit this class before + param mix-ins, because this sets the UID from the Java model. """ __metaclass__ = ABCMeta def __init__(self, java_model): - super(JavaTransformer, self).__init__() - self._java_model = java_model + """ + Initialize this instance with a Java model object. + Subclasses should call this constructor, initialize params, + and then call _transformer_params_from_java. + """ + super(JavaModel, self).__init__() + self._java_obj = java_model + self.uid = java_model.uid() - def _java_obj(self): - return self._java_model + def copy(self, extra={}): + """ + Creates a copy of this instance with the same uid and some + extra params. This implementation first calls Params.copy and + then make a copy of the companion Java model with extra params. + So both the Python wrapper and the Java model get copied. + :param extra: Extra parameters to copy to the new instance + :return: Copy of this instance + """ + that = super(JavaModel, self).copy(extra) + that._java_obj = self._java_obj.copy(self._empty_java_param_map()) + that._transfer_params_to_java() + return that + + def _call_java(self, name, *args): + m = getattr(self._java_obj, name) + sc = SparkContext._active_spark_context + java_args = [_py2java(sc, arg) for arg in args] + return _java2py(sc, m(*java_args)) diff --git a/python/pyspark/mllib/__init__.py b/python/pyspark/mllib/__init__.py index 07507b2ad0d05..b11aed2c3afda 100644 --- a/python/pyspark/mllib/__init__.py +++ b/python/pyspark/mllib/__init__.py @@ -28,11 +28,3 @@ __all__ = ['classification', 'clustering', 'feature', 'fpm', 'linalg', 'random', 'recommendation', 'regression', 'stat', 'tree', 'util'] - -import sys -from . import rand as random -modname = __name__ + '.random' -random.__name__ = modname -random.RandomRDDs.__module__ = modname -sys.modules[modname] = random -del modname, sys diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py index abbb7cf60eece..b55583f82223f 100644 --- a/python/pyspark/mllib/clustering.py +++ b/python/pyspark/mllib/clustering.py @@ -40,11 +40,16 @@ class KMeansModel(Saveable, Loader): >>> data = array([0.0,0.0, 1.0,1.0, 9.0,8.0, 8.0,9.0]).reshape(4, 2) >>> model = KMeans.train( - ... sc.parallelize(data), 2, maxIterations=10, runs=30, initializationMode="random") + ... sc.parallelize(data), 2, maxIterations=10, runs=30, initializationMode="random", + ... seed=50, initializationSteps=5, epsilon=1e-4) >>> model.predict(array([0.0, 0.0])) == model.predict(array([1.0, 1.0])) True >>> model.predict(array([8.0, 9.0])) == model.predict(array([9.0, 8.0])) True + >>> model.k + 2 + >>> model.computeCost(sc.parallelize(data)) + 2.0000000000000004 >>> model = KMeans.train(sc.parallelize(data), 2) >>> sparse_data = [ ... SparseVector(3, {1: 1.0}), @@ -52,7 +57,8 @@ class KMeansModel(Saveable, Loader): ... SparseVector(3, {2: 1.0}), ... SparseVector(3, {2: 1.1}) ... ] - >>> model = KMeans.train(sc.parallelize(sparse_data), 2, initializationMode="k-means||") + >>> model = KMeans.train(sc.parallelize(sparse_data), 2, initializationMode="k-means||", + ... seed=50, initializationSteps=5, epsilon=1e-4) >>> model.predict(array([0., 1., 0.])) == model.predict(array([0, 1.1, 0.])) True >>> model.predict(array([0., 0., 1.])) == model.predict(array([0, 0, 1.1])) @@ -83,6 +89,11 @@ def clusterCenters(self): """Get the cluster centers, represented as a list of NumPy arrays.""" return self.centers + @property + def k(self): + """Total number of clusters.""" + return len(self.centers) + def predict(self, x): """Find the cluster to which x belongs in this model.""" best = 0 @@ -95,6 +106,15 @@ def predict(self, x): best_distance = distance return best + def computeCost(self, rdd): + """ + Return the K-means cost (sum of squared distances of points to + their nearest center) for this model on the given data. + """ + cost = callMLlibFunc("computeCostKmeansModel", rdd.map(_convert_to_vector), + [_convert_to_vector(c) for c in self.centers]) + return cost + def save(self, sc, path): java_centers = _py2java(sc, [_convert_to_vector(c) for c in self.centers]) java_model = sc._jvm.org.apache.spark.mllib.clustering.KMeansModel(java_centers) @@ -109,10 +129,11 @@ def load(cls, sc, path): class KMeans(object): @classmethod - def train(cls, rdd, k, maxIterations=100, runs=1, initializationMode="k-means||", seed=None): + def train(cls, rdd, k, maxIterations=100, runs=1, initializationMode="k-means||", + seed=None, initializationSteps=5, epsilon=1e-4): """Train a k-means clustering model.""" model = callMLlibFunc("trainKMeansModel", rdd.map(_convert_to_vector), k, maxIterations, - runs, initializationMode, seed) + runs, initializationMode, seed, initializationSteps, epsilon) centers = callJavaFunc(rdd.context, model.clusterCenters) return KMeansModel([c.toArray() for c in centers]) @@ -121,6 +142,7 @@ class GaussianMixtureModel(object): """A clustering model derived from the Gaussian Mixture Model method. + >>> from pyspark.mllib.linalg import Vectors, DenseMatrix >>> clusterdata_1 = sc.parallelize(array([-0.1,-0.05,-0.01,-0.1, ... 0.9,0.8,0.75,0.935, ... -0.83,-0.68,-0.91,-0.76 ]).reshape(6, 2)) @@ -133,11 +155,12 @@ class GaussianMixtureModel(object): True >>> labels[4]==labels[5] True - >>> clusterdata_2 = sc.parallelize(array([-5.1971, -2.5359, -3.8220, - ... -5.2211, -5.0602, 4.7118, - ... 6.8989, 3.4592, 4.6322, - ... 5.7048, 4.6567, 5.5026, - ... 4.5605, 5.2043, 6.2734]).reshape(5, 3)) + >>> data = array([-5.1971, -2.5359, -3.8220, + ... -5.2211, -5.0602, 4.7118, + ... 6.8989, 3.4592, 4.6322, + ... 5.7048, 4.6567, 5.5026, + ... 4.5605, 5.2043, 6.2734]) + >>> clusterdata_2 = sc.parallelize(data.reshape(5,3)) >>> model = GaussianMixture.train(clusterdata_2, 2, convergenceTol=0.0001, ... maxIterations=150, seed=10) >>> labels = model.predict(clusterdata_2).collect() @@ -145,12 +168,38 @@ class GaussianMixtureModel(object): True >>> labels[3]==labels[4] True + >>> clusterdata_3 = sc.parallelize(data.reshape(15, 1)) + >>> im = GaussianMixtureModel([0.5, 0.5], + ... [MultivariateGaussian(Vectors.dense([-1.0]), DenseMatrix(1, 1, [1.0])), + ... MultivariateGaussian(Vectors.dense([1.0]), DenseMatrix(1, 1, [1.0]))]) + >>> model = GaussianMixture.train(clusterdata_3, 2, initialModel=im) """ def __init__(self, weights, gaussians): - self.weights = weights - self.gaussians = gaussians - self.k = len(self.weights) + self._weights = weights + self._gaussians = gaussians + self._k = len(self._weights) + + @property + def weights(self): + """ + Weights for each Gaussian distribution in the mixture, where weights[i] is + the weight for Gaussian i, and weights.sum == 1. + """ + return self._weights + + @property + def gaussians(self): + """ + Array of MultivariateGaussian where gaussians[i] represents + the Multivariate Gaussian (Normal) Distribution for Gaussian i. + """ + return self._gaussians + + @property + def k(self): + """Number of gaussians in mixture.""" + return self._k def predict(self, x): """ @@ -163,6 +212,9 @@ def predict(self, x): if isinstance(x, RDD): cluster_labels = self.predictSoft(x).map(lambda z: z.index(max(z))) return cluster_labels + else: + raise TypeError("x should be represented by an RDD, " + "but got %s." % type(x)) def predictSoft(self, x): """ @@ -172,10 +224,13 @@ def predictSoft(self, x): :return: membership_matrix. RDD of array of double values. """ if isinstance(x, RDD): - means, sigmas = zip(*[(g.mu, g.sigma) for g in self.gaussians]) + means, sigmas = zip(*[(g.mu, g.sigma) for g in self._gaussians]) membership_matrix = callMLlibFunc("predictSoftGMM", x.map(_convert_to_vector), - _convert_to_vector(self.weights), means, sigmas) + _convert_to_vector(self._weights), means, sigmas) return membership_matrix.map(lambda x: pyarray.array('d', x)) + else: + raise TypeError("x should be represented by an RDD, " + "but got %s." % type(x)) class GaussianMixture(object): @@ -187,13 +242,24 @@ class GaussianMixture(object): :param convergenceTol: Threshold value to check the convergence criteria. Defaults to 1e-3 :param maxIterations: Number of iterations. Default to 100 :param seed: Random Seed + :param initialModel: GaussianMixtureModel for initializing learning """ @classmethod - def train(cls, rdd, k, convergenceTol=1e-3, maxIterations=100, seed=None): + def train(cls, rdd, k, convergenceTol=1e-3, maxIterations=100, seed=None, initialModel=None): """Train a Gaussian Mixture clustering model.""" - weight, mu, sigma = callMLlibFunc("trainGaussianMixture", - rdd.map(_convert_to_vector), k, - convergenceTol, maxIterations, seed) + initialModelWeights = None + initialModelMu = None + initialModelSigma = None + if initialModel is not None: + if initialModel.k != k: + raise Exception("Mismatched cluster count, initialModel.k = %s, however k = %s" + % (initialModel.k, k)) + initialModelWeights = initialModel.weights + initialModelMu = [initialModel.gaussians[i].mu for i in range(initialModel.k)] + initialModelSigma = [initialModel.gaussians[i].sigma for i in range(initialModel.k)] + weight, mu, sigma = callMLlibFunc("trainGaussianMixture", rdd.map(_convert_to_vector), k, + convergenceTol, maxIterations, seed, initialModelWeights, + initialModelMu, initialModelSigma) mvg_obj = [MultivariateGaussian(mu[i], sigma[i]) for i in range(k)] return GaussianMixtureModel(weight, mvg_obj) diff --git a/python/pyspark/mllib/common.py b/python/pyspark/mllib/common.py index ba6058978880a..855e85f57155e 100644 --- a/python/pyspark/mllib/common.py +++ b/python/pyspark/mllib/common.py @@ -27,7 +27,7 @@ from pyspark import RDD, SparkContext from pyspark.serializers import PickleSerializer, AutoBatchedSerializer - +from pyspark.sql import DataFrame, SQLContext # Hack for support float('inf') in Py4j _old_smart_decode = py4j.protocol.smart_decode @@ -99,6 +99,9 @@ def _java2py(sc, r, encoding="bytes"): jrdd = sc._jvm.SerDe.javaToPython(r) return RDD(jrdd, sc) + if clsName == 'DataFrame': + return DataFrame(r, SQLContext(sc)) + if clsName in _picklable_classes: r = sc._jvm.SerDe.dumps(r) elif isinstance(r, (JavaArray, JavaList)): diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py index 16cb49cc0cfff..c5cf3a4e7ff22 100644 --- a/python/pyspark/mllib/evaluation.py +++ b/python/pyspark/mllib/evaluation.py @@ -15,29 +15,31 @@ # limitations under the License. # -from pyspark.mllib.common import JavaModelWrapper +from pyspark.mllib.common import JavaModelWrapper, callMLlibFunc from pyspark.sql import SQLContext -from pyspark.sql.types import StructField, StructType, DoubleType +from pyspark.sql.types import StructField, StructType, DoubleType, IntegerType, ArrayType + +__all__ = ['BinaryClassificationMetrics', 'RegressionMetrics', + 'MulticlassMetrics', 'RankingMetrics'] class BinaryClassificationMetrics(JavaModelWrapper): """ Evaluator for binary classification. + :param scoreAndLabels: an RDD of (score, label) pairs + >>> scoreAndLabels = sc.parallelize([ ... (0.1, 0.0), (0.1, 1.0), (0.4, 0.0), (0.6, 0.0), (0.6, 1.0), (0.6, 1.0), (0.8, 1.0)], 2) >>> metrics = BinaryClassificationMetrics(scoreAndLabels) - >>> metrics.areaUnderROC() + >>> metrics.areaUnderROC 0.70... - >>> metrics.areaUnderPR() + >>> metrics.areaUnderPR 0.83... >>> metrics.unpersist() """ def __init__(self, scoreAndLabels): - """ - :param scoreAndLabels: an RDD of (score, label) pairs - """ sc = scoreAndLabels.ctx sql_ctx = SQLContext(sc) df = sql_ctx.createDataFrame(scoreAndLabels, schema=StructType([ @@ -47,6 +49,7 @@ def __init__(self, scoreAndLabels): java_model = java_class(df._jdf) super(BinaryClassificationMetrics, self).__init__(java_model) + @property def areaUnderROC(self): """ Computes the area under the receiver operating characteristic @@ -54,6 +57,7 @@ def areaUnderROC(self): """ return self.call("areaUnderROC") + @property def areaUnderPR(self): """ Computes the area under the precision-recall curve. @@ -67,6 +71,397 @@ def unpersist(self): self.call("unpersist") +class RegressionMetrics(JavaModelWrapper): + """ + Evaluator for regression. + + :param predictionAndObservations: an RDD of (prediction, + observation) pairs. + + >>> predictionAndObservations = sc.parallelize([ + ... (2.5, 3.0), (0.0, -0.5), (2.0, 2.0), (8.0, 7.0)]) + >>> metrics = RegressionMetrics(predictionAndObservations) + >>> metrics.explainedVariance + 0.95... + >>> metrics.meanAbsoluteError + 0.5... + >>> metrics.meanSquaredError + 0.37... + >>> metrics.rootMeanSquaredError + 0.61... + >>> metrics.r2 + 0.94... + """ + + def __init__(self, predictionAndObservations): + sc = predictionAndObservations.ctx + sql_ctx = SQLContext(sc) + df = sql_ctx.createDataFrame(predictionAndObservations, schema=StructType([ + StructField("prediction", DoubleType(), nullable=False), + StructField("observation", DoubleType(), nullable=False)])) + java_class = sc._jvm.org.apache.spark.mllib.evaluation.RegressionMetrics + java_model = java_class(df._jdf) + super(RegressionMetrics, self).__init__(java_model) + + @property + def explainedVariance(self): + """ + Returns the explained variance regression score. + explainedVariance = 1 - variance(y - \hat{y}) / variance(y) + """ + return self.call("explainedVariance") + + @property + def meanAbsoluteError(self): + """ + Returns the mean absolute error, which is a risk function corresponding to the + expected value of the absolute error loss or l1-norm loss. + """ + return self.call("meanAbsoluteError") + + @property + def meanSquaredError(self): + """ + Returns the mean squared error, which is a risk function corresponding to the + expected value of the squared error loss or quadratic loss. + """ + return self.call("meanSquaredError") + + @property + def rootMeanSquaredError(self): + """ + Returns the root mean squared error, which is defined as the square root of + the mean squared error. + """ + return self.call("rootMeanSquaredError") + + @property + def r2(self): + """ + Returns R^2^, the coefficient of determination. + """ + return self.call("r2") + + +class MulticlassMetrics(JavaModelWrapper): + """ + Evaluator for multiclass classification. + + :param predictionAndLabels an RDD of (prediction, label) pairs. + + >>> predictionAndLabels = sc.parallelize([(0.0, 0.0), (0.0, 1.0), (0.0, 0.0), + ... (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (2.0, 2.0), (2.0, 0.0)]) + >>> metrics = MulticlassMetrics(predictionAndLabels) + >>> metrics.falsePositiveRate(0.0) + 0.2... + >>> metrics.precision(1.0) + 0.75... + >>> metrics.recall(2.0) + 1.0... + >>> metrics.fMeasure(0.0, 2.0) + 0.52... + >>> metrics.precision() + 0.66... + >>> metrics.recall() + 0.66... + >>> metrics.weightedFalsePositiveRate + 0.19... + >>> metrics.weightedPrecision + 0.68... + >>> metrics.weightedRecall + 0.66... + >>> metrics.weightedFMeasure() + 0.66... + >>> metrics.weightedFMeasure(2.0) + 0.65... + """ + + def __init__(self, predictionAndLabels): + sc = predictionAndLabels.ctx + sql_ctx = SQLContext(sc) + df = sql_ctx.createDataFrame(predictionAndLabels, schema=StructType([ + StructField("prediction", DoubleType(), nullable=False), + StructField("label", DoubleType(), nullable=False)])) + java_class = sc._jvm.org.apache.spark.mllib.evaluation.MulticlassMetrics + java_model = java_class(df._jdf) + super(MulticlassMetrics, self).__init__(java_model) + + def truePositiveRate(self, label): + """ + Returns true positive rate for a given label (category). + """ + return self.call("truePositiveRate", label) + + def falsePositiveRate(self, label): + """ + Returns false positive rate for a given label (category). + """ + return self.call("falsePositiveRate", label) + + def precision(self, label=None): + """ + Returns precision or precision for a given label (category) if specified. + """ + if label is None: + return self.call("precision") + else: + return self.call("precision", float(label)) + + def recall(self, label=None): + """ + Returns recall or recall for a given label (category) if specified. + """ + if label is None: + return self.call("recall") + else: + return self.call("recall", float(label)) + + def fMeasure(self, label=None, beta=None): + """ + Returns f-measure or f-measure for a given label (category) if specified. + """ + if beta is None: + if label is None: + return self.call("fMeasure") + else: + return self.call("fMeasure", label) + else: + if label is None: + raise Exception("If the beta parameter is specified, label can not be none") + else: + return self.call("fMeasure", label, beta) + + @property + def weightedTruePositiveRate(self): + """ + Returns weighted true positive rate. + (equals to precision, recall and f-measure) + """ + return self.call("weightedTruePositiveRate") + + @property + def weightedFalsePositiveRate(self): + """ + Returns weighted false positive rate. + """ + return self.call("weightedFalsePositiveRate") + + @property + def weightedRecall(self): + """ + Returns weighted averaged recall. + (equals to precision, recall and f-measure) + """ + return self.call("weightedRecall") + + @property + def weightedPrecision(self): + """ + Returns weighted averaged precision. + """ + return self.call("weightedPrecision") + + def weightedFMeasure(self, beta=None): + """ + Returns weighted averaged f-measure. + """ + if beta is None: + return self.call("weightedFMeasure") + else: + return self.call("weightedFMeasure", beta) + + +class RankingMetrics(JavaModelWrapper): + """ + Evaluator for ranking algorithms. + + :param predictionAndLabels: an RDD of (predicted ranking, + ground truth set) pairs. + + >>> predictionAndLabels = sc.parallelize([ + ... ([1, 6, 2, 7, 8, 3, 9, 10, 4, 5], [1, 2, 3, 4, 5]), + ... ([4, 1, 5, 6, 2, 7, 3, 8, 9, 10], [1, 2, 3]), + ... ([1, 2, 3, 4, 5], [])]) + >>> metrics = RankingMetrics(predictionAndLabels) + >>> metrics.precisionAt(1) + 0.33... + >>> metrics.precisionAt(5) + 0.26... + >>> metrics.precisionAt(15) + 0.17... + >>> metrics.meanAveragePrecision + 0.35... + >>> metrics.ndcgAt(3) + 0.33... + >>> metrics.ndcgAt(10) + 0.48... + + """ + + def __init__(self, predictionAndLabels): + sc = predictionAndLabels.ctx + sql_ctx = SQLContext(sc) + df = sql_ctx.createDataFrame(predictionAndLabels, + schema=sql_ctx._inferSchema(predictionAndLabels)) + java_model = callMLlibFunc("newRankingMetrics", df._jdf) + super(RankingMetrics, self).__init__(java_model) + + def precisionAt(self, k): + """ + Compute the average precision of all the queries, truncated at ranking position k. + + If for a query, the ranking algorithm returns n (n < k) results, the precision value + will be computed as #(relevant items retrieved) / k. This formula also applies when + the size of the ground truth set is less than k. + + If a query has an empty ground truth set, zero will be used as precision together + with a log warning. + """ + return self.call("precisionAt", int(k)) + + @property + def meanAveragePrecision(self): + """ + Returns the mean average precision (MAP) of all the queries. + If a query has an empty ground truth set, the average precision will be zero and + a log warining is generated. + """ + return self.call("meanAveragePrecision") + + def ndcgAt(self, k): + """ + Compute the average NDCG value of all the queries, truncated at ranking position k. + The discounted cumulative gain at position k is computed as: + sum,,i=1,,^k^ (2^{relevance of ''i''th item}^ - 1) / log(i + 1), + and the NDCG is obtained by dividing the DCG value on the ground truth set. + In the current implementation, the relevance value is binary. + If a query has an empty ground truth set, zero will be used as NDCG together with + a log warning. + """ + return self.call("ndcgAt", int(k)) + + +class MultilabelMetrics(JavaModelWrapper): + """ + Evaluator for multilabel classification. + + :param predictionAndLabels: an RDD of (predictions, labels) pairs, + both are non-null Arrays, each with + unique elements. + + >>> predictionAndLabels = sc.parallelize([([0.0, 1.0], [0.0, 2.0]), ([0.0, 2.0], [0.0, 1.0]), + ... ([], [0.0]), ([2.0], [2.0]), ([2.0, 0.0], [2.0, 0.0]), + ... ([0.0, 1.0, 2.0], [0.0, 1.0]), ([1.0], [1.0, 2.0])]) + >>> metrics = MultilabelMetrics(predictionAndLabels) + >>> metrics.precision(0.0) + 1.0 + >>> metrics.recall(1.0) + 0.66... + >>> metrics.f1Measure(2.0) + 0.5 + >>> metrics.precision() + 0.66... + >>> metrics.recall() + 0.64... + >>> metrics.f1Measure() + 0.63... + >>> metrics.microPrecision + 0.72... + >>> metrics.microRecall + 0.66... + >>> metrics.microF1Measure + 0.69... + >>> metrics.hammingLoss + 0.33... + >>> metrics.subsetAccuracy + 0.28... + >>> metrics.accuracy + 0.54... + """ + + def __init__(self, predictionAndLabels): + sc = predictionAndLabels.ctx + sql_ctx = SQLContext(sc) + df = sql_ctx.createDataFrame(predictionAndLabels, + schema=sql_ctx._inferSchema(predictionAndLabels)) + java_class = sc._jvm.org.apache.spark.mllib.evaluation.MultilabelMetrics + java_model = java_class(df._jdf) + super(MultilabelMetrics, self).__init__(java_model) + + def precision(self, label=None): + """ + Returns precision or precision for a given label (category) if specified. + """ + if label is None: + return self.call("precision") + else: + return self.call("precision", float(label)) + + def recall(self, label=None): + """ + Returns recall or recall for a given label (category) if specified. + """ + if label is None: + return self.call("recall") + else: + return self.call("recall", float(label)) + + def f1Measure(self, label=None): + """ + Returns f1Measure or f1Measure for a given label (category) if specified. + """ + if label is None: + return self.call("f1Measure") + else: + return self.call("f1Measure", float(label)) + + @property + def microPrecision(self): + """ + Returns micro-averaged label-based precision. + (equals to micro-averaged document-based precision) + """ + return self.call("microPrecision") + + @property + def microRecall(self): + """ + Returns micro-averaged label-based recall. + (equals to micro-averaged document-based recall) + """ + return self.call("microRecall") + + @property + def microF1Measure(self): + """ + Returns micro-averaged label-based f1-measure. + (equals to micro-averaged document-based f1-measure) + """ + return self.call("microF1Measure") + + @property + def hammingLoss(self): + """ + Returns Hamming-loss. + """ + return self.call("hammingLoss") + + @property + def subsetAccuracy(self): + """ + Returns subset accuracy. + (for equal sets of labels) + """ + return self.call("subsetAccuracy") + + @property + def accuracy(self): + """ + Returns accuracy. + """ + return self.call("accuracy") + + def _test(): import doctest from pyspark import SparkContext diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py index 1140539a24e95..da90554f41437 100644 --- a/python/pyspark/mllib/feature.py +++ b/python/pyspark/mllib/feature.py @@ -33,10 +33,12 @@ from pyspark import SparkContext from pyspark.rdd import RDD, ignore_unicode_prefix from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper -from pyspark.mllib.linalg import Vectors, _convert_to_vector +from pyspark.mllib.linalg import Vectors, DenseVector, SparseVector, _convert_to_vector +from pyspark.mllib.regression import LabeledPoint __all__ = ['Normalizer', 'StandardScalerModel', 'StandardScaler', - 'HashingTF', 'IDFModel', 'IDF', 'Word2Vec', 'Word2VecModel'] + 'HashingTF', 'IDFModel', 'IDF', 'Word2Vec', 'Word2VecModel', + 'ChiSqSelector', 'ChiSqSelectorModel'] class VectorTransformer(object): @@ -66,6 +68,8 @@ class Normalizer(VectorTransformer): For `p` = float('inf'), max(abs(vector)) will be used as norm for normalization. + :param p: Normalization in L^p^ space, p = 2 by default. + >>> v = Vectors.dense(range(3)) >>> nor = Normalizer(1) >>> nor.transform(v) @@ -80,9 +84,6 @@ class Normalizer(VectorTransformer): DenseVector([0.0, 0.5, 1.0]) """ def __init__(self, p=2.0): - """ - :param p: Normalization in L^p^ space, p = 2 by default. - """ assert p >= 1.0, "p should be greater than 1.0" self.p = float(p) @@ -92,7 +93,7 @@ def transform(self, vector): :param vector: vector or RDD of vector to be normalized. :return: normalized vector. If the norm of the input is zero, it - will return the input vector. + will return the input vector. """ sc = SparkContext._active_spark_context assert sc is not None, "SparkContext should be initialized first" @@ -162,6 +163,13 @@ class StandardScaler(object): variance using column summary statistics on the samples in the training set. + :param withMean: False by default. Centers the data with mean + before scaling. It will build a dense output, so this + does not work on sparse input and will raise an + exception. + :param withStd: True by default. Scales the data to unit + standard deviation. + >>> vs = [Vectors.dense([-2.0, 2.3, 0]), Vectors.dense([3.8, 0.0, 1.9])] >>> dataset = sc.parallelize(vs) >>> standardizer = StandardScaler(True, True) @@ -172,14 +180,6 @@ class StandardScaler(object): DenseVector([0.7071, -0.7071, 0.7071]) """ def __init__(self, withMean=False, withStd=True): - """ - :param withMean: False by default. Centers the data with mean - before scaling. It will build a dense output, so this - does not work on sparse input and will raise an - exception. - :param withStd: True by default. Scales the data to unit - standard deviation. - """ if not (withMean or withStd): warnings.warn("Both withMean and withStd are false. The model does nothing.") self.withMean = withMean @@ -191,7 +191,7 @@ def fit(self, dataset): for later scaling. :param data: The data used to compute the mean and variance - to build the transformation model. + to build the transformation model. :return: a StandardScalarModel """ dataset = dataset.map(_convert_to_vector) @@ -199,6 +199,58 @@ def fit(self, dataset): return StandardScalerModel(jmodel) +class ChiSqSelectorModel(JavaVectorTransformer): + """ + .. note:: Experimental + + Represents a Chi Squared selector model. + """ + def transform(self, vector): + """ + Applies transformation on a vector. + + :param vector: Vector or RDD of Vector to be transformed. + :return: transformed vector. + """ + return JavaVectorTransformer.transform(self, vector) + + +class ChiSqSelector(object): + """ + .. note:: Experimental + + Creates a ChiSquared feature selector. + + :param numTopFeatures: number of features that selector will select. + + >>> data = [ + ... LabeledPoint(0.0, SparseVector(3, {0: 8.0, 1: 7.0})), + ... LabeledPoint(1.0, SparseVector(3, {1: 9.0, 2: 6.0})), + ... LabeledPoint(1.0, [0.0, 9.0, 8.0]), + ... LabeledPoint(2.0, [8.0, 9.0, 5.0]) + ... ] + >>> model = ChiSqSelector(1).fit(sc.parallelize(data)) + >>> model.transform(SparseVector(3, {1: 9.0, 2: 6.0})) + SparseVector(1, {0: 6.0}) + >>> model.transform(DenseVector([8.0, 9.0, 5.0])) + DenseVector([5.0]) + """ + def __init__(self, numTopFeatures): + self.numTopFeatures = int(numTopFeatures) + + def fit(self, data): + """ + Returns a ChiSquared feature selector. + + :param data: an `RDD[LabeledPoint]` containing the labeled dataset + with categorical features. Real-valued features will be + treated as categorical for each distinct value. + Apply feature discretizer before using this function. + """ + jmodel = callMLlibFunc("fitChiSqSelector", self.numTopFeatures, data) + return ChiSqSelectorModel(jmodel) + + class HashingTF(object): """ .. note:: Experimental @@ -208,15 +260,14 @@ class HashingTF(object): Note: the terms must be hashable (can not be dict/set/list...). + :param numFeatures: number of features (default: 2^20) + >>> htf = HashingTF(100) >>> doc = "a a b b c d".split(" ") >>> htf.transform(doc) SparseVector(100, {...}) """ def __init__(self, numFeatures=1 << 20): - """ - :param numFeatures: number of features (default: 2^20) - """ self.numFeatures = numFeatures def indexOf(self, term): @@ -256,7 +307,7 @@ def transform(self, x): Call transform directly on the RDD instead. :param x: an RDD of term frequency vectors or a term frequency - vector + vector :return: an RDD of TF-IDF vectors or a TF-IDF vector """ if isinstance(x, RDD): @@ -287,6 +338,9 @@ class IDF(object): `minDocFreq`). For terms that are not in at least `minDocFreq` documents, the IDF is found as 0, resulting in TF-IDFs of 0. + :param minDocFreq: minimum of documents in which a term + should appear for filtering + >>> n = 4 >>> freqs = [Vectors.sparse(n, (1, 3), (1.0, 2.0)), ... Vectors.dense([0.0, 1.0, 2.0, 3.0]), @@ -307,10 +361,6 @@ class IDF(object): SparseVector(4, {1: 0.0, 3: 0.5754}) """ def __init__(self, minDocFreq=0): - """ - :param minDocFreq: minimum of documents in which a term - should appear for filtering - """ self.minDocFreq = minDocFreq def fit(self, dataset): diff --git a/python/pyspark/mllib/fpm.py b/python/pyspark/mllib/fpm.py index d8df02bdbaba9..bdc4a132b1b18 100644 --- a/python/pyspark/mllib/fpm.py +++ b/python/pyspark/mllib/fpm.py @@ -61,12 +61,12 @@ class FPGrowth(object): def train(cls, data, minSupport=0.3, numPartitions=-1): """ Computes an FP-Growth model that contains frequent itemsets. - :param data: The input data set, each element - contains a transaction. - :param minSupport: The minimal support level - (default: `0.3`). - :param numPartitions: The number of partitions used by parallel - FP-growth (default: same as input data). + + :param data: The input data set, each element contains a + transaction. + :param minSupport: The minimal support level (default: `0.3`). + :param numPartitions: The number of partitions used by + parallel FP-growth (default: same as input data). """ model = callMLlibFunc("trainFPGrowthModel", data, float(minSupport), int(numPartitions)) return FPGrowthModel(model) diff --git a/python/pyspark/mllib/linalg.py b/python/pyspark/mllib/linalg.py index a57c0b3ae0d00..23d1a79ffe511 100644 --- a/python/pyspark/mllib/linalg.py +++ b/python/pyspark/mllib/linalg.py @@ -208,9 +208,46 @@ def __init__(self, ar): ar = ar.astype(np.float64) self.array = ar + @staticmethod + def parse(s): + """ + Parse string representation back into the DenseVector. + + >>> DenseVector.parse(' [ 0.0,1.0,2.0, 3.0]') + DenseVector([0.0, 1.0, 2.0, 3.0]) + """ + start = s.find('[') + if start == -1: + raise ValueError("Array should start with '['.") + end = s.find(']') + if end == -1: + raise ValueError("Array should end with ']'.") + s = s[start + 1: end] + + try: + values = [float(val) for val in s.split(',')] + except ValueError: + raise ValueError("Unable to parse values from %s" % s) + return DenseVector(values) + def __reduce__(self): return DenseVector, (self.array.tostring(),) + def numNonzeros(self): + return np.count_nonzero(self.array) + + def norm(self, p): + """ + Calculte the norm of a DenseVector. + + >>> a = DenseVector([0, -1, 2, -3]) + >>> a.norm(2) + 3.7... + >>> a.norm(1) + 6.0 + """ + return np.linalg.norm(self.array, p) + def dot(self, other): """ Compute the dot product of two Vectors. We support @@ -387,8 +424,74 @@ def __init__(self, size, *args): if self.indices[i] >= self.indices[i + 1]: raise TypeError("indices array must be sorted") + def numNonzeros(self): + return np.count_nonzero(self.values) + + def norm(self, p): + """ + Calculte the norm of a SparseVector. + + >>> a = SparseVector(4, [0, 1], [3., -4.]) + >>> a.norm(1) + 7.0 + >>> a.norm(2) + 5.0 + """ + return np.linalg.norm(self.values, p) + def __reduce__(self): - return (SparseVector, (self.size, self.indices.tostring(), self.values.tostring())) + return ( + SparseVector, + (self.size, self.indices.tostring(), self.values.tostring())) + + @staticmethod + def parse(s): + """ + Parse string representation back into the DenseVector. + + >>> SparseVector.parse(' (4, [0,1 ],[ 4.0,5.0] )') + SparseVector(4, {0: 4.0, 1: 5.0}) + """ + start = s.find('(') + if start == -1: + raise ValueError("Tuple should start with '('") + end = s.find(')') + if start == -1: + raise ValueError("Tuple should end with ')'") + s = s[start + 1: end].strip() + + size = s[: s.find(',')] + try: + size = int(size) + except ValueError: + raise ValueError("Cannot parse size %s." % size) + + ind_start = s.find('[') + if ind_start == -1: + raise ValueError("Indices array should start with '['.") + ind_end = s.find(']') + if ind_end == -1: + raise ValueError("Indices array should end with ']'") + new_s = s[ind_start + 1: ind_end] + ind_list = new_s.split(',') + try: + indices = [int(ind) for ind in ind_list] + except ValueError: + raise ValueError("Unable to parse indices from %s." % new_s) + s = s[ind_end + 1:].strip() + + val_start = s.find('[') + if val_start == -1: + raise ValueError("Values array should start with '['.") + val_end = s.find(']') + if val_end == -1: + raise ValueError("Values array should end with ']'.") + val_list = s[val_start + 1: val_end].split(',') + try: + values = [float(val) for val in val_list] + except ValueError: + raise ValueError("Unable to parse values from %s." % s) + return SparseVector(size, indices, values) def dot(self, other): """ @@ -633,6 +736,49 @@ def stringify(vector): """ return str(vector) + @staticmethod + def squared_distance(v1, v2): + """ + Squared distance between two vectors. + a and b can be of type SparseVector, DenseVector, np.ndarray + or array.array. + + >>> a = Vectors.sparse(4, [(0, 1), (3, 4)]) + >>> b = Vectors.dense([2, 5, 4, 1]) + >>> a.squared_distance(b) + 51.0 + """ + v1, v2 = _convert_to_vector(v1), _convert_to_vector(v2) + return v1.squared_distance(v2) + + @staticmethod + def norm(vector, p): + """ + Find norm of the given vector. + """ + return _convert_to_vector(vector).norm(p) + + @staticmethod + def parse(s): + """Parse a string representation back into the Vector. + + >>> Vectors.parse('[2,1,2 ]') + DenseVector([2.0, 1.0, 2.0]) + >>> Vectors.parse(' ( 100, [0], [2])') + SparseVector(100, {0: 2.0}) + """ + if s.find('(') == -1 and s.find('[') != -1: + return DenseVector.parse(s) + elif s.find('(') != -1: + return SparseVector.parse(s) + else: + raise ValueError( + "Cannot find tokens '[' or '(' from the input string.") + + @staticmethod + def zeros(size): + return DenseVector(np.zeros(size)) + class Matrix(object): """ @@ -755,7 +901,7 @@ def __reduce__(self): return SparseMatrix, ( self.numRows, self.numCols, self.colPtrs.tostring(), self.rowIndices.tostring(), self.values.tostring(), - self.isTransposed) + int(self.isTransposed)) def __getitem__(self, indices): i, j = indices @@ -801,7 +947,7 @@ def toDense(self): # TODO: More efficient implementation: def __eq__(self, other): - return np.all(self.toArray == other.toArray) + return np.all(self.toArray() == other.toArray()) class Matrices(object): diff --git a/python/pyspark/mllib/rand.py b/python/pyspark/mllib/random.py similarity index 100% rename from python/pyspark/mllib/rand.py rename to python/pyspark/mllib/random.py diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py index 4bc6351bdf02f..41bde2ce3e60b 100644 --- a/python/pyspark/mllib/regression.py +++ b/python/pyspark/mllib/regression.py @@ -18,14 +18,16 @@ import numpy as np from numpy import array +from pyspark import RDD from pyspark.mllib.common import callMLlibFunc, _py2java, _java2py, inherit_doc -from pyspark.mllib.linalg import SparseVector, _convert_to_vector +from pyspark.mllib.linalg import SparseVector, Vectors, _convert_to_vector from pyspark.mllib.util import Saveable, Loader __all__ = ['LabeledPoint', 'LinearModel', 'LinearRegressionModel', 'LinearRegressionWithSGD', 'RidgeRegressionModel', 'RidgeRegressionWithSGD', - 'LassoModel', 'LassoWithSGD'] + 'LassoModel', 'LassoWithSGD', 'IsotonicRegressionModel', + 'IsotonicRegression'] class LabeledPoint(object): @@ -396,6 +398,73 @@ def train(rdd, i): return _regression_train_wrapper(train, RidgeRegressionModel, data, initialWeights) +class IsotonicRegressionModel(Saveable, Loader): + + """Regression model for isotonic regression. + + >>> data = [(1, 0, 1), (2, 1, 1), (3, 2, 1), (1, 3, 1), (6, 4, 1), (17, 5, 1), (16, 6, 1)] + >>> irm = IsotonicRegression.train(sc.parallelize(data)) + >>> irm.predict(3) + 2.0 + >>> irm.predict(5) + 16.5 + >>> irm.predict(sc.parallelize([3, 5])).collect() + [2.0, 16.5] + >>> import os, tempfile + >>> path = tempfile.mkdtemp() + >>> irm.save(sc, path) + >>> sameModel = IsotonicRegressionModel.load(sc, path) + >>> sameModel.predict(3) + 2.0 + >>> sameModel.predict(5) + 16.5 + >>> try: + ... os.removedirs(path) + ... except OSError: + ... pass + """ + + def __init__(self, boundaries, predictions, isotonic): + self.boundaries = boundaries + self.predictions = predictions + self.isotonic = isotonic + + def predict(self, x): + if isinstance(x, RDD): + return x.map(lambda v: self.predict(v)) + return np.interp(x, self.boundaries, self.predictions) + + def save(self, sc, path): + java_boundaries = _py2java(sc, self.boundaries.tolist()) + java_predictions = _py2java(sc, self.predictions.tolist()) + java_model = sc._jvm.org.apache.spark.mllib.regression.IsotonicRegressionModel( + java_boundaries, java_predictions, self.isotonic) + java_model.save(sc._jsc.sc(), path) + + @classmethod + def load(cls, sc, path): + java_model = sc._jvm.org.apache.spark.mllib.regression.IsotonicRegressionModel.load( + sc._jsc.sc(), path) + py_boundaries = _java2py(sc, java_model.boundaryVector()).toArray() + py_predictions = _java2py(sc, java_model.predictionVector()).toArray() + return IsotonicRegressionModel(py_boundaries, py_predictions, java_model.isotonic) + + +class IsotonicRegression(object): + """ + Run IsotonicRegression algorithm to obtain isotonic regression model. + + :param data: RDD of (label, feature, weight) tuples. + :param isotonic: Whether this is isotonic or antitonic. + """ + @classmethod + def train(cls, data, isotonic=True): + """Train a isotonic regression model on the given data.""" + boundaries, predictions = callMLlibFunc("trainIsotonicRegressionModel", + data.map(_convert_to_vector), bool(isotonic)) + return IsotonicRegressionModel(boundaries.toArray(), predictions.toArray(), isotonic) + + def _test(): import doctest from pyspark import SparkContext diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py index 1b008b93bc137..36a4c7a5408c6 100644 --- a/python/pyspark/mllib/tests.py +++ b/python/pyspark/mllib/tests.py @@ -24,7 +24,7 @@ import tempfile import array as pyarray -from numpy import array, array_equal, zeros +from numpy import array, array_equal, zeros, inf from py4j.protocol import Py4JJavaError if sys.version_info[:2] <= (2, 6): @@ -92,6 +92,9 @@ def test_serialize(self): self._test_serialize(SparseVector(4, {1: 1, 3: 2})) self._test_serialize(SparseVector(3, {})) self._test_serialize(DenseMatrix(2, 3, range(6))) + sm1 = SparseMatrix( + 3, 4, [0, 2, 2, 4, 4], [1, 2, 1, 2], [1.0, 2.0, 4.0, 5.0]) + self._test_serialize(sm1) def test_dot(self): sv = SparseVector(4, {1: 1, 3: 2}) @@ -217,6 +220,29 @@ def test_dense_matrix_is_transposed(self): self.assertTrue(array_equal(sm.colPtrs, [0, 2, 5])) self.assertTrue(array_equal(sm.values, [1, 3, 4, 6, 9])) + def test_parse_vector(self): + a = DenseVector([3, 4, 6, 7]) + self.assertTrue(str(a), '[3.0,4.0,6.0,7.0]') + self.assertTrue(Vectors.parse(str(a)), a) + a = SparseVector(4, [0, 2], [3, 4]) + self.assertTrue(str(a), '(4,[0,2],[3.0,4.0])') + self.assertTrue(Vectors.parse(str(a)), a) + a = SparseVector(10, [0, 1], [4, 5]) + self.assertTrue(SparseVector.parse(' (10, [0,1 ],[ 4.0,5.0] )'), a) + + def test_norms(self): + a = DenseVector([0, 2, 3, -1]) + self.assertAlmostEqual(a.norm(2), 3.742, 3) + self.assertTrue(a.norm(1), 6) + self.assertTrue(a.norm(inf), 3) + a = SparseVector(4, [0, 2], [3, -4]) + self.assertAlmostEqual(a.norm(2), 5) + self.assertTrue(a.norm(1), 7) + self.assertTrue(a.norm(inf), 4) + + tmp = SparseVector(4, [0, 2], [3, 0]) + self.assertEqual(tmp.numNonzeros(), 1) + class ListTests(MLlibTestCase): @@ -233,7 +259,8 @@ def test_kmeans(self): [1.1, 0], [1.2, 0], ] - clusters = KMeans.train(self.sc.parallelize(data), 2, initializationMode="k-means||") + clusters = KMeans.train(self.sc.parallelize(data), 2, initializationMode="k-means||", + initializationSteps=7, epsilon=1e-4) self.assertEquals(clusters.predict(data[0]), clusters.predict(data[1])) self.assertEquals(clusters.predict(data[2]), clusters.predict(data[3])) @@ -243,9 +270,11 @@ def test_kmeans_deterministic(self): Y = range(0, 100, 10) data = [[x, y] for x, y in zip(X, Y)] clusters1 = KMeans.train(self.sc.parallelize(data), - 3, initializationMode="k-means||", seed=42) + 3, initializationMode="k-means||", + seed=42, initializationSteps=7, epsilon=1e-4) clusters2 = KMeans.train(self.sc.parallelize(data), - 3, initializationMode="k-means||", seed=42) + 3, initializationMode="k-means||", + seed=42, initializationSteps=7, epsilon=1e-4) centers1 = clusters1.centers centers2 = clusters2.centers for c1, c2 in zip(centers1, centers2): diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index d254deb527d10..98a8ff8606366 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -813,13 +813,21 @@ def op(x, y): def fold(self, zeroValue, op): """ Aggregate the elements of each partition, and then the results for all - the partitions, using a given associative function and a neutral "zero - value." + the partitions, using a given associative and commutative function and + a neutral "zero value." The function C{op(t1, t2)} is allowed to modify C{t1} and return it as its result value to avoid object allocation; however, it should not modify C{t2}. + This behaves somewhat differently from fold operations implemented + for non-distributed collections in functional languages like Scala. + This fold operation may be applied to partitions individually, and then + fold those results into the final result, rather than apply the fold + to each element sequentially in some defined ordering. For functions + that are not commutative, the result may differ from that of a fold + applied to a non-distributed collection. + >>> from operator import add >>> sc.parallelize([1, 2, 3, 4, 5]).fold(0, add) 15 @@ -2239,8 +2247,6 @@ def countApproxDistinct(self, relativeSD=0.05): """ if relativeSD < 0.000017: raise ValueError("relativeSD should be greater than 0.000017") - if relativeSD > 0.37: - raise ValueError("relativeSD should be smaller than 0.37") # the hash space in Java is 2^32 hashRDD = self.map(lambda x: portable_hash(x) & 0xFFFFFFFF) return hashRDD._to_java_object_rdd().countApproxDistinct(relativeSD) @@ -2262,7 +2268,7 @@ def toLocalIterator(self): def _prepare_for_python_RDD(sc, command, obj=None): # the serialized command will be compressed by broadcast ser = CloudPickleSerializer() - pickled_command = ser.dumps((command, sys.version_info[:2])) + pickled_command = ser.dumps(command) if len(pickled_command) > (1 << 20): # 1M # The broadcast will have same life cycle as created PythonRDD broadcast = sc.broadcast(pickled_command) @@ -2346,7 +2352,7 @@ def _jrdd(self): python_rdd = self.ctx._jvm.PythonRDD(self._prev_jrdd.rdd(), bytearray(pickled_cmd), env, includes, self.preservesPartitioning, - self.ctx.pythonExec, + self.ctx.pythonExec, self.ctx.pythonVer, bvars, self.ctx._javaAccumulator) self._jrdd_val = python_rdd.asJavaRDD() diff --git a/python/pyspark/shuffle.py b/python/pyspark/shuffle.py index 1d0b16cade8bb..81c420ce16541 100644 --- a/python/pyspark/shuffle.py +++ b/python/pyspark/shuffle.py @@ -362,7 +362,7 @@ def _spill(self): self.spills += 1 gc.collect() # release the memory as much as possible - MemoryBytesSpilled += (used_memory - get_used_memory()) << 20 + MemoryBytesSpilled += max(used_memory - get_used_memory(), 0) << 20 def items(self): """ Return all merged items as iterator """ @@ -515,7 +515,7 @@ def load(f): gc.collect() batch //= 2 limit = self._next_limit() - MemoryBytesSpilled += (used_memory - get_used_memory()) << 20 + MemoryBytesSpilled += max(used_memory - get_used_memory(), 0) << 20 DiskBytesSpilled += os.path.getsize(path) os.unlink(path) # data will be deleted after close @@ -630,7 +630,7 @@ def _spill(self): self.values = [] gc.collect() DiskBytesSpilled += self._file.tell() - pos - MemoryBytesSpilled += (used_memory - get_used_memory()) << 20 + MemoryBytesSpilled += max(used_memory - get_used_memory(), 0) << 20 class ExternalListOfList(ExternalList): @@ -794,7 +794,7 @@ def _spill(self): self.spills += 1 gc.collect() # release the memory as much as possible - MemoryBytesSpilled += (used_memory - get_used_memory()) << 20 + MemoryBytesSpilled += max(used_memory - get_used_memory(), 0) << 20 def _merged_items(self, index): size = sum(os.path.getsize(os.path.join(self._get_spill_dir(j), str(index))) diff --git a/python/pyspark/sql/__init__.py b/python/pyspark/sql/__init__.py index b60b991dd4d8b..726d288d97b2e 100644 --- a/python/pyspark/sql/__init__.py +++ b/python/pyspark/sql/__init__.py @@ -18,45 +18,47 @@ """ Important classes of Spark SQL and DataFrames: - - L{SQLContext} + - :class:`pyspark.sql.SQLContext` Main entry point for :class:`DataFrame` and SQL functionality. - - L{DataFrame} + - :class:`pyspark.sql.DataFrame` A distributed collection of data grouped into named columns. - - L{Column} + - :class:`pyspark.sql.Column` A column expression in a :class:`DataFrame`. - - L{Row} + - :class:`pyspark.sql.Row` A row of data in a :class:`DataFrame`. - - L{HiveContext} + - :class:`pyspark.sql.HiveContext` Main entry point for accessing data stored in Apache Hive. - - L{GroupedData} + - :class:`pyspark.sql.GroupedData` Aggregation methods, returned by :func:`DataFrame.groupBy`. - - L{DataFrameNaFunctions} + - :class:`pyspark.sql.DataFrameNaFunctions` Methods for handling missing data (null values). - - L{functions} + - :class:`pyspark.sql.DataFrameStatFunctions` + Methods for statistics functionality. + - :class:`pyspark.sql.functions` List of built-in functions available for :class:`DataFrame`. - - L{types} + - :class:`pyspark.sql.types` List of data types available. + - :class:`pyspark.sql.Window` + For working with window functions. """ from __future__ import absolute_import -# fix the module name conflict for Python 3+ -import sys -from . import _types as types -modname = __name__ + '.types' -types.__name__ = modname -# update the __module__ for all objects, make them picklable -for v in types.__dict__.values(): - if hasattr(v, "__module__") and v.__module__.endswith('._types'): - v.__module__ = modname -sys.modules[modname] = types -del modname, sys + +def since(version): + def deco(f): + f.__doc__ = f.__doc__.rstrip() + "\n\n.. versionadded:: %s" % version + return f + return deco from pyspark.sql.types import Row from pyspark.sql.context import SQLContext, HiveContext -from pyspark.sql.dataframe import DataFrame, GroupedData, Column, SchemaRDD, DataFrameNaFunctions -from pyspark.sql.dataframe import DataFrameStatFunctions +from pyspark.sql.column import Column +from pyspark.sql.dataframe import DataFrame, SchemaRDD, DataFrameNaFunctions, DataFrameStatFunctions +from pyspark.sql.group import GroupedData +from pyspark.sql.readwriter import DataFrameReader, DataFrameWriter +from pyspark.sql.window import Window, WindowSpec __all__ = [ 'SQLContext', 'HiveContext', 'DataFrame', 'GroupedData', 'Column', 'Row', - 'DataFrameNaFunctions', 'DataFrameStatFunctions' + 'DataFrameNaFunctions', 'DataFrameStatFunctions', 'Window', 'WindowSpec', ] diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py new file mode 100644 index 0000000000000..1ecec5b126505 --- /dev/null +++ b/python/pyspark/sql/column.py @@ -0,0 +1,425 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import sys + +if sys.version >= '3': + basestring = str + long = int + +from pyspark.context import SparkContext +from pyspark.rdd import ignore_unicode_prefix +from pyspark.sql import since +from pyspark.sql.types import * + +__all__ = ["DataFrame", "Column", "SchemaRDD", "DataFrameNaFunctions", + "DataFrameStatFunctions"] + + +def _create_column_from_literal(literal): + sc = SparkContext._active_spark_context + return sc._jvm.functions.lit(literal) + + +def _create_column_from_name(name): + sc = SparkContext._active_spark_context + return sc._jvm.functions.col(name) + + +def _to_java_column(col): + if isinstance(col, Column): + jcol = col._jc + else: + jcol = _create_column_from_name(col) + return jcol + + +def _to_seq(sc, cols, converter=None): + """ + Convert a list of Column (or names) into a JVM Seq of Column. + + An optional `converter` could be used to convert items in `cols` + into JVM Column objects. + """ + if converter: + cols = [converter(c) for c in cols] + return sc._jvm.PythonUtils.toSeq(cols) + + +def _unary_op(name, doc="unary operator"): + """ Create a method for given unary operator """ + def _(self): + jc = getattr(self._jc, name)() + return Column(jc) + _.__doc__ = doc + return _ + + +def _func_op(name, doc=''): + def _(self): + sc = SparkContext._active_spark_context + jc = getattr(sc._jvm.functions, name)(self._jc) + return Column(jc) + _.__doc__ = doc + return _ + + +def _bin_op(name, doc="binary operator"): + """ Create a method for given binary operator + """ + def _(self, other): + jc = other._jc if isinstance(other, Column) else other + njc = getattr(self._jc, name)(jc) + return Column(njc) + _.__doc__ = doc + return _ + + +def _reverse_op(name, doc="binary operator"): + """ Create a method for binary operator (this object is on right side) + """ + def _(self, other): + jother = _create_column_from_literal(other) + jc = getattr(jother, name)(self._jc) + return Column(jc) + _.__doc__ = doc + return _ + + +class Column(object): + + """ + A column in a DataFrame. + + :class:`Column` instances can be created by:: + + # 1. Select a column out of a DataFrame + + df.colName + df["colName"] + + # 2. Create from an expression + df.colName + 1 + 1 / df.colName + + .. note:: Experimental + + .. versionadded:: 1.3 + """ + + def __init__(self, jc): + self._jc = jc + + # arithmetic operators + __neg__ = _func_op("negate") + __add__ = _bin_op("plus") + __sub__ = _bin_op("minus") + __mul__ = _bin_op("multiply") + __div__ = _bin_op("divide") + __truediv__ = _bin_op("divide") + __mod__ = _bin_op("mod") + __radd__ = _bin_op("plus") + __rsub__ = _reverse_op("minus") + __rmul__ = _bin_op("multiply") + __rdiv__ = _reverse_op("divide") + __rtruediv__ = _reverse_op("divide") + __rmod__ = _reverse_op("mod") + + # logistic operators + __eq__ = _bin_op("equalTo") + __ne__ = _bin_op("notEqual") + __lt__ = _bin_op("lt") + __le__ = _bin_op("leq") + __ge__ = _bin_op("geq") + __gt__ = _bin_op("gt") + + # `and`, `or`, `not` cannot be overloaded in Python, + # so use bitwise operators as boolean operators + __and__ = _bin_op('and') + __or__ = _bin_op('or') + __invert__ = _func_op('not') + __rand__ = _bin_op("and") + __ror__ = _bin_op("or") + + # container operators + __contains__ = _bin_op("contains") + __getitem__ = _bin_op("apply") + + # bitwise operators + bitwiseOR = _bin_op("bitwiseOR") + bitwiseAND = _bin_op("bitwiseAND") + bitwiseXOR = _bin_op("bitwiseXOR") + + @since(1.3) + def getItem(self, key): + """ + An expression that gets an item at position ``ordinal`` out of a list, + or gets an item by key out of a dict. + + >>> df = sc.parallelize([([1, 2], {"key": "value"})]).toDF(["l", "d"]) + >>> df.select(df.l.getItem(0), df.d.getItem("key")).show() + +----+------+ + |l[0]|d[key]| + +----+------+ + | 1| value| + +----+------+ + >>> df.select(df.l[0], df.d["key"]).show() + +----+------+ + |l[0]|d[key]| + +----+------+ + | 1| value| + +----+------+ + """ + return self[key] + + @since(1.3) + def getField(self, name): + """ + An expression that gets a field by name in a StructField. + + >>> from pyspark.sql import Row + >>> df = sc.parallelize([Row(r=Row(a=1, b="b"))]).toDF() + >>> df.select(df.r.getField("b")).show() + +----+ + |r[b]| + +----+ + | b| + +----+ + >>> df.select(df.r.a).show() + +----+ + |r[a]| + +----+ + | 1| + +----+ + """ + return self[name] + + def __getattr__(self, item): + if item.startswith("__"): + raise AttributeError(item) + return self.getField(item) + + # string methods + rlike = _bin_op("rlike") + like = _bin_op("like") + startswith = _bin_op("startsWith") + endswith = _bin_op("endsWith") + + @ignore_unicode_prefix + @since(1.3) + def substr(self, startPos, length): + """ + Return a :class:`Column` which is a substring of the column. + + :param startPos: start position (int or Column) + :param length: length of the substring (int or Column) + + >>> df.select(df.name.substr(1, 3).alias("col")).collect() + [Row(col=u'Ali'), Row(col=u'Bob')] + """ + if type(startPos) != type(length): + raise TypeError("Can not mix the type") + if isinstance(startPos, (int, long)): + jc = self._jc.substr(startPos, length) + elif isinstance(startPos, Column): + jc = self._jc.substr(startPos._jc, length._jc) + else: + raise TypeError("Unexpected type: %s" % type(startPos)) + return Column(jc) + + __getslice__ = substr + + @ignore_unicode_prefix + @since(1.3) + def inSet(self, *cols): + """ + A boolean expression that is evaluated to true if the value of this + expression is contained by the evaluated values of the arguments. + + >>> df[df.name.inSet("Bob", "Mike")].collect() + [Row(age=5, name=u'Bob')] + >>> df[df.age.inSet([1, 2, 3])].collect() + [Row(age=2, name=u'Alice')] + """ + if len(cols) == 1 and isinstance(cols[0], (list, set)): + cols = cols[0] + cols = [c._jc if isinstance(c, Column) else _create_column_from_literal(c) for c in cols] + sc = SparkContext._active_spark_context + jc = getattr(self._jc, "in")(_to_seq(sc, cols)) + return Column(jc) + + # order + asc = _unary_op("asc", "Returns a sort expression based on the" + " ascending order of the given column name.") + desc = _unary_op("desc", "Returns a sort expression based on the" + " descending order of the given column name.") + + isNull = _unary_op("isNull", "True if the current expression is null.") + isNotNull = _unary_op("isNotNull", "True if the current expression is not null.") + + @since(1.3) + def alias(self, *alias): + """ + Returns this column aliased with a new name or names (in the case of expressions that + return more than one column, such as explode). + + >>> df.select(df.age.alias("age2")).collect() + [Row(age2=2), Row(age2=5)] + """ + + if len(alias) == 1: + return Column(getattr(self._jc, "as")(alias[0])) + else: + sc = SparkContext._active_spark_context + return Column(getattr(self._jc, "as")(_to_seq(sc, list(alias)))) + + @ignore_unicode_prefix + @since(1.3) + def cast(self, dataType): + """ Convert the column into type ``dataType``. + + >>> df.select(df.age.cast("string").alias('ages')).collect() + [Row(ages=u'2'), Row(ages=u'5')] + >>> df.select(df.age.cast(StringType()).alias('ages')).collect() + [Row(ages=u'2'), Row(ages=u'5')] + """ + if isinstance(dataType, basestring): + jc = self._jc.cast(dataType) + elif isinstance(dataType, DataType): + sc = SparkContext._active_spark_context + ssql_ctx = sc._jvm.SQLContext(sc._jsc.sc()) + jdt = ssql_ctx.parseDataType(dataType.json()) + jc = self._jc.cast(jdt) + else: + raise TypeError("unexpected type: %s" % type(dataType)) + return Column(jc) + + astype = cast + + @since(1.3) + def between(self, lowerBound, upperBound): + """ + A boolean expression that is evaluated to true if the value of this + expression is between the given columns. + + >>> df.select(df.name, df.age.between(2, 4)).show() + +-----+--------------------------+ + | name|((age >= 2) && (age <= 4))| + +-----+--------------------------+ + |Alice| true| + | Bob| false| + +-----+--------------------------+ + """ + return (self >= lowerBound) & (self <= upperBound) + + @since(1.4) + def when(self, condition, value): + """ + Evaluates a list of conditions and returns one of multiple possible result expressions. + If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions. + + See :func:`pyspark.sql.functions.when` for example usage. + + :param condition: a boolean :class:`Column` expression. + :param value: a literal value, or a :class:`Column` expression. + + >>> from pyspark.sql import functions as F + >>> df.select(df.name, F.when(df.age > 4, 1).when(df.age < 3, -1).otherwise(0)).show() + +-----+--------------------------------------------------------+ + | name|CASE WHEN (age > 4) THEN 1 WHEN (age < 3) THEN -1 ELSE 0| + +-----+--------------------------------------------------------+ + |Alice| -1| + | Bob| 1| + +-----+--------------------------------------------------------+ + """ + if not isinstance(condition, Column): + raise TypeError("condition should be a Column") + v = value._jc if isinstance(value, Column) else value + jc = self._jc.when(condition._jc, v) + return Column(jc) + + @since(1.4) + def otherwise(self, value): + """ + Evaluates a list of conditions and returns one of multiple possible result expressions. + If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions. + + See :func:`pyspark.sql.functions.when` for example usage. + + :param value: a literal value, or a :class:`Column` expression. + + >>> from pyspark.sql import functions as F + >>> df.select(df.name, F.when(df.age > 3, 1).otherwise(0)).show() + +-----+---------------------------------+ + | name|CASE WHEN (age > 3) THEN 1 ELSE 0| + +-----+---------------------------------+ + |Alice| 0| + | Bob| 1| + +-----+---------------------------------+ + """ + v = value._jc if isinstance(value, Column) else value + jc = self._jc.otherwise(v) + return Column(jc) + + @since(1.4) + def over(self, window): + """ + Define a windowing column. + + :param window: a :class:`WindowSpec` + :return: a Column + + >>> from pyspark.sql import Window + >>> window = Window.partitionBy("name").orderBy("age").rowsBetween(-1, 1) + >>> from pyspark.sql.functions import rank, min + >>> # df.select(rank().over(window), min('age').over(window)) + + .. note:: Window functions is only supported with HiveContext in 1.4 + """ + from pyspark.sql.window import WindowSpec + if not isinstance(window, WindowSpec): + raise TypeError("window should be WindowSpec") + jc = self._jc.over(window._jspec) + return Column(jc) + + def __repr__(self): + return 'Column<%s>' % self._jc.toString().encode('utf8') + + +def _test(): + import doctest + from pyspark.context import SparkContext + from pyspark.sql import SQLContext + import pyspark.sql.column + globs = pyspark.sql.column.__dict__.copy() + sc = SparkContext('local[4]', 'PythonTest') + globs['sc'] = sc + globs['sqlContext'] = SQLContext(sc) + globs['df'] = sc.parallelize([(2, 'Alice'), (5, 'Bob')]) \ + .toDF(StructType([StructField('age', IntegerType()), + StructField('name', StringType())])) + + (failure_count, test_count) = doctest.testmod( + pyspark.sql.column, globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF) + globs['sc'].stop() + if failure_count: + exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index f6f107ca32d2f..22f6257dfe02d 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -28,9 +28,11 @@ from pyspark.rdd import RDD, _prepare_for_python_RDD, ignore_unicode_prefix from pyspark.serializers import AutoBatchedSerializer, PickleSerializer +from pyspark.sql import since from pyspark.sql.types import Row, StringType, StructType, _verify_type, \ _infer_schema, _has_nulltype, _merge_type, _create_converter, _python_to_sql_converter from pyspark.sql.dataframe import DataFrame +from pyspark.sql.readwriter import DataFrameReader try: import pandas @@ -105,11 +107,13 @@ def _ssql_ctx(self): self._scala_SQLContext = self._jvm.SQLContext(self._jsc.sc()) return self._scala_SQLContext + @since(1.3) def setConf(self, key, value): """Sets the given Spark SQL configuration property. """ self._ssql_ctx.setConf(key, value) + @since(1.3) def getConf(self, key, defaultValue): """Returns the value of Spark SQL configuration property for the given key. @@ -118,11 +122,34 @@ def getConf(self, key, defaultValue): return self._ssql_ctx.getConf(key, defaultValue) @property + @since("1.3.1") def udf(self): """Returns a :class:`UDFRegistration` for UDF registration.""" return UDFRegistration(self) + @since(1.4) + def range(self, start, end, step=1, numPartitions=None): + """ + Create a :class:`DataFrame` with single LongType column named `id`, + containing elements in a range from `start` to `end` (exclusive) with + step value `step`. + + :param start: the start value + :param end: the end value (exclusive) + :param step: the incremental step (default: 1) + :param numPartitions: the number of partitions of the DataFrame + :return: A new DataFrame + + >>> sqlContext.range(1, 7, 2).collect() + [Row(id=1), Row(id=3), Row(id=5)] + """ + if numPartitions is None: + numPartitions = self._sc.defaultParallelism + jdf = self._ssql_ctx.range(int(start), int(end), int(step), int(numPartitions)) + return DataFrame(jdf, self) + @ignore_unicode_prefix + @since(1.2) def registerFunction(self, name, f, returnType=StringType()): """Registers a lambda function as a UDF so it can be used in SQL statements. @@ -157,6 +184,7 @@ def registerFunction(self, name, f, returnType=StringType()): env, includes, self._sc.pythonExec, + self._sc.pythonVer, bvars, self._sc._javaAccumulator, returnType.json()) @@ -188,7 +216,8 @@ def _inferSchema(self, rdd, samplingRatio=None): @ignore_unicode_prefix def inferSchema(self, rdd, samplingRatio=None): - """::note: Deprecated in 1.3, use :func:`createDataFrame` instead. + """ + .. note:: Deprecated in 1.3, use :func:`createDataFrame` instead. """ warnings.warn("inferSchema is deprecated, please use createDataFrame instead") @@ -199,7 +228,8 @@ def inferSchema(self, rdd, samplingRatio=None): @ignore_unicode_prefix def applySchema(self, rdd, schema): - """::note: Deprecated in 1.3, use :func:`createDataFrame` instead. + """ + .. note:: Deprecated in 1.3, use :func:`createDataFrame` instead. """ warnings.warn("applySchema is deprecated, please use createDataFrame instead") @@ -211,6 +241,7 @@ def applySchema(self, rdd, schema): return self.createDataFrame(rdd, schema) + @since(1.3) @ignore_unicode_prefix def createDataFrame(self, data, schema=None, samplingRatio=None): """ @@ -315,6 +346,7 @@ def createDataFrame(self, data, schema=None, samplingRatio=None): df = self._ssql_ctx.applySchemaToPythonRDD(jrdd.rdd(), schema.json()) return DataFrame(df, self) + @since(1.3) def registerDataFrameAsTable(self, df, tableName): """Registers the given :class:`DataFrame` as a temporary table in the catalog. @@ -327,6 +359,7 @@ def registerDataFrameAsTable(self, df, tableName): else: raise ValueError("Can only register DataFrame as table") + @since(1.0) def parquetFile(self, *paths): """Loads a Parquet file, returning the result as a :class:`DataFrame`. @@ -345,6 +378,7 @@ def parquetFile(self, *paths): jdf = self._ssql_ctx.parquetFile(jpaths) return DataFrame(jdf, self) + @since(1.0) def jsonFile(self, path, schema=None, samplingRatio=1.0): """Loads a text file storing one JSON object per line as a :class:`DataFrame`. @@ -385,6 +419,7 @@ def jsonFile(self, path, schema=None, samplingRatio=1.0): return DataFrame(df, self) @ignore_unicode_prefix + @since(1.0) def jsonRDD(self, rdd, schema=None, samplingRatio=1.0): """Loads an RDD storing one JSON object per string as a :class:`DataFrame`. @@ -427,6 +462,7 @@ def func(iterator): df = self._ssql_ctx.jsonRDD(jrdd.rdd(), scala_datatype) return DataFrame(df, self) + @since(1.3) def load(self, path=None, source=None, schema=None, **options): """Returns the dataset in a data source as a :class:`DataFrame`. @@ -436,20 +472,9 @@ def load(self, path=None, source=None, schema=None, **options): Optionally, a schema can be provided as the schema of the returned DataFrame. """ - if path is not None: - options["path"] = path - if source is None: - source = self.getConf("spark.sql.sources.default", - "org.apache.spark.sql.parquet") - if schema is None: - df = self._ssql_ctx.load(source, options) - else: - if not isinstance(schema, StructType): - raise TypeError("schema should be StructType") - scala_datatype = self._ssql_ctx.parseDataType(schema.json()) - df = self._ssql_ctx.load(source, scala_datatype, options) - return DataFrame(df, self) + return self.read.load(path, source, schema, **options) + @since(1.3) def createExternalTable(self, tableName, path=None, source=None, schema=None, **options): """Creates an external table based on the dataset in a data source. @@ -479,6 +504,7 @@ def createExternalTable(self, tableName, path=None, source=None, return DataFrame(df, self) @ignore_unicode_prefix + @since(1.0) def sql(self, sqlQuery): """Returns a :class:`DataFrame` representing the result of the given query. @@ -489,6 +515,7 @@ def sql(self, sqlQuery): """ return DataFrame(self._ssql_ctx.sql(sqlQuery), self) + @since(1.0) def table(self, tableName): """Returns the specified table as a :class:`DataFrame`. @@ -500,6 +527,7 @@ def table(self, tableName): return DataFrame(self._ssql_ctx.table(tableName), self) @ignore_unicode_prefix + @since(1.3) def tables(self, dbName=None): """Returns a :class:`DataFrame` containing names of tables in the given database. @@ -518,6 +546,7 @@ def tables(self, dbName=None): else: return DataFrame(self._ssql_ctx.tables(dbName), self) + @since(1.3) def tableNames(self, dbName=None): """Returns a list of names of tables in the database ``dbName``. @@ -534,18 +563,33 @@ def tableNames(self, dbName=None): else: return [name for name in self._ssql_ctx.tableNames(dbName)] + @since(1.0) def cacheTable(self, tableName): """Caches the specified table in-memory.""" self._ssql_ctx.cacheTable(tableName) + @since(1.0) def uncacheTable(self, tableName): """Removes the specified table from the in-memory cache.""" self._ssql_ctx.uncacheTable(tableName) + @since(1.3) def clearCache(self): """Removes all cached tables from the in-memory cache. """ self._ssql_ctx.clearCache() + @property + @since(1.4) + def read(self): + """ + Returns a :class:`DataFrameReader` that can be used to read data + in as a :class:`DataFrame`. + + >>> sqlContext.read + + """ + return DataFrameReader(self) + class HiveContext(SQLContext): """A variant of Spark SQL that integrates with data stored in Hive. diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index aac5b8c4c5770..a82b6b87c413e 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -25,17 +25,17 @@ else: from itertools import imap as map -from pyspark.context import SparkContext from pyspark.rdd import RDD, _load_from_socket, ignore_unicode_prefix from pyspark.serializers import BatchedSerializer, PickleSerializer, UTF8Deserializer from pyspark.storagelevel import StorageLevel from pyspark.traceback_utils import SCCallSiteSync -from pyspark.sql.types import * +from pyspark.sql import since from pyspark.sql.types import _create_cls, _parse_datatype_json_string +from pyspark.sql.column import Column, _to_seq, _to_java_column +from pyspark.sql.readwriter import DataFrameWriter +from pyspark.sql.types import * - -__all__ = ["DataFrame", "GroupedData", "Column", "SchemaRDD", "DataFrameNaFunctions", - "DataFrameStatFunctions"] +__all__ = ["DataFrame", "SchemaRDD", "DataFrameNaFunctions", "DataFrameStatFunctions"] class DataFrame(object): @@ -61,6 +61,10 @@ class DataFrame(object): people.filter(people.age > 30).join(department, people.deptId == department.id)) \ .groupBy(department.name, "gender").agg({"salary": "avg", "age": "max"}) + + .. note:: Experimental + + .. versionadded:: 1.3 """ def __init__(self, jdf, sql_ctx): @@ -72,6 +76,7 @@ def __init__(self, jdf, sql_ctx): self._lazy_rdd = None @property + @since(1.3) def rdd(self): """Returns the content as an :class:`pyspark.RDD` of :class:`Row`. """ @@ -89,18 +94,21 @@ def applySchema(it): return self._lazy_rdd @property + @since("1.3.1") def na(self): """Returns a :class:`DataFrameNaFunctions` for handling missing values. """ return DataFrameNaFunctions(self) @property + @since(1.4) def stat(self): """Returns a :class:`DataFrameStatFunctions` for statistic functions. """ return DataFrameStatFunctions(self) @ignore_unicode_prefix + @since(1.3) def toJSON(self, use_unicode=True): """Converts a :class:`DataFrame` into a :class:`RDD` of string. @@ -112,6 +120,7 @@ def toJSON(self, use_unicode=True): rdd = self._jdf.toJSON() return RDD(rdd.toJavaRDD(), self._sc, UTF8Deserializer(use_unicode)) + @since(1.3) def saveAsParquetFile(self, path): """Saves the contents as a Parquet file, preserving the schema. @@ -128,6 +137,7 @@ def saveAsParquetFile(self, path): """ self._jdf.saveAsParquetFile(path) + @since(1.3) def registerTempTable(self, name): """Registers this RDD as a temporary table using the given name. @@ -141,37 +151,21 @@ def registerTempTable(self, name): """ self._jdf.registerTempTable(name) + @since(1.3) def registerAsTable(self, name): """DEPRECATED: use :func:`registerTempTable` instead""" warnings.warn("Use registerTempTable instead of registerAsTable.", DeprecationWarning) self.registerTempTable(name) + @since(1.3) def insertInto(self, tableName, overwrite=False): """Inserts the contents of this :class:`DataFrame` into the specified table. Optionally overwriting any existing data. """ - self._jdf.insertInto(tableName, overwrite) - - def _java_save_mode(self, mode): - """Returns the Java save mode based on the Python save mode represented by a string. - """ - jSaveMode = self._sc._jvm.org.apache.spark.sql.SaveMode - jmode = jSaveMode.ErrorIfExists - mode = mode.lower() - if mode == "append": - jmode = jSaveMode.Append - elif mode == "overwrite": - jmode = jSaveMode.Overwrite - elif mode == "ignore": - jmode = jSaveMode.Ignore - elif mode == "error": - pass - else: - raise ValueError( - "Only 'append', 'overwrite', 'ignore', and 'error' are acceptable save mode.") - return jmode + self.write.insertInto(tableName, overwrite) + @since(1.3) def saveAsTable(self, tableName, source=None, mode="error", **options): """Saves the contents of this :class:`DataFrame` to a data source as a table. @@ -187,12 +181,9 @@ def saveAsTable(self, tableName, source=None, mode="error", **options): * `error`: Throw an exception if data already exists. * `ignore`: Silently ignore this operation if data already exists. """ - if source is None: - source = self.sql_ctx.getConf("spark.sql.sources.default", - "org.apache.spark.sql.parquet") - jmode = self._java_save_mode(mode) - self._jdf.saveAsTable(tableName, source, jmode, options) + self.write.saveAsTable(tableName, source, mode, **options) + @since(1.3) def save(self, path=None, source=None, mode="error", **options): """Saves the contents of the :class:`DataFrame` to a data source. @@ -208,15 +199,26 @@ def save(self, path=None, source=None, mode="error", **options): * `error`: Throw an exception if data already exists. * `ignore`: Silently ignore this operation if data already exists. """ - if path is not None: - options["path"] = path - if source is None: - source = self.sql_ctx.getConf("spark.sql.sources.default", - "org.apache.spark.sql.parquet") - jmode = self._java_save_mode(mode) - self._jdf.save(source, jmode, options) + return self.write.save(path, source, mode, **options) + + @property + @since(1.4) + def write(self): + """ + Interface for saving the content of the :class:`DataFrame` out + into external storage. + + :return :class:`DataFrameWriter` + + .. note:: Experimental + + >>> df.write + + """ + return DataFrameWriter(self) @property + @since(1.3) def schema(self): """Returns the schema of this :class:`DataFrame` as a :class:`types.StructType`. @@ -227,6 +229,7 @@ def schema(self): self._schema = _parse_datatype_json_string(self._jdf.schema().json()) return self._schema + @since(1.3) def printSchema(self): """Prints out the schema in the tree format. @@ -238,6 +241,7 @@ def printSchema(self): """ print(self._jdf.schema().treeString()) + @since(1.3) def explain(self, extended=False): """Prints the (logical and physical) plans to the console for debugging purpose. @@ -263,27 +267,33 @@ def explain(self, extended=False): else: print(self._jdf.queryExecution().executedPlan().toString()) + @since(1.3) def isLocal(self): """Returns ``True`` if the :func:`collect` and :func:`take` methods can be run locally (without any Spark executors). """ return self._jdf.isLocal() + @since(1.3) def show(self, n=20): """Prints the first ``n`` rows to the console. >>> df DataFrame[age: int, name: string] >>> df.show() - age name - 2 Alice - 5 Bob + +---+-----+ + |age| name| + +---+-----+ + | 2|Alice| + | 5| Bob| + +---+-----+ """ print(self._jdf.showString(n)) def __repr__(self): return "DataFrame[%s]" % (", ".join("%s: %s" % c for c in self.dtypes)) + @since(1.3) def count(self): """Returns the number of rows in this :class:`DataFrame`. @@ -293,6 +303,7 @@ def count(self): return int(self._jdf.count()) @ignore_unicode_prefix + @since(1.3) def collect(self): """Returns all the records as a list of :class:`Row`. @@ -306,6 +317,7 @@ def collect(self): return [cls(r) for r in rs] @ignore_unicode_prefix + @since(1.3) def limit(self, num): """Limits the result count to the number specified. @@ -318,6 +330,7 @@ def limit(self, num): return DataFrame(jdf, self.sql_ctx) @ignore_unicode_prefix + @since(1.3) def take(self, num): """Returns the first ``num`` rows as a :class:`list` of :class:`Row`. @@ -327,6 +340,7 @@ def take(self, num): return self.limit(num).collect() @ignore_unicode_prefix + @since(1.3) def map(self, f): """ Returns a new :class:`RDD` by applying a the ``f`` function to each :class:`Row`. @@ -338,6 +352,7 @@ def map(self, f): return self.rdd.map(f) @ignore_unicode_prefix + @since(1.3) def flatMap(self, f): """ Returns a new :class:`RDD` by first applying the ``f`` function to each :class:`Row`, and then flattening the results. @@ -349,6 +364,7 @@ def flatMap(self, f): """ return self.rdd.flatMap(f) + @since(1.3) def mapPartitions(self, f, preservesPartitioning=False): """Returns a new :class:`RDD` by applying the ``f`` function to each partition. @@ -361,6 +377,7 @@ def mapPartitions(self, f, preservesPartitioning=False): """ return self.rdd.mapPartitions(f, preservesPartitioning) + @since(1.3) def foreach(self, f): """Applies the ``f`` function to all :class:`Row` of this :class:`DataFrame`. @@ -372,6 +389,7 @@ def foreach(self, f): """ return self.rdd.foreach(f) + @since(1.3) def foreachPartition(self, f): """Applies the ``f`` function to each partition of this :class:`DataFrame`. @@ -384,6 +402,7 @@ def foreachPartition(self, f): """ return self.rdd.foreachPartition(f) + @since(1.3) def cache(self): """ Persists with the default storage level (C{MEMORY_ONLY_SER}). """ @@ -391,6 +410,7 @@ def cache(self): self._jdf.cache() return self + @since(1.3) def persist(self, storageLevel=StorageLevel.MEMORY_ONLY_SER): """Sets the storage level to persist its values across operations after the first time it is computed. This can only be used to assign @@ -402,6 +422,7 @@ def persist(self, storageLevel=StorageLevel.MEMORY_ONLY_SER): self._jdf.persist(javaStorageLevel) return self + @since(1.3) def unpersist(self, blocking=True): """Marks the :class:`DataFrame` as non-persistent, and remove all blocks for it from memory and disk. @@ -410,10 +431,22 @@ def unpersist(self, blocking=True): self._jdf.unpersist(blocking) return self - # def coalesce(self, numPartitions, shuffle=False): - # rdd = self._jdf.coalesce(numPartitions, shuffle, None) - # return DataFrame(rdd, self.sql_ctx) + @since(1.4) + def coalesce(self, numPartitions): + """ + Returns a new :class:`DataFrame` that has exactly `numPartitions` partitions. + + Similar to coalesce defined on an :class:`RDD`, this operation results in a + narrow dependency, e.g. if you go from 1000 partitions to 100 partitions, + there will not be a shuffle, instead each of the 100 new partitions will + claim 10 of the current partitions. + + >>> df.coalesce(1).rdd.getNumPartitions() + 1 + """ + return DataFrame(self._jdf.coalesce(numPartitions), self.sql_ctx) + @since(1.3) def repartition(self, numPartitions): """Returns a new :class:`DataFrame` that has exactly ``numPartitions`` partitions. @@ -422,6 +455,7 @@ def repartition(self, numPartitions): """ return DataFrame(self._jdf.repartition(numPartitions), self.sql_ctx) + @since(1.3) def distinct(self): """Returns a new :class:`DataFrame` containing the distinct rows in this :class:`DataFrame`. @@ -430,6 +464,7 @@ def distinct(self): """ return DataFrame(self._jdf.distinct(), self.sql_ctx) + @since(1.3) def sample(self, withReplacement, fraction, seed=None): """Returns a sampled subset of this :class:`DataFrame`. @@ -441,6 +476,7 @@ def sample(self, withReplacement, fraction, seed=None): rdd = self._jdf.sample(withReplacement, fraction, long(seed)) return DataFrame(rdd, self.sql_ctx) + @since(1.4) def randomSplit(self, weights, seed=None): """Randomly splits this :class:`DataFrame` with the provided weights. @@ -463,6 +499,7 @@ def randomSplit(self, weights, seed=None): return [DataFrame(rdd, self.sql_ctx) for rdd in rdd_array] @property + @since(1.3) def dtypes(self): """Returns all column names and their data types as a list. @@ -473,6 +510,7 @@ def dtypes(self): @property @ignore_unicode_prefix + @since(1.3) def columns(self): """Returns all column names as a list. @@ -482,6 +520,7 @@ def columns(self): return [f.name for f in self.schema.fields] @ignore_unicode_prefix + @since(1.3) def alias(self, alias): """Returns a new :class:`DataFrame` with an alias set. @@ -496,6 +535,7 @@ def alias(self, alias): return DataFrame(getattr(self._jdf, "as")(alias), self.sql_ctx) @ignore_unicode_prefix + @since(1.3) def join(self, other, joinExprs=None, joinType=None): """Joins with another :class:`DataFrame`, using the given join expression. @@ -529,6 +569,7 @@ def join(self, other, joinExprs=None, joinType=None): return DataFrame(jdf, self.sql_ctx) @ignore_unicode_prefix + @since(1.3) def sort(self, *cols, **kwargs): """Returns a new :class:`DataFrame` sorted by the specified column(s). @@ -575,6 +616,10 @@ def _jseq(self, cols, converter=None): """Return a JVM Seq of Columns from a list of Column or names""" return _to_seq(self.sql_ctx._sc, cols, converter) + def _jmap(self, jm): + """Return a JVM Scala Map from a dict""" + return _to_scala_map(self.sql_ctx._sc, jm) + def _jcols(self, *cols): """Return a JVM Seq of Columns from a list of Column or column names @@ -584,6 +629,7 @@ def _jcols(self, *cols): cols = cols[0] return self._jseq(cols, _to_java_column) + @since("1.3.1") def describe(self, *cols): """Computes statistics for numeric columns. @@ -591,17 +637,21 @@ def describe(self, *cols): given, this function computes statistics for all numerical columns. >>> df.describe().show() - summary age - count 2 - mean 3.5 - stddev 1.5 - min 2 - max 5 + +-------+---+ + |summary|age| + +-------+---+ + | count| 2| + | mean|3.5| + | stddev|1.5| + | min| 2| + | max| 5| + +-------+---+ """ jdf = self._jdf.describe(self._jseq(cols)) return DataFrame(jdf, self.sql_ctx) @ignore_unicode_prefix + @since(1.3) def head(self, n=None): """ Returns the first ``n`` rows as a list of :class:`Row`, @@ -618,6 +668,7 @@ def head(self, n=None): return self.take(n) @ignore_unicode_prefix + @since(1.3) def first(self): """Returns the first row as a :class:`Row`. @@ -627,6 +678,7 @@ def first(self): return self.head() @ignore_unicode_prefix + @since(1.3) def __getitem__(self, item): """Returns the column as a :class:`Column`. @@ -654,6 +706,7 @@ def __getitem__(self, item): else: raise TypeError("unexpected item type: %s" % type(item)) + @since(1.3) def __getattr__(self, name): """Returns the :class:`Column` denoted by ``name``. @@ -667,6 +720,7 @@ def __getattr__(self, name): return Column(jc) @ignore_unicode_prefix + @since(1.3) def select(self, *cols): """Projects a set of expressions and returns a new :class:`DataFrame`. @@ -684,6 +738,7 @@ def select(self, *cols): jdf = self._jdf.select(self._jcols(*cols)) return DataFrame(jdf, self.sql_ctx) + @since(1.3) def selectExpr(self, *expr): """Projects a set of SQL expressions and returns a new :class:`DataFrame`. @@ -698,6 +753,7 @@ def selectExpr(self, *expr): return DataFrame(jdf, self.sql_ctx) @ignore_unicode_prefix + @since(1.3) def filter(self, condition): """Filters rows using the given condition. @@ -727,6 +783,7 @@ def filter(self, condition): where = filter @ignore_unicode_prefix + @since(1.3) def groupBy(self, *cols): """Groups the :class:`DataFrame` using the specified columns, so we can run aggregation on them. See :class:`GroupedData` @@ -746,11 +803,55 @@ def groupBy(self, *cols): >>> df.groupBy(['name', df.age]).count().collect() [Row(name=u'Bob', age=5, count=1), Row(name=u'Alice', age=2, count=1)] """ - jdf = self._jdf.groupBy(self._jcols(*cols)) - return GroupedData(jdf, self.sql_ctx) - - groupby = groupBy - + jgd = self._jdf.groupBy(self._jcols(*cols)) + from pyspark.sql.group import GroupedData + return GroupedData(jgd, self.sql_ctx) + + @since(1.4) + def rollup(self, *cols): + """ + Create a multi-dimensional rollup for the current :class:`DataFrame` using + the specified columns, so we can run aggregation on them. + + >>> df.rollup('name', df.age).count().show() + +-----+----+-----+ + | name| age|count| + +-----+----+-----+ + |Alice|null| 1| + | Bob| 5| 1| + | Bob|null| 1| + | null|null| 2| + |Alice| 2| 1| + +-----+----+-----+ + """ + jgd = self._jdf.rollup(self._jcols(*cols)) + from pyspark.sql.group import GroupedData + return GroupedData(jgd, self.sql_ctx) + + @since(1.4) + def cube(self, *cols): + """ + Create a multi-dimensional cube for the current :class:`DataFrame` using + the specified columns, so we can run aggregation on them. + + >>> df.cube('name', df.age).count().show() + +-----+----+-----+ + | name| age|count| + +-----+----+-----+ + | null| 2| 1| + |Alice|null| 1| + | Bob| 5| 1| + | Bob|null| 1| + | null| 5| 1| + | null|null| 2| + |Alice| 2| 1| + +-----+----+-----+ + """ + jgd = self._jdf.cube(self._jcols(*cols)) + from pyspark.sql.group import GroupedData + return GroupedData(jgd, self.sql_ctx) + + @since(1.3) def agg(self, *exprs): """ Aggregate on the entire :class:`DataFrame` without groups (shorthand for ``df.groupBy.agg()``). @@ -763,6 +864,7 @@ def agg(self, *exprs): """ return self.groupBy().agg(*exprs) + @since(1.3) def unionAll(self, other): """ Return a new :class:`DataFrame` containing union of rows in this frame and another frame. @@ -771,6 +873,7 @@ def unionAll(self, other): """ return DataFrame(self._jdf.unionAll(other._jdf), self.sql_ctx) + @since(1.3) def intersect(self, other): """ Return a new :class:`DataFrame` containing rows only in both this frame and another frame. @@ -779,6 +882,7 @@ def intersect(self, other): """ return DataFrame(self._jdf.intersect(other._jdf), self.sql_ctx) + @since(1.3) def subtract(self, other): """ Return a new :class:`DataFrame` containing rows in this frame but not in another frame. @@ -787,6 +891,38 @@ def subtract(self, other): """ return DataFrame(getattr(self._jdf, "except")(other._jdf), self.sql_ctx) + @since(1.4) + def dropDuplicates(self, subset=None): + """Return a new :class:`DataFrame` with duplicate rows removed, + optionally only considering certain columns. + + >>> from pyspark.sql import Row + >>> df = sc.parallelize([ \ + Row(name='Alice', age=5, height=80), \ + Row(name='Alice', age=5, height=80), \ + Row(name='Alice', age=10, height=80)]).toDF() + >>> df.dropDuplicates().show() + +---+------+-----+ + |age|height| name| + +---+------+-----+ + | 5| 80|Alice| + | 10| 80|Alice| + +---+------+-----+ + + >>> df.dropDuplicates(['name', 'height']).show() + +---+------+-----+ + |age|height| name| + +---+------+-----+ + | 5| 80|Alice| + +---+------+-----+ + """ + if subset is None: + jdf = self._jdf.dropDuplicates() + else: + jdf = self._jdf.dropDuplicates(self._jseq(subset)) + return DataFrame(jdf, self.sql_ctx) + + @since("1.3.1") def dropna(self, how='any', thresh=None, subset=None): """Returns a new :class:`DataFrame` omitting rows with null values. @@ -801,12 +937,18 @@ def dropna(self, how='any', thresh=None, subset=None): :param subset: optional list of column names to consider. >>> df4.dropna().show() - age height name - 10 80 Alice + +---+------+-----+ + |age|height| name| + +---+------+-----+ + | 10| 80|Alice| + +---+------+-----+ >>> df4.na.drop().show() - age height name - 10 80 Alice + +---+------+-----+ + |age|height| name| + +---+------+-----+ + | 10| 80|Alice| + +---+------+-----+ """ if how is not None and how not in ['any', 'all']: raise ValueError("how ('" + how + "') should be 'any' or 'all'") @@ -823,6 +965,7 @@ def dropna(self, how='any', thresh=None, subset=None): return DataFrame(self._jdf.na().drop(thresh, self._jseq(subset)), self.sql_ctx) + @since("1.3.1") def fillna(self, value, subset=None): """Replace null values, alias for ``na.fill()``. @@ -837,25 +980,34 @@ def fillna(self, value, subset=None): then the non-string column is simply ignored. >>> df4.fillna(50).show() - age height name - 10 80 Alice - 5 50 Bob - 50 50 Tom - 50 50 null + +---+------+-----+ + |age|height| name| + +---+------+-----+ + | 10| 80|Alice| + | 5| 50| Bob| + | 50| 50| Tom| + | 50| 50| null| + +---+------+-----+ >>> df4.fillna({'age': 50, 'name': 'unknown'}).show() - age height name - 10 80 Alice - 5 null Bob - 50 null Tom - 50 null unknown + +---+------+-------+ + |age|height| name| + +---+------+-------+ + | 10| 80| Alice| + | 5| null| Bob| + | 50| null| Tom| + | 50| null|unknown| + +---+------+-------+ >>> df4.na.fill({'age': 50, 'name': 'unknown'}).show() - age height name - 10 80 Alice - 5 null Bob - 50 null Tom - 50 null unknown + +---+------+-------+ + |age|height| name| + +---+------+-------+ + | 10| 80| Alice| + | 5| null| Bob| + | 50| null| Tom| + | 50| null|unknown| + +---+------+-------+ """ if not isinstance(value, (float, int, long, basestring, dict)): raise ValueError("value should be a float, int, long, string, or dict") @@ -875,6 +1027,83 @@ def fillna(self, value, subset=None): return DataFrame(self._jdf.na().fill(value, self._jseq(subset)), self.sql_ctx) + @since(1.4) + def replace(self, to_replace, value, subset=None): + """Returns a new :class:`DataFrame` replacing a value with another value. + + :param to_replace: int, long, float, string, or list. + Value to be replaced. + If the value is a dict, then `value` is ignored and `to_replace` must be a + mapping from column name (string) to replacement value. The value to be + replaced must be an int, long, float, or string. + :param value: int, long, float, string, or list. + Value to use to replace holes. + The replacement value must be an int, long, float, or string. If `value` is a + list or tuple, `value` should be of the same length with `to_replace`. + :param subset: optional list of column names to consider. + Columns specified in subset that do not have matching data type are ignored. + For example, if `value` is a string, and subset contains a non-string column, + then the non-string column is simply ignored. + + >>> df4.replace(10, 20).show() + +----+------+-----+ + | age|height| name| + +----+------+-----+ + | 20| 80|Alice| + | 5| null| Bob| + |null| null| Tom| + |null| null| null| + +----+------+-----+ + + >>> df4.replace(['Alice', 'Bob'], ['A', 'B'], 'name').show() + +----+------+----+ + | age|height|name| + +----+------+----+ + | 10| 80| A| + | 5| null| B| + |null| null| Tom| + |null| null|null| + +----+------+----+ + """ + if not isinstance(to_replace, (float, int, long, basestring, list, tuple, dict)): + raise ValueError( + "to_replace should be a float, int, long, string, list, tuple, or dict") + + if not isinstance(value, (float, int, long, basestring, list, tuple)): + raise ValueError("value should be a float, int, long, string, list, or tuple") + + rep_dict = dict() + + if isinstance(to_replace, (float, int, long, basestring)): + to_replace = [to_replace] + + if isinstance(to_replace, tuple): + to_replace = list(to_replace) + + if isinstance(value, tuple): + value = list(value) + + if isinstance(to_replace, list) and isinstance(value, list): + if len(to_replace) != len(value): + raise ValueError("to_replace and value lists should be of the same length") + rep_dict = dict(zip(to_replace, value)) + elif isinstance(to_replace, list) and isinstance(value, (float, int, long, basestring)): + rep_dict = dict([(tr, value) for tr in to_replace]) + elif isinstance(to_replace, dict): + rep_dict = to_replace + + if subset is None: + return DataFrame(self._jdf.na().replace('*', rep_dict), self.sql_ctx) + elif isinstance(subset, basestring): + subset = [subset] + + if not isinstance(subset, (list, tuple)): + raise ValueError("subset should be a list or tuple of column names") + + return DataFrame( + self._jdf.na().replace(self._jseq(subset), self._jmap(rep_dict)), self.sql_ctx) + + @since(1.4) def corr(self, col1, col2, method=None): """ Calculates the correlation of two columns of a DataFrame as a double value. Currently only @@ -896,6 +1125,7 @@ def corr(self, col1, col2, method=None): "coefficient is supported.") return self._jdf.stat().corr(col1, col2, method) + @since(1.4) def cov(self, col1, col2): """ Calculate the sample covariance for the given columns, specified by their names, as a @@ -910,6 +1140,29 @@ def cov(self, col1, col2): raise ValueError("col2 should be a string.") return self._jdf.stat().cov(col1, col2) + @since(1.4) + def crosstab(self, col1, col2): + """ + Computes a pair-wise frequency table of the given columns. Also known as a contingency + table. The number of distinct values for each column should be less than 1e4. At most 1e6 + non-zero pair frequencies will be returned. + The first column of each row will be the distinct values of `col1` and the column names + will be the distinct values of `col2`. The name of the first column will be `$col1_$col2`. + Pairs that have no occurrences will have `null` as their counts. + :func:`DataFrame.crosstab` and :func:`DataFrameStatFunctions.crosstab` are aliases. + + :param col1: The name of the first column. Distinct items will make the first item of + each row. + :param col2: The name of the second column. Distinct items will make the column names + of the DataFrame. + """ + if not isinstance(col1, str): + raise ValueError("col1 should be a string.") + if not isinstance(col2, str): + raise ValueError("col2 should be a string.") + return DataFrame(self._jdf.stat().crosstab(col1, col2), self.sql_ctx) + + @since(1.4) def freqItems(self, cols, support=None): """ Finding frequent items for columns, possibly with false positives. Using the @@ -917,6 +1170,9 @@ def freqItems(self, cols, support=None): "http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou". :func:`DataFrame.freqItems` and :func:`DataFrameStatFunctions.freqItems` are aliases. + This function is meant for exploratory data analysis, as we make no guarantee about the + backward compatibility of the schema of the resulting DataFrame. + :param cols: Names of the columns to calculate frequent items for as a list or tuple of strings. :param support: The frequency with which to consider an item 'frequent'. Default is 1%. @@ -931,6 +1187,7 @@ def freqItems(self, cols, support=None): return DataFrame(self._jdf.stat().freqItems(_to_seq(self._sc, cols), support), self.sql_ctx) @ignore_unicode_prefix + @since(1.3) def withColumn(self, colName, col): """Returns a new :class:`DataFrame` by adding a column. @@ -943,8 +1200,9 @@ def withColumn(self, colName, col): return self.select('*', col.alias(colName)) @ignore_unicode_prefix + @since(1.3) def withColumnRenamed(self, existing, new): - """REturns a new :class:`DataFrame` by renaming an existing column. + """Returns a new :class:`DataFrame` by renaming an existing column. :param existing: string, name of the existing column to rename. :param col: string, new name of the column. @@ -957,6 +1215,20 @@ def withColumnRenamed(self, existing, new): for c in self.columns] return self.select(*cols) + @since(1.4) + @ignore_unicode_prefix + def drop(self, colName): + """Returns a new :class:`DataFrame` that drops the specified column. + + :param colName: string, name of the column to drop. + + >>> df.drop('age').collect() + [Row(name=u'Alice'), Row(name=u'Bob')] + """ + jdf = self._jdf.drop(colName) + return DataFrame(jdf, self.sql_ctx) + + @since(1.3) def toPandas(self): """Returns the contents of this :class:`DataFrame` as Pandas ``pandas.DataFrame``. @@ -970,6 +1242,10 @@ def toPandas(self): import pandas as pd return pd.DataFrame.from_records(self.collect(), columns=self.columns) + # Pandas compatibility + groupby = groupBy + drop_duplicates = dropDuplicates + # Having SchemaRDD for backward compatibility (for docs) class SchemaRDD(DataFrame): @@ -977,382 +1253,11 @@ class SchemaRDD(DataFrame): """ -def dfapi(f): - def _api(self): - name = f.__name__ - jdf = getattr(self._jdf, name)() - return DataFrame(jdf, self.sql_ctx) - _api.__name__ = f.__name__ - _api.__doc__ = f.__doc__ - return _api - - -def df_varargs_api(f): - def _api(self, *args): - name = f.__name__ - jdf = getattr(self._jdf, name)(_to_seq(self.sql_ctx._sc, args)) - return DataFrame(jdf, self.sql_ctx) - _api.__name__ = f.__name__ - _api.__doc__ = f.__doc__ - return _api - - -class GroupedData(object): +def _to_scala_map(sc, jm): """ - A set of methods for aggregations on a :class:`DataFrame`, - created by :func:`DataFrame.groupBy`. + Convert a dict into a JVM Map. """ - - def __init__(self, jdf, sql_ctx): - self._jdf = jdf - self.sql_ctx = sql_ctx - - @ignore_unicode_prefix - def agg(self, *exprs): - """Compute aggregates and returns the result as a :class:`DataFrame`. - - The available aggregate functions are `avg`, `max`, `min`, `sum`, `count`. - - If ``exprs`` is a single :class:`dict` mapping from string to string, then the key - is the column to perform aggregation on, and the value is the aggregate function. - - Alternatively, ``exprs`` can also be a list of aggregate :class:`Column` expressions. - - :param exprs: a dict mapping from column name (string) to aggregate functions (string), - or a list of :class:`Column`. - - >>> gdf = df.groupBy(df.name) - >>> gdf.agg({"*": "count"}).collect() - [Row(name=u'Alice', COUNT(1)=1), Row(name=u'Bob', COUNT(1)=1)] - - >>> from pyspark.sql import functions as F - >>> gdf.agg(F.min(df.age)).collect() - [Row(MIN(age)=2), Row(MIN(age)=5)] - """ - assert exprs, "exprs should not be empty" - if len(exprs) == 1 and isinstance(exprs[0], dict): - jdf = self._jdf.agg(exprs[0]) - else: - # Columns - assert all(isinstance(c, Column) for c in exprs), "all exprs should be Column" - jdf = self._jdf.agg(exprs[0]._jc, - _to_seq(self.sql_ctx._sc, [c._jc for c in exprs[1:]])) - return DataFrame(jdf, self.sql_ctx) - - @dfapi - def count(self): - """Counts the number of records for each group. - - >>> df.groupBy(df.age).count().collect() - [Row(age=2, count=1), Row(age=5, count=1)] - """ - - @df_varargs_api - def mean(self, *cols): - """Computes average values for each numeric columns for each group. - - :func:`mean` is an alias for :func:`avg`. - - :param cols: list of column names (string). Non-numeric columns are ignored. - - >>> df.groupBy().mean('age').collect() - [Row(AVG(age)=3.5)] - >>> df3.groupBy().mean('age', 'height').collect() - [Row(AVG(age)=3.5, AVG(height)=82.5)] - """ - - @df_varargs_api - def avg(self, *cols): - """Computes average values for each numeric columns for each group. - - :func:`mean` is an alias for :func:`avg`. - - :param cols: list of column names (string). Non-numeric columns are ignored. - - >>> df.groupBy().avg('age').collect() - [Row(AVG(age)=3.5)] - >>> df3.groupBy().avg('age', 'height').collect() - [Row(AVG(age)=3.5, AVG(height)=82.5)] - """ - - @df_varargs_api - def max(self, *cols): - """Computes the max value for each numeric columns for each group. - - >>> df.groupBy().max('age').collect() - [Row(MAX(age)=5)] - >>> df3.groupBy().max('age', 'height').collect() - [Row(MAX(age)=5, MAX(height)=85)] - """ - - @df_varargs_api - def min(self, *cols): - """Computes the min value for each numeric column for each group. - - :param cols: list of column names (string). Non-numeric columns are ignored. - - >>> df.groupBy().min('age').collect() - [Row(MIN(age)=2)] - >>> df3.groupBy().min('age', 'height').collect() - [Row(MIN(age)=2, MIN(height)=80)] - """ - - @df_varargs_api - def sum(self, *cols): - """Compute the sum for each numeric columns for each group. - - :param cols: list of column names (string). Non-numeric columns are ignored. - - >>> df.groupBy().sum('age').collect() - [Row(SUM(age)=7)] - >>> df3.groupBy().sum('age', 'height').collect() - [Row(SUM(age)=7, SUM(height)=165)] - """ - - -def _create_column_from_literal(literal): - sc = SparkContext._active_spark_context - return sc._jvm.functions.lit(literal) - - -def _create_column_from_name(name): - sc = SparkContext._active_spark_context - return sc._jvm.functions.col(name) - - -def _to_java_column(col): - if isinstance(col, Column): - jcol = col._jc - else: - jcol = _create_column_from_name(col) - return jcol - - -def _to_seq(sc, cols, converter=None): - """ - Convert a list of Column (or names) into a JVM Seq of Column. - - An optional `converter` could be used to convert items in `cols` - into JVM Column objects. - """ - if converter: - cols = [converter(c) for c in cols] - return sc._jvm.PythonUtils.toSeq(cols) - - -def _unary_op(name, doc="unary operator"): - """ Create a method for given unary operator """ - def _(self): - jc = getattr(self._jc, name)() - return Column(jc) - _.__doc__ = doc - return _ - - -def _func_op(name, doc=''): - def _(self): - sc = SparkContext._active_spark_context - jc = getattr(sc._jvm.functions, name)(self._jc) - return Column(jc) - _.__doc__ = doc - return _ - - -def _bin_op(name, doc="binary operator"): - """ Create a method for given binary operator - """ - def _(self, other): - jc = other._jc if isinstance(other, Column) else other - njc = getattr(self._jc, name)(jc) - return Column(njc) - _.__doc__ = doc - return _ - - -def _reverse_op(name, doc="binary operator"): - """ Create a method for binary operator (this object is on right side) - """ - def _(self, other): - jother = _create_column_from_literal(other) - jc = getattr(jother, name)(self._jc) - return Column(jc) - _.__doc__ = doc - return _ - - -class Column(object): - - """ - A column in a DataFrame. - - :class:`Column` instances can be created by:: - - # 1. Select a column out of a DataFrame - - df.colName - df["colName"] - - # 2. Create from an expression - df.colName + 1 - 1 / df.colName - """ - - def __init__(self, jc): - self._jc = jc - - # arithmetic operators - __neg__ = _func_op("negate") - __add__ = _bin_op("plus") - __sub__ = _bin_op("minus") - __mul__ = _bin_op("multiply") - __div__ = _bin_op("divide") - __truediv__ = _bin_op("divide") - __mod__ = _bin_op("mod") - __radd__ = _bin_op("plus") - __rsub__ = _reverse_op("minus") - __rmul__ = _bin_op("multiply") - __rdiv__ = _reverse_op("divide") - __rtruediv__ = _reverse_op("divide") - __rmod__ = _reverse_op("mod") - - # logistic operators - __eq__ = _bin_op("equalTo") - __ne__ = _bin_op("notEqual") - __lt__ = _bin_op("lt") - __le__ = _bin_op("leq") - __ge__ = _bin_op("geq") - __gt__ = _bin_op("gt") - - # `and`, `or`, `not` cannot be overloaded in Python, - # so use bitwise operators as boolean operators - __and__ = _bin_op('and') - __or__ = _bin_op('or') - __invert__ = _func_op('not') - __rand__ = _bin_op("and") - __ror__ = _bin_op("or") - - # container operators - __contains__ = _bin_op("contains") - __getitem__ = _bin_op("getItem") - - def getItem(self, key): - """An expression that gets an item at position `ordinal` out of a list, - or gets an item by key out of a dict. - - >>> df = sc.parallelize([([1, 2], {"key": "value"})]).toDF(["l", "d"]) - >>> df.select(df.l.getItem(0), df.d.getItem("key")).show() - l[0] d[key] - 1 value - >>> df.select(df.l[0], df.d["key"]).show() - l[0] d[key] - 1 value - """ - return self[key] - - def getField(self, name): - """An expression that gets a field by name in a StructField. - - >>> from pyspark.sql import Row - >>> df = sc.parallelize([Row(r=Row(a=1, b="b"))]).toDF() - >>> df.select(df.r.getField("b")).show() - r.b - b - >>> df.select(df.r.a).show() - r.a - 1 - """ - return Column(self._jc.getField(name)) - - def __getattr__(self, item): - if item.startswith("__"): - raise AttributeError(item) - return self.getField(item) - - # string methods - rlike = _bin_op("rlike") - like = _bin_op("like") - startswith = _bin_op("startsWith") - endswith = _bin_op("endsWith") - - @ignore_unicode_prefix - def substr(self, startPos, length): - """ - Return a :class:`Column` which is a substring of the column - - :param startPos: start position (int or Column) - :param length: length of the substring (int or Column) - - >>> df.select(df.name.substr(1, 3).alias("col")).collect() - [Row(col=u'Ali'), Row(col=u'Bob')] - """ - if type(startPos) != type(length): - raise TypeError("Can not mix the type") - if isinstance(startPos, (int, long)): - jc = self._jc.substr(startPos, length) - elif isinstance(startPos, Column): - jc = self._jc.substr(startPos._jc, length._jc) - else: - raise TypeError("Unexpected type: %s" % type(startPos)) - return Column(jc) - - __getslice__ = substr - - @ignore_unicode_prefix - def inSet(self, *cols): - """ A boolean expression that is evaluated to true if the value of this - expression is contained by the evaluated values of the arguments. - - >>> df[df.name.inSet("Bob", "Mike")].collect() - [Row(age=5, name=u'Bob')] - >>> df[df.age.inSet([1, 2, 3])].collect() - [Row(age=2, name=u'Alice')] - """ - if len(cols) == 1 and isinstance(cols[0], (list, set)): - cols = cols[0] - cols = [c._jc if isinstance(c, Column) else _create_column_from_literal(c) for c in cols] - sc = SparkContext._active_spark_context - jc = getattr(self._jc, "in")(_to_seq(sc, cols)) - return Column(jc) - - # order - asc = _unary_op("asc", "Returns a sort expression based on the" - " ascending order of the given column name.") - desc = _unary_op("desc", "Returns a sort expression based on the" - " descending order of the given column name.") - - isNull = _unary_op("isNull", "True if the current expression is null.") - isNotNull = _unary_op("isNotNull", "True if the current expression is not null.") - - def alias(self, alias): - """Return a alias for this column - - >>> df.select(df.age.alias("age2")).collect() - [Row(age2=2), Row(age2=5)] - """ - return Column(getattr(self._jc, "as")(alias)) - - @ignore_unicode_prefix - def cast(self, dataType): - """ Convert the column into type `dataType` - - >>> df.select(df.age.cast("string").alias('ages')).collect() - [Row(ages=u'2'), Row(ages=u'5')] - >>> df.select(df.age.cast(StringType()).alias('ages')).collect() - [Row(ages=u'2'), Row(ages=u'5')] - """ - if isinstance(dataType, basestring): - jc = self._jc.cast(dataType) - elif isinstance(dataType, DataType): - sc = SparkContext._active_spark_context - ssql_ctx = sc._jvm.SQLContext(sc._jsc.sc()) - jdt = ssql_ctx.parseDataType(dataType.json()) - jc = self._jc.cast(jdt) - else: - raise TypeError("unexpected type: %s" % type(dataType)) - return Column(jc) - - def __repr__(self): - return 'Column<%s>' % self._jc.toString().encode('utf8') + return sc._jvm.PythonUtils.toScalaMap(jm) class DataFrameNaFunctions(object): @@ -1390,6 +1295,11 @@ def cov(self, col1, col2): cov.__doc__ = DataFrame.cov.__doc__ + def crosstab(self, col1, col2): + return self.df.crosstab(col1, col2) + + crosstab.__doc__ = DataFrame.crosstab.__doc__ + def freqItems(self, cols, support=None): return self.df.freqItems(cols, support) @@ -1409,9 +1319,6 @@ def _test(): .toDF(StructType([StructField('age', IntegerType()), StructField('name', StringType())])) globs['df2'] = sc.parallelize([Row(name='Tom', height=80), Row(name='Bob', height=85)]).toDF() - globs['df3'] = sc.parallelize([Row(name='Alice', age=2, height=80), - Row(name='Bob', age=5, height=85)]).toDF() - globs['df4'] = sc.parallelize([Row(name='Alice', age=10, height=80), Row(name='Bob', age=5, height=None), Row(name='Tom', age=None, height=None), diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 641220a264295..bbf465aca8d4d 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -26,18 +26,26 @@ from pyspark import SparkContext from pyspark.rdd import _prepare_for_python_RDD, ignore_unicode_prefix from pyspark.serializers import PickleSerializer, AutoBatchedSerializer +from pyspark.sql import since from pyspark.sql.types import StringType -from pyspark.sql.dataframe import Column, _to_java_column, _to_seq +from pyspark.sql.column import Column, _to_java_column, _to_seq __all__ = [ + 'array', 'approxCountDistinct', + 'coalesce', 'countDistinct', + 'explode', 'monotonicallyIncreasingId', 'rand', 'randn', 'sparkPartitionId', - 'udf'] + 'struct', + 'udf', + 'when'] + +__all__ += ['lag', 'lead', 'ntile'] def _create_function(name, doc=""): @@ -51,6 +59,30 @@ def _(col): return _ +def _create_binary_mathfunction(name, doc=""): + """ Create a binary mathfunction by name""" + def _(col1, col2): + sc = SparkContext._active_spark_context + # users might write ints for simplicity. This would throw an error on the JVM side. + jc = getattr(sc._jvm.functions, name)(col1._jc if isinstance(col1, Column) else float(col1), + col2._jc if isinstance(col2, Column) else float(col2)) + return Column(jc) + _.__name__ = name + _.__doc__ = doc + return _ + + +def _create_window_function(name, doc=''): + """ Create a window function by name """ + def _(): + sc = SparkContext._active_spark_context + jc = getattr(sc._jvm.functions, name)() + return Column(jc) + _.__name__ = name + _.__doc__ = 'Window function: ' + doc + return _ + + _functions = { 'lit': 'Creates a :class:`Column` of literal value.', 'col': 'Returns a :class:`Column` based on the given column name.', @@ -74,13 +106,97 @@ def _(col): 'sumDistinct': 'Aggregate function: returns the sum of distinct values in the expression.', } +_functions_1_4 = { + # unary math functions + 'acos': 'Computes the cosine inverse of the given value; the returned angle is in the range' + + '0.0 through pi.', + 'asin': 'Computes the sine inverse of the given value; the returned angle is in the range' + + '-pi/2 through pi/2.', + 'atan': 'Computes the tangent inverse of the given value.', + 'cbrt': 'Computes the cube-root of the given value.', + 'ceil': 'Computes the ceiling of the given value.', + 'cos': 'Computes the cosine of the given value.', + 'cosh': 'Computes the hyperbolic cosine of the given value.', + 'exp': 'Computes the exponential of the given value.', + 'expm1': 'Computes the exponential of the given value minus one.', + 'floor': 'Computes the floor of the given value.', + 'log': 'Computes the natural logarithm of the given value.', + 'log10': 'Computes the logarithm of the given value in Base 10.', + 'log1p': 'Computes the natural logarithm of the given value plus one.', + 'rint': 'Returns the double value that is closest in value to the argument and' + + ' is equal to a mathematical integer.', + 'signum': 'Computes the signum of the given value.', + 'sin': 'Computes the sine of the given value.', + 'sinh': 'Computes the hyperbolic sine of the given value.', + 'tan': 'Computes the tangent of the given value.', + 'tanh': 'Computes the hyperbolic tangent of the given value.', + 'toDegrees': 'Converts an angle measured in radians to an approximately equivalent angle ' + + 'measured in degrees.', + 'toRadians': 'Converts an angle measured in degrees to an approximately equivalent angle ' + + 'measured in radians.', + + 'bitwiseNOT': 'Computes bitwise not.', +} + +# math functions that take two arguments as input +_binary_mathfunctions = { + 'atan2': 'Returns the angle theta from the conversion of rectangular coordinates (x, y) to' + + 'polar coordinates (r, theta).', + 'hypot': 'Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.', + 'pow': 'Returns the value of the first argument raised to the power of the second argument.' +} + +_window_functions = { + 'rowNumber': + """returns a sequential number starting at 1 within a window partition. + + This is equivalent to the ROW_NUMBER function in SQL.""", + 'denseRank': + """returns the rank of rows within a window partition, without any gaps. + + The difference between rank and denseRank is that denseRank leaves no gaps in ranking + sequence when there are ties. That is, if you were ranking a competition using denseRank + and had three people tie for second place, you would say that all three were in second + place and that the next person came in third. + + This is equivalent to the DENSE_RANK function in SQL.""", + 'rank': + """returns the rank of rows within a window partition. + + The difference between rank and denseRank is that denseRank leaves no gaps in ranking + sequence when there are ties. That is, if you were ranking a competition using denseRank + and had three people tie for second place, you would say that all three were in second + place and that the next person came in third. + + This is equivalent to the RANK function in SQL.""", + 'cumeDist': + """returns the cumulative distribution of values within a window partition, + i.e. the fraction of rows that are below the current row. + + This is equivalent to the CUME_DIST function in SQL.""", + 'percentRank': + """returns the relative rank (i.e. percentile) of rows within a window partition. + + This is equivalent to the PERCENT_RANK function in SQL.""", +} + for _name, _doc in _functions.items(): - globals()[_name] = _create_function(_name, _doc) + globals()[_name] = since(1.3)(_create_function(_name, _doc)) +for _name, _doc in _functions_1_4.items(): + globals()[_name] = since(1.4)(_create_function(_name, _doc)) +for _name, _doc in _binary_mathfunctions.items(): + globals()[_name] = since(1.4)(_create_binary_mathfunction(_name, _doc)) +for _name, _doc in _window_functions.items(): + globals()[_name] = since(1.4)(_create_window_function(_name, _doc)) del _name, _doc __all__ += _functions.keys() +__all__ += _functions_1_4.keys() +__all__ += _binary_mathfunctions.keys() +__all__ += _window_functions.keys() __all__.sort() +@since(1.4) def array(*cols): """Creates a new array column. @@ -99,6 +215,7 @@ def array(*cols): return Column(jc) +@since(1.3) def approxCountDistinct(col, rsd=None): """Returns a new :class:`Column` for approximate distinct count of ``col``. @@ -113,6 +230,44 @@ def approxCountDistinct(col, rsd=None): return Column(jc) +@since(1.4) +def coalesce(*cols): + """Returns the first column that is not null. + + >>> cDf = sqlContext.createDataFrame([(None, None), (1, None), (None, 2)], ("a", "b")) + >>> cDf.show() + +----+----+ + | a| b| + +----+----+ + |null|null| + | 1|null| + |null| 2| + +----+----+ + + >>> cDf.select(coalesce(cDf["a"], cDf["b"])).show() + +-------------+ + |Coalesce(a,b)| + +-------------+ + | null| + | 1| + | 2| + +-------------+ + + >>> cDf.select('*', coalesce(cDf["a"], lit(0.0))).show() + +----+----+---------------+ + | a| b|Coalesce(a,0.0)| + +----+----+---------------+ + |null|null| 0.0| + | 1|null| 1.0| + |null| 2| 0.0| + +----+----+---------------+ + """ + sc = SparkContext._active_spark_context + jc = sc._jvm.functions.coalesce(_to_seq(sc, cols, _to_java_column)) + return Column(jc) + + +@since(1.3) def countDistinct(col, *cols): """Returns a new :class:`Column` for distinct count of ``col`` or ``cols``. @@ -127,6 +282,28 @@ def countDistinct(col, *cols): return Column(jc) +@since(1.4) +def explode(col): + """Returns a new row for each element in the given array or map. + + >>> from pyspark.sql import Row + >>> eDF = sqlContext.createDataFrame([Row(a=1, intlist=[1,2,3], mapfield={"a": "b"})]) + >>> eDF.select(explode(eDF.intlist).alias("anInt")).collect() + [Row(anInt=1), Row(anInt=2), Row(anInt=3)] + + >>> eDF.select(explode(eDF.mapfield).alias("key", "value")).show() + +---+-----+ + |key|value| + +---+-----+ + | a| b| + +---+-----+ + """ + sc = SparkContext._active_spark_context + jc = sc._jvm.functions.explode(_to_java_column(col)) + return Column(jc) + + +@since(1.4) def monotonicallyIncreasingId(): """A column that generates monotonically increasing 64-bit integers. @@ -135,7 +312,7 @@ def monotonicallyIncreasingId(): within each partition in the lower 33 bits. The assumption is that the data frame has less than 1 billion partitions, and each partition has less than 8 billion records. - As an example, consider a [[DataFrame]] with two partitions, each with 3 records. + As an example, consider a :class:`DataFrame` with two partitions, each with 3 records. This expression would return the following IDs: 0, 1, 2, 8589934592 (1L << 33), 8589934593, 8589934594. @@ -147,6 +324,7 @@ def monotonicallyIncreasingId(): return Column(sc._jvm.functions.monotonicallyIncreasingId()) +@since(1.4) def rand(seed=None): """Generates a random column with i.i.d. samples from U[0.0, 1.0]. """ @@ -158,6 +336,7 @@ def rand(seed=None): return Column(jc) +@since(1.4) def randn(seed=None): """Generates a column with i.i.d. samples from the standard normal distribution. """ @@ -169,6 +348,7 @@ def randn(seed=None): return Column(jc) +@since(1.4) def sparkPartitionId(): """A column for partition ID of the Spark task. @@ -182,6 +362,7 @@ def sparkPartitionId(): @ignore_unicode_prefix +@since(1.4) def struct(*cols): """Creates a new struct column. @@ -200,9 +381,82 @@ def struct(*cols): return Column(jc) +@since(1.4) +def when(condition, value): + """Evaluates a list of conditions and returns one of multiple possible result expressions. + If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions. + + :param condition: a boolean :class:`Column` expression. + :param value: a literal value, or a :class:`Column` expression. + + >>> df.select(when(df['age'] == 2, 3).otherwise(4).alias("age")).collect() + [Row(age=3), Row(age=4)] + + >>> df.select(when(df.age == 2, df.age + 1).alias("age")).collect() + [Row(age=3), Row(age=None)] + """ + sc = SparkContext._active_spark_context + if not isinstance(condition, Column): + raise TypeError("condition should be a Column") + v = value._jc if isinstance(value, Column) else value + jc = sc._jvm.functions.when(condition._jc, v) + return Column(jc) + + +@since(1.4) +def lag(col, count=1, default=None): + """ + Window function: returns the value that is `offset` rows before the current row, and + `defaultValue` if there is less than `offset` rows before the current row. For example, + an `offset` of one will return the previous row at any given point in the window partition. + + This is equivalent to the LAG function in SQL. + + :param col: name of column or expression + :param count: number of row to extend + :param default: default value + """ + sc = SparkContext._active_spark_context + return Column(sc._jvm.functions.lag(_to_java_column(col), count, default)) + + +@since(1.4) +def lead(col, count=1, default=None): + """ + Window function: returns the value that is `offset` rows after the current row, and + `defaultValue` if there is less than `offset` rows after the current row. For example, + an `offset` of one will return the next row at any given point in the window partition. + + This is equivalent to the LEAD function in SQL. + + :param col: name of column or expression + :param count: number of row to extend + :param default: default value + """ + sc = SparkContext._active_spark_context + return Column(sc._jvm.functions.lead(_to_java_column(col), count, default)) + + +@since(1.4) +def ntile(n): + """ + Window function: returns a group id from 1 to `n` (inclusive) in a round-robin fashion in + a window partition. Fow example, if `n` is 3, the first row will get 1, the second row will + get 2, the third row will get 3, and the fourth row will get 1... + + This is equivalent to the NTILE function in SQL. + + :param n: an integer + """ + sc = SparkContext._active_spark_context + return Column(sc._jvm.functions.ntile(int(n))) + + class UserDefinedFunction(object): """ User defined function in Python + + .. versionadded:: 1.3 """ def __init__(self, func, returnType): self.func = func @@ -220,8 +474,8 @@ def _create_judf(self): ssql_ctx = sc._jvm.SQLContext(sc._jsc.sc()) jdt = ssql_ctx.parseDataType(self.returnType.json()) fname = f.__name__ if hasattr(f, '__name__') else f.__class__.__name__ - judf = sc._jvm.UserDefinedPythonFunction(fname, bytearray(pickled_command), env, - includes, sc.pythonExec, broadcast_vars, + judf = sc._jvm.UserDefinedPythonFunction(fname, bytearray(pickled_command), env, includes, + sc.pythonExec, sc.pythonVer, broadcast_vars, sc._javaAccumulator, jdt) return judf @@ -236,6 +490,7 @@ def __call__(self, *cols): return Column(jc) +@since(1.3) def udf(f, returnType=StringType()): """Creates a :class:`Column` expression representing a user defined function (UDF). diff --git a/python/pyspark/sql/group.py b/python/pyspark/sql/group.py new file mode 100644 index 0000000000000..5a37a673ee80c --- /dev/null +++ b/python/pyspark/sql/group.py @@ -0,0 +1,195 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from pyspark.rdd import ignore_unicode_prefix +from pyspark.sql import since +from pyspark.sql.column import Column, _to_seq +from pyspark.sql.dataframe import DataFrame +from pyspark.sql.types import * + +__all__ = ["GroupedData"] + + +def dfapi(f): + def _api(self): + name = f.__name__ + jdf = getattr(self._jdf, name)() + return DataFrame(jdf, self.sql_ctx) + _api.__name__ = f.__name__ + _api.__doc__ = f.__doc__ + return _api + + +def df_varargs_api(f): + def _api(self, *args): + name = f.__name__ + jdf = getattr(self._jdf, name)(_to_seq(self.sql_ctx._sc, args)) + return DataFrame(jdf, self.sql_ctx) + _api.__name__ = f.__name__ + _api.__doc__ = f.__doc__ + return _api + + +class GroupedData(object): + """ + A set of methods for aggregations on a :class:`DataFrame`, + created by :func:`DataFrame.groupBy`. + + .. note:: Experimental + + .. versionadded:: 1.3 + """ + + def __init__(self, jdf, sql_ctx): + self._jdf = jdf + self.sql_ctx = sql_ctx + + @ignore_unicode_prefix + @since(1.3) + def agg(self, *exprs): + """Compute aggregates and returns the result as a :class:`DataFrame`. + + The available aggregate functions are `avg`, `max`, `min`, `sum`, `count`. + + If ``exprs`` is a single :class:`dict` mapping from string to string, then the key + is the column to perform aggregation on, and the value is the aggregate function. + + Alternatively, ``exprs`` can also be a list of aggregate :class:`Column` expressions. + + :param exprs: a dict mapping from column name (string) to aggregate functions (string), + or a list of :class:`Column`. + + >>> gdf = df.groupBy(df.name) + >>> gdf.agg({"*": "count"}).collect() + [Row(name=u'Alice', COUNT(1)=1), Row(name=u'Bob', COUNT(1)=1)] + + >>> from pyspark.sql import functions as F + >>> gdf.agg(F.min(df.age)).collect() + [Row(name=u'Alice', MIN(age)=2), Row(name=u'Bob', MIN(age)=5)] + """ + assert exprs, "exprs should not be empty" + if len(exprs) == 1 and isinstance(exprs[0], dict): + jdf = self._jdf.agg(exprs[0]) + else: + # Columns + assert all(isinstance(c, Column) for c in exprs), "all exprs should be Column" + jdf = self._jdf.agg(exprs[0]._jc, + _to_seq(self.sql_ctx._sc, [c._jc for c in exprs[1:]])) + return DataFrame(jdf, self.sql_ctx) + + @dfapi + @since(1.3) + def count(self): + """Counts the number of records for each group. + + >>> df.groupBy(df.age).count().collect() + [Row(age=2, count=1), Row(age=5, count=1)] + """ + + @df_varargs_api + @since(1.3) + def mean(self, *cols): + """Computes average values for each numeric columns for each group. + + :func:`mean` is an alias for :func:`avg`. + + :param cols: list of column names (string). Non-numeric columns are ignored. + + >>> df.groupBy().mean('age').collect() + [Row(AVG(age)=3.5)] + >>> df3.groupBy().mean('age', 'height').collect() + [Row(AVG(age)=3.5, AVG(height)=82.5)] + """ + + @df_varargs_api + @since(1.3) + def avg(self, *cols): + """Computes average values for each numeric columns for each group. + + :func:`mean` is an alias for :func:`avg`. + + :param cols: list of column names (string). Non-numeric columns are ignored. + + >>> df.groupBy().avg('age').collect() + [Row(AVG(age)=3.5)] + >>> df3.groupBy().avg('age', 'height').collect() + [Row(AVG(age)=3.5, AVG(height)=82.5)] + """ + + @df_varargs_api + @since(1.3) + def max(self, *cols): + """Computes the max value for each numeric columns for each group. + + >>> df.groupBy().max('age').collect() + [Row(MAX(age)=5)] + >>> df3.groupBy().max('age', 'height').collect() + [Row(MAX(age)=5, MAX(height)=85)] + """ + + @df_varargs_api + @since(1.3) + def min(self, *cols): + """Computes the min value for each numeric column for each group. + + :param cols: list of column names (string). Non-numeric columns are ignored. + + >>> df.groupBy().min('age').collect() + [Row(MIN(age)=2)] + >>> df3.groupBy().min('age', 'height').collect() + [Row(MIN(age)=2, MIN(height)=80)] + """ + + @df_varargs_api + @since(1.3) + def sum(self, *cols): + """Compute the sum for each numeric columns for each group. + + :param cols: list of column names (string). Non-numeric columns are ignored. + + >>> df.groupBy().sum('age').collect() + [Row(SUM(age)=7)] + >>> df3.groupBy().sum('age', 'height').collect() + [Row(SUM(age)=7, SUM(height)=165)] + """ + + +def _test(): + import doctest + from pyspark.context import SparkContext + from pyspark.sql import Row, SQLContext + import pyspark.sql.group + globs = pyspark.sql.group.__dict__.copy() + sc = SparkContext('local[4]', 'PythonTest') + globs['sc'] = sc + globs['sqlContext'] = SQLContext(sc) + globs['df'] = sc.parallelize([(2, 'Alice'), (5, 'Bob')]) \ + .toDF(StructType([StructField('age', IntegerType()), + StructField('name', StringType())])) + globs['df3'] = sc.parallelize([Row(name='Alice', age=2, height=80), + Row(name='Bob', age=5, height=85)]).toDF() + + (failure_count, test_count) = doctest.testmod( + pyspark.sql.group, globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF) + globs['sc'].stop() + if failure_count: + exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/sql/mathfunctions.py b/python/pyspark/sql/mathfunctions.py deleted file mode 100644 index 7dbcab8694293..0000000000000 --- a/python/pyspark/sql/mathfunctions.py +++ /dev/null @@ -1,101 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -""" -A collection of builtin math functions -""" - -from pyspark import SparkContext -from pyspark.sql.dataframe import Column - -__all__ = [] - - -def _create_unary_mathfunction(name, doc=""): - """ Create a unary mathfunction by name""" - def _(col): - sc = SparkContext._active_spark_context - jc = getattr(sc._jvm.mathfunctions, name)(col._jc if isinstance(col, Column) else col) - return Column(jc) - _.__name__ = name - _.__doc__ = doc - return _ - - -def _create_binary_mathfunction(name, doc=""): - """ Create a binary mathfunction by name""" - def _(col1, col2): - sc = SparkContext._active_spark_context - # users might write ints for simplicity. This would throw an error on the JVM side. - if type(col1) is int: - col1 = col1 * 1.0 - if type(col2) is int: - col2 = col2 * 1.0 - jc = getattr(sc._jvm.mathfunctions, name)(col1._jc if isinstance(col1, Column) else col1, - col2._jc if isinstance(col2, Column) else col2) - return Column(jc) - _.__name__ = name - _.__doc__ = doc - return _ - - -# math functions are found under another object therefore, they need to be handled separately -_mathfunctions = { - 'acos': 'Computes the cosine inverse of the given value; the returned angle is in the range' + - '0.0 through pi.', - 'asin': 'Computes the sine inverse of the given value; the returned angle is in the range' + - '-pi/2 through pi/2.', - 'atan': 'Computes the tangent inverse of the given value.', - 'cbrt': 'Computes the cube-root of the given value.', - 'ceil': 'Computes the ceiling of the given value.', - 'cos': 'Computes the cosine of the given value.', - 'cosh': 'Computes the hyperbolic cosine of the given value.', - 'exp': 'Computes the exponential of the given value.', - 'expm1': 'Computes the exponential of the given value minus one.', - 'floor': 'Computes the floor of the given value.', - 'log': 'Computes the natural logarithm of the given value.', - 'log10': 'Computes the logarithm of the given value in Base 10.', - 'log1p': 'Computes the natural logarithm of the given value plus one.', - 'rint': 'Returns the double value that is closest in value to the argument and' + - ' is equal to a mathematical integer.', - 'signum': 'Computes the signum of the given value.', - 'sin': 'Computes the sine of the given value.', - 'sinh': 'Computes the hyperbolic sine of the given value.', - 'tan': 'Computes the tangent of the given value.', - 'tanh': 'Computes the hyperbolic tangent of the given value.', - 'toDeg': 'Converts an angle measured in radians to an approximately equivalent angle ' + - 'measured in degrees.', - 'toRad': 'Converts an angle measured in degrees to an approximately equivalent angle ' + - 'measured in radians.' -} - -# math functions that take two arguments as input -_binary_mathfunctions = { - 'atan2': 'Returns the angle theta from the conversion of rectangular coordinates (x, y) to' + - 'polar coordinates (r, theta).', - 'hypot': 'Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.', - 'pow': 'Returns the value of the first argument raised to the power of the second argument.' -} - -for _name, _doc in _mathfunctions.items(): - globals()[_name] = _create_unary_mathfunction(_name, _doc) -for _name, _doc in _binary_mathfunctions.items(): - globals()[_name] = _create_binary_mathfunction(_name, _doc) -del _name, _doc -__all__ += _mathfunctions.keys() -__all__ += _binary_mathfunctions.keys() -__all__.sort() diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py new file mode 100644 index 0000000000000..d17d87419fe3d --- /dev/null +++ b/python/pyspark/sql/readwriter.py @@ -0,0 +1,428 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from py4j.java_gateway import JavaClass + +from pyspark.sql import since +from pyspark.sql.column import _to_seq +from pyspark.sql.types import * + +__all__ = ["DataFrameReader", "DataFrameWriter"] + + +class DataFrameReader(object): + """ + Interface used to load a :class:`DataFrame` from external storage systems + (e.g. file systems, key-value stores, etc). Use :func:`SQLContext.read` + to access this. + + ::Note: Experimental + + .. versionadded:: 1.4 + """ + + def __init__(self, sqlContext): + self._jreader = sqlContext._ssql_ctx.read() + self._sqlContext = sqlContext + + def _df(self, jdf): + from pyspark.sql.dataframe import DataFrame + return DataFrame(jdf, self._sqlContext) + + @since(1.4) + def format(self, source): + """ + Specifies the input data source format. + """ + self._jreader = self._jreader.format(source) + return self + + @since(1.4) + def schema(self, schema): + """ + Specifies the input schema. Some data sources (e.g. JSON) can + infer the input schema automatically from data. By specifying + the schema here, the underlying data source can skip the schema + inference step, and thus speed up data loading. + + :param schema: a StructType object + """ + if not isinstance(schema, StructType): + raise TypeError("schema should be StructType") + jschema = self._sqlContext._ssql_ctx.parseDataType(schema.json()) + self._jreader = self._jreader.schema(jschema) + return self + + @since(1.4) + def options(self, **options): + """ + Adds input options for the underlying data source. + """ + for k in options: + self._jreader = self._jreader.option(k, options[k]) + return self + + @since(1.4) + def load(self, path=None, format=None, schema=None, **options): + """Loads data from a data source and returns it as a :class`DataFrame`. + + :param path: optional string for file-system backed data sources. + :param format: optional string for format of the data source. Default to 'parquet'. + :param schema: optional :class:`StructType` for the input schema. + :param options: all other string options + """ + if format is not None: + self.format(format) + if schema is not None: + self.schema(schema) + self.options(**options) + if path is not None: + return self._df(self._jreader.load(path)) + else: + return self._df(self._jreader.load()) + + @since(1.4) + def json(self, path, schema=None): + """ + Loads a JSON file (one object per line) and returns the result as + a :class`DataFrame`. + + If the ``schema`` parameter is not specified, this function goes + through the input once to determine the input schema. + + :param path: string, path to the JSON dataset. + :param schema: an optional :class:`StructType` for the input schema. + + >>> import tempfile, shutil + >>> jsonFile = tempfile.mkdtemp() + >>> shutil.rmtree(jsonFile) + >>> with open(jsonFile, 'w') as f: + ... f.writelines(jsonStrings) + >>> df1 = sqlContext.read.json(jsonFile) + >>> df1.printSchema() + root + |-- field1: long (nullable = true) + |-- field2: string (nullable = true) + |-- field3: struct (nullable = true) + | |-- field4: long (nullable = true) + + >>> from pyspark.sql.types import * + >>> schema = StructType([ + ... StructField("field2", StringType()), + ... StructField("field3", + ... StructType([StructField("field5", ArrayType(IntegerType()))]))]) + >>> df2 = sqlContext.read.json(jsonFile, schema) + >>> df2.printSchema() + root + |-- field2: string (nullable = true) + |-- field3: struct (nullable = true) + | |-- field5: array (nullable = true) + | | |-- element: integer (containsNull = true) + """ + if schema is not None: + self.schema(schema) + return self._df(self._jreader.json(path)) + + @since(1.4) + def table(self, tableName): + """Returns the specified table as a :class:`DataFrame`. + + >>> sqlContext.registerDataFrameAsTable(df, "table1") + >>> df2 = sqlContext.read.table("table1") + >>> sorted(df.collect()) == sorted(df2.collect()) + True + """ + return self._df(self._jreader.table(tableName)) + + @since(1.4) + def parquet(self, *path): + """Loads a Parquet file, returning the result as a :class:`DataFrame`. + + >>> import tempfile, shutil + >>> parquetFile = tempfile.mkdtemp() + >>> shutil.rmtree(parquetFile) + >>> df.saveAsParquetFile(parquetFile) + >>> df2 = sqlContext.read.parquet(parquetFile) + >>> sorted(df.collect()) == sorted(df2.collect()) + True + """ + return self._df(self._jreader.parquet(_to_seq(self._sqlContext._sc, path))) + + @since(1.4) + def jdbc(self, url, table, column=None, lowerBound=None, upperBound=None, numPartitions=None, + predicates=None, properties={}): + """ + Construct a :class:`DataFrame` representing the database table accessible + via JDBC URL `url` named `table` and connection `properties`. + + The `column` parameter could be used to partition the table, then it will + be retrieved in parallel based on the parameters passed to this function. + + The `predicates` parameter gives a list expressions suitable for inclusion + in WHERE clauses; each one defines one partition of the :class:`DataFrame`. + + ::Note: Don't create too many partitions in parallel on a large cluster; + otherwise Spark might crash your external database systems. + + :param url: a JDBC URL + :param table: name of table + :param column: the column used to partition + :param lowerBound: the lower bound of partition column + :param upperBound: the upper bound of the partition column + :param numPartitions: the number of partitions + :param predicates: a list of expressions + :param properties: JDBC database connection arguments, a list of arbitrary string + tag/value. Normally at least a "user" and "password" property + should be included. + :return: a DataFrame + """ + jprop = JavaClass("java.util.Properties", self._sqlContext._sc._gateway._gateway_client)() + for k in properties: + jprop.setProperty(k, properties[k]) + if column is not None: + if numPartitions is None: + numPartitions = self._sqlContext._sc.defaultParallelism + return self._df(self._jreader.jdbc(url, table, column, int(lowerBound), int(upperBound), + int(numPartitions), jprop)) + if predicates is not None: + arr = self._sqlContext._sc._jvm.PythonUtils.toArray(predicates) + return self._df(self._jreader.jdbc(url, table, arr, jprop)) + return self._df(self._jreader.jdbc(url, table, jprop)) + + +class DataFrameWriter(object): + """ + Interface used to write a [[DataFrame]] to external storage systems + (e.g. file systems, key-value stores, etc). Use :func:`DataFrame.write` + to access this. + + ::Note: Experimental + + .. versionadded:: 1.4 + """ + def __init__(self, df): + self._df = df + self._sqlContext = df.sql_ctx + self._jwrite = df._jdf.write() + + @since(1.4) + def mode(self, saveMode): + """ + Specifies the behavior when data or table already exists. Options include: + + * `append`: Append contents of this :class:`DataFrame` to existing data. + * `overwrite`: Overwrite existing data. + * `error`: Throw an exception if data already exists. + * `ignore`: Silently ignore this operation if data already exists. + """ + self._jwrite = self._jwrite.mode(saveMode) + return self + + @since(1.4) + def format(self, source): + """ + Specifies the underlying output data source. Built-in options include + "parquet", "json", etc. + """ + self._jwrite = self._jwrite.format(source) + return self + + @since(1.4) + def options(self, **options): + """ + Adds output options for the underlying data source. + """ + for k in options: + self._jwrite = self._jwrite.option(k, options[k]) + return self + + @since(1.4) + def partitionBy(self, *cols): + """ + Partitions the output by the given columns on the file system. + If specified, the output is laid out on the file system similar + to Hive's partitioning scheme. + + :param cols: name of columns + """ + if len(cols) == 1 and isinstance(cols[0], (list, tuple)): + cols = cols[0] + self._jwrite = self._jwrite.partitionBy(_to_seq(self._sqlContext._sc, cols)) + return self + + @since(1.4) + def save(self, path=None, format=None, mode="error", **options): + """ + Saves the contents of the :class:`DataFrame` to a data source. + + The data source is specified by the ``format`` and a set of ``options``. + If ``format`` is not specified, the default data source configured by + ``spark.sql.sources.default`` will be used. + + Additionally, mode is used to specify the behavior of the save operation when + data already exists in the data source. There are four modes: + + * `append`: Append contents of this :class:`DataFrame` to existing data. + * `overwrite`: Overwrite existing data. + * `error`: Throw an exception if data already exists. + * `ignore`: Silently ignore this operation if data already exists. + + :param path: the path in a Hadoop supported file system + :param format: the format used to save + :param mode: one of `append`, `overwrite`, `error`, `ignore` (default: error) + :param options: all other string options + """ + self.mode(mode).options(**options) + if format is not None: + self.format(format) + if path is None: + self._jwrite.save() + else: + self._jwrite.save(path) + + @since(1.4) + def insertInto(self, tableName, overwrite=False): + """ + Inserts the content of the :class:`DataFrame` to the specified table. + It requires that the schema of the class:`DataFrame` is the same as the + schema of the table. + + Optionally overwriting any existing data. + """ + self._jwrite.mode("overwrite" if overwrite else "append").insertInto(tableName) + + @since(1.4) + def saveAsTable(self, name, format=None, mode="error", **options): + """ + Saves the content of the :class:`DataFrame` as the specified table. + + In the case the table already exists, behavior of this function depends on the + save mode, specified by the `mode` function (default to throwing an exception). + When `mode` is `Overwrite`, the schema of the [[DataFrame]] does not need to be + the same as that of the existing table. + + * `append`: Append contents of this :class:`DataFrame` to existing data. + * `overwrite`: Overwrite existing data. + * `error`: Throw an exception if data already exists. + * `ignore`: Silently ignore this operation if data already exists. + + :param name: the table name + :param format: the format used to save + :param mode: one of `append`, `overwrite`, `error`, `ignore` (default: error) + :param options: all other string options + """ + self.mode(mode).options(**options) + if format is not None: + self.format(format) + return self._jwrite.saveAsTable(name) + + @since(1.4) + def json(self, path, mode="error"): + """ + Saves the content of the :class:`DataFrame` in JSON format at the + specified path. + + Additionally, mode is used to specify the behavior of the save operation when + data already exists in the data source. There are four modes: + + * `append`: Append contents of this :class:`DataFrame` to existing data. + * `overwrite`: Overwrite existing data. + * `error`: Throw an exception if data already exists. + * `ignore`: Silently ignore this operation if data already exists. + + :param path: the path in any Hadoop supported file system + :param mode: one of `append`, `overwrite`, `error`, `ignore` (default: error) + """ + return self._jwrite.mode(mode).json(path) + + @since(1.4) + def parquet(self, path, mode="error"): + """ + Saves the content of the :class:`DataFrame` in Parquet format at the + specified path. + + Additionally, mode is used to specify the behavior of the save operation when + data already exists in the data source. There are four modes: + + * `append`: Append contents of this :class:`DataFrame` to existing data. + * `overwrite`: Overwrite existing data. + * `error`: Throw an exception if data already exists. + * `ignore`: Silently ignore this operation if data already exists. + + :param path: the path in any Hadoop supported file system + :param mode: one of `append`, `overwrite`, `error`, `ignore` (default: error) + """ + return self._jwrite.mode(mode).parquet(path) + + @since(1.4) + def jdbc(self, url, table, mode="error", properties={}): + """ + Saves the content of the :class:`DataFrame` to a external database table + via JDBC. + + In the case the table already exists in the external database, + behavior of this function depends on the save mode, specified by the `mode` + function (default to throwing an exception). There are four modes: + + * `append`: Append contents of this :class:`DataFrame` to existing data. + * `overwrite`: Overwrite existing data. + * `error`: Throw an exception if data already exists. + * `ignore`: Silently ignore this operation if data already exists. + + :param url: a JDBC URL of the form `jdbc:subprotocol:subname` + :param table: Name of the table in the external database. + :param mode: one of `append`, `overwrite`, `error`, `ignore` (default: error) + :param properties: JDBC database connection arguments, a list of + arbitrary string tag/value. Normally at least a + "user" and "password" property should be included. + """ + jprop = JavaClass("java.util.Properties", self._sqlContext._sc._gateway._gateway_client)() + for k in properties: + jprop.setProperty(k, properties[k]) + self._jwrite.mode(mode).jdbc(url, table, jprop) + + +def _test(): + import doctest + from pyspark.context import SparkContext + from pyspark.sql import Row, SQLContext + import pyspark.sql.readwriter + globs = pyspark.sql.readwriter.__dict__.copy() + sc = SparkContext('local[4]', 'PythonTest') + globs['sc'] = sc + globs['sqlContext'] = SQLContext(sc) + globs['df'] = sc.parallelize([(2, 'Alice'), (5, 'Bob')]) \ + .toDF(StructType([StructField('age', IntegerType()), + StructField('name', StringType())])) + jsonStrings = [ + '{"field1": 1, "field2": "row1", "field3":{"field4":11}}', + '{"field1" : 2, "field3":{"field4":22, "field5": [10, 11]},' + '"field6":[{"field7": "row2"}]}', + '{"field1" : null, "field2": "row3", ' + '"field3":{"field4":33, "field5": []}}' + ] + globs['jsonStrings'] = jsonStrings + (failure_count, test_count) = doctest.testmod( + pyspark.sql.readwriter, globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF) + globs['sc'].stop() + if failure_count: + exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index d652c302a54ba..76384d31f1bf4 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -44,6 +44,7 @@ from pyspark.sql.types import UserDefinedType, _infer_type from pyspark.tests import ReusedPySparkTestCase from pyspark.sql.functions import UserDefinedFunction +from pyspark.sql.window import Window class ExamplePointUDT(UserDefinedType): @@ -99,6 +100,15 @@ def test_data_type_eq(self): lt2 = pickle.loads(pickle.dumps(LongType())) self.assertEquals(lt, lt2) + # regression test for SPARK-7978 + def test_decimal_type(self): + t1 = DecimalType() + t2 = DecimalType(10, 2) + self.assertTrue(t2 is not t1) + self.assertNotEqual(t1, t2) + t3 = DecimalType(8) + self.assertNotEqual(t2, t3) + class SQLTests(ReusedPySparkTestCase): @@ -117,6 +127,26 @@ def tearDownClass(cls): ReusedPySparkTestCase.tearDownClass() shutil.rmtree(cls.tempdir.name, ignore_errors=True) + def test_range(self): + self.assertEqual(self.sqlCtx.range(1, 1).count(), 0) + self.assertEqual(self.sqlCtx.range(1, 0, -1).count(), 1) + self.assertEqual(self.sqlCtx.range(0, 1 << 40, 1 << 39).count(), 2) + + def test_explode(self): + from pyspark.sql.functions import explode + d = [Row(a=1, intlist=[1, 2, 3], mapfield={"a": "b"})] + rdd = self.sc.parallelize(d) + data = self.sqlCtx.createDataFrame(rdd) + + result = data.select(explode(data.intlist).alias("a")).select("a").collect() + self.assertEqual(result[0][0], 1) + self.assertEqual(result[1][0], 2) + self.assertEqual(result[2][0], 3) + + result = data.select(explode(data.mapfield).alias("a", "b")).select("a", "b").collect() + self.assertEqual(result[0][0], "a") + self.assertEqual(result[0][1], "b") + def test_udf_with_callable(self): d = [Row(number=i, squared=i**2) for i in range(10)] rdd = self.sc.parallelize(d) @@ -405,9 +435,18 @@ def test_cov(self): cov = df.stat.cov("a", "b") self.assertTrue(abs(cov - 55.0 / 3) < 1e-6) + def test_crosstab(self): + df = self.sc.parallelize([Row(a=i % 3, b=i % 2) for i in range(1, 7)]).toDF() + ct = df.stat.crosstab("a", "b").collect() + ct = sorted(ct, key=lambda x: x[0]) + for i, row in enumerate(ct): + self.assertEqual(row[0], str(i)) + self.assertTrue(row[1], 1) + self.assertTrue(row[2], 1) + def test_math_functions(self): df = self.sc.parallelize([Row(a=i, b=2 * i) for i in range(10)]).toDF() - from pyspark.sql import mathfunctions as functions + from pyspark.sql import functions import math def get_values(l): @@ -444,33 +483,41 @@ def test_rand_functions(self): for row in rndn: assert row[1] >= -4.0 and row[1] <= 4.0, "got: %s" % row[1] + def test_between_function(self): + df = self.sc.parallelize([ + Row(a=1, b=2, c=3), + Row(a=2, b=1, c=3), + Row(a=4, b=1, c=4)]).toDF() + self.assertEqual([Row(a=2, b=1, c=3), Row(a=4, b=1, c=4)], + df.filter(df.a.between(df.b, df.c)).collect()) + def test_save_and_load(self): df = self.df tmpPath = tempfile.mkdtemp() shutil.rmtree(tmpPath) - df.save(tmpPath, "org.apache.spark.sql.json", "error") - actual = self.sqlCtx.load(tmpPath, "org.apache.spark.sql.json") - self.assertTrue(sorted(df.collect()) == sorted(actual.collect())) + df.write.json(tmpPath) + actual = self.sqlCtx.read.json(tmpPath) + self.assertEqual(sorted(df.collect()), sorted(actual.collect())) schema = StructType([StructField("value", StringType(), True)]) - actual = self.sqlCtx.load(tmpPath, "org.apache.spark.sql.json", schema) - self.assertTrue(sorted(df.select("value").collect()) == sorted(actual.collect())) + actual = self.sqlCtx.read.json(tmpPath, schema) + self.assertEqual(sorted(df.select("value").collect()), sorted(actual.collect())) - df.save(tmpPath, "org.apache.spark.sql.json", "overwrite") - actual = self.sqlCtx.load(tmpPath, "org.apache.spark.sql.json") - self.assertTrue(sorted(df.collect()) == sorted(actual.collect())) + df.write.json(tmpPath, "overwrite") + actual = self.sqlCtx.read.json(tmpPath) + self.assertEqual(sorted(df.collect()), sorted(actual.collect())) - df.save(source="org.apache.spark.sql.json", mode="overwrite", path=tmpPath, - noUse="this options will not be used in save.") - actual = self.sqlCtx.load(source="org.apache.spark.sql.json", path=tmpPath, - noUse="this options will not be used in load.") - self.assertTrue(sorted(df.collect()) == sorted(actual.collect())) + df.write.save(format="json", mode="overwrite", path=tmpPath, + noUse="this options will not be used in save.") + actual = self.sqlCtx.read.load(format="json", path=tmpPath, + noUse="this options will not be used in load.") + self.assertEqual(sorted(df.collect()), sorted(actual.collect())) defaultDataSourceName = self.sqlCtx.getConf("spark.sql.sources.default", "org.apache.spark.sql.parquet") self.sqlCtx.sql("SET spark.sql.sources.default=org.apache.spark.sql.json") actual = self.sqlCtx.load(path=tmpPath) - self.assertTrue(sorted(df.collect()) == sorted(actual.collect())) + self.assertEqual(sorted(df.collect()), sorted(actual.collect())) self.sqlCtx.sql("SET spark.sql.sources.default=" + defaultDataSourceName) shutil.rmtree(tmpPath) @@ -502,6 +549,13 @@ def test_access_nested_types(self): self.assertEqual("v", df.select(df.d["k"]).first()[0]) self.assertEqual("v", df.select(df.d.getItem("k")).first()[0]) + def test_field_accessor(self): + df = self.sc.parallelize([Row(l=[1], r=Row(a=1, b="b"), d={"k": "v"})]).toDF() + self.assertEqual(1, df.select(df.l[0]).first()[0]) + self.assertEqual(1, df.select(df.r["a"]).first()[0]) + self.assertEqual("b", df.select(df.r["b"]).first()[0]) + self.assertEqual("v", df.select(df.d["k"]).first()[0]) + def test_infer_long_type(self): longrow = [Row(f1='a', f2=100000000000000)] df = self.sc.parallelize(longrow).toDF() @@ -628,6 +682,67 @@ def test_fillna(self): self.assertEqual(row.age, None) self.assertEqual(row.height, None) + def test_bitwise_operations(self): + from pyspark.sql import functions + row = Row(a=170, b=75) + df = self.sqlCtx.createDataFrame([row]) + result = df.select(df.a.bitwiseAND(df.b)).collect()[0].asDict() + self.assertEqual(170 & 75, result['(a & b)']) + result = df.select(df.a.bitwiseOR(df.b)).collect()[0].asDict() + self.assertEqual(170 | 75, result['(a | b)']) + result = df.select(df.a.bitwiseXOR(df.b)).collect()[0].asDict() + self.assertEqual(170 ^ 75, result['(a ^ b)']) + result = df.select(functions.bitwiseNOT(df.b)).collect()[0].asDict() + self.assertEqual(~75, result['~b']) + + def test_replace(self): + schema = StructType([ + StructField("name", StringType(), True), + StructField("age", IntegerType(), True), + StructField("height", DoubleType(), True)]) + + # replace with int + row = self.sqlCtx.createDataFrame([(u'Alice', 10, 10.0)], schema).replace(10, 20).first() + self.assertEqual(row.age, 20) + self.assertEqual(row.height, 20.0) + + # replace with double + row = self.sqlCtx.createDataFrame( + [(u'Alice', 80, 80.0)], schema).replace(80.0, 82.1).first() + self.assertEqual(row.age, 82) + self.assertEqual(row.height, 82.1) + + # replace with string + row = self.sqlCtx.createDataFrame( + [(u'Alice', 10, 80.1)], schema).replace(u'Alice', u'Ann').first() + self.assertEqual(row.name, u"Ann") + self.assertEqual(row.age, 10) + + # replace with subset specified by a string of a column name w/ actual change + row = self.sqlCtx.createDataFrame( + [(u'Alice', 10, 80.1)], schema).replace(10, 20, subset='age').first() + self.assertEqual(row.age, 20) + + # replace with subset specified by a string of a column name w/o actual change + row = self.sqlCtx.createDataFrame( + [(u'Alice', 10, 80.1)], schema).replace(10, 20, subset='height').first() + self.assertEqual(row.age, 10) + + # replace with subset specified with one column replaced, another column not in subset + # stays unchanged. + row = self.sqlCtx.createDataFrame( + [(u'Alice', 10, 10.0)], schema).replace(10, 20, subset=['name', 'age']).first() + self.assertEqual(row.name, u'Alice') + self.assertEqual(row.age, 20) + self.assertEqual(row.height, 10.0) + + # replace with subset specified but no column will be replaced + row = self.sqlCtx.createDataFrame( + [(u'Alice', 10, None)], schema).replace(10, 20, subset=['name', 'height']).first() + self.assertEqual(row.name, u'Alice') + self.assertEqual(row.age, 10) + self.assertEqual(row.height, None) + class HiveContextSQLTests(ReusedPySparkTestCase): @@ -638,11 +753,9 @@ def setUpClass(cls): try: cls.sc._jvm.org.apache.hadoop.hive.conf.HiveConf() except py4j.protocol.Py4JError: - cls.sqlCtx = None - return + raise unittest.SkipTest("Hive is not available") except TypeError: - cls.sqlCtx = None - return + raise unittest.SkipTest("Hive is not available") os.unlink(cls.tempdir.name) _scala_HiveContext =\ cls.sc._jvm.org.apache.spark.sql.hive.test.TestHiveContext(cls.sc._jsc.sc()) @@ -656,57 +769,68 @@ def tearDownClass(cls): shutil.rmtree(cls.tempdir.name, ignore_errors=True) def test_save_and_load_table(self): - if self.sqlCtx is None: - return # no hive available, skipped - df = self.df tmpPath = tempfile.mkdtemp() shutil.rmtree(tmpPath) - df.saveAsTable("savedJsonTable", "org.apache.spark.sql.json", "append", path=tmpPath) - actual = self.sqlCtx.createExternalTable("externalJsonTable", tmpPath, - "org.apache.spark.sql.json") - self.assertTrue( - sorted(df.collect()) == - sorted(self.sqlCtx.sql("SELECT * FROM savedJsonTable").collect())) - self.assertTrue( - sorted(df.collect()) == - sorted(self.sqlCtx.sql("SELECT * FROM externalJsonTable").collect())) - self.assertTrue(sorted(df.collect()) == sorted(actual.collect())) + df.write.saveAsTable("savedJsonTable", "json", "append", path=tmpPath) + actual = self.sqlCtx.createExternalTable("externalJsonTable", tmpPath, "json") + self.assertEqual(sorted(df.collect()), + sorted(self.sqlCtx.sql("SELECT * FROM savedJsonTable").collect())) + self.assertEqual(sorted(df.collect()), + sorted(self.sqlCtx.sql("SELECT * FROM externalJsonTable").collect())) + self.assertEqual(sorted(df.collect()), sorted(actual.collect())) self.sqlCtx.sql("DROP TABLE externalJsonTable") - df.saveAsTable("savedJsonTable", "org.apache.spark.sql.json", "overwrite", path=tmpPath) + df.write.saveAsTable("savedJsonTable", "json", "overwrite", path=tmpPath) schema = StructType([StructField("value", StringType(), True)]) - actual = self.sqlCtx.createExternalTable("externalJsonTable", - source="org.apache.spark.sql.json", + actual = self.sqlCtx.createExternalTable("externalJsonTable", source="json", schema=schema, path=tmpPath, noUse="this options will not be used") - self.assertTrue( - sorted(df.collect()) == - sorted(self.sqlCtx.sql("SELECT * FROM savedJsonTable").collect())) - self.assertTrue( - sorted(df.select("value").collect()) == - sorted(self.sqlCtx.sql("SELECT * FROM externalJsonTable").collect())) - self.assertTrue(sorted(df.select("value").collect()) == sorted(actual.collect())) + self.assertEqual(sorted(df.collect()), + sorted(self.sqlCtx.sql("SELECT * FROM savedJsonTable").collect())) + self.assertEqual(sorted(df.select("value").collect()), + sorted(self.sqlCtx.sql("SELECT * FROM externalJsonTable").collect())) + self.assertEqual(sorted(df.select("value").collect()), sorted(actual.collect())) self.sqlCtx.sql("DROP TABLE savedJsonTable") self.sqlCtx.sql("DROP TABLE externalJsonTable") defaultDataSourceName = self.sqlCtx.getConf("spark.sql.sources.default", "org.apache.spark.sql.parquet") self.sqlCtx.sql("SET spark.sql.sources.default=org.apache.spark.sql.json") - df.saveAsTable("savedJsonTable", path=tmpPath, mode="overwrite") + df.write.saveAsTable("savedJsonTable", path=tmpPath, mode="overwrite") actual = self.sqlCtx.createExternalTable("externalJsonTable", path=tmpPath) - self.assertTrue( - sorted(df.collect()) == - sorted(self.sqlCtx.sql("SELECT * FROM savedJsonTable").collect())) - self.assertTrue( - sorted(df.collect()) == - sorted(self.sqlCtx.sql("SELECT * FROM externalJsonTable").collect())) - self.assertTrue(sorted(df.collect()) == sorted(actual.collect())) + self.assertEqual(sorted(df.collect()), + sorted(self.sqlCtx.sql("SELECT * FROM savedJsonTable").collect())) + self.assertEqual(sorted(df.collect()), + sorted(self.sqlCtx.sql("SELECT * FROM externalJsonTable").collect())) + self.assertEqual(sorted(df.collect()), sorted(actual.collect())) self.sqlCtx.sql("DROP TABLE savedJsonTable") self.sqlCtx.sql("DROP TABLE externalJsonTable") self.sqlCtx.sql("SET spark.sql.sources.default=" + defaultDataSourceName) shutil.rmtree(tmpPath) + def test_window_functions(self): + df = self.sqlCtx.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"]) + w = Window.partitionBy("value").orderBy("key") + from pyspark.sql import functions as F + sel = df.select(df.value, df.key, + F.max("key").over(w.rowsBetween(0, 1)), + F.min("key").over(w.rowsBetween(0, 1)), + F.count("key").over(w.rowsBetween(float('-inf'), float('inf'))), + F.rowNumber().over(w), + F.rank().over(w), + F.denseRank().over(w), + F.ntile(2).over(w)) + rs = sorted(sel.collect()) + expected = [ + ("1", 1, 1, 1, 1, 1, 1, 1, 1), + ("2", 1, 1, 1, 3, 1, 1, 1, 1), + ("2", 1, 2, 1, 3, 2, 1, 1, 1), + ("2", 2, 2, 2, 3, 3, 3, 2, 2) + ] + for r, ex in zip(rs, expected): + self.assertEqual(tuple(r), ex[:len(r)]) + if __name__ == "__main__": unittest.main() diff --git a/python/pyspark/sql/_types.py b/python/pyspark/sql/types.py similarity index 95% rename from python/pyspark/sql/_types.py rename to python/pyspark/sql/types.py index 95fb91ad43457..b6ec6137c9180 100644 --- a/python/pyspark/sql/_types.py +++ b/python/pyspark/sql/types.py @@ -73,56 +73,84 @@ def json(self): # This singleton pattern does not work with pickle, you will get # another object after pickle and unpickle -class PrimitiveTypeSingleton(type): - """Metaclass for PrimitiveType""" +class DataTypeSingleton(type): + """Metaclass for DataType""" _instances = {} def __call__(cls): if cls not in cls._instances: - cls._instances[cls] = super(PrimitiveTypeSingleton, cls).__call__() + cls._instances[cls] = super(DataTypeSingleton, cls).__call__() return cls._instances[cls] -class PrimitiveType(DataType): - """Spark SQL PrimitiveType""" +class NullType(DataType): + """Null type. + + The data type representing None, used for the types that cannot be inferred. + """ - __metaclass__ = PrimitiveTypeSingleton + __metaclass__ = DataTypeSingleton -class NullType(PrimitiveType): - """Null type. +class AtomicType(DataType): + """An internal type used to represent everything that is not + null, UDTs, arrays, structs, and maps.""" - The data type representing None, used for the types that cannot be inferred. + +class NumericType(AtomicType): + """Numeric data types. + """ + + +class IntegralType(NumericType): + """Integral data types. + """ + + __metaclass__ = DataTypeSingleton + + +class FractionalType(NumericType): + """Fractional data types. """ -class StringType(PrimitiveType): +class StringType(AtomicType): """String data type. """ + __metaclass__ = DataTypeSingleton -class BinaryType(PrimitiveType): + +class BinaryType(AtomicType): """Binary (byte array) data type. """ + __metaclass__ = DataTypeSingleton + -class BooleanType(PrimitiveType): +class BooleanType(AtomicType): """Boolean data type. """ + __metaclass__ = DataTypeSingleton + -class DateType(PrimitiveType): +class DateType(AtomicType): """Date (datetime.date) data type. """ + __metaclass__ = DataTypeSingleton -class TimestampType(PrimitiveType): + +class TimestampType(AtomicType): """Timestamp (datetime.datetime) data type. """ + __metaclass__ = DataTypeSingleton + -class DecimalType(DataType): +class DecimalType(FractionalType): """Decimal (decimal.Decimal) data type. """ @@ -150,31 +178,35 @@ def __repr__(self): return "DecimalType()" -class DoubleType(PrimitiveType): +class DoubleType(FractionalType): """Double data type, representing double precision floats. """ + __metaclass__ = DataTypeSingleton + -class FloatType(PrimitiveType): +class FloatType(FractionalType): """Float data type, representing single precision floats. """ + __metaclass__ = DataTypeSingleton -class ByteType(PrimitiveType): + +class ByteType(IntegralType): """Byte data type, i.e. a signed integer in a single byte. """ def simpleString(self): return 'tinyint' -class IntegerType(PrimitiveType): +class IntegerType(IntegralType): """Int data type, i.e. a signed 32-bit integer. """ def simpleString(self): return 'int' -class LongType(PrimitiveType): +class LongType(IntegralType): """Long data type, i.e. a signed 64-bit integer. If the values are beyond the range of [-9223372036854775808, 9223372036854775807], @@ -184,7 +216,7 @@ def simpleString(self): return 'bigint' -class ShortType(PrimitiveType): +class ShortType(IntegralType): """Short data type, i.e. a signed 16-bit integer. """ def simpleString(self): @@ -426,11 +458,9 @@ def __eq__(self, other): return type(self) == type(other) -_all_primitive_types = dict((v.typeName(), v) - for v in list(globals().values()) - if (type(v) is type or type(v) is PrimitiveTypeSingleton) - and v.__base__ == PrimitiveType) - +_atomic_types = [StringType, BinaryType, BooleanType, DecimalType, FloatType, DoubleType, + ByteType, ShortType, IntegerType, LongType, DateType, TimestampType] +_all_atomic_types = dict((t.typeName(), t) for t in _atomic_types) _all_complex_types = dict((v.typeName(), v) for v in [ArrayType, MapType, StructType]) @@ -444,7 +474,7 @@ def _parse_datatype_json_string(json_string): ... scala_datatype = sqlContext._ssql_ctx.parseDataType(datatype.json()) ... python_datatype = _parse_datatype_json_string(scala_datatype.json()) ... assert datatype == python_datatype - >>> for cls in _all_primitive_types.values(): + >>> for cls in _all_atomic_types.values(): ... check_datatype(cls()) >>> # Simple ArrayType. @@ -494,8 +524,8 @@ def _parse_datatype_json_string(json_string): def _parse_datatype_json_value(json_value): if not isinstance(json_value, dict): - if json_value in _all_primitive_types.keys(): - return _all_primitive_types[json_value]() + if json_value in _all_atomic_types.keys(): + return _all_atomic_types[json_value]() elif json_value == 'decimal': return DecimalType() elif _FIXED_DECIMAL.match(json_value): @@ -652,7 +682,7 @@ def _python_to_sql_converter(dataType): if isinstance(dataType, StructType): names, types = zip(*[(f.name, f.dataType) for f in dataType.fields]) - converters = map(_python_to_sql_converter, types) + converters = [_python_to_sql_converter(t) for t in types] def converter(obj): if isinstance(obj, dict): @@ -930,7 +960,7 @@ def _infer_schema_type(obj, dataType): DecimalType: (decimal.Decimal,), StringType: (str, unicode), BinaryType: (bytearray,), - DateType: (datetime.date,), + DateType: (datetime.date, datetime.datetime), TimestampType: (datetime.datetime,), ArrayType: (list, tuple, array), MapType: (dict,), @@ -1125,7 +1155,7 @@ def Dict(d): return lambda datum: dataType.deserialize(datum) elif not isinstance(dataType, StructType): - # no wrapper for primitive types + # no wrapper for atomic types return lambda x: x class Row(tuple): @@ -1228,12 +1258,14 @@ def __getattr__(self, item): raise AttributeError(item) def __reduce__(self): + """Returns a tuple so Python knows how to pickle Row.""" if hasattr(self, "__fields__"): return (_create_row, (self.__fields__, tuple(self))) else: return tuple.__reduce__(self) def __repr__(self): + """Printable representation of Row used in Python REPL.""" if hasattr(self, "__fields__"): return "Row(%s)" % ", ".join("%s=%r" % (k, v) for k, v in zip(self.__fields__, tuple(self))) diff --git a/python/pyspark/sql/window.py b/python/pyspark/sql/window.py new file mode 100644 index 0000000000000..0a0e006bdf83a --- /dev/null +++ b/python/pyspark/sql/window.py @@ -0,0 +1,158 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import sys + +from pyspark import SparkContext +from pyspark.sql import since +from pyspark.sql.column import _to_seq, _to_java_column + +__all__ = ["Window", "WindowSpec"] + + +def _to_java_cols(cols): + sc = SparkContext._active_spark_context + if len(cols) == 1 and isinstance(cols[0], list): + cols = cols[0] + return _to_seq(sc, cols, _to_java_column) + + +class Window(object): + + """ + Utility functions for defining window in DataFrames. + + For example: + + >>> # PARTITION BY country ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + >>> window = Window.partitionBy("country").orderBy("date").rowsBetween(-sys.maxsize, 0) + + >>> # PARTITION BY country ORDER BY date RANGE BETWEEN 3 PRECEDING AND 3 FOLLOWING + >>> window = Window.orderBy("date").partitionBy("country").rangeBetween(-3, 3) + + .. note:: Experimental + + .. versionadded:: 1.4 + """ + @staticmethod + @since(1.4) + def partitionBy(*cols): + """ + Creates a :class:`WindowSpec` with the partitioning defined. + """ + sc = SparkContext._active_spark_context + jspec = sc._jvm.org.apache.spark.sql.expressions.Window.partitionBy(_to_java_cols(cols)) + return WindowSpec(jspec) + + @staticmethod + @since(1.4) + def orderBy(*cols): + """ + Creates a :class:`WindowSpec` with the partitioning defined. + """ + sc = SparkContext._active_spark_context + jspec = sc._jvm.org.apache.spark.sql.expressions.Window.partitionBy(_to_java_cols(cols)) + return WindowSpec(jspec) + + +class WindowSpec(object): + """ + A window specification that defines the partitioning, ordering, + and frame boundaries. + + Use the static methods in :class:`Window` to create a :class:`WindowSpec`. + + .. note:: Experimental + + .. versionadded:: 1.4 + """ + + _JAVA_MAX_LONG = (1 << 63) - 1 + _JAVA_MIN_LONG = - (1 << 63) + + def __init__(self, jspec): + self._jspec = jspec + + @since(1.4) + def partitionBy(self, *cols): + """ + Defines the partitioning columns in a :class:`WindowSpec`. + + :param cols: names of columns or expressions + """ + return WindowSpec(self._jspec.partitionBy(_to_java_cols(cols))) + + @since(1.4) + def orderBy(self, *cols): + """ + Defines the ordering columns in a :class:`WindowSpec`. + + :param cols: names of columns or expressions + """ + return WindowSpec(self._jspec.orderBy(_to_java_cols(cols))) + + @since(1.4) + def rowsBetween(self, start, end): + """ + Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive). + + Both `start` and `end` are relative positions from the current row. + For example, "0" means "current row", while "-1" means the row before + the current row, and "5" means the fifth row after the current row. + + :param start: boundary start, inclusive. + The frame is unbounded if this is ``-sys.maxsize`` (or lower). + :param end: boundary end, inclusive. + The frame is unbounded if this is ``sys.maxsize`` (or higher). + """ + if start <= -sys.maxsize: + start = self._JAVA_MIN_LONG + if end >= sys.maxsize: + end = self._JAVA_MAX_LONG + return WindowSpec(self._jspec.rowsBetween(start, end)) + + @since(1.4) + def rangeBetween(self, start, end): + """ + Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive). + + Both `start` and `end` are relative from the current row. For example, + "0" means "current row", while "-1" means one off before the current row, + and "5" means the five off after the current row. + + :param start: boundary start, inclusive. + The frame is unbounded if this is ``-sys.maxsize`` (or lower). + :param end: boundary end, inclusive. + The frame is unbounded if this is ``sys.maxsize`` (or higher). + """ + if start <= -sys.maxsize: + start = self._JAVA_MIN_LONG + if end >= sys.maxsize: + end = self._JAVA_MAX_LONG + return WindowSpec(self._jspec.rangeBetween(start, end)) + + +def _test(): + import doctest + SparkContext('local[4]', 'PythonTest') + (failure_count, test_count) = doctest.testmod() + if failure_count: + exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/streaming/kafka.py b/python/pyspark/streaming/kafka.py index e278b29003f69..10a859a532e28 100644 --- a/python/pyspark/streaming/kafka.py +++ b/python/pyspark/streaming/kafka.py @@ -132,11 +132,12 @@ def createRDD(sc, kafkaParams, offsetRanges, leaders={}, .. note:: Experimental Create a RDD from Kafka using offset ranges for each topic and partition. + :param sc: SparkContext object :param kafkaParams: Additional params for Kafka :param offsetRanges: list of offsetRange to specify topic:partition:[start, end) to consume :param leaders: Kafka brokers for each TopicAndPartition in offsetRanges. May be an empty - map, in which case leaders will be looked up on the driver. + map, in which case leaders will be looked up on the driver. :param keyDecoder: A function used to decode key (default is utf8_decoder) :param valueDecoder: A function used to decode value (default is utf8_decoder) :return: A RDD object diff --git a/python/pyspark/streaming/tests.py b/python/pyspark/streaming/tests.py index 33ea8c9293d74..46cb18b2e8ef9 100644 --- a/python/pyspark/streaming/tests.py +++ b/python/pyspark/streaming/tests.py @@ -41,8 +41,8 @@ class PySparkStreamingTestCase(unittest.TestCase): - timeout = 4 # seconds - duration = .2 + timeout = 10 # seconds + duration = .5 @classmethod def setUpClass(cls): @@ -379,13 +379,13 @@ def func(dstream): class WindowFunctionTests(PySparkStreamingTestCase): - timeout = 5 + timeout = 15 def test_window(self): input = [range(1), range(2), range(3), range(4), range(5)] def func(dstream): - return dstream.window(.6, .2).count() + return dstream.window(1.5, .5).count() expected = [[1], [3], [6], [9], [12], [9], [5]] self._test_func(input, func, expected) @@ -394,7 +394,7 @@ def test_count_by_window(self): input = [range(1), range(2), range(3), range(4), range(5)] def func(dstream): - return dstream.countByWindow(.6, .2) + return dstream.countByWindow(1.5, .5) expected = [[1], [3], [6], [9], [12], [9], [5]] self._test_func(input, func, expected) @@ -403,7 +403,7 @@ def test_count_by_window_large(self): input = [range(1), range(2), range(3), range(4), range(5), range(6)] def func(dstream): - return dstream.countByWindow(1, .2) + return dstream.countByWindow(2.5, .5) expected = [[1], [3], [6], [10], [15], [20], [18], [15], [11], [6]] self._test_func(input, func, expected) @@ -412,7 +412,7 @@ def test_count_by_value_and_window(self): input = [range(1), range(2), range(3), range(4), range(5), range(6)] def func(dstream): - return dstream.countByValueAndWindow(1, .2) + return dstream.countByValueAndWindow(2.5, .5) expected = [[1], [2], [3], [4], [5], [6], [6], [6], [6], [6]] self._test_func(input, func, expected) @@ -421,7 +421,7 @@ def test_group_by_key_and_window(self): input = [[('a', i)] for i in range(5)] def func(dstream): - return dstream.groupByKeyAndWindow(.6, .2).mapValues(list) + return dstream.groupByKeyAndWindow(1.5, .5).mapValues(list) expected = [[('a', [0])], [('a', [0, 1])], [('a', [0, 1, 2])], [('a', [1, 2, 3])], [('a', [2, 3, 4])], [('a', [3, 4])], [('a', [4])]] diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py index ea63a396da5b8..f9fb37f7fc139 100644 --- a/python/pyspark/tests.py +++ b/python/pyspark/tests.py @@ -444,6 +444,11 @@ def func(x): class RDDTests(ReusedPySparkTestCase): + def test_range(self): + self.assertEqual(self.sc.range(1, 1).count(), 0) + self.assertEqual(self.sc.range(1, 0, -1).count(), 1) + self.assertEqual(self.sc.range(0, 1 << 40, 1 << 39).count(), 2) + def test_id(self): rdd = self.sc.parallelize(range(10)) id = rdd.id() @@ -644,7 +649,6 @@ def test_count_approx_distinct(self): self.assertTrue(18 < rdd.map(lambda x: (x, -x)).countApproxDistinct() < 22) self.assertRaises(ValueError, lambda: rdd.countApproxDistinct(0.00000001)) - self.assertRaises(ValueError, lambda: rdd.countApproxDistinct(0.5)) def test_histogram(self): # empty @@ -1544,13 +1548,13 @@ def count(): def test_with_different_versions_of_python(self): rdd = self.sc.parallelize(range(10)) rdd.count() - version = sys.version_info - sys.version_info = (2, 0, 0) + version = self.sc.pythonVer + self.sc.pythonVer = "2.0" try: with QuietTest(self.sc): self.assertRaises(Py4JJavaError, lambda: rdd.count()) finally: - sys.version_info = version + self.sc.pythonVer = version class SparkSubmitTests(unittest.TestCase): @@ -1805,6 +1809,10 @@ def run(): sc.stop() + def test_startTime(self): + with SparkContext() as sc: + self.assertGreater(sc.startTime, 0) + @unittest.skipIf(not _have_scipy, "SciPy not installed") class SciPyTests(PySparkTestCase): diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py index fbdaf3a5814cd..93df9002be377 100644 --- a/python/pyspark/worker.py +++ b/python/pyspark/worker.py @@ -57,6 +57,12 @@ def main(infile, outfile): if split_index == -1: # for unit tests exit(-1) + version = utf8_deserializer.loads(infile) + if version != "%d.%d" % sys.version_info[:2]: + raise Exception(("Python in worker has different version %s than that in " + + "driver %s, PySpark cannot run with different minor versions") % + ("%d.%d" % sys.version_info[:2], version)) + # initialize global state shuffle.MemoryBytesSpilled = 0 shuffle.DiskBytesSpilled = 0 @@ -92,11 +98,7 @@ def main(infile, outfile): command = pickleSer._read_with_length(infile) if isinstance(command, Broadcast): command = pickleSer.loads(command.value) - (func, profiler, deserializer, serializer), version = command - if version != sys.version_info[:2]: - raise Exception(("Python in worker has different version %s than that in " + - "driver %s, PySpark cannot run with different minor versions") % - (sys.version_info[:2], version)) + func, profiler, deserializer, serializer = command init_time = time.time() def process(): diff --git a/python/run-tests b/python/run-tests index 0e0eee3564e7c..17dda3eadac0c 100755 --- a/python/run-tests +++ b/python/run-tests @@ -57,49 +57,56 @@ function run_test() { function run_core_tests() { echo "Run core tests ..." - run_test "pyspark/rdd.py" - run_test "pyspark/context.py" - run_test "pyspark/conf.py" - PYSPARK_DOC_TEST=1 run_test "pyspark/broadcast.py" - PYSPARK_DOC_TEST=1 run_test "pyspark/accumulators.py" - run_test "pyspark/serializers.py" - run_test "pyspark/profiler.py" - run_test "pyspark/shuffle.py" - run_test "pyspark/tests.py" + run_test "pyspark.rdd" + run_test "pyspark.context" + run_test "pyspark.conf" + run_test "pyspark.broadcast" + run_test "pyspark.accumulators" + run_test "pyspark.serializers" + run_test "pyspark.profiler" + run_test "pyspark.shuffle" + run_test "pyspark.tests" } function run_sql_tests() { echo "Run sql tests ..." - run_test "pyspark/sql/_types.py" - run_test "pyspark/sql/context.py" - run_test "pyspark/sql/dataframe.py" - run_test "pyspark/sql/functions.py" - run_test "pyspark/sql/tests.py" + run_test "pyspark.sql.types" + run_test "pyspark.sql.context" + run_test "pyspark.sql.column" + run_test "pyspark.sql.dataframe" + run_test "pyspark.sql.group" + run_test "pyspark.sql.functions" + run_test "pyspark.sql.readwriter" + run_test "pyspark.sql.window" + run_test "pyspark.sql.tests" } function run_mllib_tests() { echo "Run mllib tests ..." - run_test "pyspark/mllib/classification.py" - run_test "pyspark/mllib/clustering.py" - run_test "pyspark/mllib/evaluation.py" - run_test "pyspark/mllib/feature.py" - run_test "pyspark/mllib/fpm.py" - run_test "pyspark/mllib/linalg.py" - run_test "pyspark/mllib/rand.py" - run_test "pyspark/mllib/recommendation.py" - run_test "pyspark/mllib/regression.py" - run_test "pyspark/mllib/stat/_statistics.py" - run_test "pyspark/mllib/tree.py" - run_test "pyspark/mllib/util.py" - run_test "pyspark/mllib/tests.py" + run_test "pyspark.mllib.classification" + run_test "pyspark.mllib.clustering" + run_test "pyspark.mllib.evaluation" + run_test "pyspark.mllib.feature" + run_test "pyspark.mllib.fpm" + run_test "pyspark.mllib.linalg" + run_test "pyspark.mllib.random" + run_test "pyspark.mllib.recommendation" + run_test "pyspark.mllib.regression" + run_test "pyspark.mllib.stat._statistics" + run_test "pyspark.mllib.tree" + run_test "pyspark.mllib.util" + run_test "pyspark.mllib.tests" } function run_ml_tests() { echo "Run ml tests ..." - run_test "pyspark/ml/feature.py" - run_test "pyspark/ml/classification.py" - run_test "pyspark/ml/tuning.py" - run_test "pyspark/ml/tests.py" + run_test "pyspark.ml.feature" + run_test "pyspark.ml.classification" + run_test "pyspark.ml.recommendation" + run_test "pyspark.ml.regression" + run_test "pyspark.ml.tuning" + run_test "pyspark.ml.tests" + run_test "pyspark.ml.evaluation" } function run_streaming_tests() { @@ -119,8 +126,8 @@ function run_streaming_tests() { done export PYSPARK_SUBMIT_ARGS="--jars ${KAFKA_ASSEMBLY_JAR} pyspark-shell" - run_test "pyspark/streaming/util.py" - run_test "pyspark/streaming/tests.py" + run_test "pyspark.streaming.util" + run_test "pyspark.streaming.tests" } echo "Running PySpark tests. Output is in python/$LOG_FILE." diff --git a/repl/pom.xml b/repl/pom.xml index 03053b4c3b287..58e9e1e49b5d9 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../pom.xml diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala index 8dc0e0c965923..2b235525250c2 100644 --- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala +++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala @@ -206,7 +206,8 @@ class SparkILoop( // e.g. file:/C:/my/path.jar -> C:/my/path.jar SparkILoop.getAddedJars.map { jar => new URI(jar).getPath.stripPrefix("/") } } else { - SparkILoop.getAddedJars + // We need new URI(jar).getPath here for the case that `jar` includes encoded white space (%20). + SparkILoop.getAddedJars.map { jar => new URI(jar).getPath } } // work around for Scala bug val totalClassPath = addedJars.foldLeft( @@ -1028,7 +1029,7 @@ class SparkILoop( logInfo("Created sql context (with Hive support)..") } catch { - case cnf: java.lang.ClassNotFoundException => + case _: java.lang.ClassNotFoundException | _: java.lang.NoClassDefFoundError => sqlContext = new SQLContext(sparkContext) logInfo("Created sql context..") } @@ -1109,7 +1110,7 @@ object SparkILoop extends Logging { if (settings.classpath.isDefault) settings.classpath.value = sys.props("java.class.path") - getAddedJars.foreach(settings.classpath.append(_)) + getAddedJars.map(jar => new URI(jar).getPath).foreach(settings.classpath.append(_)) repl process settings } diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala index 2210fbaafeadb..f4f4b626988e9 100644 --- a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala +++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala @@ -88,7 +88,7 @@ object Main extends Logging { logInfo("Created sql context (with Hive support)..") } catch { - case cnf: java.lang.ClassNotFoundException => + case _: java.lang.ClassNotFoundException | _: java.lang.NoClassDefFoundError => sqlContext = new SQLContext(sparkContext) logInfo("Created sql context..") } diff --git a/sbin/start-master.sh b/sbin/start-master.sh index 17fff58f4f768..a7f5d5702fd80 100755 --- a/sbin/start-master.sh +++ b/sbin/start-master.sh @@ -22,6 +22,8 @@ sbin="`dirname "$0"`" sbin="`cd "$sbin"; pwd`" +ORIGINAL_ARGS="$@" + START_TACHYON=false while (( "$#" )); do @@ -53,7 +55,9 @@ if [ "$SPARK_MASTER_WEBUI_PORT" = "" ]; then SPARK_MASTER_WEBUI_PORT=8080 fi -"$sbin"/spark-daemon.sh start org.apache.spark.deploy.master.Master 1 --ip $SPARK_MASTER_IP --port $SPARK_MASTER_PORT --webui-port $SPARK_MASTER_WEBUI_PORT +"$sbin"/spark-daemon.sh start org.apache.spark.deploy.master.Master 1 \ + --ip $SPARK_MASTER_IP --port $SPARK_MASTER_PORT --webui-port $SPARK_MASTER_WEBUI_PORT \ + $ORIGINAL_ARGS if [ "$START_TACHYON" == "true" ]; then "$sbin"/../tachyon/bin/tachyon bootstrap-conf $SPARK_MASTER_IP diff --git a/scalastyle-config.xml b/scalastyle-config.xml index 7168d5b2a8e26..dd4eb8c1ffbc3 100644 --- a/scalastyle-config.xml +++ b/scalastyle-config.xml @@ -50,9 +50,12 @@ */]]> + + + - + @@ -96,8 +99,8 @@ - - + + @@ -114,6 +117,9 @@ + + + @@ -142,4 +148,15 @@ + + + ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW + + + + + ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW + + + diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 5c322d032d474..9f1ac40a99d30 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../../pom.xml diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java index 0a358ed408aa1..bb546b3086b33 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java @@ -145,7 +145,7 @@ public void pointTo( private void assertIndexIsValid(int index) { assert index >= 0 : "index (" + index + ") should >= 0"; - assert index < numFields : "index (" + index + ") should <= " + numFields; + assert index < numFields : "index (" + index + ") should < " + numFields; } @Override diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala index 4190b7ffe1c8f..0d460b634d9b0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala @@ -55,6 +55,9 @@ object Row { // TODO: Improve the performance of this if used in performance critical part. new GenericRow(rows.flatMap(_.toSeq).toArray) } + + /** Returns an empty row. */ + val empty = apply() } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala index 2eb3e167baad5..ef7b3ad9432cf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala @@ -103,7 +103,7 @@ class SqlLexical extends StdLexical { ( identChar ~ (identChar | digit).* ^^ { case first ~ rest => processIdent((first :: rest).mkString) } | rep1(digit) ~ ('.' ~> digit.*).? ^^ { - case i ~ None => NumericLit(i.mkString) + case i ~ None => NumericLit(i.mkString) case i ~ Some(d) => FloatLit(i.mkString + "." + d.mkString) } | '\'' ~> chrExcept('\'', '\n', EofCh).* <~ '\'' ^^ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/Dialect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala similarity index 62% rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/Dialect.scala rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala index 977003493d471..3f351b07b37df 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/Dialect.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala @@ -17,17 +17,19 @@ package org.apache.spark.sql.catalyst -import org.apache.spark.annotation.DeveloperApi -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +private[spark] trait CatalystConf { + def caseSensitiveAnalysis: Boolean +} /** - * Root class of SQL Parser Dialect, and we don't guarantee the binary - * compatibility for the future release, let's keep it as the internal - * interface for advanced user. - * + * A trivial conf that is empty. Used for testing when all + * relations are already filled in and the analyser needs only to resolve attribute references. */ -@DeveloperApi -abstract class Dialect { - // this is the main function that will be implemented by sql parser. - def parse(sqlText: String): LogicalPlan +object EmptyConf extends CatalystConf { + override def caseSensitiveAnalysis: Boolean = { + throw new UnsupportedOperationException + } } + +/** A CatalystConf that can be used for local testing. */ +case class SimpleCatalystConf(caseSensitiveAnalysis: Boolean) extends CatalystConf diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala index a13e2f36a1a1f..1c0ddb5093d17 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala @@ -23,6 +23,7 @@ import java.util.{Map => JavaMap} import scala.collection.mutable.HashMap import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.util.DateUtils import org.apache.spark.sql.types._ /** @@ -232,7 +233,7 @@ object CatalystTypeConverters { case other => other } - /** + /** * Converts Catalyst types used internally in rows to standard Scala types * This method is slow, and for batch conversion you should be using converter * produced by createToScalaConverter. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala similarity index 93% rename from sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala index db484c5f50074..9a3f9694e4c48 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/JavaTypeInference.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala @@ -15,18 +15,17 @@ * limitations under the License. */ -package org.apache.spark.sql +package org.apache.spark.sql.catalyst import java.beans.Introspector import java.lang.{Iterable => JIterable} import java.util.{Iterator => JIterator, Map => JMap} -import com.google.common.reflect.TypeToken +import scala.language.existentials +import com.google.common.reflect.TypeToken import org.apache.spark.sql.types._ -import scala.language.existentials - /** * Type-inference utilities for POJOs and Java collections. */ @@ -39,12 +38,21 @@ private [sql] object JavaTypeInference { private val keySetReturnType = classOf[JMap[_, _]].getMethod("keySet").getGenericReturnType private val valuesReturnType = classOf[JMap[_, _]].getMethod("values").getGenericReturnType + /** + * Infers the corresponding SQL data type of a JavaClean class. + * @param beanClass Java type + * @return (SQL data type, nullable) + */ + def inferDataType(beanClass: Class[_]): (DataType, Boolean) = { + inferDataType(TypeToken.of(beanClass)) + } + /** * Infers the corresponding SQL data type of a Java type. * @param typeToken Java type * @return (SQL data type, nullable) */ - private [sql] def inferDataType(typeToken: TypeToken[_]): (DataType, Boolean) = { + private def inferDataType(typeToken: TypeToken[_]): (DataType, Boolean) = { // TODO: All of this could probably be moved to Catalyst as it is mostly not Spark specific. typeToken.getRawType match { case c: Class[_] if c.isAnnotationPresent(classOf[SQLUserDefinedType]) => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserDialect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserDialect.scala new file mode 100644 index 0000000000000..554fb4eb25eb1 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ParserDialect.scala @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst + +import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan + +/** + * Root class of SQL Parser Dialect, and we don't guarantee the binary + * compatibility for the future release, let's keep it as the internal + * interface for advanced user. + * + */ +@DeveloperApi +abstract class ParserDialect { + // this is the main function that will be implemented by sql parser. + def parse(sqlText: String): LogicalPlan +} + +/** + * Currently we support the default dialect named "sql", associated with the class + * [[DefaultParserDialect]] + * + * And we can also provide custom SQL Dialect, for example in Spark SQL CLI: + * {{{ + *-- switch to "hiveql" dialect + * spark-sql>SET spark.sql.dialect=hiveql; + * spark-sql>SELECT * FROM src LIMIT 1; + * + *-- switch to "sql" dialect + * spark-sql>SET spark.sql.dialect=sql; + * spark-sql>SELECT * FROM src LIMIT 1; + * + *-- register the new SQL dialect + * spark-sql> SET spark.sql.dialect=com.xxx.xxx.SQL99Dialect; + * spark-sql> SELECT * FROM src LIMIT 1; + * + *-- register the non-exist SQL dialect + * spark-sql> SET spark.sql.dialect=NotExistedClass; + * spark-sql> SELECT * FROM src LIMIT 1; + * + *-- Exception will be thrown and switch to dialect + *-- "sql" (for SQLContext) or + *-- "hiveql" (for HiveContext) + * }}} + */ +private[spark] class DefaultParserDialect extends ParserDialect { + @transient + protected val sqlParser = new SqlParser + + override def parse(sqlText: String): LogicalPlan = { + sqlParser.parse(sqlText) + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala index c52965507c715..6998cc8d9666d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala @@ -27,6 +27,7 @@ import org.apache.spark.sql.types._ */ object ScalaReflection extends ScalaReflection { val universe: scala.reflect.runtime.universe.type = scala.reflect.runtime.universe + val mirror: universe.Mirror = universe.runtimeMirror(Thread.currentThread().getContextClassLoader) } /** @@ -36,6 +37,9 @@ trait ScalaReflection { /** The universe we work in (runtime or macro) */ val universe: scala.reflect.api.Universe + /** The mirror used to access types in the universe */ + val mirror: universe.Mirror + import universe._ // The Predef.Map is scala.collection.immutable.Map. @@ -52,7 +56,19 @@ trait ScalaReflection { /** Returns a catalyst DataType and its nullability for the given Scala Type using reflection. */ def schemaFor[T: TypeTag]: Schema = - ScalaReflectionLock.synchronized { schemaFor(typeOf[T]) } + ScalaReflectionLock.synchronized { schemaFor(localTypeOf[T]) } + + /** + * Return the Scala Type for `T` in the current classloader mirror. + * + * Use this method instead of the convenience method `universe.typeOf`, which + * assumes that all types can be found in the classloader that loaded scala-reflect classes. + * That's not necessarily the case when running using Eclipse launchers or even + * Sbt console or test (without `fork := true`). + * + * @see SPARK-5281 + */ + private def localTypeOf[T: TypeTag]: `Type` = typeTag[T].in(mirror).tpe /** Returns a catalyst DataType and its nullability for the given Scala Type using reflection. */ def schemaFor(tpe: `Type`): Schema = ScalaReflectionLock.synchronized { @@ -67,25 +83,25 @@ trait ScalaReflection { val udt = Utils.classForName(className) .getAnnotation(classOf[SQLUserDefinedType]).udt().newInstance() Schema(udt, nullable = true) - case t if t <:< typeOf[Option[_]] => + case t if t <:< localTypeOf[Option[_]] => val TypeRef(_, _, Seq(optType)) = t Schema(schemaFor(optType).dataType, nullable = true) // Need to decide if we actually need a special type here. - case t if t <:< typeOf[Array[Byte]] => Schema(BinaryType, nullable = true) - case t if t <:< typeOf[Array[_]] => + case t if t <:< localTypeOf[Array[Byte]] => Schema(BinaryType, nullable = true) + case t if t <:< localTypeOf[Array[_]] => val TypeRef(_, _, Seq(elementType)) = t val Schema(dataType, nullable) = schemaFor(elementType) Schema(ArrayType(dataType, containsNull = nullable), nullable = true) - case t if t <:< typeOf[Seq[_]] => + case t if t <:< localTypeOf[Seq[_]] => val TypeRef(_, _, Seq(elementType)) = t val Schema(dataType, nullable) = schemaFor(elementType) Schema(ArrayType(dataType, containsNull = nullable), nullable = true) - case t if t <:< typeOf[Map[_, _]] => + case t if t <:< localTypeOf[Map[_, _]] => val TypeRef(_, _, Seq(keyType, valueType)) = t val Schema(valueDataType, valueNullable) = schemaFor(valueType) Schema(MapType(schemaFor(keyType).dataType, valueDataType, valueContainsNull = valueNullable), nullable = true) - case t if t <:< typeOf[Product] => + case t if t <:< localTypeOf[Product] => val formalTypeArgs = t.typeSymbol.asClass.typeParams val TypeRef(_, _, actualTypeArgs) = t val constructorSymbol = t.member(nme.CONSTRUCTOR) @@ -107,19 +123,20 @@ trait ScalaReflection { schemaFor(p.typeSignature.substituteTypes(formalTypeArgs, actualTypeArgs)) StructField(p.name.toString, dataType, nullable) }), nullable = true) - case t if t <:< typeOf[String] => Schema(StringType, nullable = true) - case t if t <:< typeOf[java.sql.Timestamp] => Schema(TimestampType, nullable = true) - case t if t <:< typeOf[java.sql.Date] => Schema(DateType, nullable = true) - case t if t <:< typeOf[BigDecimal] => Schema(DecimalType.Unlimited, nullable = true) - case t if t <:< typeOf[java.math.BigDecimal] => Schema(DecimalType.Unlimited, nullable = true) - case t if t <:< typeOf[Decimal] => Schema(DecimalType.Unlimited, nullable = true) - case t if t <:< typeOf[java.lang.Integer] => Schema(IntegerType, nullable = true) - case t if t <:< typeOf[java.lang.Long] => Schema(LongType, nullable = true) - case t if t <:< typeOf[java.lang.Double] => Schema(DoubleType, nullable = true) - case t if t <:< typeOf[java.lang.Float] => Schema(FloatType, nullable = true) - case t if t <:< typeOf[java.lang.Short] => Schema(ShortType, nullable = true) - case t if t <:< typeOf[java.lang.Byte] => Schema(ByteType, nullable = true) - case t if t <:< typeOf[java.lang.Boolean] => Schema(BooleanType, nullable = true) + case t if t <:< localTypeOf[String] => Schema(StringType, nullable = true) + case t if t <:< localTypeOf[java.sql.Timestamp] => Schema(TimestampType, nullable = true) + case t if t <:< localTypeOf[java.sql.Date] => Schema(DateType, nullable = true) + case t if t <:< localTypeOf[BigDecimal] => Schema(DecimalType.Unlimited, nullable = true) + case t if t <:< localTypeOf[java.math.BigDecimal] => + Schema(DecimalType.Unlimited, nullable = true) + case t if t <:< localTypeOf[Decimal] => Schema(DecimalType.Unlimited, nullable = true) + case t if t <:< localTypeOf[java.lang.Integer] => Schema(IntegerType, nullable = true) + case t if t <:< localTypeOf[java.lang.Long] => Schema(LongType, nullable = true) + case t if t <:< localTypeOf[java.lang.Double] => Schema(DoubleType, nullable = true) + case t if t <:< localTypeOf[java.lang.Float] => Schema(FloatType, nullable = true) + case t if t <:< localTypeOf[java.lang.Short] => Schema(ShortType, nullable = true) + case t if t <:< localTypeOf[java.lang.Byte] => Schema(ByteType, nullable = true) + case t if t <:< localTypeOf[java.lang.Boolean] => Schema(BooleanType, nullable = true) case t if t <:< definitions.IntTpe => Schema(IntegerType, nullable = false) case t if t <:< definitions.LongTpe => Schema(LongType, nullable = false) case t if t <:< definitions.DoubleTpe => Schema(DoubleType, nullable = false) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala index 1d3a2dc0d9bb0..e85312aee7d16 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala @@ -140,7 +140,7 @@ class SqlParser extends AbstractSparkSQLParser with DataTypeParser { (HAVING ~> expression).? ~ sortType.? ~ (LIMIT ~> expression).? ^^ { - case d ~ p ~ r ~ f ~ g ~ h ~ o ~ l => + case d ~ p ~ r ~ f ~ g ~ h ~ o ~ l => val base = r.getOrElse(OneRowRelation) val withFilter = f.map(Filter(_, base)).getOrElse(base) val withProjection = g @@ -212,7 +212,7 @@ class SqlParser extends AbstractSparkSQLParser with DataTypeParser { protected lazy val ordering: Parser[Seq[SortOrder]] = ( rep1sep(expression ~ direction.? , ",") ^^ { - case exps => exps.map(pair => SortOrder(pair._1, pair._2.getOrElse(Ascending))) + case exps => exps.map(pair => SortOrder(pair._1, pair._2.getOrElse(Ascending))) } ) @@ -242,7 +242,7 @@ class SqlParser extends AbstractSparkSQLParser with DataTypeParser { | termExpression ~ NOT.? ~ (BETWEEN ~> termExpression) ~ (AND ~> termExpression) ^^ { case e ~ not ~ el ~ eu => val betweenExpr: Expression = And(GreaterThanOrEqual(e, el), LessThanOrEqual(e, eu)) - not.fold(betweenExpr)(f=> Not(betweenExpr)) + not.fold(betweenExpr)(f => Not(betweenExpr)) } | termExpression ~ (RLIKE ~> termExpression) ^^ { case e1 ~ e2 => RLike(e1, e2) } | termExpression ~ (REGEXP ~> termExpression) ^^ { case e1 ~ e2 => RLike(e1, e2) } @@ -296,13 +296,13 @@ class SqlParser extends AbstractSparkSQLParser with DataTypeParser { | LOWER ~ "(" ~> expression <~ ")" ^^ { case exp => Lower(exp) } | IF ~ "(" ~> expression ~ ("," ~> expression) ~ ("," ~> expression) <~ ")" ^^ { case c ~ t ~ f => If(c, t, f) } - | CASE ~> expression.? ~ (WHEN ~> expression ~ (THEN ~> expression)).* ~ + | CASE ~> expression.? ~ rep1(WHEN ~> expression ~ (THEN ~> expression)) ~ (ELSE ~> expression).? <~ END ^^ { case casePart ~ altPart ~ elsePart => - val altExprs = altPart.flatMap { case whenExpr ~ thenExpr => - Seq(casePart.fold(whenExpr)(EqualTo(_, whenExpr)), thenExpr) - } - CaseWhen(altExprs ++ elsePart.toList) + val branches = altPart.flatMap { case whenExpr ~ thenExpr => + Seq(whenExpr, thenExpr) + } ++ elsePart + casePart.map(CaseKeyWhen(_, branches)).getOrElse(CaseWhen(branches)) } | (SUBSTR | SUBSTRING) ~ "(" ~> expression ~ ("," ~> expression) <~ ")" ^^ { case s ~ p => Substring(s, p, Literal(Integer.MAX_VALUE)) } @@ -365,7 +365,7 @@ class SqlParser extends AbstractSparkSQLParser with DataTypeParser { protected lazy val baseExpression: Parser[Expression] = ( "*" ^^^ UnresolvedStar(None) - | ident <~ "." ~ "*" ^^ { case tableName => UnresolvedStar(Option(tableName)) } + | ident <~ "." ~ "*" ^^ { case tableName => UnresolvedStar(Option(tableName)) } | primary ) @@ -375,9 +375,9 @@ class SqlParser extends AbstractSparkSQLParser with DataTypeParser { protected lazy val primary: PackratParser[Expression] = ( literal | expression ~ ("[" ~> expression <~ "]") ^^ - { case base ~ ordinal => GetItem(base, ordinal) } + { case base ~ ordinal => UnresolvedExtractValue(base, ordinal) } | (expression <~ ".") ~ ident ^^ - { case base ~ fieldName => UnresolvedGetField(base, fieldName) } + { case base ~ fieldName => UnresolvedExtractValue(base, Literal(fieldName)) } | cast | "(" ~> expression <~ ")" | function diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 5e42b409dcc59..bc17169f35a46 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -17,8 +17,10 @@ package org.apache.spark.sql.catalyst.analysis -import org.apache.spark.util.collection.OpenHashSet +import scala.collection.mutable.ArrayBuffer + import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.{SimpleCatalystConf, CatalystConf} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ @@ -29,7 +31,8 @@ import org.apache.spark.sql.types._ * when all relations are already filled in and the analyzer needs only to resolve attribute * references. */ -object SimpleAnalyzer extends Analyzer(EmptyCatalog, EmptyFunctionRegistry, true) +object SimpleAnalyzer + extends Analyzer(EmptyCatalog, EmptyFunctionRegistry, new SimpleCatalystConf(true)) /** * Provides a logical query plan analyzer, which translates [[UnresolvedAttribute]]s and @@ -39,11 +42,17 @@ object SimpleAnalyzer extends Analyzer(EmptyCatalog, EmptyFunctionRegistry, true class Analyzer( catalog: Catalog, registry: FunctionRegistry, - caseSensitive: Boolean, + conf: CatalystConf, maxIterations: Int = 100) extends RuleExecutor[LogicalPlan] with HiveTypeCoercion with CheckAnalysis { - val resolver = if (caseSensitive) caseSensitiveResolution else caseInsensitiveResolution + def resolver: Resolver = { + if (conf.caseSensitiveAnalysis) { + caseSensitiveResolution + } else { + caseInsensitiveResolution + } + } val fixedPoint = FixedPoint(maxIterations) @@ -53,14 +62,18 @@ class Analyzer( val extendedResolutionRules: Seq[Rule[LogicalPlan]] = Nil lazy val batches: Seq[Batch] = Seq( + Batch("Substitution", fixedPoint, + CTESubstitution :: + WindowsSubstitution :: + Nil : _*), Batch("Resolution", fixedPoint, ResolveRelations :: ResolveReferences :: ResolveGroupingAnalytics :: ResolveSortReferences :: ResolveGenerate :: - ImplicitGenerate :: ResolveFunctions :: + ExtractWindowExpressions :: GlobalAggregates :: UnresolvedHavingClauseAttributes :: TrimGroupingAliases :: @@ -68,6 +81,55 @@ class Analyzer( extendedResolutionRules : _*) ) + /** + * Substitute child plan with cte definitions + */ + object CTESubstitution extends Rule[LogicalPlan] { + // TODO allow subquery to define CTE + def apply(plan: LogicalPlan): LogicalPlan = plan match { + case With(child, relations) => substituteCTE(child, relations) + case other => other + } + + def substituteCTE(plan: LogicalPlan, cteRelations: Map[String, LogicalPlan]): LogicalPlan = { + plan transform { + // In hive, if there is same table name in database and CTE definition, + // hive will use the table in database, not the CTE one. + // Taking into account the reasonableness and the implementation complexity, + // here use the CTE definition first, check table name only and ignore database name + // see https://github.com/apache/spark/pull/4929#discussion_r27186638 for more info + case u : UnresolvedRelation => + val substituted = cteRelations.get(u.tableIdentifier.last).map { relation => + val withAlias = u.alias.map(Subquery(_, relation)) + withAlias.getOrElse(relation) + } + substituted.getOrElse(u) + } + } + } + + /** + * Substitute child plan with WindowSpecDefinitions. + */ + object WindowsSubstitution extends Rule[LogicalPlan] { + def apply(plan: LogicalPlan): LogicalPlan = plan transform { + // Lookup WindowSpecDefinitions. This rule works with unresolved children. + case WithWindowDefinition(windowDefinitions, child) => + child.transform { + case plan => plan.transformExpressions { + case UnresolvedWindowExpression(c, WindowSpecReference(windowName)) => + val errorMessage = + s"Window specification $windowName is not defined in the WINDOW clause." + val windowSpecDefinition = + windowDefinitions + .get(windowName) + .getOrElse(failAnalysis(errorMessage)) + WindowExpression(c, windowSpecDefinition) + } + } + } + } + /** * Removes no-op Alias expressions from the plan. */ @@ -79,25 +141,6 @@ class Analyzer( } object ResolveGroupingAnalytics extends Rule[LogicalPlan] { - /** - * Extract attribute set according to the grouping id - * @param bitmask bitmask to represent the selected of the attribute sequence - * @param exprs the attributes in sequence - * @return the attributes of non selected specified via bitmask (with the bit set to 1) - */ - private def buildNonSelectExprSet(bitmask: Int, exprs: Seq[Expression]) - : OpenHashSet[Expression] = { - val set = new OpenHashSet[Expression](2) - - var bit = exprs.length - 1 - while (bit >= 0) { - if (((bitmask >> bit) & 1) == 0) set.add(exprs(bit)) - bit -= 1 - } - - set - } - /* * GROUP BY a, b, c WITH ROLLUP * is equivalent to @@ -134,10 +177,15 @@ class Analyzer( g.bitmasks.foreach { bitmask => // get the non selected grouping attributes according to the bit mask - val nonSelectedGroupExprSet = buildNonSelectExprSet(bitmask, g.groupByExprs) + val nonSelectedGroupExprs = ArrayBuffer.empty[Expression] + var bit = g.groupByExprs.length - 1 + while (bit >= 0) { + if (((bitmask >> bit) & 1) == 0) nonSelectedGroupExprs += g.groupByExprs(bit) + bit -= 1 + } val substitution = (g.child.output :+ g.gid).map(expr => expr transformDown { - case x: Expression if nonSelectedGroupExprSet.contains(x) => + case x: Expression if nonSelectedGroupExprs.find(_ semanticEquals x).isDefined => // if the input attribute in the Invalid Grouping Expression set of for this group // replace it with constant null Literal.create(null, expr.dataType) @@ -169,36 +217,20 @@ class Analyzer( * Replaces [[UnresolvedRelation]]s with concrete relations from the catalog. */ object ResolveRelations extends Rule[LogicalPlan] { - def getTable(u: UnresolvedRelation, cteRelations: Map[String, LogicalPlan]): LogicalPlan = { + def getTable(u: UnresolvedRelation): LogicalPlan = { try { - // In hive, if there is same table name in database and CTE definition, - // hive will use the table in database, not the CTE one. - // Taking into account the reasonableness and the implementation complexity, - // here use the CTE definition first, check table name only and ignore database name - cteRelations.get(u.tableIdentifier.last) - .map(relation => u.alias.map(Subquery(_, relation)).getOrElse(relation)) - .getOrElse(catalog.lookupRelation(u.tableIdentifier, u.alias)) + catalog.lookupRelation(u.tableIdentifier, u.alias) } catch { case _: NoSuchTableException => u.failAnalysis(s"no such table ${u.tableName}") } } - def apply(plan: LogicalPlan): LogicalPlan = { - val (realPlan, cteRelations) = plan match { - // TODO allow subquery to define CTE - // Add cte table to a temp relation map,drop `with` plan and keep its child - case With(child, relations) => (child, relations) - case other => (other, Map.empty[String, LogicalPlan]) - } - - realPlan transform { - case i@InsertIntoTable(u: UnresolvedRelation, _, _, _, _) => - i.copy( - table = EliminateSubQueries(getTable(u, cteRelations))) - case u: UnresolvedRelation => - getTable(u, cteRelations) - } + def apply(plan: LogicalPlan): LogicalPlan = plan transform { + case i@InsertIntoTable(u: UnresolvedRelation, _, _, _, _) => + i.copy(table = EliminateSubQueries(getTable(u))) + case u: UnresolvedRelation => + getTable(u) } } @@ -274,6 +306,16 @@ class Analyzer( case oldVersion @ Aggregate(_, aggregateExpressions, _) if findAliases(aggregateExpressions).intersect(conflictingAttributes).nonEmpty => (oldVersion, oldVersion.copy(aggregateExpressions = newAliases(aggregateExpressions))) + + case oldVersion: Generate + if oldVersion.generatedSet.intersect(conflictingAttributes).nonEmpty => + val newOutput = oldVersion.generatorOutput.map(_.newInstance()) + (oldVersion, oldVersion.copy(generatorOutput = newOutput)) + + case oldVersion @ Window(_, windowExpressions, _, child) + if AttributeSet(windowExpressions.map(_.toAttribute)).intersect(conflictingAttributes) + .nonEmpty => + (oldVersion, oldVersion.copy(windowExpressions = newAliases(windowExpressions))) }.headOption.getOrElse { // Only handle first case, others will be fixed on the next pass. sys.error( s""" @@ -308,8 +350,8 @@ class Analyzer( withPosition(u) { q.resolveChildren(nameParts, resolver).getOrElse(u) } logDebug(s"Resolving $u to $result") result - case UnresolvedGetField(child, fieldName) if child.resolved => - GetField(child, fieldName, resolver) + case UnresolvedExtractValue(child, fieldExpr) if child.resolved => + ExtractValue(child, fieldExpr, resolver) } } @@ -452,7 +494,7 @@ class Analyzer( def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { case filter @ Filter(havingCondition, aggregate @ Aggregate(_, originalAggExprs, _)) if aggregate.resolved && containsAggregate(havingCondition) => { - val evaluatedCondition = Alias(havingCondition, "havingCondition")() + val evaluatedCondition = Alias(havingCondition, "havingCondition")() val aggExprsWithHaving = evaluatedCondition +: originalAggExprs Project(aggregate.output, @@ -468,65 +510,337 @@ class Analyzer( } /** - * When a SELECT clause has only a single expression and that expression is a - * [[catalyst.expressions.Generator Generator]] we convert the - * [[catalyst.plans.logical.Project Project]] to a [[catalyst.plans.logical.Generate Generate]]. + * Rewrites table generating expressions that either need one or more of the following in order + * to be resolved: + * - concrete attribute references for their output. + * - to be relocated from a SELECT clause (i.e. from a [[Project]]) into a [[Generate]]). + * + * Names for the output [[Attribute]]s are extracted from [[Alias]] or [[MultiAlias]] expressions + * that wrap the [[Generator]]. If more than one [[Generator]] is found in a Project, an + * [[AnalysisException]] is throw. */ - object ImplicitGenerate extends Rule[LogicalPlan] { + object ResolveGenerate extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { - case Project(Seq(Alias(g: Generator, name)), child) => - Generate(g, join = false, outer = false, - qualifier = None, UnresolvedAttribute(name) :: Nil, child) - case Project(Seq(MultiAlias(g: Generator, names)), child) => - Generate(g, join = false, outer = false, - qualifier = None, names.map(UnresolvedAttribute(_)), child) + case p: Generate if !p.child.resolved || !p.generator.resolved => p + case g: Generate if !g.resolved => + g.copy(generatorOutput = makeGeneratorOutput(g.generator, g.generatorOutput.map(_.name))) + + case p @ Project(projectList, child) => + // Holds the resolved generator, if one exists in the project list. + var resolvedGenerator: Generate = null + + val newProjectList = projectList.flatMap { + case AliasedGenerator(generator, names) if generator.childrenResolved => + if (resolvedGenerator != null) { + failAnalysis( + s"Only one generator allowed per select but ${resolvedGenerator.nodeName} and " + + s"and ${generator.nodeName} found.") + } + + resolvedGenerator = + Generate( + generator, + join = projectList.size > 1, // Only join if there are other expressions in SELECT. + outer = false, + qualifier = None, + generatorOutput = makeGeneratorOutput(generator, names), + child) + + resolvedGenerator.generatorOutput + case other => other :: Nil + } + + if (resolvedGenerator != null) { + Project(newProjectList, resolvedGenerator) + } else { + p + } } - } - /** - * Resolve the Generate, if the output names specified, we will take them, otherwise - * we will try to provide the default names, which follow the same rule with Hive. - */ - object ResolveGenerate extends Rule[LogicalPlan] { - // Construct the output attributes for the generator, - // The output attribute names can be either specified or - // auto generated. + /** Extracts a [[Generator]] expression and any names assigned by aliases to their output. */ + private object AliasedGenerator { + def unapply(e: Expression): Option[(Generator, Seq[String])] = e match { + case Alias(g: Generator, name) + if g.elementTypes.size > 1 && java.util.regex.Pattern.matches("_c[0-9]+", name) => { + // Assume the default name given by parser is "_c[0-9]+", + // TODO in long term, move the naming logic from Parser to Analyzer. + // In projection, Parser gave default name for TGF as does for normal UDF, + // but the TGF probably have multiple output columns/names. + // e.g. SELECT explode(map(key, value)) FROM src; + // Let's simply ignore the default given name for this case. + Some((g, Nil)) + } + case Alias(g: Generator, name) if g.elementTypes.size > 1 => + // If not given the default names, and the TGF with multiple output columns + failAnalysis( + s"""Expect multiple names given for ${g.getClass.getName}, + |but only single name '${name}' specified""".stripMargin) + case Alias(g: Generator, name) => Some((g, name :: Nil)) + case MultiAlias(g: Generator, names) => Some(g, names) + case _ => None + } + } + + /** + * Construct the output attributes for a [[Generator]], given a list of names. If the list of + * names is empty names are assigned by ordinal (i.e., _c0, _c1, ...) to match Hive's defaults. + */ private def makeGeneratorOutput( generator: Generator, - generatorOutput: Seq[Attribute]): Seq[Attribute] = { + names: Seq[String]): Seq[Attribute] = { val elementTypes = generator.elementTypes - if (generatorOutput.length == elementTypes.length) { - generatorOutput.zip(elementTypes).map { - case (a, (t, nullable)) if !a.resolved => - AttributeReference(a.name, t, nullable)() - case (a, _) => a + if (names.length == elementTypes.length) { + names.zip(elementTypes).map { + case (name, (t, nullable)) => + AttributeReference(name, t, nullable)() } - } else if (generatorOutput.length == 0) { + } else if (names.isEmpty) { elementTypes.zipWithIndex.map { // keep the default column names as Hive does _c0, _c1, _cN case ((t, nullable), i) => AttributeReference(s"_c$i", t, nullable)() } } else { - throw new AnalysisException( - s""" - |The number of aliases supplied in the AS clause does not match - |the number of columns output by the UDTF expected - |${elementTypes.size} aliases but got ${generatorOutput.size} - """.stripMargin) + failAnalysis( + "The number of aliases supplied in the AS clause does not match the number of columns " + + s"output by the UDTF expected ${elementTypes.size} aliases but got " + + s"${names.mkString(",")} ") } } + } - def apply(plan: LogicalPlan): LogicalPlan = plan transform { - case p: Generate if !p.child.resolved || !p.generator.resolved => p - case p: Generate if p.resolved == false => - // if the generator output names are not specified, we will use the default ones. - Generate( - p.generator, - join = p.join, - outer = p.outer, - p.qualifier, - makeGeneratorOutput(p.generator, p.generatorOutput), p.child) + /** + * Extracts [[WindowExpression]]s from the projectList of a [[Project]] operator and + * aggregateExpressions of an [[Aggregate]] operator and creates individual [[Window]] + * operators for every distinct [[WindowSpecDefinition]]. + * + * This rule handles three cases: + * - A [[Project]] having [[WindowExpression]]s in its projectList; + * - An [[Aggregate]] having [[WindowExpression]]s in its aggregateExpressions. + * - An [[Filter]]->[[Aggregate]] pattern representing GROUP BY with a HAVING + * clause and the [[Aggregate]] has [[WindowExpression]]s in its aggregateExpressions. + * Note: If there is a GROUP BY clause in the query, aggregations and corresponding + * filters (expressions in the HAVING clause) should be evaluated before any + * [[WindowExpression]]. If a query has SELECT DISTINCT, the DISTINCT part should be + * evaluated after all [[WindowExpression]]s. + * + * For every case, the transformation works as follows: + * 1. For a list of [[Expression]]s (a projectList or an aggregateExpressions), partitions + * it two lists of [[Expression]]s, one for all [[WindowExpression]]s and another for + * all regular expressions. + * 2. For all [[WindowExpression]]s, groups them based on their [[WindowSpecDefinition]]s. + * 3. For every distinct [[WindowSpecDefinition]], creates a [[Window]] operator and inserts + * it into the plan tree. + */ + object ExtractWindowExpressions extends Rule[LogicalPlan] { + private def hasWindowFunction(projectList: Seq[NamedExpression]): Boolean = + projectList.exists(hasWindowFunction) + + private def hasWindowFunction(expr: NamedExpression): Boolean = { + expr.find { + case window: WindowExpression => true + case _ => false + }.isDefined + } + + /** + * From a Seq of [[NamedExpression]]s, extract expressions containing window expressions and + * other regular expressions that do not contain any window expression. For example, for + * `col1, Sum(col2 + col3) OVER (PARTITION BY col4 ORDER BY col5)`, we will extract + * `col1`, `col2 + col3`, `col4`, and `col5` out and replace their appearances in + * the window expression as attribute references. So, the first returned value will be + * `[Sum(_w0) OVER (PARTITION BY _w1 ORDER BY _w2)]` and the second returned value will be + * [col1, col2 + col3 as _w0, col4 as _w1, col5 as _w2]. + * + * @return (seq of expressions containing at lease one window expressions, + * seq of non-window expressions) + */ + private def extract( + expressions: Seq[NamedExpression]): (Seq[NamedExpression], Seq[NamedExpression]) = { + // First, we partition the input expressions to two part. For the first part, + // every expression in it contain at least one WindowExpression. + // Expressions in the second part do not have any WindowExpression. + val (expressionsWithWindowFunctions, regularExpressions) = + expressions.partition(hasWindowFunction) + + // Then, we need to extract those regular expressions used in the WindowExpression. + // For example, when we have col1 - Sum(col2 + col3) OVER (PARTITION BY col4 ORDER BY col5), + // we need to make sure that col1 to col5 are all projected from the child of the Window + // operator. + val extractedExprBuffer = new ArrayBuffer[NamedExpression]() + def extractExpr(expr: Expression): Expression = expr match { + case ne: NamedExpression => + // If a named expression is not in regularExpressions, add it to + // extractedExprBuffer and replace it with an AttributeReference. + val missingExpr = + AttributeSet(Seq(expr)) -- (regularExpressions ++ extractedExprBuffer) + if (missingExpr.nonEmpty) { + extractedExprBuffer += ne + } + ne.toAttribute + case e: Expression if e.foldable => + e // No need to create an attribute reference if it will be evaluated as a Literal. + case e: Expression => + // For other expressions, we extract it and replace it with an AttributeReference (with + // an interal column name, e.g. "_w0"). + val withName = Alias(e, s"_w${extractedExprBuffer.length}")() + extractedExprBuffer += withName + withName.toAttribute + } + + // Now, we extract regular expressions from expressionsWithWindowFunctions + // by using extractExpr. + val newExpressionsWithWindowFunctions = expressionsWithWindowFunctions.map { + _.transform { + // Extracts children expressions of a WindowFunction (input parameters of + // a WindowFunction). + case wf : WindowFunction => + val newChildren = wf.children.map(extractExpr(_)) + wf.withNewChildren(newChildren) + + // Extracts expressions from the partition spec and order spec. + case wsc @ WindowSpecDefinition(partitionSpec, orderSpec, _) => + val newPartitionSpec = partitionSpec.map(extractExpr(_)) + val newOrderSpec = orderSpec.map { so => + val newChild = extractExpr(so.child) + so.copy(child = newChild) + } + wsc.copy(partitionSpec = newPartitionSpec, orderSpec = newOrderSpec) + + // Extracts AggregateExpression. For example, for SUM(x) - Sum(y) OVER (...), + // we need to extract SUM(x). + case agg: AggregateExpression => + val withName = Alias(agg, s"_w${extractedExprBuffer.length}")() + extractedExprBuffer += withName + withName.toAttribute + }.asInstanceOf[NamedExpression] + } + + (newExpressionsWithWindowFunctions, regularExpressions ++ extractedExprBuffer) + } // end of extract + + /** + * Adds operators for Window Expressions. Every Window operator handles a single Window Spec. + */ + private def addWindow( + expressionsWithWindowFunctions: Seq[NamedExpression], + child: LogicalPlan): LogicalPlan = { + // First, we need to extract all WindowExpressions from expressionsWithWindowFunctions + // and put those extracted WindowExpressions to extractedWindowExprBuffer. + // This step is needed because it is possible that an expression contains multiple + // WindowExpressions with different Window Specs. + // After extracting WindowExpressions, we need to construct a project list to generate + // expressionsWithWindowFunctions based on extractedWindowExprBuffer. + // For example, for "sum(a) over (...) / sum(b) over (...)", we will first extract + // "sum(a) over (...)" and "sum(b) over (...)" out, and assign "_we0" as the alias to + // "sum(a) over (...)" and "_we1" as the alias to "sum(b) over (...)". + // Then, the projectList will be [_we0/_we1]. + val extractedWindowExprBuffer = new ArrayBuffer[NamedExpression]() + val newExpressionsWithWindowFunctions = expressionsWithWindowFunctions.map { + // We need to use transformDown because we want to trigger + // "case alias @ Alias(window: WindowExpression, _)" first. + _.transformDown { + case alias @ Alias(window: WindowExpression, _) => + // If a WindowExpression has an assigned alias, just use it. + extractedWindowExprBuffer += alias + alias.toAttribute + case window: WindowExpression => + // If there is no alias assigned to the WindowExpressions. We create an + // internal column. + val withName = Alias(window, s"_we${extractedWindowExprBuffer.length}")() + extractedWindowExprBuffer += withName + withName.toAttribute + }.asInstanceOf[NamedExpression] + } + + // Second, we group extractedWindowExprBuffer based on their Window Spec. + val groupedWindowExpressions = extractedWindowExprBuffer.groupBy { expr => + val distinctWindowSpec = expr.collect { + case window: WindowExpression => window.windowSpec + }.distinct + + // We do a final check and see if we only have a single Window Spec defined in an + // expressions. + if (distinctWindowSpec.length == 0 ) { + failAnalysis(s"$expr does not have any WindowExpression.") + } else if (distinctWindowSpec.length > 1) { + // newExpressionsWithWindowFunctions only have expressions with a single + // WindowExpression. If we reach here, we have a bug. + failAnalysis(s"$expr has multiple Window Specifications ($distinctWindowSpec)." + + s"Please file a bug report with this error message, stack trace, and the query.") + } else { + distinctWindowSpec.head + } + }.toSeq + + // Third, for every Window Spec, we add a Window operator and set currentChild as the + // child of it. + var currentChild = child + var i = 0 + while (i < groupedWindowExpressions.size) { + val (windowSpec, windowExpressions) = groupedWindowExpressions(i) + // Set currentChild to the newly created Window operator. + currentChild = Window(currentChild.output, windowExpressions, windowSpec, currentChild) + + // Move to next Window Spec. + i += 1 + } + + // Finally, we create a Project to output currentChild's output + // newExpressionsWithWindowFunctions. + Project(currentChild.output ++ newExpressionsWithWindowFunctions, currentChild) + } // end of addWindow + + // We have to use transformDown at here to make sure the rule of + // "Aggregate with Having clause" will be triggered. + def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { + // Aggregate with Having clause. This rule works with an unresolved Aggregate because + // a resolved Aggregate will not have Window Functions. + case f @ Filter(condition, a @ Aggregate(groupingExprs, aggregateExprs, child)) + if child.resolved && + hasWindowFunction(aggregateExprs) && + a.expressions.forall(_.resolved) => + val (windowExpressions, aggregateExpressions) = extract(aggregateExprs) + // Create an Aggregate operator to evaluate aggregation functions. + val withAggregate = Aggregate(groupingExprs, aggregateExpressions, child) + // Add a Filter operator for conditions in the Having clause. + val withFilter = Filter(condition, withAggregate) + val withWindow = addWindow(windowExpressions, withFilter) + + // Finally, generate output columns according to the original projectList. + val finalProjectList = aggregateExprs.map (_.toAttribute) + Project(finalProjectList, withWindow) + + case p: LogicalPlan if !p.childrenResolved => p + + // Aggregate without Having clause. + case a @ Aggregate(groupingExprs, aggregateExprs, child) + if hasWindowFunction(aggregateExprs) && + a.expressions.forall(_.resolved) => + val (windowExpressions, aggregateExpressions) = extract(aggregateExprs) + // Create an Aggregate operator to evaluate aggregation functions. + val withAggregate = Aggregate(groupingExprs, aggregateExpressions, child) + // Add Window operators. + val withWindow = addWindow(windowExpressions, withAggregate) + + // Finally, generate output columns according to the original projectList. + val finalProjectList = aggregateExprs.map (_.toAttribute) + Project(finalProjectList, withWindow) + + // We only extract Window Expressions after all expressions of the Project + // have been resolved. + case p @ Project(projectList, child) + if hasWindowFunction(projectList) && !p.expressions.exists(!_.resolved) => + val (windowExpressions, regularExpressions) = extract(projectList) + // We add a project to get all needed expressions for window expressions from the child + // of the original Project operator. + val withProject = Project(regularExpressions, child) + // Add Window operators. + val withWindow = addWindow(windowExpressions, withProject) + + // Finally, generate output columns according to the original projectList. + val finalProjectList = projectList.map (_.toAttribute) + Project(finalProjectList, withWindow) } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala index 18c24b651921a..3e240fd55e250 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala @@ -19,6 +19,8 @@ package org.apache.spark.sql.catalyst.analysis import scala.collection.mutable +import org.apache.spark.sql.catalyst.CatalystConf +import org.apache.spark.sql.catalyst.EmptyConf import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Subquery} /** @@ -34,7 +36,7 @@ class NoSuchDatabaseException extends Exception */ trait Catalog { - def caseSensitive: Boolean + val conf: CatalystConf def tableExists(tableIdentifier: Seq[String]): Boolean @@ -57,10 +59,10 @@ trait Catalog { def unregisterAllTables(): Unit protected def processTableIdentifier(tableIdentifier: Seq[String]): Seq[String] = { - if (!caseSensitive) { - tableIdentifier.map(_.toLowerCase) - } else { + if (conf.caseSensitiveAnalysis) { tableIdentifier + } else { + tableIdentifier.map(_.toLowerCase) } } @@ -78,7 +80,7 @@ trait Catalog { } } -class SimpleCatalog(val caseSensitive: Boolean) extends Catalog { +class SimpleCatalog(val conf: CatalystConf) extends Catalog { val tables = new mutable.HashMap[String, LogicalPlan]() override def registerTable( @@ -138,7 +140,7 @@ class SimpleCatalog(val caseSensitive: Boolean) extends Catalog { trait OverrideCatalog extends Catalog { // TODO: This doesn't work when the database changes... - val overrides = new mutable.HashMap[(Option[String],String), LogicalPlan]() + val overrides = new mutable.HashMap[(Option[String], String), LogicalPlan]() abstract override def tableExists(tableIdentifier: Seq[String]): Boolean = { val tableIdent = processTableIdentifier(tableIdentifier) @@ -164,10 +166,10 @@ trait OverrideCatalog extends Catalog { } abstract override def getTables(databaseName: Option[String]): Seq[(String, Boolean)] = { - val dbName = if (!caseSensitive) { - if (databaseName.isDefined) Some(databaseName.get.toLowerCase) else None - } else { + val dbName = if (conf.caseSensitiveAnalysis) { databaseName + } else { + if (databaseName.isDefined) Some(databaseName.get.toLowerCase) else None } val temporaryTables = overrides.filter { @@ -207,7 +209,7 @@ trait OverrideCatalog extends Catalog { */ object EmptyCatalog extends Catalog { - override val caseSensitive: Boolean = true + override val conf: CatalystConf = EmptyConf override def tableExists(tableIdentifier: Seq[String]): Boolean = { throw new UnsupportedOperationException diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 2381689e17525..193dc6b6546b5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -69,7 +69,17 @@ trait CheckAnalysis { case b: BinaryExpression if !b.resolved => failAnalysis( s"invalid expression ${b.prettyString} " + - s"between ${b.left.simpleString} and ${b.right.simpleString}") + s"between ${b.left.dataType.simpleString} and ${b.right.dataType.simpleString}") + + case WindowExpression(UnresolvedWindowFunction(name, _), _) => + failAnalysis( + s"Could not resolve window function '$name'. " + + "Note that, using window functions currently requires a HiveContext") + + case w @ WindowExpression(windowFunction, windowSpec) if windowSpec.validate.nonEmpty => + // The window spec is not valid. + val reason = windowSpec.validate.get + failAnalysis(s"Window specification $windowSpec is not valid because $reason") } operator match { @@ -81,12 +91,12 @@ trait CheckAnalysis { case Aggregate(groupingExprs, aggregateExprs, child) => def checkValidAggregateExpression(expr: Expression): Unit = expr match { case _: AggregateExpression => // OK - case e: Attribute if !groupingExprs.contains(e) => + case e: Attribute if groupingExprs.find(_ semanticEquals e).isEmpty => failAnalysis( s"expression '${e.prettyString}' is neither present in the group by, " + s"nor is it an aggregate function. " + "Add to group by or wrap in first() if you don't care which value you get.") - case e if groupingExprs.contains(e) => // OK + case e if groupingExprs.find(_ semanticEquals e).isDefined => // OK case e if e.references.isEmpty => // OK case e => e.children.foreach(checkValidAggregateExpression) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 16ca5bcd57a72..0849faa9bfa7b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.catalyst.analysis +import org.apache.spark.sql.catalyst.CatalystConf import org.apache.spark.sql.catalyst.expressions.Expression import scala.collection.mutable @@ -28,12 +29,12 @@ trait FunctionRegistry { def lookupFunction(name: String, children: Seq[Expression]): Expression - def caseSensitive: Boolean + def conf: CatalystConf } trait OverrideFunctionRegistry extends FunctionRegistry { - val functionBuilders = StringKeyHashMap[FunctionBuilder](caseSensitive) + val functionBuilders = StringKeyHashMap[FunctionBuilder](conf.caseSensitiveAnalysis) override def registerFunction(name: String, builder: FunctionBuilder): Unit = { functionBuilders.put(name, builder) @@ -44,8 +45,9 @@ trait OverrideFunctionRegistry extends FunctionRegistry { } } -class SimpleFunctionRegistry(val caseSensitive: Boolean) extends FunctionRegistry { - val functionBuilders = StringKeyHashMap[FunctionBuilder](caseSensitive) +class SimpleFunctionRegistry(val conf: CatalystConf) extends FunctionRegistry { + + val functionBuilders = StringKeyHashMap[FunctionBuilder](conf.caseSensitiveAnalysis) override def registerFunction(name: String, builder: FunctionBuilder): Unit = { functionBuilders.put(name, builder) @@ -69,7 +71,7 @@ object EmptyFunctionRegistry extends FunctionRegistry { throw new UnsupportedOperationException } - override def caseSensitive: Boolean = throw new UnsupportedOperationException + override def conf: CatalystConf = throw new UnsupportedOperationException } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala index 73c9a1c7afdad..fa7968eb5664e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala @@ -26,7 +26,14 @@ object HiveTypeCoercion { // See https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types. // The conversion for integral and floating point types have a linear widening hierarchy: private val numericPrecedence = - Seq(ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType, DecimalType.Unlimited) + IndexedSeq( + ByteType, + ShortType, + IntegerType, + LongType, + FloatType, + DoubleType, + DecimalType.Unlimited) /** * Find the tightest common type of two types that might be used in a binary expression. @@ -34,25 +41,21 @@ object HiveTypeCoercion { * with primitive types, because in that case the precision and scale of the result depends on * the operation. Those rules are implemented in [[HiveTypeCoercion.DecimalPrecision]]. */ - def findTightestCommonType(t1: DataType, t2: DataType): Option[DataType] = { - val valueTypes = Seq(t1, t2).filter(t => t != NullType) - if (valueTypes.distinct.size > 1) { - // Promote numeric types to the highest of the two and all numeric types to unlimited decimal - if (numericPrecedence.contains(t1) && numericPrecedence.contains(t2)) { - Some(numericPrecedence.filter(t => t == t1 || t == t2).last) - } else if (t1.isInstanceOf[DecimalType] && t2.isInstanceOf[DecimalType]) { - // Fixed-precision decimals can up-cast into unlimited - if (t1 == DecimalType.Unlimited || t2 == DecimalType.Unlimited) { - Some(DecimalType.Unlimited) - } else { - None - } - } else { - None - } - } else { - Some(if (valueTypes.size == 0) NullType else valueTypes.head) - } + val findTightestCommonType: (DataType, DataType) => Option[DataType] = { + case (t1, t2) if t1 == t2 => Some(t1) + case (NullType, t1) => Some(t1) + case (t1, NullType) => Some(t1) + + // Promote numeric types to the highest of the two and all numeric types to unlimited decimal + case (t1, t2) if Seq(t1, t2).forall(numericPrecedence.contains) => + val index = numericPrecedence.lastIndexWhere(t => t == t1 || t == t2) + Some(numericPrecedence(index)) + + // Fixed-precision decimals can up-cast into unlimited + case (DecimalType.Unlimited, _: DecimalType) => Some(DecimalType.Unlimited) + case (_: DecimalType, DecimalType.Unlimited) => Some(DecimalType.Unlimited) + + case _ => None } } @@ -69,6 +72,7 @@ trait HiveTypeCoercion { val typeCoercionRules = PropagateTypes :: ConvertNaNs :: + InConversion :: WidenTypes :: PromoteStrings :: DecimalPrecision :: @@ -239,37 +243,43 @@ trait HiveTypeCoercion { a.makeCopy(Array(a.left, Cast(a.right, DoubleType))) // we should cast all timestamp/date/string compare into string compare - case p: BinaryPredicate if p.left.dataType == StringType - && p.right.dataType == DateType => - p.makeCopy(Array(p.left, Cast(p.right, StringType))) - case p: BinaryPredicate if p.left.dataType == DateType - && p.right.dataType == StringType => - p.makeCopy(Array(Cast(p.left, StringType), p.right)) - case p: BinaryPredicate if p.left.dataType == StringType - && p.right.dataType == TimestampType => + case p: BinaryComparison if p.left.dataType == StringType && + p.right.dataType == DateType => p.makeCopy(Array(p.left, Cast(p.right, StringType))) - case p: BinaryPredicate if p.left.dataType == TimestampType - && p.right.dataType == StringType => + case p: BinaryComparison if p.left.dataType == DateType && + p.right.dataType == StringType => p.makeCopy(Array(Cast(p.left, StringType), p.right)) - case p: BinaryPredicate if p.left.dataType == TimestampType - && p.right.dataType == DateType => + case p: BinaryComparison if p.left.dataType == StringType && + p.right.dataType == TimestampType => + p.makeCopy(Array(Cast(p.left, TimestampType), p.right)) + case p: BinaryComparison if p.left.dataType == TimestampType && + p.right.dataType == StringType => + p.makeCopy(Array(p.left, Cast(p.right, TimestampType))) + case p: BinaryComparison if p.left.dataType == TimestampType && + p.right.dataType == DateType => p.makeCopy(Array(Cast(p.left, StringType), Cast(p.right, StringType))) - case p: BinaryPredicate if p.left.dataType == DateType - && p.right.dataType == TimestampType => + case p: BinaryComparison if p.left.dataType == DateType && + p.right.dataType == TimestampType => p.makeCopy(Array(Cast(p.left, StringType), Cast(p.right, StringType))) - case p: BinaryPredicate if p.left.dataType == StringType && p.right.dataType != StringType => + case p: BinaryComparison if p.left.dataType == StringType && + p.right.dataType != StringType => p.makeCopy(Array(Cast(p.left, DoubleType), p.right)) - case p: BinaryPredicate if p.left.dataType != StringType && p.right.dataType == StringType => + case p: BinaryComparison if p.left.dataType != StringType && + p.right.dataType == StringType => p.makeCopy(Array(p.left, Cast(p.right, DoubleType))) - case i @ In(a, b) if a.dataType == DateType && b.forall(_.dataType == StringType) => - i.makeCopy(Array(Cast(a, StringType), b)) - case i @ In(a, b) if a.dataType == TimestampType && b.forall(_.dataType == StringType) => + case i @ In(a, b) if a.dataType == DateType && + b.forall(_.dataType == StringType) => i.makeCopy(Array(Cast(a, StringType), b)) - case i @ In(a, b) if a.dataType == DateType && b.forall(_.dataType == TimestampType) => + case i @ In(a, b) if a.dataType == TimestampType && + b.forall(_.dataType == StringType) => + i.makeCopy(Array(a, b.map(Cast(_, TimestampType)))) + case i @ In(a, b) if a.dataType == DateType && + b.forall(_.dataType == TimestampType) => i.makeCopy(Array(Cast(a, StringType), b.map(Cast(_, StringType)))) - case i @ In(a, b) if a.dataType == TimestampType && b.forall(_.dataType == DateType) => + case i @ In(a, b) if a.dataType == TimestampType && + b.forall(_.dataType == DateType) => i.makeCopy(Array(Cast(a, StringType), b.map(Cast(_, StringType)))) case Sum(e) if e.dataType == StringType => @@ -281,6 +291,16 @@ trait HiveTypeCoercion { } } + /** + * Convert all expressions in in() list to the left operator type + */ + object InConversion extends Rule[LogicalPlan] { + def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions { + case i @ In(a, b) if b.exists(_.dataType != a.dataType) => + i.makeCopy(Array(a, b.map(Cast(_, a.dataType)))) + } + } + // scalastyle:off /** * Calculates and propagates precision for fixed-precision decimals. Hive has a number of @@ -420,19 +440,19 @@ trait HiveTypeCoercion { ) case LessThan(e1 @ DecimalType.Expression(p1, s1), - e2 @ DecimalType.Expression(p2, s2)) if p1 != p2 || s1 != s2 => + e2 @ DecimalType.Expression(p2, s2)) if p1 != p2 || s1 != s2 => LessThan(Cast(e1, DecimalType.Unlimited), Cast(e2, DecimalType.Unlimited)) case LessThanOrEqual(e1 @ DecimalType.Expression(p1, s1), - e2 @ DecimalType.Expression(p2, s2)) if p1 != p2 || s1 != s2 => + e2 @ DecimalType.Expression(p2, s2)) if p1 != p2 || s1 != s2 => LessThanOrEqual(Cast(e1, DecimalType.Unlimited), Cast(e2, DecimalType.Unlimited)) case GreaterThan(e1 @ DecimalType.Expression(p1, s1), - e2 @ DecimalType.Expression(p2, s2)) if p1 != p2 || s1 != s2 => + e2 @ DecimalType.Expression(p2, s2)) if p1 != p2 || s1 != s2 => GreaterThan(Cast(e1, DecimalType.Unlimited), Cast(e2, DecimalType.Unlimited)) case GreaterThanOrEqual(e1 @ DecimalType.Expression(p1, s1), - e2 @ DecimalType.Expression(p2, s2)) if p1 != p2 || s1 != s2 => + e2 @ DecimalType.Expression(p2, s2)) if p1 != p2 || s1 != s2 => GreaterThanOrEqual(Cast(e1, DecimalType.Unlimited), Cast(e2, DecimalType.Unlimited)) // Promote integers inside a binary expression with fixed-precision decimals to decimals, @@ -481,8 +501,8 @@ trait HiveTypeCoercion { // No need to change the EqualNullSafe operators, too case e: EqualNullSafe => e // Otherwise turn them to Byte types so that there exists and ordering. - case p: BinaryComparison - if p.left.dataType == BooleanType && p.right.dataType == BooleanType => + case p: BinaryComparison if p.left.dataType == BooleanType && + p.right.dataType == BooleanType => p.makeCopy(Array(Cast(p.left, ByteType), Cast(p.right, ByteType))) } } @@ -538,8 +558,7 @@ trait HiveTypeCoercion { case a @ CreateArray(children) if !a.resolved => val commonType = a.childTypes.reduce( - (a,b) => - findTightestCommonType(a,b).getOrElse(StringType)) + (a, b) => findTightestCommonType(a, b).getOrElse(StringType)) CreateArray( children.map(c => if (c.dataType == commonType) c else Cast(c, commonType))) @@ -564,10 +583,6 @@ trait HiveTypeCoercion { case Sum(e @ TimestampType()) => Sum(Cast(e, DoubleType)) case Average(e @ TimestampType()) => Average(Cast(e, DoubleType)) - // Compatible with Hive - case Substring(e, start, len) if e.dataType != StringType => - Substring(Cast(e, StringType), start, len) - // Coalesce should return the first non-null value, which could be any column // from the list. So we need to make sure the return type is deterministic and // compatible with every child column. @@ -615,31 +630,24 @@ trait HiveTypeCoercion { import HiveTypeCoercion._ def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions { - case cw @ CaseWhen(branches) if !cw.resolved && !branches.exists(!_.resolved) => - val valueTypes = branches.sliding(2, 2).map { - case Seq(_, value) => value.dataType - case Seq(elseVal) => elseVal.dataType - }.toSeq - - logDebug(s"Input values for null casting ${valueTypes.mkString(",")}") - - if (valueTypes.distinct.size > 1) { - val commonType = valueTypes.reduce { (v1, v2) => - findTightestCommonType(v1, v2) - .getOrElse(sys.error( - s"Types in CASE WHEN must be the same or coercible to a common type: $v1 != $v2")) - } - val transformedBranches = branches.sliding(2, 2).map { - case Seq(cond, value) if value.dataType != commonType => - Seq(cond, Cast(value, commonType)) - case Seq(elseVal) if elseVal.dataType != commonType => - Seq(Cast(elseVal, commonType)) - case s => s - }.reduce(_ ++ _) - CaseWhen(transformedBranches) - } else { - // Types match up. Hopefully some other rule fixes whatever is wrong with resolution. - cw + case cw: CaseWhenLike if !cw.resolved && cw.childrenResolved && !cw.valueTypesEqual => + logDebug(s"Input values for null casting ${cw.valueTypes.mkString(",")}") + val commonType = cw.valueTypes.reduce { (v1, v2) => + findTightestCommonType(v1, v2).getOrElse(sys.error( + s"Types in CASE WHEN must be the same or coercible to a common type: $v1 != $v2")) + } + val transformedBranches = cw.branches.sliding(2, 2).map { + case Seq(when, value) if value.dataType != commonType => + Seq(when, Cast(value, commonType)) + case Seq(elseVal) if elseVal.dataType != commonType => + Seq(Cast(elseVal, commonType)) + case s => s + }.reduce(_ ++ _) + cw match { + case _: CaseWhen => + CaseWhen(transformedBranches) + case CaseKeyWhen(key, _) => + CaseKeyWhen(key, transformedBranches) } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index 3f567e3e8b2a6..2999c2ef3efe1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -95,7 +95,7 @@ case class UnresolvedFunction(name: String, children: Seq[Expression]) extends E * Represents all of the input attributes to a given relational operator, for example in * "SELECT * FROM ...". A [[Star]] gets automatically expanded during analysis. */ -trait Star extends Attribute with trees.LeafNode[Expression] { +trait Star extends NamedExpression with trees.LeafNode[Expression] { self: Product => override def name: String = throw new UnresolvedException(this, "name") @@ -103,13 +103,9 @@ trait Star extends Attribute with trees.LeafNode[Expression] { override def dataType: DataType = throw new UnresolvedException(this, "dataType") override def nullable: Boolean = throw new UnresolvedException(this, "nullable") override def qualifiers: Seq[String] = throw new UnresolvedException(this, "qualifiers") + override def toAttribute: Attribute = throw new UnresolvedException(this, "toAttribute") override lazy val resolved = false - override def newInstance(): Star = this - override def withNullability(newNullability: Boolean): Star = this - override def withQualifiers(newQualifiers: Seq[String]): Star = this - override def withName(newName: String): Star = this - // Star gets expanded at runtime so we never evaluate a Star. override def eval(input: Row = null): EvaluatedType = throw new TreeNodeException(this, s"No function to evaluate expression. type: ${this.nodeName}") @@ -154,7 +150,7 @@ case class UnresolvedStar(table: Option[String]) extends Star { * @param names the names to be associated with each output of computing [[child]]. */ case class MultiAlias(child: Expression, names: Seq[String]) - extends Attribute with trees.UnaryNode[Expression] { + extends NamedExpression with trees.UnaryNode[Expression] { override def name: String = throw new UnresolvedException(this, "name") @@ -166,15 +162,9 @@ case class MultiAlias(child: Expression, names: Seq[String]) override def qualifiers: Seq[String] = throw new UnresolvedException(this, "qualifiers") - override lazy val resolved = false - - override def newInstance(): MultiAlias = this - - override def withNullability(newNullability: Boolean): MultiAlias = this - - override def withQualifiers(newQualifiers: Seq[String]): MultiAlias = this + override def toAttribute: Attribute = throw new UnresolvedException(this, "toAttribute") - override def withName(newName: String): MultiAlias = this + override lazy val resolved = false override def eval(input: Row = null): EvaluatedType = throw new TreeNodeException(this, s"No function to evaluate expression. type: ${this.nodeName}") @@ -194,7 +184,17 @@ case class ResolvedStar(expressions: Seq[NamedExpression]) extends Star { override def toString: String = expressions.mkString("ResolvedStar(", ", ", ")") } -case class UnresolvedGetField(child: Expression, fieldName: String) extends UnaryExpression { +/** + * Extracts a value or values from an Expression + * + * @param child The expression to extract value from, + * can be Map, Array, Struct or array of Structs. + * @param extraction The expression to describe the extraction, + * can be key of Map, index of Array, field name of Struct. + */ +case class UnresolvedExtractValue(child: Expression, extraction: Expression) + extends UnaryExpression { + override def dataType: DataType = throw new UnresolvedException(this, "dataType") override def foldable: Boolean = throw new UnresolvedException(this, "foldable") override def nullable: Boolean = throw new UnresolvedException(this, "nullable") @@ -203,5 +203,5 @@ case class UnresolvedGetField(child: Expression, fieldName: String) extends Unar override def eval(input: Row = null): EvaluatedType = throw new TreeNodeException(this, s"No function to evaluate expression. type: ${this.nodeName}") - override def toString: String = s"$child.$fieldName" + override def toString: String = s"$child[$extraction]" } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index fa6cc7a1a36cf..51821757967d2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -20,9 +20,8 @@ package org.apache.spark.sql.catalyst import java.sql.{Date, Timestamp} import scala.language.implicitConversions -import scala.reflect.runtime.universe.{TypeTag, typeTag} -import org.apache.spark.sql.catalyst.analysis.{EliminateSubQueries, UnresolvedGetField, UnresolvedAttribute} +import org.apache.spark.sql.catalyst.analysis.{EliminateSubQueries, UnresolvedExtractValue, UnresolvedAttribute} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.plans.{Inner, JoinType} @@ -61,7 +60,7 @@ package object dsl { trait ImplicitOperators { def expr: Expression - def unary_- : Expression= UnaryMinus(expr) + def unary_- : Expression = UnaryMinus(expr) def unary_! : Predicate = Not(expr) def unary_~ : Expression = BitwiseNot(expr) @@ -100,8 +99,9 @@ package object dsl { def isNull: Predicate = IsNull(expr) def isNotNull: Predicate = IsNotNull(expr) - def getItem(ordinal: Expression): Expression = GetItem(expr, ordinal) - def getField(fieldName: String): UnresolvedGetField = UnresolvedGetField(expr, fieldName) + def getItem(ordinal: Expression): UnresolvedExtractValue = UnresolvedExtractValue(expr, ordinal) + def getField(fieldName: String): UnresolvedExtractValue = + UnresolvedExtractValue(expr, Literal(fieldName)) def cast(to: DataType): Expression = Cast(expr, to) @@ -140,7 +140,7 @@ package object dsl { // Note that if we make ExpressionConversions an object rather than a trait, we can // then make this a value class to avoid the small penalty of runtime instantiation. def $(args: Any*): analysis.UnresolvedAttribute = { - analysis.UnresolvedAttribute(sc.s(args :_*)) + analysis.UnresolvedAttribute(sc.s(args : _*)) } } @@ -233,129 +233,59 @@ package object dsl { implicit class DslAttribute(a: AttributeReference) { def notNull: AttributeReference = a.withNullability(false) def nullable: AttributeReference = a.withNullability(true) - - // Protobuf terminology - def required: AttributeReference = a.withNullability(false) - def at(ordinal: Int): BoundReference = BoundReference(ordinal, a.dataType, a.nullable) } } - object expressions extends ExpressionConversions // scalastyle:ignore - abstract class LogicalPlanFunctions { - def logicalPlan: LogicalPlan - - def select(exprs: NamedExpression*): LogicalPlan = Project(exprs, logicalPlan) + object plans { // scalastyle:ignore + implicit class DslLogicalPlan(val logicalPlan: LogicalPlan) { + def select(exprs: NamedExpression*): LogicalPlan = Project(exprs, logicalPlan) - def where(condition: Expression): LogicalPlan = Filter(condition, logicalPlan) + def where(condition: Expression): LogicalPlan = Filter(condition, logicalPlan) - def limit(limitExpr: Expression): LogicalPlan = Limit(limitExpr, logicalPlan) + def limit(limitExpr: Expression): LogicalPlan = Limit(limitExpr, logicalPlan) - def join( + def join( otherPlan: LogicalPlan, joinType: JoinType = Inner, condition: Option[Expression] = None): LogicalPlan = - Join(logicalPlan, otherPlan, joinType, condition) + Join(logicalPlan, otherPlan, joinType, condition) - def orderBy(sortExprs: SortOrder*): LogicalPlan = Sort(sortExprs, true, logicalPlan) + def orderBy(sortExprs: SortOrder*): LogicalPlan = Sort(sortExprs, true, logicalPlan) - def sortBy(sortExprs: SortOrder*): LogicalPlan = Sort(sortExprs, false, logicalPlan) + def sortBy(sortExprs: SortOrder*): LogicalPlan = Sort(sortExprs, false, logicalPlan) - def groupBy(groupingExprs: Expression*)(aggregateExprs: Expression*): LogicalPlan = { - val aliasedExprs = aggregateExprs.map { - case ne: NamedExpression => ne - case e => Alias(e, e.toString)() + def groupBy(groupingExprs: Expression*)(aggregateExprs: Expression*): LogicalPlan = { + val aliasedExprs = aggregateExprs.map { + case ne: NamedExpression => ne + case e => Alias(e, e.toString)() + } + Aggregate(groupingExprs, aliasedExprs, logicalPlan) } - Aggregate(groupingExprs, aliasedExprs, logicalPlan) - } - def subquery(alias: Symbol): LogicalPlan = Subquery(alias.name, logicalPlan) + def subquery(alias: Symbol): LogicalPlan = Subquery(alias.name, logicalPlan) + + def except(otherPlan: LogicalPlan): LogicalPlan = Except(logicalPlan, otherPlan) - def unionAll(otherPlan: LogicalPlan): LogicalPlan = Union(logicalPlan, otherPlan) + def intersect(otherPlan: LogicalPlan): LogicalPlan = Intersect(logicalPlan, otherPlan) - def sfilter[T1](arg1: Symbol)(udf: (T1) => Boolean): LogicalPlan = - Filter(ScalaUdf(udf, BooleanType, Seq(UnresolvedAttribute(arg1.name))), logicalPlan) + def unionAll(otherPlan: LogicalPlan): LogicalPlan = Union(logicalPlan, otherPlan) - // TODO specify the output column names - def generate( + // TODO specify the output column names + def generate( generator: Generator, join: Boolean = false, outer: Boolean = false, alias: Option[String] = None): LogicalPlan = - Generate(generator, join = join, outer = outer, alias, Nil, logicalPlan) + Generate(generator, join = join, outer = outer, alias, Nil, logicalPlan) - def insertInto(tableName: String, overwrite: Boolean = false): LogicalPlan = - InsertIntoTable( - analysis.UnresolvedRelation(Seq(tableName)), Map.empty, logicalPlan, overwrite, false) + def insertInto(tableName: String, overwrite: Boolean = false): LogicalPlan = + InsertIntoTable( + analysis.UnresolvedRelation(Seq(tableName)), Map.empty, logicalPlan, overwrite, false) - def analyze: LogicalPlan = EliminateSubQueries(analysis.SimpleAnalyzer.execute(logicalPlan)) - } - - object plans { // scalastyle:ignore - implicit class DslLogicalPlan(val logicalPlan: LogicalPlan) extends LogicalPlanFunctions { - def writeToFile(path: String): LogicalPlan = WriteToFile(path, logicalPlan) + def analyze: LogicalPlan = EliminateSubQueries(analysis.SimpleAnalyzer.execute(logicalPlan)) } } - - case class ScalaUdfBuilder[T: TypeTag](f: AnyRef) { - def call(args: Expression*): ScalaUdf = { - ScalaUdf(f, ScalaReflection.schemaFor(typeTag[T]).dataType, args) - } - } - - // scalastyle:off - /** functionToUdfBuilder 1-22 were generated by this script - - (1 to 22).map { x => - val argTypes = Seq.fill(x)("_").mkString(", ") - s"implicit def functionToUdfBuilder[T: TypeTag](func: Function$x[$argTypes, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func)" - } - */ - - implicit def functionToUdfBuilder[T: TypeTag](func: Function1[_, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func) - - implicit def functionToUdfBuilder[T: TypeTag](func: Function2[_, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func) - - implicit def functionToUdfBuilder[T: TypeTag](func: Function3[_, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func) - - implicit def functionToUdfBuilder[T: TypeTag](func: Function4[_, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func) - - implicit def functionToUdfBuilder[T: TypeTag](func: Function5[_, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func) - - implicit def functionToUdfBuilder[T: TypeTag](func: Function6[_, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func) - - implicit def functionToUdfBuilder[T: TypeTag](func: Function7[_, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func) - - implicit def functionToUdfBuilder[T: TypeTag](func: Function8[_, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func) - - implicit def functionToUdfBuilder[T: TypeTag](func: Function9[_, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func) - - implicit def functionToUdfBuilder[T: TypeTag](func: Function10[_, _, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func) - - implicit def functionToUdfBuilder[T: TypeTag](func: Function11[_, _, _, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func) - - implicit def functionToUdfBuilder[T: TypeTag](func: Function12[_, _, _, _, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func) - - implicit def functionToUdfBuilder[T: TypeTag](func: Function13[_, _, _, _, _, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func) - - implicit def functionToUdfBuilder[T: TypeTag](func: Function14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func) - - implicit def functionToUdfBuilder[T: TypeTag](func: Function15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func) - - implicit def functionToUdfBuilder[T: TypeTag](func: Function16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func) - - implicit def functionToUdfBuilder[T: TypeTag](func: Function17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func) - - implicit def functionToUdfBuilder[T: TypeTag](func: Function18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func) - - implicit def functionToUdfBuilder[T: TypeTag](func: Function19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func) - - implicit def functionToUdfBuilder[T: TypeTag](func: Function20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func) - - implicit def functionToUdfBuilder[T: TypeTag](func: Function21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func) - - implicit def functionToUdfBuilder[T: TypeTag](func: Function22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, T]): ScalaUdfBuilder[T] = ScalaUdfBuilder(func) - // scalastyle:on } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/errors/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/errors/package.scala index 0fd4f9b374ee0..d2a90a50c89f4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/errors/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/errors/package.scala @@ -49,11 +49,4 @@ package object errors { case e: Exception => throw new TreeNodeException(tree, msg, e) } } - - /** - * Executes `f` which is expected to throw a - * [[catalyst.errors.TreeNodeException TreeNodeException]]. The first tree encountered in - * the stack of exceptions of type `TreeType` is returned. - */ - def getTree[TreeType <: TreeNode[_]](f: => Unit): TreeType = ??? // TODO: Implement } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index adf941ab2a45f..a8785595c2f9b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -21,6 +21,7 @@ import java.sql.{Date, Timestamp} import java.text.{DateFormat, SimpleDateFormat} import org.apache.spark.Logging +import org.apache.spark.sql.catalyst.util.DateUtils import org.apache.spark.sql.types._ /** Cast the child expression to the target data type. */ @@ -34,48 +35,48 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w private[this] def forceNullable(from: DataType, to: DataType) = (from, to) match { case (StringType, _: NumericType) => true - case (StringType, TimestampType) => true - case (DoubleType, TimestampType) => true - case (FloatType, TimestampType) => true - case (StringType, DateType) => true - case (_: NumericType, DateType) => true - case (BooleanType, DateType) => true - case (DateType, _: NumericType) => true - case (DateType, BooleanType) => true + case (StringType, TimestampType) => true + case (DoubleType, TimestampType) => true + case (FloatType, TimestampType) => true + case (StringType, DateType) => true + case (_: NumericType, DateType) => true + case (BooleanType, DateType) => true + case (DateType, _: NumericType) => true + case (DateType, BooleanType) => true case (DoubleType, _: DecimalType) => true - case (FloatType, _: DecimalType) => true + case (FloatType, _: DecimalType) => true case (_, DecimalType.Fixed(_, _)) => true // TODO: not all upcasts here can really give null - case _ => false + case _ => false } private[this] def resolvableNullability(from: Boolean, to: Boolean) = !from || to private[this] def resolve(from: DataType, to: DataType): Boolean = { (from, to) match { - case (from, to) if from == to => true + case (from, to) if from == to => true - case (NullType, _) => true + case (NullType, _) => true - case (_, StringType) => true + case (_, StringType) => true - case (StringType, BinaryType) => true + case (StringType, BinaryType) => true - case (StringType, BooleanType) => true - case (DateType, BooleanType) => true - case (TimestampType, BooleanType) => true - case (_: NumericType, BooleanType) => true + case (StringType, BooleanType) => true + case (DateType, BooleanType) => true + case (TimestampType, BooleanType) => true + case (_: NumericType, BooleanType) => true - case (StringType, TimestampType) => true - case (BooleanType, TimestampType) => true - case (DateType, TimestampType) => true - case (_: NumericType, TimestampType) => true + case (StringType, TimestampType) => true + case (BooleanType, TimestampType) => true + case (DateType, TimestampType) => true + case (_: NumericType, TimestampType) => true - case (_, DateType) => true + case (_, DateType) => true - case (StringType, _: NumericType) => true - case (BooleanType, _: NumericType) => true - case (DateType, _: NumericType) => true - case (TimestampType, _: NumericType) => true + case (StringType, _: NumericType) => true + case (BooleanType, _: NumericType) => true + case (DateType, _: NumericType) => true + case (TimestampType, _: NumericType) => true case (_: NumericType, _: NumericType) => true case (ArrayType(from, fn), ArrayType(to, tn)) => @@ -411,21 +412,21 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w private[this] def cast(from: DataType, to: DataType): Any => Any = to match { case dt if dt == child.dataType => identity[Any] - case StringType => castToString(from) - case BinaryType => castToBinary(from) - case DateType => castToDate(from) - case decimal: DecimalType => castToDecimal(from, decimal) - case TimestampType => castToTimestamp(from) - case BooleanType => castToBoolean(from) - case ByteType => castToByte(from) - case ShortType => castToShort(from) - case IntegerType => castToInt(from) - case FloatType => castToFloat(from) - case LongType => castToLong(from) - case DoubleType => castToDouble(from) - case array: ArrayType => castArray(from.asInstanceOf[ArrayType], array) - case map: MapType => castMap(from.asInstanceOf[MapType], map) - case struct: StructType => castStruct(from.asInstanceOf[StructType], struct) + case StringType => castToString(from) + case BinaryType => castToBinary(from) + case DateType => castToDate(from) + case decimal: DecimalType => castToDecimal(from, decimal) + case TimestampType => castToTimestamp(from) + case BooleanType => castToBoolean(from) + case ByteType => castToByte(from) + case ShortType => castToShort(from) + case IntegerType => castToInt(from) + case FloatType => castToFloat(from) + case LongType => castToLong(from) + case DoubleType => castToDouble(from) + case array: ArrayType => castArray(from.asInstanceOf[ArrayType], array) + case map: MapType => castMap(from.asInstanceOf[MapType], map) + case struct: StructType => castStruct(from.asInstanceOf[StructType], struct) } private[this] lazy val cast: Any => Any = cast(child.dataType, dataType) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index 1d71c1b4b0c7c..482090191fbb2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -18,7 +18,6 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute -import org.apache.spark.sql.catalyst.errors.TreeNodeException import org.apache.spark.sql.catalyst.trees import org.apache.spark.sql.catalyst.trees.TreeNode import org.apache.spark.sql.types._ @@ -41,7 +40,15 @@ abstract class Expression extends TreeNode[Expression] { * - A [[Cast]] or [[UnaryMinus]] is foldable if its child is foldable */ def foldable: Boolean = false + + /** + * Returns true when the current expression always return the same result for fixed input values. + */ + // TODO: Need to define explicit input values vs implicit input values. + def deterministic: Boolean = true + def nullable: Boolean + def references: AttributeSet = AttributeSet(children.flatMap(_.references.iterator)) /** Returns the result of evaluating this expression on a given input Row */ @@ -65,7 +72,7 @@ abstract class Expression extends TreeNode[Expression] { * Returns true if all the children of this expression have been resolved to a specific schema * and false if any still contains any unresolved placeholders. */ - def childrenResolved: Boolean = !children.exists(!_.resolved) + def childrenResolved: Boolean = children.forall(_.resolved) /** * Returns a string representation of this expression that does not have developer centric @@ -77,6 +84,19 @@ abstract class Expression extends TreeNode[Expression] { case u: UnresolvedAttribute => PrettyAttribute(u.name) }.toString } + + /** + * Returns true when two expressions will always compute the same result, even if they differ + * cosmetically (i.e. capitalization of names in attributes may be different). + */ + def semanticEquals(other: Expression): Boolean = this.getClass == other.getClass && { + val elements1 = this.productIterator.toSeq + val elements2 = other.asInstanceOf[Product].productIterator.toSeq + elements1.length == elements2.length && elements1.zip(elements2).forall { + case (e1: Expression, e2: Expression) => e1 semanticEquals e2 + case (i1, i2) => i1 == i2 + } + } } abstract class BinaryExpression extends Expression with trees.BinaryNode[Expression] { @@ -86,6 +106,8 @@ abstract class BinaryExpression extends Expression with trees.BinaryNode[Express override def foldable: Boolean = left.foldable && right.foldable + override def nullable: Boolean = left.nullable || right.nullable + override def toString: String = s"($left $symbol $right)" } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExtractValue.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExtractValue.scala new file mode 100644 index 0000000000000..737bff863a0e2 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExtractValue.scala @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions + +import scala.collection.Map + +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.analysis._ +import org.apache.spark.sql.types._ + +object ExtractValue { + /** + * Returns the resolved `ExtractValue`. It will return one kind of concrete `ExtractValue`, + * depend on the type of `child` and `extraction`. + * + * `child` | `extraction` | concrete `ExtractValue` + * ---------------------------------------------------------------- + * Struct | Literal String | GetStructField + * Array[Struct] | Literal String | GetArrayStructFields + * Array | Integral type | GetArrayItem + * Map | Any type | GetMapValue + */ + def apply( + child: Expression, + extraction: Expression, + resolver: Resolver): ExtractValue = { + + (child.dataType, extraction) match { + case (StructType(fields), Literal(fieldName, StringType)) => + val ordinal = findField(fields, fieldName.toString, resolver) + GetStructField(child, fields(ordinal), ordinal) + case (ArrayType(StructType(fields), containsNull), Literal(fieldName, StringType)) => + val ordinal = findField(fields, fieldName.toString, resolver) + GetArrayStructFields(child, fields(ordinal), ordinal, containsNull) + case (_: ArrayType, _) if extraction.dataType.isInstanceOf[IntegralType] => + GetArrayItem(child, extraction) + case (_: MapType, _) => + GetMapValue(child, extraction) + case (otherType, _) => + val errorMsg = otherType match { + case StructType(_) | ArrayType(StructType(_), _) => + s"Field name should be String Literal, but it's $extraction" + case _: ArrayType => + s"Array index should be integral type, but it's ${extraction.dataType}" + case other => + s"Can't extract value from $child" + } + throw new AnalysisException(errorMsg) + } + } + + def unapply(g: ExtractValue): Option[(Expression, Expression)] = { + g match { + case o: ExtractValueWithOrdinal => Some((o.child, o.ordinal)) + case _ => Some((g.child, null)) + } + } + + /** + * Find the ordinal of StructField, report error if no desired field or over one + * desired fields are found. + */ + private def findField(fields: Array[StructField], fieldName: String, resolver: Resolver): Int = { + val checkField = (f: StructField) => resolver(f.name, fieldName) + val ordinal = fields.indexWhere(checkField) + if (ordinal == -1) { + throw new AnalysisException( + s"No such struct field $fieldName in ${fields.map(_.name).mkString(", ")}") + } else if (fields.indexWhere(checkField, ordinal + 1) != -1) { + throw new AnalysisException( + s"Ambiguous reference to fields ${fields.filter(checkField).mkString(", ")}") + } else { + ordinal + } + } +} + +trait ExtractValue extends UnaryExpression { + self: Product => + + type EvaluatedType = Any +} + +/** + * Returns the value of fields in the Struct `child`. + */ +case class GetStructField(child: Expression, field: StructField, ordinal: Int) + extends ExtractValue { + + override def dataType: DataType = field.dataType + override def nullable: Boolean = child.nullable || field.nullable + override def foldable: Boolean = child.foldable + override def toString: String = s"$child.${field.name}" + + override def eval(input: Row): Any = { + val baseValue = child.eval(input).asInstanceOf[Row] + if (baseValue == null) null else baseValue(ordinal) + } +} + +/** + * Returns the array of value of fields in the Array of Struct `child`. + */ +case class GetArrayStructFields( + child: Expression, + field: StructField, + ordinal: Int, + containsNull: Boolean) extends ExtractValue { + + override def dataType: DataType = ArrayType(field.dataType, containsNull) + override def nullable: Boolean = child.nullable + override def foldable: Boolean = child.foldable + override def toString: String = s"$child.${field.name}" + + override def eval(input: Row): Any = { + val baseValue = child.eval(input).asInstanceOf[Seq[Row]] + if (baseValue == null) null else { + baseValue.map { row => + if (row == null) null else row(ordinal) + } + } + } +} + +abstract class ExtractValueWithOrdinal extends ExtractValue { + self: Product => + + def ordinal: Expression + + /** `Null` is returned for invalid ordinals. */ + override def nullable: Boolean = true + override def foldable: Boolean = child.foldable && ordinal.foldable + override def toString: String = s"$child[$ordinal]" + override def children: Seq[Expression] = child :: ordinal :: Nil + + override def eval(input: Row): Any = { + val value = child.eval(input) + if (value == null) { + null + } else { + val o = ordinal.eval(input) + if (o == null) { + null + } else { + evalNotNull(value, o) + } + } + } + + protected def evalNotNull(value: Any, ordinal: Any): Any +} + +/** + * Returns the field at `ordinal` in the Array `child` + */ +case class GetArrayItem(child: Expression, ordinal: Expression) + extends ExtractValueWithOrdinal { + + override def dataType: DataType = child.dataType.asInstanceOf[ArrayType].elementType + + override lazy val resolved = childrenResolved && + child.dataType.isInstanceOf[ArrayType] && ordinal.dataType.isInstanceOf[IntegralType] + + protected def evalNotNull(value: Any, ordinal: Any) = { + // TODO: consider using Array[_] for ArrayType child to avoid + // boxing of primitives + val baseValue = value.asInstanceOf[Seq[_]] + val index = ordinal.asInstanceOf[Int] + if (index >= baseValue.size || index < 0) { + null + } else { + baseValue(index) + } + } +} + +/** + * Returns the value of key `ordinal` in Map `child` + */ +case class GetMapValue(child: Expression, ordinal: Expression) + extends ExtractValueWithOrdinal { + + override def dataType: DataType = child.dataType.asInstanceOf[MapType].valueType + + override lazy val resolved = childrenResolved && child.dataType.isInstanceOf[MapType] + + protected def evalNotNull(value: Any, ordinal: Any) = { + val baseValue = value.asInstanceOf[Map[Any, _]] + baseValue.get(ordinal).orNull + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala index c2866cd955409..8cae548279eb1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala @@ -548,3 +548,97 @@ class JoinedRow5 extends Row { } } } + +/** + * JIT HACK: Replace with macros + */ +class JoinedRow6 extends Row { + private[this] var row1: Row = _ + private[this] var row2: Row = _ + + def this(left: Row, right: Row) = { + this() + row1 = left + row2 = right + } + + /** Updates this JoinedRow to used point at two new base rows. Returns itself. */ + def apply(r1: Row, r2: Row): Row = { + row1 = r1 + row2 = r2 + this + } + + /** Updates this JoinedRow by updating its left base row. Returns itself. */ + def withLeft(newLeft: Row): Row = { + row1 = newLeft + this + } + + /** Updates this JoinedRow by updating its right base row. Returns itself. */ + def withRight(newRight: Row): Row = { + row2 = newRight + this + } + + override def toSeq: Seq[Any] = row1.toSeq ++ row2.toSeq + + override def length: Int = row1.length + row2.length + + override def apply(i: Int): Any = + if (i < row1.length) row1(i) else row2(i - row1.length) + + override def isNullAt(i: Int): Boolean = + if (i < row1.length) row1.isNullAt(i) else row2.isNullAt(i - row1.length) + + override def getInt(i: Int): Int = + if (i < row1.length) row1.getInt(i) else row2.getInt(i - row1.length) + + override def getLong(i: Int): Long = + if (i < row1.length) row1.getLong(i) else row2.getLong(i - row1.length) + + override def getDouble(i: Int): Double = + if (i < row1.length) row1.getDouble(i) else row2.getDouble(i - row1.length) + + override def getBoolean(i: Int): Boolean = + if (i < row1.length) row1.getBoolean(i) else row2.getBoolean(i - row1.length) + + override def getShort(i: Int): Short = + if (i < row1.length) row1.getShort(i) else row2.getShort(i - row1.length) + + override def getByte(i: Int): Byte = + if (i < row1.length) row1.getByte(i) else row2.getByte(i - row1.length) + + override def getFloat(i: Int): Float = + if (i < row1.length) row1.getFloat(i) else row2.getFloat(i - row1.length) + + override def getString(i: Int): String = + if (i < row1.length) row1.getString(i) else row2.getString(i - row1.length) + + override def getAs[T](i: Int): T = + if (i < row1.length) row1.getAs[T](i) else row2.getAs[T](i - row1.length) + + override def copy(): Row = { + val totalSize = row1.length + row2.length + val copiedValues = new Array[Any](totalSize) + var i = 0 + while(i < totalSize) { + copiedValues(i) = apply(i) + i += 1 + } + new GenericRow(copiedValues) + } + + override def toString: String = { + // Make sure toString never throws NullPointerException. + if ((row1 eq null) && (row2 eq null)) { + "[ empty row ]" + } else if (row1 eq null) { + row2.mkString("[", ",", "]") + } else if (row2 eq null) { + row1.mkString("[", ",", "]") + } else { + mkString("[", ",", "]") + } + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala index 9a77ca624ebe2..fe2873e0be34d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala @@ -55,9 +55,9 @@ case class ScalaUdf(function: AnyRef, dataType: DataType, children: Seq[Expressi }.foreach(println) */ - - val f = children.size match { - case 0 => + + private[this] val f = children.size match { + case 0 => val func = function.asInstanceOf[() => Any] (input: Row) => { func() @@ -956,7 +956,7 @@ case class ScalaUdf(function: AnyRef, dataType: DataType, children: Seq[Expressi } // scalastyle:on - - override def eval(input: Row): Any = CatalystTypeConverters.convertToCatalyst(f(input), dataType) + private[this] val converter = CatalystTypeConverters.createToCatalystConverter(dataType) + override def eval(input: Row): Any = converter(f(input)) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala index 83074eb1e6310..0dd8a4e9b0759 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala @@ -29,7 +29,7 @@ case object Descending extends SortDirection * An expression that can be used to sort a tuple. This class extends expression primarily so that * transformations over expression will descend into its child. */ -case class SortOrder(child: Expression, direction: SortDirection) extends Expression +case class SortOrder(child: Expression, direction: SortDirection) extends Expression with trees.UnaryNode[Expression] { override def dataType: DataType = child.dataType diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala index f3830c6d3bcf2..27a5a2c2e8a43 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala @@ -113,7 +113,7 @@ case class MinFunction(expr: Expression, base: AggregateExpression) extends Aggr override def update(input: Row): Unit = { if (currentMin.value == null) { currentMin.value = expr.eval(input) - } else if(cmp.eval(input) == true) { + } else if (cmp.eval(input) == true) { currentMin.value = expr.eval(input) } } @@ -144,7 +144,7 @@ case class MaxFunction(expr: Expression, base: AggregateExpression) extends Aggr override def update(input: Row): Unit = { if (currentMax.value == null) { currentMax.value = expr.eval(input) - } else if(cmp.eval(input) == true) { + } else if (cmp.eval(input) == true) { currentMax.value = expr.eval(input) } } @@ -396,13 +396,13 @@ case class Sum(child: Expression) extends PartialAggregate with trees.UnaryNode[ * Combining PartitionLevel InputData * <-- null * Zero <-- Zero <-- null - * + * * <-- null <-- no data - * null <-- null <-- no data + * null <-- null <-- no data */ case class CombineSum(child: Expression) extends AggregateExpression { def this() = this(null) - + override def children: Seq[Expression] = child :: Nil override def nullable: Boolean = true override def dataType: DataType = child.dataType @@ -618,7 +618,7 @@ case class SumFunction(expr: Expression, base: AggregateExpression) extends Aggr private val sum = MutableLiteral(null, calcType) - private val addFunction = + private val addFunction = Coalesce(Seq(Add(Coalesce(Seq(sum, zero)), Cast(expr, calcType)), sum, zero)) override def update(input: Row): Unit = { @@ -636,7 +636,7 @@ case class SumFunction(expr: Expression, base: AggregateExpression) extends Aggr case class CombineSumFunction(expr: Expression, base: AggregateExpression) extends AggregateFunction { - + def this() = this(null, null) // Required for serialization. private val calcType = @@ -651,12 +651,12 @@ case class CombineSumFunction(expr: Expression, base: AggregateExpression) private val sum = MutableLiteral(null, calcType) - private val addFunction = + private val addFunction = Coalesce(Seq(Add(Coalesce(Seq(sum, zero)), Cast(expr, calcType)), sum, zero)) - + override def update(input: Row): Unit = { val result = expr.eval(input) - // partial sum result can be null only when no input rows present + // partial sum result can be null only when no input rows present if(result != null) { sum.update(addFunction, input) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index 140ccd8d3796f..1c70e8a41de46 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -74,14 +74,12 @@ abstract class BinaryArithmetic extends BinaryExpression { type EvaluatedType = Any - def nullable: Boolean = left.nullable || right.nullable - override lazy val resolved = left.resolved && right.resolved && left.dataType == right.dataType && !DecimalType.isFixed(left.dataType) - def dataType: DataType = { + override def dataType: DataType = { if (!resolved) { throw new UnresolvedException(this, s"datatype. Can not resolve due to differing types ${left.dataType}, ${right.dataType}") @@ -186,7 +184,7 @@ case class Divide(left: Expression, right: Expression) extends BinaryArithmetic case it: IntegralType => it.integral.asInstanceOf[Integral[Any]].quot case other => sys.error(s"Type $other does not support numeric operations") } - + override def eval(input: Row): Any = { val evalE2 = right.eval(input) if (evalE2 == null || evalE2 == 0) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index d17af0e7ff87e..36964af68dd8d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -250,7 +250,7 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin case Cast(child @ DateType(), StringType) => child.castOrNull(c => q"""org.apache.spark.sql.types.UTF8String( - org.apache.spark.sql.types.DateUtils.toString($c))""", + org.apache.spark.sql.catalyst.util.DateUtils.toString($c))""", StringType) case Cast(child @ NumericType(), IntegerType) => @@ -373,7 +373,7 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin // Uh, bad function name... child.castOrNull(c => q"!$c", BooleanType) - case Add(e1, e2) => (e1, e2) evaluate { case (eval1, eval2) => q"$eval1 + $eval2" } + case Add(e1, e2) => (e1, e2) evaluate { case (eval1, eval2) => q"$eval1 + $eval2" } case Subtract(e1, e2) => (e1, e2) evaluate { case (eval1, eval2) => q"$eval1 - $eval2" } case Multiply(e1, e2) => (e1, e2) evaluate { case (eval1, eval2) => q"$eval1 * $eval2" } case Divide(e1, e2) => @@ -665,7 +665,7 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin protected def defaultPrimitive(dt: DataType) = dt match { case BooleanType => ru.Literal(Constant(false)) case FloatType => ru.Literal(Constant(-1.0.toFloat)) - case StringType => q"""org.apache.spark.sql.types.UTF8String("")""" + case StringType => q"""org.apache.spark.sql.types.UTF8String("")""" case ShortType => ru.Literal(Constant(-1.toShort)) case LongType => ru.Literal(Constant(-1L)) case ByteType => ru.Literal(Constant(-1.toByte)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala index 584f938445c8c..31c63a79ebc8c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala @@ -161,7 +161,7 @@ object GenerateProjection extends CodeGenerator[Seq[Expression], Projection] { } } - val hashValues = expressions.zipWithIndex.map { case (e,i) => + val hashValues = expressions.zipWithIndex.map { case (e, i) => val elementName = newTermName(s"c$i") val nonNull = e.dataType match { case BooleanType => q"if ($elementName) 0 else 1" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypes.scala index fc1f69655963d..98c1c405ad684 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypes.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypes.scala @@ -17,148 +17,17 @@ package org.apache.spark.sql.catalyst.expressions -import scala.collection.Map - -import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.analysis.Resolver import org.apache.spark.sql.types._ -/** - * Returns the item at `ordinal` in the Array `child` or the Key `ordinal` in Map `child`. - */ -case class GetItem(child: Expression, ordinal: Expression) extends Expression { - type EvaluatedType = Any - - val children: Seq[Expression] = child :: ordinal :: Nil - /** `Null` is returned for invalid ordinals. */ - override def nullable: Boolean = true - override def foldable: Boolean = child.foldable && ordinal.foldable - - override def dataType: DataType = child.dataType match { - case ArrayType(dt, _) => dt - case MapType(_, vt, _) => vt - } - override lazy val resolved = - childrenResolved && - (child.dataType.isInstanceOf[ArrayType] || child.dataType.isInstanceOf[MapType]) - - override def toString: String = s"$child[$ordinal]" - - override def eval(input: Row): Any = { - val value = child.eval(input) - if (value == null) { - null - } else { - val key = ordinal.eval(input) - if (key == null) { - null - } else { - if (child.dataType.isInstanceOf[ArrayType]) { - // TODO: consider using Array[_] for ArrayType child to avoid - // boxing of primitives - val baseValue = value.asInstanceOf[Seq[_]] - val o = key.asInstanceOf[Int] - if (o >= baseValue.size || o < 0) { - null - } else { - baseValue(o) - } - } else { - val baseValue = value.asInstanceOf[Map[Any, _]] - baseValue.get(key).orNull - } - } - } - } -} - - -trait GetField extends UnaryExpression { - self: Product => - - type EvaluatedType = Any - override def foldable: Boolean = child.foldable - override def toString: String = s"$child.${field.name}" - - def field: StructField -} - -object GetField { - /** - * Returns the resolved `GetField`, and report error if no desired field or over one - * desired fields are found. - */ - def apply( - expr: Expression, - fieldName: String, - resolver: Resolver): GetField = { - def findField(fields: Array[StructField]): Int = { - val checkField = (f: StructField) => resolver(f.name, fieldName) - val ordinal = fields.indexWhere(checkField) - if (ordinal == -1) { - throw new AnalysisException( - s"No such struct field $fieldName in ${fields.map(_.name).mkString(", ")}") - } else if (fields.indexWhere(checkField, ordinal + 1) != -1) { - throw new AnalysisException( - s"Ambiguous reference to fields ${fields.filter(checkField).mkString(", ")}") - } else { - ordinal - } - } - expr.dataType match { - case StructType(fields) => - val ordinal = findField(fields) - StructGetField(expr, fields(ordinal), ordinal) - case ArrayType(StructType(fields), containsNull) => - val ordinal = findField(fields) - ArrayGetField(expr, fields(ordinal), ordinal, containsNull) - case otherType => - throw new AnalysisException(s"GetField is not valid on fields of type $otherType") - } - } -} - -/** - * Returns the value of fields in the Struct `child`. - */ -case class StructGetField(child: Expression, field: StructField, ordinal: Int) extends GetField { - - override def dataType: DataType = field.dataType - override def nullable: Boolean = child.nullable || field.nullable - - override def eval(input: Row): Any = { - val baseValue = child.eval(input).asInstanceOf[Row] - if (baseValue == null) null else baseValue(ordinal) - } -} - -/** - * Returns the array of value of fields in the Array of Struct `child`. - */ -case class ArrayGetField(child: Expression, field: StructField, ordinal: Int, containsNull: Boolean) - extends GetField { - - override def dataType: DataType = ArrayType(field.dataType, containsNull) - override def nullable: Boolean = child.nullable - - override def eval(input: Row): Any = { - val baseValue = child.eval(input).asInstanceOf[Seq[Row]] - if (baseValue == null) null else { - baseValue.map { row => - if (row == null) null else row(ordinal) - } - } - } -} /** * Returns an Array containing the evaluation of all children expressions. */ case class CreateArray(children: Seq[Expression]) extends Expression { override type EvaluatedType = Any - + override def foldable: Boolean = children.forall(_.foldable) - + lazy val childTypes = children.map(_.dataType).distinct override lazy val resolved = diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala index 9a6cb048af5ad..ec46fc02e593d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala @@ -56,6 +56,12 @@ abstract class Generator extends Expression { /** Should be implemented by child classes to perform specific Generators. */ override def eval(input: Row): TraversableOnce[Row] + + /** + * Notifies that there are no more rows to process, clean up code, and additional + * rows can be made here. + */ + def terminate(): TraversableOnce[Row] = Nil } /** @@ -99,8 +105,8 @@ case class Explode(child: Expression) val inputArray = child.eval(input).asInstanceOf[Seq[Any]] if (inputArray == null) Nil else inputArray.map(v => new GenericRow(Array(v))) case MapType(_, _, _) => - val inputMap = child.eval(input).asInstanceOf[Map[Any,Any]] - if (inputMap == null) Nil else inputMap.map { case (k,v) => new GenericRow(Array(k,v)) } + val inputMap = child.eval(input).asInstanceOf[Map[Any, Any]] + if (inputMap == null) Nil else inputMap.map { case (k, v) => new GenericRow(Array(k, v)) } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala index 18cba4cc46707..5f8c7354aede1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} import org.apache.spark.sql.catalyst.CatalystTypeConverters +import org.apache.spark.sql.catalyst.util.DateUtils import org.apache.spark.sql.types._ object Literal { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathfuncs/binary.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathfuncs/binary.scala index fcc06d3aa1036..16fdeade3c4f1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathfuncs/binary.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathfuncs/binary.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.catalyst.expressions.mathfuncs -import org.apache.spark.sql.catalyst.analysis.UnresolvedException import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, BinaryExpression, Expression, Row} import org.apache.spark.sql.types._ @@ -27,7 +26,7 @@ import org.apache.spark.sql.types._ * @param f The math function. * @param name The short name of the function */ -abstract class BinaryMathExpression(f: (Double, Double) => Double, name: String) +abstract class BinaryMathExpression(f: (Double, Double) => Double, name: String) extends BinaryExpression with Serializable with ExpectsInputTypes { self: Product => type EvaluatedType = Any override def symbol: String = null @@ -41,13 +40,7 @@ abstract class BinaryMathExpression(f: (Double, Double) => Double, name: String) left.dataType == right.dataType && !DecimalType.isFixed(left.dataType) - override def dataType: DataType = { - if (!resolved) { - throw new UnresolvedException(this, - s"datatype. Can not resolve due to differing types ${left.dataType}, ${right.dataType}") - } - left.dataType - } + override def dataType: DataType = DoubleType override def eval(input: Row): Any = { val evalE1 = left.eval(input) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathfuncs/unary.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathfuncs/unary.scala index dc68469e060cb..9bd88cb3def46 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathfuncs/unary.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathfuncs/unary.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.types._ * input format, therefore these functions extend `ExpectsInputTypes`. * @param name The short name of the function */ -abstract class MathematicalExpression(f: Double => Double, name: String) +abstract class UnaryMathExpression(f: Double => Double, name: String) extends UnaryExpression with Serializable with ExpectsInputTypes { self: Product => type EvaluatedType = Any @@ -47,46 +47,44 @@ abstract class MathematicalExpression(f: Double => Double, name: String) } } -case class Acos(child: Expression) extends MathematicalExpression(math.acos, "ACOS") +case class Acos(child: Expression) extends UnaryMathExpression(math.acos, "ACOS") -case class Asin(child: Expression) extends MathematicalExpression(math.asin, "ASIN") +case class Asin(child: Expression) extends UnaryMathExpression(math.asin, "ASIN") -case class Atan(child: Expression) extends MathematicalExpression(math.atan, "ATAN") +case class Atan(child: Expression) extends UnaryMathExpression(math.atan, "ATAN") -case class Cbrt(child: Expression) extends MathematicalExpression(math.cbrt, "CBRT") +case class Cbrt(child: Expression) extends UnaryMathExpression(math.cbrt, "CBRT") -case class Ceil(child: Expression) extends MathematicalExpression(math.ceil, "CEIL") +case class Ceil(child: Expression) extends UnaryMathExpression(math.ceil, "CEIL") -case class Cos(child: Expression) extends MathematicalExpression(math.cos, "COS") +case class Cos(child: Expression) extends UnaryMathExpression(math.cos, "COS") -case class Cosh(child: Expression) extends MathematicalExpression(math.cosh, "COSH") +case class Cosh(child: Expression) extends UnaryMathExpression(math.cosh, "COSH") -case class Exp(child: Expression) extends MathematicalExpression(math.exp, "EXP") +case class Exp(child: Expression) extends UnaryMathExpression(math.exp, "EXP") -case class Expm1(child: Expression) extends MathematicalExpression(math.expm1, "EXPM1") +case class Expm1(child: Expression) extends UnaryMathExpression(math.expm1, "EXPM1") -case class Floor(child: Expression) extends MathematicalExpression(math.floor, "FLOOR") +case class Floor(child: Expression) extends UnaryMathExpression(math.floor, "FLOOR") -case class Log(child: Expression) extends MathematicalExpression(math.log, "LOG") +case class Log(child: Expression) extends UnaryMathExpression(math.log, "LOG") -case class Log10(child: Expression) extends MathematicalExpression(math.log10, "LOG10") +case class Log10(child: Expression) extends UnaryMathExpression(math.log10, "LOG10") -case class Log1p(child: Expression) extends MathematicalExpression(math.log1p, "LOG1P") +case class Log1p(child: Expression) extends UnaryMathExpression(math.log1p, "LOG1P") -case class Rint(child: Expression) extends MathematicalExpression(math.rint, "ROUND") +case class Rint(child: Expression) extends UnaryMathExpression(math.rint, "ROUND") -case class Signum(child: Expression) extends MathematicalExpression(math.signum, "SIGNUM") +case class Signum(child: Expression) extends UnaryMathExpression(math.signum, "SIGNUM") -case class Sin(child: Expression) extends MathematicalExpression(math.sin, "SIN") +case class Sin(child: Expression) extends UnaryMathExpression(math.sin, "SIN") -case class Sinh(child: Expression) extends MathematicalExpression(math.sinh, "SINH") +case class Sinh(child: Expression) extends UnaryMathExpression(math.sinh, "SINH") -case class Tan(child: Expression) extends MathematicalExpression(math.tan, "TAN") +case class Tan(child: Expression) extends UnaryMathExpression(math.tan, "TAN") -case class Tanh(child: Expression) extends MathematicalExpression(math.tanh, "TANH") +case class Tanh(child: Expression) extends UnaryMathExpression(math.tanh, "TANH") -case class ToDegrees(child: Expression) - extends MathematicalExpression(math.toDegrees, "DEGREES") +case class ToDegrees(child: Expression) extends UnaryMathExpression(math.toDegrees, "DEGREES") -case class ToRadians(child: Expression) - extends MathematicalExpression(math.toRadians, "RADIANS") +case class ToRadians(child: Expression) extends UnaryMathExpression(math.toRadians, "RADIANS") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala index afcb2ce8b9cb4..50be26d0b08b5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala @@ -86,7 +86,7 @@ abstract class Attribute extends NamedExpression { def withQualifiers(newQualifiers: Seq[String]): Attribute def withName(newName: String): Attribute - def toAttribute: Attribute = this + override def toAttribute: Attribute = this def newInstance(): Attribute } @@ -171,11 +171,21 @@ case class AttributeReference( val exprId: ExprId = NamedExpression.newExprId, val qualifiers: Seq[String] = Nil) extends Attribute with trees.LeafNode[Expression] { + /** + * Returns true iff the expression id is the same for both attributes. + */ + def sameRef(other: AttributeReference): Boolean = this.exprId == other.exprId + override def equals(other: Any): Boolean = other match { case ar: AttributeReference => name == ar.name && exprId == ar.exprId && dataType == ar.dataType case _ => false } + override def semanticEquals(other: Expression): Boolean = other match { + case ar: AttributeReference => sameRef(ar) + case _ => false + } + override def hashCode: Int = { // See http://stackoverflow.com/questions/113511/hash-code-implementation var h = 17 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 9cb00cb2732ff..1d72a9eb834b9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -70,16 +70,14 @@ trait PredicateHelper { expr.references.subsetOf(plan.outputSet) } -abstract class BinaryPredicate extends BinaryExpression with Predicate { - self: Product => - override def nullable: Boolean = left.nullable || right.nullable -} -case class Not(child: Expression) extends UnaryExpression with Predicate { +case class Not(child: Expression) extends UnaryExpression with Predicate with ExpectsInputTypes { override def foldable: Boolean = child.foldable override def nullable: Boolean = child.nullable override def toString: String = s"NOT $child" + override def expectedChildTypes: Seq[DataType] = Seq(BooleanType) + override def eval(input: Row): Any = { child.eval(input) match { case null => null @@ -112,6 +110,7 @@ case class InSet(value: Expression, hset: Set[Any]) override def children: Seq[Expression] = value :: Nil + override def foldable: Boolean = value.foldable override def nullable: Boolean = true // TODO: Figure out correct nullability semantics of IN. override def toString: String = s"$value INSET ${hset.mkString("(", ",", ")")}" @@ -120,7 +119,11 @@ case class InSet(value: Expression, hset: Set[Any]) } } -case class And(left: Expression, right: Expression) extends BinaryPredicate { +case class And(left: Expression, right: Expression) + extends BinaryExpression with Predicate with ExpectsInputTypes { + + override def expectedChildTypes: Seq[DataType] = Seq(BooleanType, BooleanType) + override def symbol: String = "&&" override def eval(input: Row): Any = { @@ -142,7 +145,11 @@ case class And(left: Expression, right: Expression) extends BinaryPredicate { } } -case class Or(left: Expression, right: Expression) extends BinaryPredicate { +case class Or(left: Expression, right: Expression) + extends BinaryExpression with Predicate with ExpectsInputTypes { + + override def expectedChildTypes: Seq[DataType] = Seq(BooleanType, BooleanType) + override def symbol: String = "||" override def eval(input: Row): Any = { @@ -164,7 +171,7 @@ case class Or(left: Expression, right: Expression) extends BinaryPredicate { } } -abstract class BinaryComparison extends BinaryPredicate { +abstract class BinaryComparison extends BinaryExpression with Predicate { self: Product => } @@ -347,79 +354,134 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi override def toString: String = s"if ($predicate) $trueValue else $falseValue" } +trait CaseWhenLike extends Expression { + self: Product => + + type EvaluatedType = Any + + // Note that `branches` are considered in consecutive pairs (cond, val), and the optional last + // element is the value for the default catch-all case (if provided). + // Hence, `branches` consists of at least two elements, and can have an odd or even length. + def branches: Seq[Expression] + + @transient lazy val whenList = + branches.sliding(2, 2).collect { case Seq(whenExpr, _) => whenExpr }.toSeq + @transient lazy val thenList = + branches.sliding(2, 2).collect { case Seq(_, thenExpr) => thenExpr }.toSeq + val elseValue = if (branches.length % 2 == 0) None else Option(branches.last) + + // both then and else val should be considered. + def valueTypes: Seq[DataType] = (thenList ++ elseValue).map(_.dataType) + def valueTypesEqual: Boolean = valueTypes.distinct.size <= 1 + + override def dataType: DataType = { + if (!resolved) { + throw new UnresolvedException(this, "cannot resolve due to differing types in some branches") + } + valueTypes.head + } + + override def nullable: Boolean = { + // If no value is nullable and no elseValue is provided, the whole statement defaults to null. + thenList.exists(_.nullable) || (elseValue.map(_.nullable).getOrElse(true)) + } +} + // scalastyle:off /** * Case statements of the form "CASE WHEN a THEN b [WHEN c THEN d]* [ELSE e] END". * Refer to this link for the corresponding semantics: * https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF#LanguageManualUDF-ConditionalFunctions - * - * The other form of case statements "CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END" gets - * translated to this form at parsing time. Namely, such a statement gets translated to - * "CASE WHEN a=b THEN c [WHEN a=d THEN e]* [ELSE f] END". - * - * Note that `branches` are considered in consecutive pairs (cond, val), and the optional last - * element is the value for the default catch-all case (if provided). Hence, `branches` consists of - * at least two elements, and can have an odd or even length. */ // scalastyle:on -case class CaseWhen(branches: Seq[Expression]) extends Expression { - type EvaluatedType = Any +case class CaseWhen(branches: Seq[Expression]) extends CaseWhenLike { + + // Use private[this] Array to speed up evaluation. + @transient private[this] lazy val branchesArr = branches.toArray override def children: Seq[Expression] = branches - override def dataType: DataType = { - if (!resolved) { - throw new UnresolvedException(this, "cannot resolve due to differing types in some branches") + override lazy val resolved: Boolean = + childrenResolved && + whenList.forall(_.dataType == BooleanType) && + valueTypesEqual + + /** Written in imperative fashion for performance considerations. */ + override def eval(input: Row): Any = { + val len = branchesArr.length + var i = 0 + // If all branches fail and an elseVal is not provided, the whole statement + // defaults to null, according to Hive's semantics. + while (i < len - 1) { + if (branchesArr(i).eval(input) == true) { + return branchesArr(i + 1).eval(input) + } + i += 2 + } + var res: Any = null + if (i == len - 1) { + res = branchesArr(i).eval(input) } - branches(1).dataType + return res } + override def toString: String = { + "CASE" + branches.sliding(2, 2).map { + case Seq(cond, value) => s" WHEN $cond THEN $value" + case Seq(elseValue) => s" ELSE $elseValue" + }.mkString + } +} + +// scalastyle:off +/** + * Case statements of the form "CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END". + * Refer to this link for the corresponding semantics: + * https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF#LanguageManualUDF-ConditionalFunctions + */ +// scalastyle:on +case class CaseKeyWhen(key: Expression, branches: Seq[Expression]) extends CaseWhenLike { + + // Use private[this] Array to speed up evaluation. @transient private[this] lazy val branchesArr = branches.toArray - @transient private[this] lazy val predicates = - branches.sliding(2, 2).collect { case Seq(cond, _) => cond }.toSeq - @transient private[this] lazy val values = - branches.sliding(2, 2).collect { case Seq(_, value) => value }.toSeq - @transient private[this] lazy val elseValue = - if (branches.length % 2 == 0) None else Option(branches.last) - override def nullable: Boolean = { - // If no value is nullable and no elseValue is provided, the whole statement defaults to null. - values.exists(_.nullable) || (elseValue.map(_.nullable).getOrElse(true)) - } + override def children: Seq[Expression] = key +: branches - override lazy val resolved: Boolean = { - if (!childrenResolved) { - false - } else { - val allCondBooleans = predicates.forall(_.dataType == BooleanType) - // both then and else val should be considered. - val dataTypesEqual = (values ++ elseValue).map(_.dataType).distinct.size <= 1 - allCondBooleans && dataTypesEqual - } - } + override lazy val resolved: Boolean = + childrenResolved && valueTypesEqual /** Written in imperative fashion for performance considerations. */ override def eval(input: Row): Any = { + val evaluatedKey = key.eval(input) val len = branchesArr.length var i = 0 // If all branches fail and an elseVal is not provided, the whole statement // defaults to null, according to Hive's semantics. - var res: Any = null while (i < len - 1) { - if (branchesArr(i).eval(input) == true) { - res = branchesArr(i + 1).eval(input) - return res + if (equalNullSafe(evaluatedKey, branchesArr(i).eval(input))) { + return branchesArr(i + 1).eval(input) } i += 2 } + var res: Any = null if (i == len - 1) { res = branchesArr(i).eval(input) } - res + return res + } + + private def equalNullSafe(l: Any, r: Any) = { + if (l == null && r == null) { + true + } else if (l == null || r == null) { + false + } else { + l == r + } } override def toString: String = { - "CASE" + branches.sliding(2, 2).map { + s"CASE $key" + branches.sliding(2, 2).map { case Seq(cond, value) => s" WHEN $cond THEN $value" case Seq(elseValue) => s" ELSE $elseValue" }.mkString diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/random.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/random.scala index 66d7c8b07cce8..5677037dccdc3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/random.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/random.scala @@ -24,7 +24,7 @@ import org.apache.spark.util.random.XORShiftRandom /** * A Random distribution generating expression. - * TODO: This can be made generic to generate any type of random distribution, or any type of + * TODO: This can be made generic to generate any type of random distribution, or any type of * StructType. * * Since this expression is stateful, it cannot be a case object. @@ -40,6 +40,8 @@ abstract class RDG(seed: Long) extends LeafExpression with Serializable { override type EvaluatedType = Double + override def deterministic: Boolean = false + override def nullable: Boolean = false override def dataType: DataType = DoubleType diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala index d597bf7ce756a..f76e0cab1b864 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala @@ -22,7 +22,7 @@ import java.util.regex.Pattern import org.apache.spark.sql.catalyst.analysis.UnresolvedException import org.apache.spark.sql.types._ -trait StringRegexExpression { +trait StringRegexExpression extends ExpectsInputTypes { self: BinaryExpression => type EvaluatedType = Any @@ -32,6 +32,7 @@ trait StringRegexExpression { override def nullable: Boolean = left.nullable || right.nullable override def dataType: DataType = BooleanType + override def expectedChildTypes: Seq[DataType] = Seq(StringType, StringType) // try cache the pattern for Literal private lazy val cache: Pattern = right match { @@ -39,14 +40,14 @@ trait StringRegexExpression { case _ => null } - protected def compile(str: String): Pattern = if(str == null) { + protected def compile(str: String): Pattern = if (str == null) { null } else { // Let it raise exception if couldn't compile the regex string Pattern.compile(escape(str)) } - protected def pattern(str: String) = if(cache == null) compile(str) else cache + protected def pattern(str: String) = if (cache == null) compile(str) else cache override def eval(input: Row): Any = { val l = left.eval(input) @@ -57,11 +58,11 @@ trait StringRegexExpression { if(r == null) { null } else { - val regex = pattern(r.asInstanceOf[UTF8String].toString) + val regex = pattern(r.asInstanceOf[UTF8String].toString()) if(regex == null) { null } else { - matches(regex, l.asInstanceOf[UTF8String].toString) + matches(regex, l.asInstanceOf[UTF8String].toString()) } } } @@ -110,7 +111,7 @@ case class RLike(left: Expression, right: Expression) override def matches(regex: Pattern, str: String): Boolean = regex.matcher(str).find(0) } -trait CaseConversionExpression { +trait CaseConversionExpression extends ExpectsInputTypes { self: UnaryExpression => type EvaluatedType = Any @@ -118,8 +119,9 @@ trait CaseConversionExpression { def convert(v: UTF8String): UTF8String override def foldable: Boolean = child.foldable - def nullable: Boolean = child.nullable - def dataType: DataType = StringType + override def nullable: Boolean = child.nullable + override def dataType: DataType = StringType + override def expectedChildTypes: Seq[DataType] = Seq(StringType) override def eval(input: Row): Any = { val evaluated = child.eval(input) @@ -135,8 +137,8 @@ trait CaseConversionExpression { * A function that converts the characters of a string to uppercase. */ case class Upper(child: Expression) extends UnaryExpression with CaseConversionExpression { - - override def convert(v: UTF8String): UTF8String = v.toUpperCase + + override def convert(v: UTF8String): UTF8String = v.toUpperCase() override def toString: String = s"Upper($child)" } @@ -145,21 +147,23 @@ case class Upper(child: Expression) extends UnaryExpression with CaseConversionE * A function that converts the characters of a string to lowercase. */ case class Lower(child: Expression) extends UnaryExpression with CaseConversionExpression { - - override def convert(v: UTF8String): UTF8String = v.toLowerCase + + override def convert(v: UTF8String): UTF8String = v.toLowerCase() override def toString: String = s"Lower($child)" } /** A base trait for functions that compare two strings, returning a boolean. */ -trait StringComparison { - self: BinaryPredicate => +trait StringComparison extends ExpectsInputTypes { + self: BinaryExpression => + + def compare(l: UTF8String, r: UTF8String): Boolean override type EvaluatedType = Any override def nullable: Boolean = left.nullable || right.nullable - def compare(l: UTF8String, r: UTF8String): Boolean + override def expectedChildTypes: Seq[DataType] = Seq(StringType, StringType) override def eval(input: Row): Any = { val leftEval = left.eval(input) @@ -181,7 +185,7 @@ trait StringComparison { * A function that returns true if the string `left` contains the string `right`. */ case class Contains(left: Expression, right: Expression) - extends BinaryPredicate with StringComparison { + extends BinaryExpression with Predicate with StringComparison { override def compare(l: UTF8String, r: UTF8String): Boolean = l.contains(r) } @@ -189,7 +193,7 @@ case class Contains(left: Expression, right: Expression) * A function that returns true if the string `left` starts with the string `right`. */ case class StartsWith(left: Expression, right: Expression) - extends BinaryPredicate with StringComparison { + extends BinaryExpression with Predicate with StringComparison { override def compare(l: UTF8String, r: UTF8String): Boolean = l.startsWith(r) } @@ -197,7 +201,7 @@ case class StartsWith(left: Expression, right: Expression) * A function that returns true if the string `left` ends with the string `right`. */ case class EndsWith(left: Expression, right: Expression) - extends BinaryPredicate with StringComparison { + extends BinaryExpression with Predicate with StringComparison { override def compare(l: UTF8String, r: UTF8String): Boolean = l.endsWith(r) } @@ -205,8 +209,9 @@ case class EndsWith(left: Expression, right: Expression) * A function that takes a substring of its first argument starting at a given position. * Defined for String and Binary types. */ -case class Substring(str: Expression, pos: Expression, len: Expression) extends Expression { - +case class Substring(str: Expression, pos: Expression, len: Expression) + extends Expression with ExpectsInputTypes { + type EvaluatedType = Any override def foldable: Boolean = str.foldable && pos.foldable && len.foldable @@ -219,12 +224,14 @@ case class Substring(str: Expression, pos: Expression, len: Expression) extends if (str.dataType == BinaryType) str.dataType else StringType } + override def expectedChildTypes: Seq[DataType] = Seq(StringType, IntegerType, IntegerType) + override def children: Seq[Expression] = str :: pos :: len :: Nil @inline def slicePos(startPos: Int, sliceLen: Int, length: () => Int): (Int, Int) = { // Hive and SQL use one-based indexing for SUBSTR arguments but also accept zero and - // negative indices for start positions. If a start index i is greater than 0, it + // negative indices for start positions. If a start index i is greater than 0, it // refers to element i-1 in the sequence. If a start index i is less than 0, it refers // to the -ith element before the end of the sequence. If a start index i is 0, it // refers to the first element. @@ -258,7 +265,7 @@ case class Substring(str: Expression, pos: Expression, len: Expression) extends val (st, end) = slicePos(start, length, () => ba.length) ba.slice(st, end) case s: UTF8String => - val (st, end) = slicePos(start, length, () => s.length) + val (st, end) = slicePos(start, length, () => s.length()) s.slice(st, end) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala new file mode 100644 index 0000000000000..d6ecd2ee7e851 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala @@ -0,0 +1,338 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions + +import org.apache.spark.sql.catalyst.analysis.UnresolvedException +import org.apache.spark.sql.catalyst.errors.TreeNodeException +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.types.{NumericType, DataType} + +/** + * The trait of the Window Specification (specified in the OVER clause or WINDOW clause) for + * Window Functions. + */ +sealed trait WindowSpec + +/** + * The specification for a window function. + * @param partitionSpec It defines the way that input rows are partitioned. + * @param orderSpec It defines the ordering of rows in a partition. + * @param frameSpecification It defines the window frame in a partition. + */ +case class WindowSpecDefinition( + partitionSpec: Seq[Expression], + orderSpec: Seq[SortOrder], + frameSpecification: WindowFrame) extends Expression with WindowSpec { + + def validate: Option[String] = frameSpecification match { + case UnspecifiedFrame => + Some("Found a UnspecifiedFrame. It should be converted to a SpecifiedWindowFrame " + + "during analysis. Please file a bug report.") + case frame: SpecifiedWindowFrame => frame.validate.orElse { + def checkValueBasedBoundaryForRangeFrame(): Option[String] = { + if (orderSpec.length > 1) { + // It is not allowed to have a value-based PRECEDING and FOLLOWING + // as the boundary of a Range Window Frame. + Some("This Range Window Frame only accepts at most one ORDER BY expression.") + } else if (orderSpec.nonEmpty && !orderSpec.head.dataType.isInstanceOf[NumericType]) { + Some("The data type of the expression in the ORDER BY clause should be a numeric type.") + } else { + None + } + } + + (frame.frameType, frame.frameStart, frame.frameEnd) match { + case (RangeFrame, vp: ValuePreceding, _) => checkValueBasedBoundaryForRangeFrame() + case (RangeFrame, vf: ValueFollowing, _) => checkValueBasedBoundaryForRangeFrame() + case (RangeFrame, _, vp: ValuePreceding) => checkValueBasedBoundaryForRangeFrame() + case (RangeFrame, _, vf: ValueFollowing) => checkValueBasedBoundaryForRangeFrame() + case (_, _, _) => None + } + } + } + + override def children: Seq[Expression] = partitionSpec ++ orderSpec + + override lazy val resolved: Boolean = + childrenResolved && frameSpecification.isInstanceOf[SpecifiedWindowFrame] + + + override def toString: String = simpleString + + override def eval(input: Row): EvaluatedType = throw new UnsupportedOperationException + override def nullable: Boolean = true + override def foldable: Boolean = false + override def dataType: DataType = throw new UnsupportedOperationException +} + +/** + * A Window specification reference that refers to the [[WindowSpecDefinition]] defined + * under the name `name`. + */ +case class WindowSpecReference(name: String) extends WindowSpec + +/** + * The trait used to represent the type of a Window Frame. + */ +sealed trait FrameType + +/** + * RowFrame treats rows in a partition individually. When a [[ValuePreceding]] + * or a [[ValueFollowing]] is used as its [[FrameBoundary]], the value is considered + * as a physical offset. + * For example, `ROW BETWEEN 1 PRECEDING AND 1 FOLLOWING` represents a 3-row frame, + * from the row precedes the current row to the row follows the current row. + */ +case object RowFrame extends FrameType + +/** + * RangeFrame treats rows in a partition as groups of peers. + * All rows having the same `ORDER BY` ordering are considered as peers. + * When a [[ValuePreceding]] or a [[ValueFollowing]] is used as its [[FrameBoundary]], + * the value is considered as a logical offset. + * For example, assuming the value of the current row's `ORDER BY` expression `expr` is `v`, + * `RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING` represents a frame containing rows whose values + * `expr` are in the range of [v-1, v+1]. + * + * If `ORDER BY` clause is not defined, all rows in the partition is considered as peers + * of the current row. + */ +case object RangeFrame extends FrameType + +/** + * The trait used to represent the type of a Window Frame Boundary. + */ +sealed trait FrameBoundary { + def notFollows(other: FrameBoundary): Boolean +} + +/** UNBOUNDED PRECEDING boundary. */ +case object UnboundedPreceding extends FrameBoundary { + def notFollows(other: FrameBoundary): Boolean = other match { + case UnboundedPreceding => true + case vp: ValuePreceding => true + case CurrentRow => true + case vf: ValueFollowing => true + case UnboundedFollowing => true + } + + override def toString: String = "UNBOUNDED PRECEDING" +} + +/** PRECEDING boundary. */ +case class ValuePreceding(value: Int) extends FrameBoundary { + def notFollows(other: FrameBoundary): Boolean = other match { + case UnboundedPreceding => false + case ValuePreceding(anotherValue) => value >= anotherValue + case CurrentRow => true + case vf: ValueFollowing => true + case UnboundedFollowing => true + } + + override def toString: String = s"$value PRECEDING" +} + +/** CURRENT ROW boundary. */ +case object CurrentRow extends FrameBoundary { + def notFollows(other: FrameBoundary): Boolean = other match { + case UnboundedPreceding => false + case vp: ValuePreceding => false + case CurrentRow => true + case vf: ValueFollowing => true + case UnboundedFollowing => true + } + + override def toString: String = "CURRENT ROW" +} + +/** FOLLOWING boundary. */ +case class ValueFollowing(value: Int) extends FrameBoundary { + def notFollows(other: FrameBoundary): Boolean = other match { + case UnboundedPreceding => false + case vp: ValuePreceding => false + case CurrentRow => false + case ValueFollowing(anotherValue) => value <= anotherValue + case UnboundedFollowing => true + } + + override def toString: String = s"$value FOLLOWING" +} + +/** UNBOUNDED FOLLOWING boundary. */ +case object UnboundedFollowing extends FrameBoundary { + def notFollows(other: FrameBoundary): Boolean = other match { + case UnboundedPreceding => false + case vp: ValuePreceding => false + case CurrentRow => false + case vf: ValueFollowing => false + case UnboundedFollowing => true + } + + override def toString: String = "UNBOUNDED FOLLOWING" +} + +/** + * The trait used to represent the a Window Frame. + */ +sealed trait WindowFrame + +/** Used as a place holder when a frame specification is not defined. */ +case object UnspecifiedFrame extends WindowFrame + +/** A specified Window Frame. */ +case class SpecifiedWindowFrame( + frameType: FrameType, + frameStart: FrameBoundary, + frameEnd: FrameBoundary) extends WindowFrame { + + /** If this WindowFrame is valid or not. */ + def validate: Option[String] = (frameType, frameStart, frameEnd) match { + case (_, UnboundedFollowing, _) => + Some(s"$UnboundedFollowing is not allowed as the start of a Window Frame.") + case (_, _, UnboundedPreceding) => + Some(s"$UnboundedPreceding is not allowed as the end of a Window Frame.") + // case (RowFrame, start, end) => ??? RowFrame specific rule + // case (RangeFrame, start, end) => ??? RangeFrame specific rule + case (_, start, end) => + if (start.notFollows(end)) { + None + } else { + val reason = + s"The end of this Window Frame $end is smaller than the start of " + + s"this Window Frame $start." + Some(reason) + } + } + + override def toString: String = frameType match { + case RowFrame => s"ROWS BETWEEN $frameStart AND $frameEnd" + case RangeFrame => s"RANGE BETWEEN $frameStart AND $frameEnd" + } +} + +object SpecifiedWindowFrame { + /** + * + * @param hasOrderSpecification If the window spec has order by expressions. + * @param acceptWindowFrame If the window function accepts user-specified frame. + * @return + */ + def defaultWindowFrame( + hasOrderSpecification: Boolean, + acceptWindowFrame: Boolean): SpecifiedWindowFrame = { + if (hasOrderSpecification && acceptWindowFrame) { + // If order spec is defined and the window function supports user specified window frames, + // the default frame is RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW. + SpecifiedWindowFrame(RangeFrame, UnboundedPreceding, CurrentRow) + } else { + // Otherwise, the default frame is + // ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING. + SpecifiedWindowFrame(RowFrame, UnboundedPreceding, UnboundedFollowing) + } + } +} + +/** + * Every window function needs to maintain a output buffer for its output. + * It should expect that for a n-row window frame, it will be called n times + * to retrieve value corresponding with these n rows. + */ +trait WindowFunction extends Expression { + self: Product => + + def init(): Unit + + def reset(): Unit + + def prepareInputParameters(input: Row): AnyRef + + def update(input: AnyRef): Unit + + def batchUpdate(inputs: Array[AnyRef]): Unit + + def evaluate(): Unit + + def get(index: Int): Any + + def newInstance(): WindowFunction +} + +case class UnresolvedWindowFunction( + name: String, + children: Seq[Expression]) + extends Expression with WindowFunction { + + override def dataType: DataType = throw new UnresolvedException(this, "dataType") + override def foldable: Boolean = throw new UnresolvedException(this, "foldable") + override def nullable: Boolean = throw new UnresolvedException(this, "nullable") + override lazy val resolved = false + + override def init(): Unit = + throw new UnresolvedException(this, "init") + override def reset(): Unit = + throw new UnresolvedException(this, "reset") + override def prepareInputParameters(input: Row): AnyRef = + throw new UnresolvedException(this, "prepareInputParameters") + override def update(input: AnyRef): Unit = + throw new UnresolvedException(this, "update") + override def batchUpdate(inputs: Array[AnyRef]): Unit = + throw new UnresolvedException(this, "batchUpdate") + override def evaluate(): Unit = + throw new UnresolvedException(this, "evaluate") + override def get(index: Int): Any = + throw new UnresolvedException(this, "get") + // Unresolved functions are transient at compile time and don't get evaluated during execution. + override def eval(input: Row = null): EvaluatedType = + throw new TreeNodeException(this, s"No function to evaluate expression. type: ${this.nodeName}") + + override def toString: String = s"'$name(${children.mkString(",")})" + + override def newInstance(): WindowFunction = + throw new UnresolvedException(this, "newInstance") +} + +case class UnresolvedWindowExpression( + child: UnresolvedWindowFunction, + windowSpec: WindowSpecReference) extends UnaryExpression { + override def dataType: DataType = throw new UnresolvedException(this, "dataType") + override def foldable: Boolean = throw new UnresolvedException(this, "foldable") + override def nullable: Boolean = throw new UnresolvedException(this, "nullable") + override lazy val resolved = false + + // Unresolved functions are transient at compile time and don't get evaluated during execution. + override def eval(input: Row = null): EvaluatedType = + throw new TreeNodeException(this, s"No function to evaluate expression. type: ${this.nodeName}") +} + +case class WindowExpression( + windowFunction: WindowFunction, + windowSpec: WindowSpecDefinition) extends Expression { + override type EvaluatedType = Any + + override def children: Seq[Expression] = + windowFunction :: windowSpec :: Nil + + override def eval(input: Row): EvaluatedType = + throw new TreeNodeException(this, s"No function to evaluate expression. type: ${this.nodeName}") + + override def dataType: DataType = windowFunction.dataType + override def foldable: Boolean = windowFunction.foldable + override def nullable: Boolean = windowFunction.nullable + + override def toString: String = s"$windowFunction $windowSpec" +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 709f7d672d931..b25fb48f55e2b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -43,16 +43,17 @@ object DefaultOptimizer extends Optimizer { PushPredicateThroughJoin, PushPredicateThroughGenerate, ColumnPruning, + ProjectCollapsing, CombineLimits) :: Batch("ConstantFolding", FixedPoint(100), NullPropagation, + OptimizeIn, ConstantFolding, LikeSimplification, BooleanSimplification, SimplifyFilters, SimplifyCasts, - SimplifyCaseConversionExpressions, - OptimizeIn) :: + SimplifyCaseConversionExpressions) :: Batch("Decimal Optimizations", FixedPoint(100), DecimalAggregates) :: Batch("LocalRelation", FixedPoint(100), @@ -114,7 +115,7 @@ object UnionPushdown extends Rule[LogicalPlan] { * - Aggregate * - Project <- Join * - LeftSemiJoin - * - Collapse adjacent projections, performing alias substitution. + * - Performing alias substitution. */ object ColumnPruning extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { @@ -152,8 +153,43 @@ object ColumnPruning extends Rule[LogicalPlan] { Join(left, prunedChild(right, allReferences), LeftSemi, condition) - // Combine adjacent Projects. - case Project(projectList1, Project(projectList2, child)) => + case Project(projectList, Limit(exp, child)) => + Limit(exp, Project(projectList, child)) + + // push down project if possible when the child is sort + case p @ Project(projectList, s @ Sort(_, _, grandChild)) + if s.references.subsetOf(p.outputSet) => + s.copy(child = Project(projectList, grandChild)) + + // Eliminate no-op Projects + case Project(projectList, child) if child.output == projectList => child + } + + /** Applies a projection only when the child is producing unnecessary attributes */ + private def prunedChild(c: LogicalPlan, allReferences: AttributeSet) = + if ((c.outputSet -- allReferences.filter(c.outputSet.contains)).nonEmpty) { + Project(allReferences.filter(c.outputSet.contains).toSeq, c) + } else { + c + } +} + +/** + * Combines two adjacent [[Project]] operators into one, merging the + * expressions into one single expression. + */ +object ProjectCollapsing extends Rule[LogicalPlan] { + + /** Returns true if any expression in projectList is non-deterministic. */ + private def hasNondeterministic(projectList: Seq[NamedExpression]): Boolean = { + projectList.exists(expr => expr.find(!_.deterministic).isDefined) + } + + def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { + // We only collapse these two Projects if the child Project's expressions are all + // deterministic. + case Project(projectList1, Project(projectList2, child)) + if !hasNondeterministic(projectList2) => // Create a map of Aliases to their values from the child projection. // e.g., 'SELECT ... FROM (SELECT a + b AS c, d ...)' produces Map(c -> Alias(a + b, c)). val aliasMap = AttributeMap(projectList2.collect { @@ -169,21 +205,7 @@ object ColumnPruning extends Rule[LogicalPlan] { }).asInstanceOf[Seq[NamedExpression]] Project(substitutedProjection, child) - - case Project(projectList, Limit(exp, child)) => - Limit(exp, Project(projectList, child)) - - // Eliminate no-op Projects - case Project(projectList, child) if child.output == projectList => child } - - /** Applies a projection only when the child is producing unnecessary attributes */ - private def prunedChild(c: LogicalPlan, allReferences: AttributeSet) = - if ((c.outputSet -- allReferences.filter(c.outputSet.contains)).nonEmpty) { - Project(allReferences.filter(c.outputSet.contains).toSeq, c) - } else { - c - } } /** @@ -227,10 +249,8 @@ object NullPropagation extends Rule[LogicalPlan] { case e @ Count(Literal(null, _)) => Cast(Literal(0L), e.dataType) case e @ IsNull(c) if !c.nullable => Literal.create(false, BooleanType) case e @ IsNotNull(c) if !c.nullable => Literal.create(true, BooleanType) - case e @ GetItem(Literal(null, _), _) => Literal.create(null, e.dataType) - case e @ GetItem(_, Literal(null, _)) => Literal.create(null, e.dataType) - case e @ StructGetField(Literal(null, _), _, _) => Literal.create(null, e.dataType) - case e @ ArrayGetField(Literal(null, _), _, _, _) => Literal.create(null, e.dataType) + case e @ ExtractValue(Literal(null, _), _) => Literal.create(null, e.dataType) + case e @ ExtractValue(_, Literal(null, _)) => Literal.create(null, e.dataType) case e @ EqualNullSafe(Literal(null, _), r) => IsNull(r) case e @ EqualNullSafe(l, Literal(null, _)) => IsNull(l) case e @ Count(expr) if !expr.nullable => Count(Literal(1)) @@ -310,8 +330,8 @@ object OptimizeIn extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { case q: LogicalPlan => q transformExpressionsDown { case In(v, list) if !list.exists(!_.isInstanceOf[Literal]) => - val hSet = list.map(e => e.eval(null)) - InSet(v, HashSet() ++ hSet) + val hSet = list.map(e => e.eval(null)) + InSet(v, HashSet() ++ hSet) } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala index 51b5699affed5..73a21884a4710 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala @@ -51,9 +51,9 @@ abstract class QueryPlanner[PhysicalPlan <: TreeNode[PhysicalPlan]] { * filled in automatically by the QueryPlanner using the other execution strategies that are * available. */ - protected def planLater(plan: LogicalPlan) = apply(plan).next() + protected def planLater(plan: LogicalPlan) = this.plan(plan).next() - def apply(plan: LogicalPlan): Iterator[PhysicalPlan] = { + def plan(plan: LogicalPlan): Iterator[PhysicalPlan] = { // Obviously a lot to do here still... val iter = strategies.view.flatMap(_(plan)).toIterator assert(iter.hasNext, s"No plan for $plan") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala index 4574934d910db..1dd75a8846303 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala @@ -159,9 +159,10 @@ object PartialAggregation { // Should trim aliases around `GetField`s. These aliases are introduced while // resolving struct field accesses, because `GetField` is not a `NamedExpression`. // (Should we just turn `GetField` into a `NamedExpression`?) + val trimmed = e.transform { case Alias(g: ExtractValue, _) => g } namedGroupingExpressions - .get(e.transform { case Alias(g: GetField, _) => g }) - .map(_.toAttribute) + .find { case (k, v) => k semanticEquals trimmed } + .map(_._2.toAttribute) .getOrElse(e) }).asInstanceOf[Seq[NamedExpression]] diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala index 7967189cacb24..eff5c61644944 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala @@ -84,7 +84,7 @@ abstract class QueryPlan[PlanType <: TreeNode[PlanType]] extends TreeNode[PlanTy val newArgs = productIterator.map { case e: Expression => transformExpressionDown(e) case Some(e: Expression) => Some(transformExpressionDown(e)) - case m: Map[_,_] => m + case m: Map[_, _] => m case d: DataType => d // Avoid unpacking Structs case seq: Traversable[_] => seq.map { case e: Expression => transformExpressionDown(e) @@ -117,7 +117,7 @@ abstract class QueryPlan[PlanType <: TreeNode[PlanType]] extends TreeNode[PlanTy val newArgs = productIterator.map { case e: Expression => transformExpressionUp(e) case Some(e: Expression) => Some(transformExpressionUp(e)) - case m: Map[_,_] => m + case m: Map[_, _] => m case d: DataType => d // Avoid unpacking Structs case seq: Traversable[_] => seq.map { case e: Expression => transformExpressionUp(e) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala index 5dc0539caec24..77dec7ca6e2b5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala @@ -24,6 +24,16 @@ object JoinType { case "leftouter" | "left" => LeftOuter case "rightouter" | "right" => RightOuter case "leftsemi" => LeftSemi + case _ => + val supported = Seq( + "inner", + "outer", "full", "fullouter", + "leftouter", "left", + "rightouter", "right", + "leftsemi") + + throw new IllegalArgumentException(s"Unsupported join type '$typ'. " + + "Supported join types include: " + supported.mkString("'", "', '", "'") + ".") } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index ae4620a4e5abf..dba69659afc80 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -105,7 +105,7 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging { } /** - * Optionally resolves the given string to a [[NamedExpression]] using the input from all child + * Optionally resolves the given strings to a [[NamedExpression]] using the input from all child * nodes of this LogicalPlan. The attribute is expressed as * as string in the following form: `[scope].AttributeName.[nested].[fields]...`. */ @@ -116,7 +116,7 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging { resolve(nameParts, children.flatMap(_.output), resolver, throwErrors) /** - * Optionally resolves the given string to a [[NamedExpression]] based on the output of this + * Optionally resolves the given strings to a [[NamedExpression]] based on the output of this * LogicalPlan. The attribute is expressed as string in the following form: * `[scope].AttributeName.[nested].[fields]...`. */ @@ -126,6 +126,57 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging { throwErrors: Boolean = false): Option[NamedExpression] = resolve(nameParts, output, resolver, throwErrors) + /** + * Given an attribute name, split it to name parts by dot, but + * don't split the name parts quoted by backticks, for example, + * `ab.cd`.`efg` should be split into two parts "ab.cd" and "efg". + */ + def resolveQuoted( + name: String, + resolver: Resolver): Option[NamedExpression] = { + resolve(parseAttributeName(name), resolver, true) + } + + /** + * Internal method, used to split attribute name by dot with backticks rule. + * Backticks must appear in pairs, and the quoted string must be a complete name part, + * which means `ab..c`e.f is not allowed. + * Escape character is not supported now, so we can't use backtick inside name part. + */ + private def parseAttributeName(name: String): Seq[String] = { + val e = new AnalysisException(s"syntax error in attribute name: $name") + val nameParts = scala.collection.mutable.ArrayBuffer.empty[String] + val tmp = scala.collection.mutable.ArrayBuffer.empty[Char] + var inBacktick = false + var i = 0 + while (i < name.length) { + val char = name(i) + if (inBacktick) { + if (char == '`') { + inBacktick = false + if (i + 1 < name.length && name(i + 1) != '.') throw e + } else { + tmp += char + } + } else { + if (char == '`') { + if (tmp.nonEmpty) throw e + inBacktick = true + } else if (char == '.') { + if (tmp.isEmpty) throw e + nameParts += tmp.mkString + tmp.clear() + } else { + tmp += char + } + } + i += 1 + } + if (tmp.isEmpty || inBacktick) throw e + nameParts += tmp.mkString + nameParts.toSeq + } + /** * Resolve the given `name` string against the given attribute, returning either 0 or 1 match. * @@ -209,7 +260,8 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging { // For example, consider "a.b.c", where "a" is resolved to an existing attribute. // Then this will add GetField("c", GetField("b", a)), and alias // the final expression as "c". - val fieldExprs = nestedFields.foldLeft(a: Expression)(GetField(_, _, resolver)) + val fieldExprs = nestedFields.foldLeft(a: Expression)((expr, fieldName) => + ExtractValue(expr, Literal(fieldName), resolver)) val aliasName = nestedFields.last Some(Alias(fieldExprs, aliasName)()) } catch { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala index 21208c8a5c281..33a9e55a47dee 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala @@ -25,13 +25,14 @@ case class Project(projectList: Seq[NamedExpression], child: LogicalPlan) extend override def output: Seq[Attribute] = projectList.map(_.toAttribute) override lazy val resolved: Boolean = { - val containsAggregatesOrGenerators = projectList.exists ( _.collect { + val hasSpecialExpressions = projectList.exists ( _.collect { case agg: AggregateExpression => agg case generator: Generator => generator + case window: WindowExpression => window }.nonEmpty ) - !expressions.exists(!_.resolved) && childrenResolved && !containsAggregatesOrGenerators + !expressions.exists(!_.resolved) && childrenResolved && !hasSpecialExpressions } } @@ -58,6 +59,9 @@ case class Generate( child: LogicalPlan) extends UnaryNode { + /** The set of all attributes produced by this node. */ + def generatedSet: AttributeSet = AttributeSet(generatorOutput) + override lazy val resolved: Boolean = { generator.resolved && childrenResolved && @@ -89,7 +93,7 @@ case class Union(left: LogicalPlan, right: LogicalPlan) extends BinaryNode { override lazy val resolved: Boolean = childrenResolved && - left.output.zip(right.output).forall { case (l,r) => l.dataType == r.dataType } + left.output.zip(right.output).forall { case (l, r) => l.dataType == r.dataType } override def statistics: Statistics = { val sizeInBytes = left.statistics.sizeInBytes + right.statistics.sizeInBytes @@ -148,16 +152,6 @@ case class InsertIntoTable( } } -case class CreateTableAsSelect[T]( - databaseName: Option[String], - tableName: String, - child: LogicalPlan, - allowExisting: Boolean, - desc: Option[T] = None) extends UnaryNode { - override def output: Seq[Attribute] = Seq.empty[Attribute] - override lazy val resolved: Boolean = databaseName != None && childrenResolved -} - /** * A container for holding named common table expressions (CTEs) and a query plan. * This operator will be removed during analysis and the relations will be substituted into child. @@ -170,6 +164,12 @@ case class With(child: LogicalPlan, cteRelations: Map[String, Subquery]) extends override def output: Seq[Attribute] = child.output } +case class WithWindowDefinition( + windowDefinitions: Map[String, WindowSpecDefinition], + child: LogicalPlan) extends UnaryNode { + override def output: Seq[Attribute] = child.output +} + case class WriteToFile( path: String, child: LogicalPlan) extends UnaryNode { @@ -177,10 +177,10 @@ case class WriteToFile( } /** - * @param order The ordering expressions - * @param global True means global sorting apply for entire data set, + * @param order The ordering expressions + * @param global True means global sorting apply for entire data set, * False means sorting only apply within the partition. - * @param child Child logical plan + * @param child Child logical plan */ case class Sort( order: Seq[SortOrder], @@ -195,9 +195,28 @@ case class Aggregate( child: LogicalPlan) extends UnaryNode { + override lazy val resolved: Boolean = { + val hasWindowExpressions = aggregateExpressions.exists ( _.collect { + case window: WindowExpression => window + }.nonEmpty + ) + + !expressions.exists(!_.resolved) && childrenResolved && !hasWindowExpressions + } + override def output: Seq[Attribute] = aggregateExpressions.map(_.toAttribute) } +case class Window( + projectList: Seq[Attribute], + windowExpressions: Seq[NamedExpression], + windowSpec: WindowSpecDefinition, + child: LogicalPlan) extends UnaryNode { + + override def output: Seq[Attribute] = + (projectList ++ windowExpressions).map(_.toAttribute) +} + /** * Apply the all of the GroupExpressions to every input row, hence we will get * multiple output rows for a input row. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala index 45905f8ef98c5..246f4d7e34d3d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala @@ -21,9 +21,7 @@ import org.apache.spark.sql.catalyst.expressions.Attribute /** * A logical node that represents a non-query command to be executed by the system. For example, - * commands can be used by parsers to represent DDL operations. + * commands can be used by parsers to represent DDL operations. Commands, unlike queries, are + * eagerly executed. */ -abstract class Command extends LeafNode { - self: Product => - def output: Seq[Attribute] = Seq.empty -} +trait Command diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala index 97502ed3afe72..36d005d0e1684 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala @@ -71,6 +71,15 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] { this.eq(other) || this == other } + /** + * Find the first [[TreeNode]] that satisfies the condition specified by `f`. + * The condition is recursively applied to this node and all of its children (pre-order). + */ + def find(f: BaseType => Boolean): Option[BaseType] = f(this) match { + case true => Some(this) + case false => children.foldLeft(None: Option[BaseType]) { (l, r) => l.orElse(r.find(f)) } + } + /** * Runs the given function on this node and then recursively on [[children]]. * @param f the function to be applied to each node in the tree. @@ -121,6 +130,17 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] { ret } + /** + * Finds and returns the first [[TreeNode]] of the tree for which the given partial function + * is defined (pre-order), and applies the partial function to it. + */ + def collectFirst[B](pf: PartialFunction[BaseType, B]): Option[B] = { + val lifted = pf.lift + lifted(this).orElse { + children.foldLeft(None: Option[B]) { (l, r) => l.orElse(r.collectFirst(pf)) } + } + } + /** * Returns a copy of this node where `f` has been applied to all the nodes children. */ @@ -151,6 +171,20 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] { val remainingNewChildren = newChildren.toBuffer val remainingOldChildren = children.toBuffer val newArgs = productIterator.map { + // Handle Seq[TreeNode] in TreeNode parameters. + case s: Seq[_] => s.map { + case arg: TreeNode[_] if children contains arg => + val newChild = remainingNewChildren.remove(0) + val oldChild = remainingOldChildren.remove(0) + if (newChild fastEquals oldChild) { + oldChild + } else { + changed = true + newChild + } + case nonChild: AnyRef => nonChild + case null => null + } case arg: TreeNode[_] if children contains arg => val newChild = remainingNewChildren.remove(0) val oldChild = remainingOldChildren.remove(0) @@ -220,7 +254,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] { } else { Some(arg) } - case m: Map[_,_] => m + case m: Map[_, _] => m case d: DataType => d // Avoid unpacking Structs case args: Traversable[_] => args.map { case arg: TreeNode[_] if children contains arg => @@ -277,7 +311,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] { } else { Some(arg) } - case m: Map[_,_] => m + case m: Map[_, _] => m case d: DataType => d // Avoid unpacking Structs case args: Traversable[_] => args.map { case arg: TreeNode[_] if children contains arg => @@ -351,6 +385,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] { def argString: String = productIterator.flatMap { case tn: TreeNode[_] if children contains tn => Nil case tn: TreeNode[_] if tn.toString contains "\n" => s"(${tn.simpleString})" :: Nil + case seq: Seq[BaseType] if seq.toSet.subsetOf(children.toSet) => Nil case seq: Seq[_] => seq.mkString("[", ",", "]") :: Nil case set: Set[_] => set.mkString("{", ",", "}") :: Nil case other => other :: Nil diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateUtils.scala similarity index 98% rename from sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateUtils.scala rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateUtils.scala index d36a49159b87f..3f92be4a55d7d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateUtils.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.spark.sql.types +package org.apache.spark.sql.catalyst.util import java.sql.Date import java.text.SimpleDateFormat diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala index 9d613a940ee86..07054166a5e88 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala @@ -83,7 +83,7 @@ package object util { } def resourceToString( - resource:String, + resource: String, encoding: String = "UTF-8", classLoader: ClassLoader = Utils.getSparkClassLoader): String = { new String(resourceToBytes(resource, classLoader), encoding) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala index 0992a7c311ee2..1ba3a2686639f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala @@ -47,10 +47,7 @@ abstract class DataType { * ... * }}} */ - private[sql] def unapply(a: Expression): Boolean = a match { - case e: Expression if e.dataType == this => true - case _ => false - } + private[sql] def unapply(e: Expression): Boolean = e.dataType == this /** * The default size of a value of this data type, used internally for size estimation. @@ -137,10 +134,7 @@ private[sql] object IntegralType { * ... * }}} */ - def unapply(a: Expression): Boolean = a match { - case e: Expression if e.dataType.isInstanceOf[IntegralType] => true - case _ => false - } + def unapply(e: Expression): Boolean = e.dataType.isInstanceOf[IntegralType] } @@ -157,10 +151,7 @@ private[sql] object FractionalType { * ... * }}} */ - def unapply(a: Expression): Boolean = a match { - case e: Expression if e.dataType.isInstanceOf[FractionalType] => true - case _ => false - } + def unapply(e: Expression): Boolean = e.dataType.isInstanceOf[FractionalType] } @@ -174,6 +165,9 @@ object DataType { def fromJson(json: String): DataType = parseDataType(parse(json)) + /** + * @deprecated As of 1.2.0, replaced by `DataType.fromJson()` + */ @deprecated("Use DataType.fromJson instead", "1.2.0") def fromCaseClassString(string: String): DataType = CaseClassStringParser(string) @@ -280,7 +274,7 @@ object DataType { protected lazy val structField: Parser[StructField] = ("StructField(" ~> "[a-zA-Z0-9_]*".r) ~ ("," ~> dataType) ~ ("," ~> boolVal <~ ")") ^^ { - case name ~ tpe ~ nullable => + case name ~ tpe ~ nullable => StructField(name, tpe, nullable = nullable) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala index 994c5202c15dc..eb3c58c37f308 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala @@ -313,7 +313,7 @@ object Decimal { // See scala.math's Numeric.scala for examples for Scala's built-in types. /** Common methods for Decimal evidence parameters */ - trait DecimalIsConflicted extends Numeric[Decimal] { + private[sql] trait DecimalIsConflicted extends Numeric[Decimal] { override def plus(x: Decimal, y: Decimal): Decimal = x + y override def times(x: Decimal, y: Decimal): Decimal = x * y override def minus(x: Decimal, y: Decimal): Decimal = x - y @@ -327,12 +327,12 @@ object Decimal { } /** A [[scala.math.Fractional]] evidence parameter for Decimals. */ - object DecimalIsFractional extends DecimalIsConflicted with Fractional[Decimal] { + private[sql] object DecimalIsFractional extends DecimalIsConflicted with Fractional[Decimal] { override def div(x: Decimal, y: Decimal): Decimal = x / y } /** A [[scala.math.Integral]] evidence parameter for Decimals. */ - object DecimalAsIfIntegral extends DecimalIsConflicted with Integral[Decimal] { + private[sql] object DecimalAsIfIntegral extends DecimalIsConflicted with Integral[Decimal] { override def quot(x: Decimal, y: Decimal): Decimal = x / y override def rem(x: Decimal, y: Decimal): Decimal = x % y } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala index 0f8cecd28f7df..407dc27326c2e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala @@ -82,12 +82,12 @@ case class DecimalType(precisionInfo: Option[PrecisionInfo]) extends FractionalT object DecimalType { val Unlimited: DecimalType = DecimalType(None) - object Fixed { + private[sql] object Fixed { def unapply(t: DecimalType): Option[(Int, Int)] = t.precisionInfo.map(p => (p.precision, p.scale)) } - object Expression { + private[sql] object Expression { def unapply(e: Expression): Option[(Int, Int)] = e.dataType match { case t: DecimalType => t.precisionInfo.map(p => (p.precision, p.scale)) case _ => None diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/SQLUserDefinedType.java b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/SQLUserDefinedType.java index a64d2bb7cde37..df64a878b6b36 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/SQLUserDefinedType.java +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/SQLUserDefinedType.java @@ -24,11 +24,11 @@ /** * ::DeveloperApi:: * A user-defined type which can be automatically recognized by a SQLContext and registered. - * + *

* WARNING: This annotation will only work if both Java and Scala reflection return the same class * names (after erasure) for the UDT. This will NOT be the case when, e.g., the UDT class * is enclosed in an object (a singleton). - * + *

* WARNING: UDTs are currently only supported from Scala. */ // TODO: Should I used @Documented ? diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala index d80ffca18ec9a..7e00a27dfe724 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala @@ -134,6 +134,10 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru throw new IllegalArgumentException(s"""Field "$name" does not exist.""")) } + private[sql] def getFieldIndex(name: String): Option[Int] = { + nameToIndex.get(name) + } + protected[sql] def toAttributes: Seq[AttributeReference] = map(f => AttributeReference(f.name, f.dataType, f.nullable, f.metadata)()) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UTF8String.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UTF8String.scala index fc02ba6c9c43e..f5d8fcced362b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UTF8String.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UTF8String.scala @@ -19,15 +19,18 @@ package org.apache.spark.sql.types import java.util.Arrays +import org.apache.spark.annotation.DeveloperApi + /** - * A UTF-8 String, as internal representation of StringType in SparkSQL + * :: DeveloperApi :: + * A UTF-8 String, as internal representation of StringType in SparkSQL * - * A String encoded in UTF-8 as an Array[Byte], which can be used for comparison, - * search, see http://en.wikipedia.org/wiki/UTF-8 for details. + * A String encoded in UTF-8 as an Array[Byte], which can be used for comparison, + * search, see http://en.wikipedia.org/wiki/UTF-8 for details. * - * Note: This is not designed for general use cases, should not be used outside SQL. + * Note: This is not designed for general use cases, should not be used outside SQL. */ - +@DeveloperApi final class UTF8String extends Ordered[UTF8String] with Serializable { private[this] var bytes: Array[Byte] = _ @@ -180,6 +183,10 @@ final class UTF8String extends Ordered[UTF8String] with Serializable { } } +/** + * :: DeveloperApi :: + */ +@DeveloperApi object UTF8String { // number of tailing bytes in a UTF8 sequence for a code point // see http://en.wikipedia.org/wiki/UTF-8, 192-256 of Byte 1 @@ -196,7 +203,7 @@ object UTF8String { def apply(s: String): UTF8String = { if (s != null) { new UTF8String().set(s) - } else{ + } else { null } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala index bbc0b661a0c0c..7ff51db76b6bb 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala @@ -253,7 +253,7 @@ class ScalaReflectionSuite extends FunSuite { } assert(ArrayType(IntegerType) === typeOfObject3(Seq(1, 2, 3))) - assert(ArrayType(ArrayType(IntegerType)) === typeOfObject3(Seq(Seq(1,2,3)))) + assert(ArrayType(ArrayType(IntegerType)) === typeOfObject3(Seq(Seq(1, 2, 3)))) } test("convert PrimitiveData to catalyst") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SqlParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SqlParserSuite.scala index a652c70560990..9eed15952d82b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SqlParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SqlParserSuite.scala @@ -17,14 +17,18 @@ package org.apache.spark.sql.catalyst +import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.plans.logical.Command import org.scalatest.FunSuite -private[sql] case class TestCommand(cmd: String) extends Command +private[sql] case class TestCommand(cmd: String) extends LogicalPlan with Command { + override def output: Seq[Attribute] = Seq.empty + override def children: Seq[LogicalPlan] = Seq.empty +} private[sql] class SuperLongKeywordTestParser extends AbstractSparkSQLParser { - protected val EXECUTE = Keyword("THISISASUPERLONGKEYWORDTEST") + protected val EXECUTE = Keyword("THISISASUPERLONGKEYWORDTEST") override protected lazy val start: Parser[LogicalPlan] = set @@ -35,7 +39,7 @@ private[sql] class SuperLongKeywordTestParser extends AbstractSparkSQLParser { } private[sql] class CaseInsensitiveTestParser extends AbstractSparkSQLParser { - protected val EXECUTE = Keyword("EXECUTE") + protected val EXECUTE = Keyword("EXECUTE") override protected lazy val start: Parser[LogicalPlan] = set diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index 971e1ff5ec2b8..fcff24ca31486 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -23,24 +23,26 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.types._ - +import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ class AnalysisSuite extends FunSuite with BeforeAndAfter { - val caseSensitiveCatalog = new SimpleCatalog(true) - val caseInsensitiveCatalog = new SimpleCatalog(false) + val caseSensitiveConf = new SimpleCatalystConf(true) + val caseInsensitiveConf = new SimpleCatalystConf(false) + + val caseSensitiveCatalog = new SimpleCatalog(caseSensitiveConf) + val caseInsensitiveCatalog = new SimpleCatalog(caseInsensitiveConf) val caseSensitiveAnalyzer = - new Analyzer(caseSensitiveCatalog, EmptyFunctionRegistry, caseSensitive = true) { + new Analyzer(caseSensitiveCatalog, EmptyFunctionRegistry, caseSensitiveConf) { override val extendedResolutionRules = EliminateSubQueries :: Nil } val caseInsensitiveAnalyzer = - new Analyzer(caseInsensitiveCatalog, EmptyFunctionRegistry, caseSensitive = false) { + new Analyzer(caseInsensitiveCatalog, EmptyFunctionRegistry, caseInsensitiveConf) { override val extendedResolutionRules = EliminateSubQueries :: Nil } - def caseSensitiveAnalyze(plan: LogicalPlan): Unit = caseSensitiveAnalyzer.checkAnalysis(caseSensitiveAnalyzer.execute(plan)) @@ -70,6 +72,9 @@ class AnalysisSuite extends FunSuite with BeforeAndAfter { StructField("cField", StringType) :: Nil ))()) + val listRelation = LocalRelation( + AttributeReference("list", ArrayType(IntegerType))()) + before { caseSensitiveCatalog.registerTable(Seq("TaBlE"), testRelation) caseInsensitiveCatalog.registerTable(Seq("TaBlE"), testRelation) @@ -150,17 +155,35 @@ class AnalysisSuite extends FunSuite with BeforeAndAfter { caseSensitive: Boolean = true): Unit = { test(name) { val error = intercept[AnalysisException] { - if(caseSensitive) { + if (caseSensitive) { caseSensitiveAnalyze(plan) } else { caseInsensitiveAnalyze(plan) } } - errorMessages.foreach(m => assert(error.getMessage contains m)) + errorMessages.foreach(m => assert(error.getMessage.toLowerCase contains m.toLowerCase)) } } + errorTest( + "unresolved window function", + testRelation2.select( + WindowExpression( + UnresolvedWindowFunction( + "lead", + UnresolvedAttribute("c") :: Nil), + WindowSpecDefinition( + UnresolvedAttribute("a") :: Nil, + SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil, + UnspecifiedFrame)).as('window)), + "lead" :: "window functions currently requires a HiveContext" :: Nil) + + errorTest( + "too many generators", + listRelation.select(Explode('list).as('a), Explode('list).as('b)), + "only one generator" :: "explode" :: Nil) + errorTest( "unresolved attributes", testRelation.select('abcd), diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala index 36b03d1c65e28..565b1cfe019c7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala @@ -17,14 +17,17 @@ package org.apache.spark.sql.catalyst.analysis +import org.scalatest.{BeforeAndAfter, FunSuite} + import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical.{Union, Project, LocalRelation} import org.apache.spark.sql.types._ -import org.scalatest.{BeforeAndAfter, FunSuite} +import org.apache.spark.sql.catalyst.SimpleCatalystConf class DecimalPrecisionSuite extends FunSuite with BeforeAndAfter { - val catalog = new SimpleCatalog(false) - val analyzer = new Analyzer(catalog, EmptyFunctionRegistry, caseSensitive = false) + val conf = new SimpleCatalystConf(true) + val catalog = new SimpleCatalog(conf) + val analyzer = new Analyzer(catalog, EmptyFunctionRegistry, conf) val relation = LocalRelation( AttributeReference("i", IntegerType)(), diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala index fa71001c9336e..2a1530386120d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala @@ -26,9 +26,10 @@ import org.scalatest.FunSuite import org.scalatest.Matchers._ import org.apache.spark.sql.catalyst.CatalystTypeConverters -import org.apache.spark.sql.catalyst.analysis.UnresolvedGetField +import org.apache.spark.sql.catalyst.analysis.UnresolvedExtractValue import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.mathfuncs._ +import org.apache.spark.sql.catalyst.util.DateUtils import org.apache.spark.sql.types._ @@ -42,8 +43,8 @@ class ExpressionEvaluationBaseSuite extends FunSuite { val actual = try evaluate(expression, inputRow) catch { case e: Exception => fail(s"Exception evaluating $expression", e) } - if(actual != expected) { - val input = if(inputRow == EmptyRow) "" else s", input: $inputRow" + if (actual != expected) { + val input = if (inputRow == EmptyRow) "" else s", input: $inputRow" fail(s"Incorrect Evaluation: $expression, actual: $actual, expected: $expected$input") } } @@ -125,37 +126,37 @@ class ExpressionEvaluationSuite extends ExpressionEvaluationBaseSuite { } booleanLogicTest("AND", _ && _, - (true, true, true) :: - (true, false, false) :: - (true, null, null) :: - (false, true, false) :: + (true, true, true) :: + (true, false, false) :: + (true, null, null) :: + (false, true, false) :: (false, false, false) :: - (false, null, false) :: - (null, true, null) :: - (null, false, false) :: - (null, null, null) :: Nil) + (false, null, false) :: + (null, true, null) :: + (null, false, false) :: + (null, null, null) :: Nil) booleanLogicTest("OR", _ || _, - (true, true, true) :: - (true, false, true) :: - (true, null, true) :: - (false, true, true) :: + (true, true, true) :: + (true, false, true) :: + (true, null, true) :: + (false, true, true) :: (false, false, false) :: - (false, null, null) :: - (null, true, true) :: - (null, false, null) :: - (null, null, null) :: Nil) + (false, null, null) :: + (null, true, true) :: + (null, false, null) :: + (null, null, null) :: Nil) booleanLogicTest("=", _ === _, - (true, true, true) :: - (true, false, false) :: - (true, null, null) :: - (false, true, false) :: + (true, true, true) :: + (true, false, false) :: + (true, null, null) :: + (false, true, false) :: (false, false, true) :: - (false, null, null) :: - (null, true, null) :: - (null, false, null) :: - (null, null, null) :: Nil) + (false, null, null) :: + (null, true, null) :: + (null, false, null) :: + (null, null, null) :: Nil) def booleanLogicTest( name: String, @@ -163,7 +164,7 @@ class ExpressionEvaluationSuite extends ExpressionEvaluationBaseSuite { truthTable: Seq[(Any, Any, Any)]) { test(s"3VL $name") { truthTable.foreach { - case (l,r,answer) => + case (l, r, answer) => val expr = op(Literal.create(l, BooleanType), Literal.create(r, BooleanType)) checkEvaluation(expr, answer) } @@ -850,11 +851,37 @@ class ExpressionEvaluationSuite extends ExpressionEvaluationBaseSuite { assert(CaseWhen(Seq(c2, c4_notNull, c3, c5)).nullable === true) } + test("case key when") { + val row = create_row(null, 1, 2, "a", "b", "c") + val c1 = 'a.int.at(0) + val c2 = 'a.int.at(1) + val c3 = 'a.int.at(2) + val c4 = 'a.string.at(3) + val c5 = 'a.string.at(4) + val c6 = 'a.string.at(5) + + val literalNull = Literal.create(null, BooleanType) + val literalInt = Literal(1) + val literalString = Literal("a") + + checkEvaluation(CaseKeyWhen(c1, Seq(c2, c4, c5)), "b", row) + checkEvaluation(CaseKeyWhen(c1, Seq(c2, c4, literalNull, c5, c6)), "b", row) + checkEvaluation(CaseKeyWhen(c2, Seq(literalInt, c4, c5)), "a", row) + checkEvaluation(CaseKeyWhen(c2, Seq(c1, c4, c5)), "b", row) + checkEvaluation(CaseKeyWhen(c4, Seq(literalString, c2, c3)), 1, row) + checkEvaluation(CaseKeyWhen(c4, Seq(c1, c3, c5, c2, Literal(3))), 3, row) + + checkEvaluation(CaseKeyWhen(literalInt, Seq(c2, c4, c5)), "a", row) + checkEvaluation(CaseKeyWhen(literalString, Seq(c5, c2, c4, c3)), 2, row) + checkEvaluation(CaseKeyWhen(literalInt, Seq(c5, c2, c4, c3)), null, row) + checkEvaluation(CaseKeyWhen(literalNull, Seq(c5, c2, c1, c3)), 2, row) + } + test("complex type") { val row = create_row( "^Ba*n", // 0 null.asInstanceOf[UTF8String], // 1 - create_row("aa", "bb"), // 2 + create_row("aa", "bb"), // 2 Map("aa"->"bb"), // 3 Seq("aa", "bb") // 4 ) @@ -865,54 +892,79 @@ class ExpressionEvaluationSuite extends ExpressionEvaluationBaseSuite { val typeMap = MapType(StringType, StringType) val typeArray = ArrayType(StringType) - checkEvaluation(GetItem(BoundReference(3, typeMap, true), + checkEvaluation(GetMapValue(BoundReference(3, typeMap, true), Literal("aa")), "bb", row) - checkEvaluation(GetItem(Literal.create(null, typeMap), Literal("aa")), null, row) + checkEvaluation(GetMapValue(Literal.create(null, typeMap), Literal("aa")), null, row) checkEvaluation( - GetItem(Literal.create(null, typeMap), Literal.create(null, StringType)), null, row) - checkEvaluation(GetItem(BoundReference(3, typeMap, true), + GetMapValue(Literal.create(null, typeMap), Literal.create(null, StringType)), null, row) + checkEvaluation(GetMapValue(BoundReference(3, typeMap, true), Literal.create(null, StringType)), null, row) - checkEvaluation(GetItem(BoundReference(4, typeArray, true), + checkEvaluation(GetArrayItem(BoundReference(4, typeArray, true), Literal(1)), "bb", row) - checkEvaluation(GetItem(Literal.create(null, typeArray), Literal(1)), null, row) + checkEvaluation(GetArrayItem(Literal.create(null, typeArray), Literal(1)), null, row) checkEvaluation( - GetItem(Literal.create(null, typeArray), Literal.create(null, IntegerType)), null, row) - checkEvaluation(GetItem(BoundReference(4, typeArray, true), + GetArrayItem(Literal.create(null, typeArray), Literal.create(null, IntegerType)), null, row) + checkEvaluation(GetArrayItem(BoundReference(4, typeArray, true), Literal.create(null, IntegerType)), null, row) - def quickBuildGetField(expr: Expression, fieldName: String): StructGetField = { + def getStructField(expr: Expression, fieldName: String): ExtractValue = { expr.dataType match { case StructType(fields) => val field = fields.find(_.name == fieldName).get - StructGetField(expr, field, fields.indexOf(field)) + GetStructField(expr, field, fields.indexOf(field)) } } - def quickResolve(u: UnresolvedGetField): StructGetField = { - quickBuildGetField(u.child, u.fieldName) + def quickResolve(u: UnresolvedExtractValue): ExtractValue = { + ExtractValue(u.child, u.extraction, _ == _) } - checkEvaluation(quickBuildGetField(BoundReference(2, typeS, nullable = true), "a"), "aa", row) - checkEvaluation(quickBuildGetField(Literal.create(null, typeS), "a"), null, row) + checkEvaluation(getStructField(BoundReference(2, typeS, nullable = true), "a"), "aa", row) + checkEvaluation(getStructField(Literal.create(null, typeS), "a"), null, row) val typeS_notNullable = StructType( StructField("a", StringType, nullable = false) :: StructField("b", StringType, nullable = false) :: Nil ) - assert(quickBuildGetField(BoundReference(2,typeS, nullable = true), "a").nullable === true) - assert(quickBuildGetField(BoundReference(2, typeS_notNullable, nullable = false), "a").nullable + assert(getStructField(BoundReference(2, typeS, nullable = true), "a").nullable === true) + assert(getStructField(BoundReference(2, typeS_notNullable, nullable = false), "a").nullable === false) - assert(quickBuildGetField(Literal.create(null, typeS), "a").nullable === true) - assert(quickBuildGetField(Literal.create(null, typeS_notNullable), "a").nullable === true) + assert(getStructField(Literal.create(null, typeS), "a").nullable === true) + assert(getStructField(Literal.create(null, typeS_notNullable), "a").nullable === true) - checkEvaluation('c.map(typeMap).at(3).getItem("aa"), "bb", row) - checkEvaluation('c.array(typeArray.elementType).at(4).getItem(1), "bb", row) + checkEvaluation(quickResolve('c.map(typeMap).at(3).getItem("aa")), "bb", row) + checkEvaluation(quickResolve('c.array(typeArray.elementType).at(4).getItem(1)), "bb", row) checkEvaluation(quickResolve('c.struct(typeS).at(2).getField("a")), "aa", row) } + test("error message of ExtractValue") { + val structType = StructType(StructField("a", StringType, true) :: Nil) + val arrayStructType = ArrayType(structType) + val arrayType = ArrayType(StringType) + val otherType = StringType + + def checkErrorMessage( + childDataType: DataType, + fieldDataType: DataType, + errorMesage: String): Unit = { + val e = intercept[org.apache.spark.sql.AnalysisException] { + ExtractValue( + Literal.create(null, childDataType), + Literal.create(null, fieldDataType), + _ == _) + } + assert(e.getMessage().contains(errorMesage)) + } + + checkErrorMessage(structType, IntegerType, "Field name should be String Literal") + checkErrorMessage(arrayStructType, BooleanType, "Field name should be String Literal") + checkErrorMessage(arrayType, StringType, "Array index should be integral type") + checkErrorMessage(otherType, StringType, "Can't extract value from") + } + test("arithmetic") { val row = create_row(1, 2, 3, null) val c1 = 'a.int.at(0) @@ -1155,7 +1207,7 @@ class ExpressionEvaluationSuite extends ExpressionEvaluationBaseSuite { } /** - * Used for testing math functions for DataFrames. + * Used for testing math functions for DataFrames. * @param c The DataFrame function * @param f The functions in scala.math * @param domain The set of values to run the function with @@ -1163,7 +1215,7 @@ class ExpressionEvaluationSuite extends ExpressionEvaluationBaseSuite { * @tparam T Generic type for primitives */ def unaryMathFunctionEvaluation[@specialized(Int, Double, Float, Long) T]( - c: Expression => Expression, + c: Expression => Expression, f: T => T, domain: Iterable[T] = (-20 to 20).map(_ * 0.1), expectNull: Boolean = false): Unit = { @@ -1217,11 +1269,11 @@ class ExpressionEvaluationSuite extends ExpressionEvaluationBaseSuite { unaryMathFunctionEvaluation(Tanh, math.tanh) } - test("toDeg") { + test("toDegrees") { unaryMathFunctionEvaluation(ToDegrees, math.toDegrees) } - test("toRad") { + test("toRadians") { unaryMathFunctionEvaluation(ToRadians, math.toRadians) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/GeneratedEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/GeneratedEvaluationSuite.scala index b5ebe4b38e337..d7c437095e395 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/GeneratedEvaluationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/GeneratedEvaluationSuite.scala @@ -41,9 +41,9 @@ class GeneratedEvaluationSuite extends ExpressionEvaluationSuite { """.stripMargin) } - val actual = plan(inputRow).apply(0) - if(actual != expected) { - val input = if(inputRow == EmptyRow) "" else s", input: $inputRow" + val actual = plan(inputRow).apply(0) + if (actual != expected) { + val input = if (inputRow == EmptyRow) "" else s", input: $inputRow" fail(s"Incorrect Evaluation: $expression, actual: $actual, expected: $expected$input") } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/GeneratedMutableEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/GeneratedMutableEvaluationSuite.scala index 97af2e0fd0502..a40324b008e16 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/GeneratedMutableEvaluationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/GeneratedMutableEvaluationSuite.scala @@ -53,7 +53,7 @@ class GeneratedMutableEvaluationSuite extends ExpressionEvaluationSuite { """.stripMargin) } if (actual != expectedRow) { - val input = if(inputRow == EmptyRow) "" else s", input: $inputRow" + val input = if (inputRow == EmptyRow) "" else s", input: $inputRow" fail(s"Incorrect Evaluation: $expression, actual: $actual, expected: $expected$input") } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala index 6255578d7fa57..465a5e6914204 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala @@ -78,9 +78,9 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper { test("(a && b && c && ...) || (a && b && d && ...) || (a && b && e && ...) ...") { checkCondition('b > 3 || 'c > 5, 'b > 3 || 'c > 5) - checkCondition(('a < 2 && 'a > 3 && 'b > 5) || 'a < 2, 'a < 2) + checkCondition(('a < 2 && 'a > 3 && 'b > 5) || 'a < 2, 'a < 2) - checkCondition('a < 2 || ('a < 2 && 'a > 3 && 'b > 5), 'a < 2) + checkCondition('a < 2 || ('a < 2 && 'a > 3 && 'b > 5), 'a < 2) val input = ('a === 'b && 'b > 3 && 'c > 2) || ('a === 'b && 'c < 1 && 'a === 5) || diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala index a30052b38fc11..06c592f4905a3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala @@ -71,7 +71,7 @@ class CombiningLimitsSuite extends PlanTest { comparePlans(optimized, correctAnswer) } - + test("limits: combines two limits after ColumnPruning") { val originalQuery = testRelation @@ -79,7 +79,7 @@ class CombiningLimitsSuite extends PlanTest { .limit(2) .select('a) .limit(5) - + val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala index 18f92150b0966..ec3b2f1edfa05 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.optimizer -import org.apache.spark.sql.catalyst.analysis.{UnresolvedGetField, EliminateSubQueries} +import org.apache.spark.sql.catalyst.analysis.{UnresolvedExtractValue, EliminateSubQueries} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.plans.PlanTest @@ -35,6 +35,7 @@ class ConstantFoldingSuite extends PlanTest { Batch("AnalysisNodes", Once, EliminateSubQueries) :: Batch("ConstantFolding", Once, + OptimizeIn, ConstantFolding, BooleanSimplification) :: Nil } @@ -180,10 +181,10 @@ class ConstantFoldingSuite extends PlanTest { IsNull(Literal(null)) as 'c1, IsNotNull(Literal(null)) as 'c2, - GetItem(Literal.create(null, ArrayType(IntegerType)), 1) as 'c3, - GetItem( + UnresolvedExtractValue(Literal.create(null, ArrayType(IntegerType)), 1) as 'c3, + UnresolvedExtractValue( Literal.create(Seq(1), ArrayType(IntegerType)), Literal.create(null, IntegerType)) as 'c4, - UnresolvedGetField( + UnresolvedExtractValue( Literal.create(null, StructType(Seq(StructField("a", IntegerType, true)))), "a") as 'c5, @@ -247,4 +248,36 @@ class ConstantFoldingSuite extends PlanTest { comparePlans(optimized, correctAnswer) } + + test("Constant folding test: Fold In(v, list) into true or false") { + var originalQuery = + testRelation + .select('a) + .where(In(Literal(1), Seq(Literal(1), Literal(2)))) + + var optimized = Optimize.execute(originalQuery.analyze) + + var correctAnswer = + testRelation + .select('a) + .where(Literal(true)) + .analyze + + comparePlans(optimized, correctAnswer) + + originalQuery = + testRelation + .select('a) + .where(In(Literal(1), Seq(Literal(1), 'a.attr))) + + optimized = Optimize.execute(originalQuery.analyze) + + correctAnswer = + testRelation + .select('a) + .where(Literal(true)) + .analyze + + comparePlans(optimized, correctAnswer) + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala index 58d415d9011e1..17dc9124749e8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.analysis.EliminateSubQueries -import org.apache.spark.sql.catalyst.expressions.{Count, Explode} +import org.apache.spark.sql.catalyst.expressions.{SortOrder, Ascending, Count, Explode} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.plans.{LeftSemi, PlanTest, LeftOuter, RightOuter} import org.apache.spark.sql.catalyst.rules._ @@ -38,7 +38,8 @@ class FilterPushdownSuite extends PlanTest { PushPredicateThroughProject, PushPredicateThroughJoin, PushPredicateThroughGenerate, - ColumnPruning) :: Nil + ColumnPruning, + ProjectCollapsing) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) @@ -92,11 +93,11 @@ class FilterPushdownSuite extends PlanTest { comparePlans(optimized, correctAnswer) } - + test("column pruning for Project(ne, Limit)") { val originalQuery = testRelation - .select('a,'b) + .select('a, 'b) .limit(2) .select('a) @@ -108,7 +109,7 @@ class FilterPushdownSuite extends PlanTest { comparePlans(optimized, correctAnswer) } - + // After this line is unimplemented. test("simple push down") { val originalQuery = @@ -541,4 +542,38 @@ class FilterPushdownSuite extends PlanTest { comparePlans(optimized, originalQuery) } + + test("push down project past sort") { + val x = testRelation.subquery('x) + + // push down valid + val originalQuery = { + x.select('a, 'b) + .sortBy(SortOrder('a, Ascending)) + .select('a) + } + + val optimized = Optimize.execute(originalQuery.analyze) + val correctAnswer = + x.select('a) + .sortBy(SortOrder('a, Ascending)).analyze + + comparePlans(optimized, analysis.EliminateSubQueries(correctAnswer)) + + // push down invalid + val originalQuery1 = { + x.select('a, 'b) + .sortBy(SortOrder('a, Ascending)) + .select('b) + } + + val optimized1 = Optimize.execute(originalQuery1.analyze) + val correctAnswer1 = + x.select('a, 'b) + .sortBy(SortOrder('a, Ascending)) + .select('b).analyze + + comparePlans(optimized1, analysis.EliminateSubQueries(correctAnswer1)) + + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala index 3eb399e68e70c..1d433275fed2e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala @@ -46,7 +46,7 @@ class OptimizeInSuite extends PlanTest { test("OptimizedIn test: In clause optimized to InSet") { val originalQuery = testRelation - .where(In(UnresolvedAttribute("a"), Seq(Literal(1),Literal(2)))) + .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2)))) .analyze val optimized = Optimize.execute(originalQuery.analyze) @@ -57,17 +57,17 @@ class OptimizeInSuite extends PlanTest { comparePlans(optimized, correctAnswer) } - + test("OptimizedIn test: In clause not optimized in case filter has attributes") { val originalQuery = testRelation - .where(In(UnresolvedAttribute("a"), Seq(Literal(1),Literal(2), UnresolvedAttribute("b")))) + .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2), UnresolvedAttribute("b")))) .analyze val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation - .where(In(UnresolvedAttribute("a"), Seq(Literal(1),Literal(2), UnresolvedAttribute("b")))) + .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2), UnresolvedAttribute("b")))) .analyze comparePlans(optimized, correctAnswer) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ProjectCollapsingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ProjectCollapsingSuite.scala new file mode 100644 index 0000000000000..151654bffbd66 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ProjectCollapsingSuite.scala @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.optimizer + +import org.apache.spark.sql.catalyst.analysis.EliminateSubQueries +import org.apache.spark.sql.catalyst.dsl.plans._ +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.expressions.Rand +import org.apache.spark.sql.catalyst.plans.PlanTest +import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} +import org.apache.spark.sql.catalyst.rules.RuleExecutor + + +class ProjectCollapsingSuite extends PlanTest { + object Optimize extends RuleExecutor[LogicalPlan] { + val batches = + Batch("Subqueries", FixedPoint(10), EliminateSubQueries) :: + Batch("ProjectCollapsing", Once, ProjectCollapsing) :: Nil + } + + val testRelation = LocalRelation('a.int, 'b.int) + + test("collapse two deterministic, independent projects into one") { + val query = testRelation + .select(('a + 1).as('a_plus_1), 'b) + .select('a_plus_1, ('b + 1).as('b_plus_1)) + + val optimized = Optimize.execute(query.analyze) + val correctAnswer = testRelation.select(('a + 1).as('a_plus_1), ('b + 1).as('b_plus_1)).analyze + + comparePlans(optimized, correctAnswer) + } + + test("collapse two deterministic, dependent projects into one") { + val query = testRelation + .select(('a + 1).as('a_plus_1), 'b) + .select(('a_plus_1 + 1).as('a_plus_2), 'b) + + val optimized = Optimize.execute(query.analyze) + + val correctAnswer = testRelation.select( + (('a + 1).as('a_plus_1) + 1).as('a_plus_2), + 'b).analyze + + comparePlans(optimized, correctAnswer) + } + + test("do not collapse nondeterministic projects") { + val query = testRelation + .select(Rand(10).as('rand)) + .select(('rand + 1).as('rand1), ('rand + 2).as('rand2)) + + val optimized = Optimize.execute(query.analyze) + val correctAnswer = query.analyze + + comparePlans(optimized, correctAnswer) + } +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnionPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnionPushdownSuite.scala index a3ad200800b02..35f50be46b76f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnionPushdownSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnionPushdownSuite.scala @@ -33,8 +33,8 @@ class UnionPushdownSuite extends PlanTest { UnionPushdown) :: Nil } - val testRelation = LocalRelation('a.int, 'b.int, 'c.int) - val testRelation2 = LocalRelation('d.int, 'e.int, 'f.int) + val testRelation = LocalRelation('a.int, 'b.int, 'c.int) + val testRelation2 = LocalRelation('d.int, 'e.int, 'f.int) val testUnion = Union(testRelation, testRelation2) test("union: filter to each side") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala index 6b393327cc97a..5875549de5075 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala @@ -22,7 +22,7 @@ import scala.collection.mutable.ArrayBuffer import org.scalatest.FunSuite import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.types.{StringType, NullType} +import org.apache.spark.sql.types.{IntegerType, StringType, NullType} case class Dummy(optKey: Option[Expression]) extends Expression { def children: Seq[Expression] = optKey.toSeq @@ -92,7 +92,7 @@ class TreeNodeSuite extends FunSuite { test("transform works on nodes with Option children") { val dummy1 = Dummy(Some(Literal.create("1", StringType))) val dummy2 = Dummy(None) - val toZero: PartialFunction[Expression, Expression] = { case Literal(_, _) => Literal(0) } + val toZero: PartialFunction[Expression, Expression] = { case Literal(_, _) => Literal(0) } var actual = dummy1 transformDown toZero assert(actual === Dummy(Some(Literal(0)))) @@ -105,7 +105,7 @@ class TreeNodeSuite extends FunSuite { } test("preserves origin") { - CurrentOrigin.setPosition(1,1) + CurrentOrigin.setPosition(1, 1) val add = Add(Literal(1), Literal(1)) CurrentOrigin.reset() @@ -129,5 +129,97 @@ class TreeNodeSuite extends FunSuite { assert(expected === actual) } + test("find") { + val expression = Add(Literal(1), Multiply(Literal(2), Subtract(Literal(3), Literal(4)))) + // Find the top node. + var actual: Option[Expression] = expression.find { + case add: Add => true + case other => false + } + var expected: Option[Expression] = + Some(Add(Literal(1), Multiply(Literal(2), Subtract(Literal(3), Literal(4))))) + assert(expected === actual) + + // Find the first children. + actual = expression.find { + case Literal(1, IntegerType) => true + case other => false + } + expected = Some(Literal(1)) + assert(expected === actual) + + // Find an internal node (Subtract). + actual = expression.find { + case sub: Subtract => true + case other => false + } + expected = Some(Subtract(Literal(3), Literal(4))) + assert(expected === actual) + + // Find a leaf node. + actual = expression.find { + case Literal(3, IntegerType) => true + case other => false + } + expected = Some(Literal(3)) + assert(expected === actual) + + // Find nothing. + actual = expression.find { + case Literal(100, IntegerType) => true + case other => false + } + expected = None + assert(expected === actual) + } + + test("collectFirst") { + val expression = Add(Literal(1), Multiply(Literal(2), Subtract(Literal(3), Literal(4)))) + + // Collect the top node. + { + val actual = expression.collectFirst { + case add: Add => add + } + val expected = + Some(Add(Literal(1), Multiply(Literal(2), Subtract(Literal(3), Literal(4))))) + assert(expected === actual) + } + + // Collect the first children. + { + val actual = expression.collectFirst { + case l @ Literal(1, IntegerType) => l + } + val expected = Some(Literal(1)) + assert(expected === actual) + } + + // Collect an internal node (Subtract). + { + val actual = expression.collectFirst { + case sub: Subtract => sub + } + val expected = Some(Subtract(Literal(3), Literal(4))) + assert(expected === actual) + } + // Collect a leaf node. + { + val actual = expression.collectFirst { + case l @ Literal(3, IntegerType) => l + } + val expected = Some(Literal(3)) + assert(expected === actual) + } + + // Collect nothing. + { + val actual = expression.collectFirst { + case l @ Literal(100, IntegerType) => l + } + val expected = None + assert(expected === actual) + } + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala index d797510f36685..953debfe1e35a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala @@ -120,7 +120,7 @@ class DataTypeSuite extends FunSuite { checkDefaultSize(DecimalType(10, 5), 4096) checkDefaultSize(DecimalType.Unlimited, 4096) checkDefaultSize(DateType, 4) - checkDefaultSize(TimestampType,12) + checkDefaultSize(TimestampType, 12) checkDefaultSize(StringType, 4096) checkDefaultSize(BinaryType, 4096) checkDefaultSize(ArrayType(DoubleType, true), 800) @@ -179,11 +179,11 @@ class DataTypeSuite extends FunSuite { expected = false) checkEqualsIgnoreCompatibleNullability( from = MapType(StringType, ArrayType(IntegerType, true), valueContainsNull = true), - to = MapType(StringType, ArrayType(IntegerType, false), valueContainsNull = true), + to = MapType(StringType, ArrayType(IntegerType, false), valueContainsNull = true), expected = false) checkEqualsIgnoreCompatibleNullability( from = MapType(StringType, ArrayType(IntegerType, false), valueContainsNull = true), - to = MapType(StringType, ArrayType(IntegerType, true), valueContainsNull = true), + to = MapType(StringType, ArrayType(IntegerType, true), valueContainsNull = true), expected = true) diff --git a/sql/core/pom.xml b/sql/core/pom.xml index e3a6b1fe72435..2dba6e8865620 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../../pom.xml @@ -64,7 +64,7 @@ com.fasterxml.jackson.core jackson-databind - 2.3.0 + ${fasterxml.jackson.version} org.jodd @@ -103,13 +103,5 @@ target/scala-${scala.binary.version}/classes target/scala-${scala.binary.version}/test-classes - - - ../../python - - pyspark/sql/*.py - - - diff --git a/sql/core/src/main/java/org/apache/spark/sql/SaveMode.java b/sql/core/src/main/java/org/apache/spark/sql/SaveMode.java index a40be526d0d11..9665c3c46f901 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/SaveMode.java +++ b/sql/core/src/main/java/org/apache/spark/sql/SaveMode.java @@ -18,28 +18,38 @@ /** * SaveMode is used to specify the expected behavior of saving a DataFrame to a data source. + * + * @since 1.3.0 */ public enum SaveMode { /** * Append mode means that when saving a DataFrame to a data source, if data/table already exists, * contents of the DataFrame are expected to be appended to existing data. + * + * @since 1.3.0 */ Append, /** * Overwrite mode means that when saving a DataFrame to a data source, * if data/table already exists, existing data is expected to be overwritten by the contents of * the DataFrame. + * + * @since 1.3.0 */ Overwrite, /** * ErrorIfExists mode means that when saving a DataFrame to a data source, if data already exists, * an exception is expected to be thrown. + * + * @since 1.3.0 */ ErrorIfExists, /** * Ignore mode means that when saving a DataFrame to a data source, if data already exists, * the save operation is expected to not save the contents of the DataFrame and to not * change the existing data. + * + * @since 1.3.0 */ Ignore } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index 33f9d0b37d006..d3efa83380d04 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -21,9 +21,10 @@ import scala.language.implicitConversions import org.apache.spark.annotation.Experimental import org.apache.spark.Logging +import org.apache.spark.sql.expressions.Window import org.apache.spark.sql.functions.lit import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedStar, UnresolvedGetField} +import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.types._ @@ -45,6 +46,8 @@ private[sql] object Column { * @groupname expr_ops Expression operators. * @groupname df_ops DataFrame functions. * @groupname Ungrouped Support functions for DataFrames. + * + * @since 1.3.0 */ @Experimental class Column(protected[sql] val expr: Expression) extends Logging { @@ -67,6 +70,20 @@ class Column(protected[sql] val expr: Expression) extends Logging { override def hashCode: Int = this.expr.hashCode + /** + * Extracts a value or values from a complex type. + * The following types of extraction are supported: + * - Given an Array, an integer ordinal can be used to retrieve a single value. + * - Given a Map, a key of the correct type can be used to retrieve an individual value. + * - Given a Struct, a string fieldName can be used to extract that field. + * - Given an Array of Structs, a string fieldName can be used to extract filed + * of every struct in that array, and return an Array of fields + * + * @group expr_ops + * @since 1.4.0 + */ + def apply(extraction: Any): Column = UnresolvedExtractValue(expr, lit(extraction).expr) + /** * Unary minus, i.e. negate the expression. * {{{ @@ -79,21 +96,23 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def unary_- : Column = UnaryMinus(expr) /** * Inversion of boolean expression, i.e. NOT. - * {{ + * {{{ * // Scala: select rows that are not active (isActive === false) * df.filter( !df("isActive") ) * * // Java: * import static org.apache.spark.sql.functions.*; * df.filter( not(df.col("isActive")) ); - * }} + * }}} * * @group expr_ops + * @since 1.3.0 */ def unary_! : Column = Not(expr) @@ -109,6 +128,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def === (other: Any): Column = { val right = lit(other).expr @@ -132,6 +152,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def equalTo(other: Any): Column = this === other @@ -148,6 +169,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def !== (other: Any): Column = Not(EqualTo(expr, lit(other).expr)) @@ -164,6 +186,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def notEqual(other: Any): Column = Not(EqualTo(expr, lit(other).expr)) @@ -179,6 +202,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def > (other: Any): Column = GreaterThan(expr, lit(other).expr) @@ -194,6 +218,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def gt(other: Any): Column = this > other @@ -208,6 +233,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def < (other: Any): Column = LessThan(expr, lit(other).expr) @@ -222,6 +248,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def lt(other: Any): Column = this < other @@ -236,6 +263,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def <= (other: Any): Column = LessThanOrEqual(expr, lit(other).expr) @@ -250,6 +278,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def leq(other: Any): Column = this <= other @@ -264,6 +293,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def >= (other: Any): Column = GreaterThanOrEqual(expr, lit(other).expr) @@ -278,6 +308,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def geq(other: Any): Column = this >= other @@ -285,6 +316,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * Equality test that is safe for null values. * * @group expr_ops + * @since 1.3.0 */ def <=> (other: Any): Column = EqualNullSafe(expr, lit(other).expr) @@ -292,13 +324,88 @@ class Column(protected[sql] val expr: Expression) extends Logging { * Equality test that is safe for null values. * * @group java_expr_ops + * @since 1.3.0 */ def eqNullSafe(other: Any): Column = this <=> other + /** + * Evaluates a list of conditions and returns one of multiple possible result expressions. + * If otherwise is not defined at the end, null is returned for unmatched conditions. + * + * {{{ + * // Example: encoding gender string column into integer. + * + * // Scala: + * people.select(when(people("gender") === "male", 0) + * .when(people("gender") === "female", 1) + * .otherwise(2)) + * + * // Java: + * people.select(when(col("gender").equalTo("male"), 0) + * .when(col("gender").equalTo("female"), 1) + * .otherwise(2)) + * }}} + * + * @group expr_ops + * @since 1.4.0 + */ + def when(condition: Column, value: Any): Column = this.expr match { + case CaseWhen(branches: Seq[Expression]) => + CaseWhen(branches ++ Seq(lit(condition).expr, lit(value).expr)) + case _ => + throw new IllegalArgumentException( + "when() can only be applied on a Column previously generated by when() function") + } + + /** + * Evaluates a list of conditions and returns one of multiple possible result expressions. + * If otherwise is not defined at the end, null is returned for unmatched conditions. + * + * {{{ + * // Example: encoding gender string column into integer. + * + * // Scala: + * people.select(when(people("gender") === "male", 0) + * .when(people("gender") === "female", 1) + * .otherwise(2)) + * + * // Java: + * people.select(when(col("gender").equalTo("male"), 0) + * .when(col("gender").equalTo("female"), 1) + * .otherwise(2)) + * }}} + * + * @group expr_ops + * @since 1.4.0 + */ + def otherwise(value: Any): Column = this.expr match { + case CaseWhen(branches: Seq[Expression]) => + if (branches.size % 2 == 0) { + CaseWhen(branches :+ lit(value).expr) + } else { + throw new IllegalArgumentException( + "otherwise() can only be applied once on a Column previously generated by when()") + } + case _ => + throw new IllegalArgumentException( + "otherwise() can only be applied on a Column previously generated by when()") + } + + /** + * True if the current column is between the lower bound and upper bound, inclusive. + * + * @group java_expr_ops + * @since 1.4.0 + */ + def between(lowerBound: Any, upperBound: Any): Column = { + (this >= lowerBound) && (this <= upperBound) + } + /** * True if the current expression is null. * * @group expr_ops + * @since 1.3.0 */ def isNull: Column = IsNull(expr) @@ -306,6 +413,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * True if the current expression is NOT null. * * @group expr_ops + * @since 1.3.0 */ def isNotNull: Column = IsNotNull(expr) @@ -320,6 +428,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def || (other: Any): Column = Or(expr, lit(other).expr) @@ -334,6 +443,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def or(other: Column): Column = this || other @@ -348,6 +458,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def && (other: Any): Column = And(expr, lit(other).expr) @@ -362,6 +473,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def and(other: Column): Column = this && other @@ -376,6 +488,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def + (other: Any): Column = Add(expr, lit(other).expr) @@ -390,6 +503,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def plus(other: Any): Column = this + other @@ -404,6 +518,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def - (other: Any): Column = Subtract(expr, lit(other).expr) @@ -418,6 +533,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def minus(other: Any): Column = this - other @@ -432,6 +548,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def * (other: Any): Column = Multiply(expr, lit(other).expr) @@ -446,6 +563,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def multiply(other: Any): Column = this * other @@ -460,6 +578,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def / (other: Any): Column = Divide(expr, lit(other).expr) @@ -474,6 +593,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group java_expr_ops + * @since 1.3.0 */ def divide(other: Any): Column = this / other @@ -481,6 +601,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * Modulo (a.k.a. remainder) expression. * * @group expr_ops + * @since 1.3.0 */ def % (other: Any): Column = Remainder(expr, lit(other).expr) @@ -488,6 +609,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * Modulo (a.k.a. remainder) expression. * * @group java_expr_ops + * @since 1.3.0 */ def mod(other: Any): Column = this % other @@ -496,6 +618,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * by the evaluated values of the arguments. * * @group expr_ops + * @since 1.3.0 */ @scala.annotation.varargs def in(list: Column*): Column = In(expr, list.map(_.expr)) @@ -504,6 +627,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * SQL like expression. * * @group expr_ops + * @since 1.3.0 */ def like(literal: String): Column = Like(expr, lit(literal).expr) @@ -511,6 +635,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * SQL RLIKE expression (LIKE with Regex). * * @group expr_ops + * @since 1.3.0 */ def rlike(literal: String): Column = RLike(expr, lit(literal).expr) @@ -519,15 +644,17 @@ class Column(protected[sql] val expr: Expression) extends Logging { * or gets a value by key `key` in a [[MapType]]. * * @group expr_ops + * @since 1.3.0 */ - def getItem(key: Any): Column = GetItem(expr, Literal(key)) + def getItem(key: Any): Column = UnresolvedExtractValue(expr, Literal(key)) /** * An expression that gets a field by name in a [[StructType]]. * * @group expr_ops + * @since 1.3.0 */ - def getField(fieldName: String): Column = UnresolvedGetField(expr, fieldName) + def getField(fieldName: String): Column = UnresolvedExtractValue(expr, Literal(fieldName)) /** * An expression that returns a substring. @@ -535,6 +662,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * @param len expression for the length of the substring. * * @group expr_ops + * @since 1.3.0 */ def substr(startPos: Column, len: Column): Column = Substring(expr, startPos.expr, len.expr) @@ -544,6 +672,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * @param len length of the substring. * * @group expr_ops + * @since 1.3.0 */ def substr(startPos: Int, len: Int): Column = Substring(expr, lit(startPos).expr, lit(len).expr) @@ -551,6 +680,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * Contains the other element. * * @group expr_ops + * @since 1.3.0 */ def contains(other: Any): Column = Contains(expr, lit(other).expr) @@ -558,6 +688,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * String starts with. * * @group expr_ops + * @since 1.3.0 */ def startsWith(other: Column): Column = StartsWith(expr, lit(other).expr) @@ -565,6 +696,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * String starts with another string literal. * * @group expr_ops + * @since 1.3.0 */ def startsWith(literal: String): Column = this.startsWith(lit(literal)) @@ -572,6 +704,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * String ends with. * * @group expr_ops + * @since 1.3.0 */ def endsWith(other: Column): Column = EndsWith(expr, lit(other).expr) @@ -579,9 +712,22 @@ class Column(protected[sql] val expr: Expression) extends Logging { * String ends with another string literal. * * @group expr_ops + * @since 1.3.0 */ def endsWith(literal: String): Column = this.endsWith(lit(literal)) + /** + * Gives the column an alias. Same as `as`. + * {{{ + * // Renames colA to colB in select output. + * df.select($"colA".alias("colB")) + * }}} + * + * @group expr_ops + * @since 1.4.0 + */ + def alias(alias: String): Column = as(alias) + /** * Gives the column an alias. * {{{ @@ -590,9 +736,34 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def as(alias: String): Column = Alias(expr, alias)() + /** + * (Scala-specific) Assigns the given aliases to the results of a table generating function. + * {{{ + * // Renames colA to colB in select output. + * df.select(explode($"myMap").as("key" :: "value" :: Nil)) + * }}} + * + * @group expr_ops + * @since 1.4.0 + */ + def as(aliases: Seq[String]): Column = MultiAlias(expr, aliases) + + /** + * Assigns the given aliases to the results of a table generating function. + * {{{ + * // Renames colA to colB in select output. + * df.select(explode($"myMap").as("key" :: "value" :: Nil)) + * }}} + * + * @group expr_ops + * @since 1.4.0 + */ + def as(aliases: Array[String]): Column = MultiAlias(expr, aliases) + /** * Gives the column an alias. * {{{ @@ -601,6 +772,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def as(alias: Symbol): Column = Alias(expr, alias.name)() @@ -612,6 +784,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def as(alias: String, metadata: Metadata): Column = { Alias(expr, alias)(explicitMetadata = Some(metadata)) @@ -629,6 +802,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def cast(to: DataType): Column = expr match { // Lift alias out of cast so we can support col.as("name").cast(IntegerType) @@ -646,6 +820,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def cast(to: String): Column = cast(DataTypeParser.parse(to)) @@ -660,6 +835,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def desc: Column = SortOrder(expr, Descending) @@ -674,6 +850,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * }}} * * @group expr_ops + * @since 1.3.0 */ def asc: Column = SortOrder(expr, Ascending) @@ -681,6 +858,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * Prints the expression to the console for debugging purpose. * * @group df_ops + * @since 1.3.0 */ def explain(extended: Boolean): Unit = { if (extended) { @@ -689,67 +867,171 @@ class Column(protected[sql] val expr: Expression) extends Logging { println(expr.prettyString) } } + + /** + * Compute bitwise OR of this expression with another expression. + * {{{ + * df.select($"colA".bitwiseOR($"colB")) + * }}} + * + * @group expr_ops + * @since 1.4.0 + */ + def bitwiseOR(other: Any): Column = BitwiseOr(expr, lit(other).expr) + + /** + * Compute bitwise AND of this expression with another expression. + * {{{ + * df.select($"colA".bitwiseAND($"colB")) + * }}} + * + * @group expr_ops + * @since 1.4.0 + */ + def bitwiseAND(other: Any): Column = BitwiseAnd(expr, lit(other).expr) + + /** + * Compute bitwise XOR of this expression with another expression. + * {{{ + * df.select($"colA".bitwiseXOR($"colB")) + * }}} + * + * @group expr_ops + * @since 1.4.0 + */ + def bitwiseXOR(other: Any): Column = BitwiseXor(expr, lit(other).expr) + + /** + * Define a windowing column. + * + * {{{ + * val w = Window.partitionBy("name").orderBy("id") + * df.select( + * sum("price").over(w.rangeBetween(Long.MinValue, 2)), + * avg("price").over(w.rowsBetween(0, 4)) + * ) + * }}} + * + * @group expr_ops + * @since 1.4.0 + */ + def over(window: expressions.WindowSpec): Column = window.withAggregate(this) + } /** * :: Experimental :: * A convenient class used for constructing schema. + * + * @since 1.3.0 */ @Experimental class ColumnName(name: String) extends Column(name) { - /** Creates a new AttributeReference of type boolean */ + /** + * Creates a new [[StructField]] of type boolean. + * @since 1.3.0 + */ def boolean: StructField = StructField(name, BooleanType) - /** Creates a new AttributeReference of type byte */ + /** + * Creates a new [[StructField]] of type byte. + * @since 1.3.0 + */ def byte: StructField = StructField(name, ByteType) - /** Creates a new AttributeReference of type short */ + /** + * Creates a new [[StructField]] of type short. + * @since 1.3.0 + */ def short: StructField = StructField(name, ShortType) - /** Creates a new AttributeReference of type int */ + /** + * Creates a new [[StructField]] of type int. + * @since 1.3.0 + */ def int: StructField = StructField(name, IntegerType) - /** Creates a new AttributeReference of type long */ + /** + * Creates a new [[StructField]] of type long. + * @since 1.3.0 + */ def long: StructField = StructField(name, LongType) - /** Creates a new AttributeReference of type float */ + /** + * Creates a new [[StructField]] of type float. + * @since 1.3.0 + */ def float: StructField = StructField(name, FloatType) - /** Creates a new AttributeReference of type double */ + /** + * Creates a new [[StructField]] of type double. + * @since 1.3.0 + */ def double: StructField = StructField(name, DoubleType) - /** Creates a new AttributeReference of type string */ + /** + * Creates a new [[StructField]] of type string. + * @since 1.3.0 + */ def string: StructField = StructField(name, StringType) - /** Creates a new AttributeReference of type date */ + /** + * Creates a new [[StructField]] of type date. + * @since 1.3.0 + */ def date: StructField = StructField(name, DateType) - /** Creates a new AttributeReference of type decimal */ + /** + * Creates a new [[StructField]] of type decimal. + * @since 1.3.0 + */ def decimal: StructField = StructField(name, DecimalType.Unlimited) - /** Creates a new AttributeReference of type decimal */ + /** + * Creates a new [[StructField]] of type decimal. + * @since 1.3.0 + */ def decimal(precision: Int, scale: Int): StructField = StructField(name, DecimalType(precision, scale)) - /** Creates a new AttributeReference of type timestamp */ + /** + * Creates a new [[StructField]] of type timestamp. + * @since 1.3.0 + */ def timestamp: StructField = StructField(name, TimestampType) - /** Creates a new AttributeReference of type binary */ + /** + * Creates a new [[StructField]] of type binary. + * @since 1.3.0 + */ def binary: StructField = StructField(name, BinaryType) - /** Creates a new AttributeReference of type array */ + /** + * Creates a new [[StructField]] of type array. + * @since 1.3.0 + */ def array(dataType: DataType): StructField = StructField(name, ArrayType(dataType)) - /** Creates a new AttributeReference of type map */ + /** + * Creates a new [[StructField]] of type map. + * @since 1.3.0 + */ def map(keyType: DataType, valueType: DataType): StructField = map(MapType(keyType, valueType)) def map(mapType: MapType): StructField = StructField(name, mapType) - /** Creates a new AttributeReference of type struct */ + /** + * Creates a new [[StructField]] of type struct. + * @since 1.3.0 + */ def struct(fields: StructField*): StructField = struct(StructType(fields)) + /** + * Creates a new [[StructField]] of type struct. + * @since 1.3.0 + */ def struct(structType: StructType): StructField = StructField(name, structType) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala index c421006c8fd2d..034d887901975 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql import java.io.CharArrayWriter -import java.sql.DriverManager +import java.util.Properties import scala.collection.JavaConversions._ import scala.language.implicitConversions @@ -27,22 +27,22 @@ import scala.reflect.runtime.universe.TypeTag import scala.util.control.NonFatal import com.fasterxml.jackson.core.JsonFactory +import org.apache.commons.lang3.StringUtils import org.apache.spark.annotation.{DeveloperApi, Experimental} import org.apache.spark.api.java.JavaRDD import org.apache.spark.api.python.SerDeUtil import org.apache.spark.rdd.RDD -import org.apache.spark.storage.StorageLevel -import org.apache.spark.sql.catalyst.{CatalystTypeConverters, ScalaReflection, SqlParser} -import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedRelation, ResolvedStar} +import org.apache.spark.sql.catalyst.analysis.{MultiAlias, ResolvedStar, UnresolvedAttribute, UnresolvedRelation} import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.plans.{JoinType, Inner} -import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.plans.logical.{Filter, _} +import org.apache.spark.sql.catalyst.plans.{Inner, JoinType} +import org.apache.spark.sql.catalyst.{CatalystTypeConverters, ScalaReflection, SqlParser} import org.apache.spark.sql.execution.{EvaluatePython, ExplainCommand, LogicalRDD} -import org.apache.spark.sql.jdbc.JDBCWriteDetails -import org.apache.spark.sql.json.JsonRDD +import org.apache.spark.sql.json.JacksonGenerator +import org.apache.spark.sql.sources.CreateTableUsingAsSelect import org.apache.spark.sql.types._ -import org.apache.spark.sql.sources.{ResolvedDataSource, CreateTableUsingAsSelect} +import org.apache.spark.storage.StorageLevel import org.apache.spark.util.Utils @@ -57,14 +57,11 @@ private[sql] object DataFrame { * :: Experimental :: * A distributed collection of data organized into named columns. * - * A [[DataFrame]] is equivalent to a relational table in Spark SQL. There are multiple ways - * to create a [[DataFrame]]: + * A [[DataFrame]] is equivalent to a relational table in Spark SQL. The following example creates + * a [[DataFrame]] by pointing Spark SQL to a Parquet data set. * {{{ - * // Create a DataFrame from Parquet files - * val people = sqlContext.parquetFile("...") - * - * // Create a DataFrame from data sources - * val df = sqlContext.load("...", "json") + * val people = sqlContext.read.parquet("...") // in Scala + * DataFrame people = sqlContext.read().parquet("...") // in Java * }}} * * Once created, it can be manipulated using the various domain-specific-language (DSL) functions @@ -86,8 +83,8 @@ private[sql] object DataFrame { * A more concrete example in Scala: * {{{ * // To create DataFrame using SQLContext - * val people = sqlContext.parquetFile("...") - * val department = sqlContext.parquetFile("...") + * val people = sqlContext.read.parquet("...") + * val department = sqlContext.read.parquet("...") * * people.filter("age > 30") * .join(department, people("deptId") === department("id")) @@ -98,8 +95,8 @@ private[sql] object DataFrame { * and in Java: * {{{ * // To create DataFrame using SQLContext - * DataFrame people = sqlContext.parquetFile("..."); - * DataFrame department = sqlContext.parquetFile("..."); + * DataFrame people = sqlContext.read().parquet("..."); + * DataFrame department = sqlContext.read().parquet("..."); * * people.filter("age".gt(30)) * .join(department, people.col("deptId").equalTo(department("id"))) @@ -112,6 +109,7 @@ private[sql] object DataFrame { * @groupname rdd RDD Operations * @groupname output Output Operations * @groupname action Actions + * @since 1.3.0 */ // TODO: Improve documentation. @Experimental @@ -141,7 +139,6 @@ class DataFrame private[sql]( // happen right away to let these side effects take place eagerly. case _: Command | _: InsertIntoTable | - _: CreateTableAsSelect[_] | _: CreateTableUsingAsSelect | _: WriteToFile => LogicalRDD(queryExecution.analyzed.output, queryExecution.toRdd)(sqlContext) @@ -158,7 +155,7 @@ class DataFrame private[sql]( } protected[sql] def resolve(colName: String): NamedExpression = { - queryExecution.analyzed.resolve(colName.split("\\."), sqlContext.analyzer.resolver).getOrElse { + queryExecution.analyzed.resolveQuoted(colName, sqlContext.analyzer.resolver).getOrElse { throw new AnalysisException( s"""Cannot resolve column name "$colName" among (${schema.fieldNames.mkString(", ")})""") } @@ -166,7 +163,7 @@ class DataFrame private[sql]( protected[sql] def numericColumns: Seq[Expression] = { schema.fields.filter(_.dataType.isInstanceOf[NumericType]).map { n => - queryExecution.analyzed.resolve(n.name.split("\\."), sqlContext.analyzer.resolver).get + queryExecution.analyzed.resolveQuoted(n.name, sqlContext.analyzer.resolver).get } } @@ -175,6 +172,7 @@ class DataFrame private[sql]( * @param numRows Number of rows to show */ private[sql] def showString(numRows: Int): String = { + val sb = new StringBuilder val data = take(numRows) val numCols = schema.fieldNames.length @@ -194,12 +192,25 @@ class DataFrame private[sql]( } } - // Pad the cells - rows.map { row => - row.zipWithIndex.map { case (cell, i) => - String.format(s"%-${colWidths(i)}s", cell) - }.mkString(" ") - }.mkString("\n") + // Create SeparateLine + val sep: String = colWidths.map("-" * _).addString(sb, "+", "+", "+\n").toString() + + // column names + rows.head.zipWithIndex.map { case (cell, i) => + StringUtils.leftPad(cell.toString, colWidths(i)) + }.addString(sb, "|", "|", "|\n") + + sb.append(sep) + + // data + rows.tail.map { + _.zipWithIndex.map { case (cell, i) => + StringUtils.leftPad(cell.toString, colWidths(i)) + }.addString(sb, "|", "|", "|\n") + } + + sb.append(sep) + sb.toString() } override def toString: String = { @@ -211,13 +222,10 @@ class DataFrame private[sql]( } } - /** Left here for backward compatibility. */ - @deprecated("1.3.0", "use toDF") - def toSchemaRDD: DataFrame = this - /** * Returns the object itself. * @group basic + * @since 1.3.0 */ // This is declared with parentheses to prevent the Scala compiler from treating // `rdd.toDF("1")` as invoking this toDF and then apply on the returned DataFrame. @@ -232,6 +240,7 @@ class DataFrame private[sql]( * rdd.toDF("id", "name") // this creates a DataFrame with column name "id" and "name" * }}} * @group basic + * @since 1.3.0 */ @scala.annotation.varargs def toDF(colNames: String*): DataFrame = { @@ -243,18 +252,20 @@ class DataFrame private[sql]( val newCols = logicalPlan.output.zip(colNames).map { case (oldAttribute, newName) => Column(oldAttribute).as(newName) } - select(newCols :_*) + select(newCols : _*) } /** * Returns the schema of this [[DataFrame]]. * @group basic + * @since 1.3.0 */ def schema: StructType = queryExecution.analyzed.schema /** * Returns all column names and their data types as an array. * @group basic + * @since 1.3.0 */ def dtypes: Array[(String, String)] = schema.fields.map { field => (field.name, field.dataType.toString) @@ -263,18 +274,21 @@ class DataFrame private[sql]( /** * Returns all column names as an array. * @group basic + * @since 1.3.0 */ def columns: Array[String] = schema.fields.map(_.name) /** * Prints the schema to the console in a nice tree format. * @group basic + * @since 1.3.0 */ def printSchema(): Unit = println(schema.treeString) /** * Prints the plans (logical and physical) to the console for debugging purposes. * @group basic + * @since 1.3.0 */ def explain(extended: Boolean): Unit = { ExplainCommand( @@ -287,6 +301,7 @@ class DataFrame private[sql]( /** * Only prints the physical plan to the console for debugging purposes. * @group basic + * @since 1.3.0 */ def explain(): Unit = explain(extended = false) @@ -294,6 +309,7 @@ class DataFrame private[sql]( * Returns true if the `collect` and `take` methods can be run locally * (without any Spark executors). * @group basic + * @since 1.3.0 */ def isLocal: Boolean = logicalPlan.isInstanceOf[LocalRelation] @@ -310,12 +326,14 @@ class DataFrame private[sql]( * @param numRows Number of rows to show * * @group action + * @since 1.3.0 */ def show(numRows: Int): Unit = println(showString(numRows)) /** * Displays the top 20 rows of [[DataFrame]] in a tabular form. * @group action + * @since 1.3.0 */ def show(): Unit = show(20) @@ -327,6 +345,7 @@ class DataFrame private[sql]( * }}} * * @group dfops + * @since 1.3.1 */ def na: DataFrameNaFunctions = new DataFrameNaFunctions(this) @@ -338,6 +357,7 @@ class DataFrame private[sql]( * }}} * * @group dfops + * @since 1.4.0 */ def stat: DataFrameStatFunctions = new DataFrameStatFunctions(this) @@ -348,6 +368,7 @@ class DataFrame private[sql]( * * @param right Right side of the join operation. * @group dfops + * @since 1.3.0 */ def join(right: DataFrame): DataFrame = { Join(logicalPlan, right.logicalPlan, joinType = Inner, None) @@ -371,6 +392,7 @@ class DataFrame private[sql]( * @param right Right side of the join operation. * @param usingColumn Name of the column to join on. This column must exist on both sides. * @group dfops + * @since 1.4.0 */ def join(right: DataFrame, usingColumn: String): DataFrame = { // Analyze the self join. The assumption is that the analyzer will disambiguate left vs right @@ -386,7 +408,9 @@ class DataFrame private[sql]( joined.left, joined.right, joinType = Inner, - Some(EqualTo(joined.left.resolve(usingColumn), joined.right.resolve(usingColumn)))) + Some(catalyst.expressions.EqualTo( + joined.left.resolve(usingColumn), + joined.right.resolve(usingColumn)))) ) } @@ -399,10 +423,9 @@ class DataFrame private[sql]( * df1.join(df2).where($"df1Key" === $"df2Key") * }}} * @group dfops + * @since 1.3.0 */ - def join(right: DataFrame, joinExprs: Column): DataFrame = { - Join(logicalPlan, right.logicalPlan, joinType = Inner, Some(joinExprs.expr)) - } + def join(right: DataFrame, joinExprs: Column): DataFrame = join(right, joinExprs, "inner") /** * Join with another [[DataFrame]], using the given join expression. The following performs @@ -420,11 +443,45 @@ class DataFrame private[sql]( * * @param right Right side of the join. * @param joinExprs Join expression. - * @param joinType One of: `inner`, `outer`, `left_outer`, `right_outer`, `semijoin`. + * @param joinType One of: `inner`, `outer`, `left_outer`, `right_outer`, `leftsemi`. * @group dfops + * @since 1.3.0 */ def join(right: DataFrame, joinExprs: Column, joinType: String): DataFrame = { - Join(logicalPlan, right.logicalPlan, JoinType(joinType), Some(joinExprs.expr)) + // Note that in this function, we introduce a hack in the case of self-join to automatically + // resolve ambiguous join conditions into ones that might make sense [SPARK-6231]. + // Consider this case: df.join(df, df("key") === df("key")) + // Since df("key") === df("key") is a trivially true condition, this actually becomes a + // cartesian join. However, most likely users expect to perform a self join using "key". + // With that assumption, this hack turns the trivially true condition into equality on join + // keys that are resolved to both sides. + + // Trigger analysis so in the case of self-join, the analyzer will clone the plan. + // After the cloning, left and right side will have distinct expression ids. + val plan = Join(logicalPlan, right.logicalPlan, JoinType(joinType), Some(joinExprs.expr)) + .queryExecution.analyzed.asInstanceOf[Join] + + // If auto self join alias is disabled, return the plan. + if (!sqlContext.conf.dataFrameSelfJoinAutoResolveAmbiguity) { + return plan + } + + // If left/right have no output set intersection, return the plan. + val lanalyzed = this.logicalPlan.queryExecution.analyzed + val ranalyzed = right.logicalPlan.queryExecution.analyzed + if (lanalyzed.outputSet.intersect(ranalyzed.outputSet).isEmpty) { + return plan + } + + // Otherwise, find the trivially true predicates and automatically resolves them to both sides. + // By the time we get here, since we have already run analysis, all attributes should've been + // resolved and become AttributeReference. + val cond = plan.condition.map { _.transform { + case catalyst.expressions.EqualTo(a: AttributeReference, b: AttributeReference) + if a.sameRef(b) => + catalyst.expressions.EqualTo(plan.left.resolve(a.name), plan.right.resolve(b.name)) + }} + plan.copy(condition = cond) } /** @@ -436,10 +493,11 @@ class DataFrame private[sql]( * df.sort($"sortcol".asc) * }}} * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs def sort(sortCol: String, sortCols: String*): DataFrame = { - sort((sortCol +: sortCols).map(apply) :_*) + sort((sortCol +: sortCols).map(apply) : _*) } /** @@ -448,6 +506,7 @@ class DataFrame private[sql]( * df.sort($"col1", $"col2".desc) * }}} * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs def sort(sortExprs: Column*): DataFrame = { @@ -466,27 +525,31 @@ class DataFrame private[sql]( * Returns a new [[DataFrame]] sorted by the given expressions. * This is an alias of the `sort` function. * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs - def orderBy(sortCol: String, sortCols: String*): DataFrame = sort(sortCol, sortCols :_*) + def orderBy(sortCol: String, sortCols: String*): DataFrame = sort(sortCol, sortCols : _*) /** * Returns a new [[DataFrame]] sorted by the given expressions. * This is an alias of the `sort` function. * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs - def orderBy(sortExprs: Column*): DataFrame = sort(sortExprs :_*) + def orderBy(sortExprs: Column*): DataFrame = sort(sortExprs : _*) /** * Selects column based on the column name and return it as a [[Column]]. * @group dfops + * @since 1.3.0 */ def apply(colName: String): Column = col(colName) /** * Selects column based on the column name and return it as a [[Column]]. * @group dfops + * @since 1.3.0 */ def col(colName: String): Column = colName match { case "*" => @@ -499,12 +562,14 @@ class DataFrame private[sql]( /** * Returns a new [[DataFrame]] with an alias set. * @group dfops + * @since 1.3.0 */ def as(alias: String): DataFrame = Subquery(alias, logicalPlan) /** * (Scala-specific) Returns a new [[DataFrame]] with an alias set. * @group dfops + * @since 1.3.0 */ def as(alias: Symbol): DataFrame = as(alias.name) @@ -514,14 +579,20 @@ class DataFrame private[sql]( * df.select($"colA", $"colB" + 1) * }}} * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs def select(cols: Column*): DataFrame = { val namedExpressions = cols.map { case Column(expr: NamedExpression) => expr + // Leave an unaliased explode with an empty list of names since the analzyer will generate the + // correct defaults after the nested expression's type has been resolved. + case Column(explode: Explode) => MultiAlias(explode, Nil) case Column(expr: Expression) => Alias(expr, expr.prettyString)() } - Project(namedExpressions.toSeq, logicalPlan) + // When user continuously call `select`, speed up analysis by collapsing `Project` + import org.apache.spark.sql.catalyst.optimizer.ProjectCollapsing + Project(namedExpressions.toSeq, ProjectCollapsing(logicalPlan)) } /** @@ -534,9 +605,10 @@ class DataFrame private[sql]( * df.select($"colA", $"colB") * }}} * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs - def select(col: String, cols: String*): DataFrame = select((col +: cols).map(Column(_)) :_*) + def select(col: String, cols: String*): DataFrame = select((col +: cols).map(Column(_)) : _*) /** * Selects a set of SQL expressions. This is a variant of `select` that accepts @@ -546,6 +618,7 @@ class DataFrame private[sql]( * df.selectExpr("colA", "colB as newName", "abs(colC)") * }}} * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs def selectExpr(exprs: String*): DataFrame = { @@ -563,6 +636,7 @@ class DataFrame private[sql]( * peopleDf($"age" > 15) * }}} * @group dfops + * @since 1.3.0 */ def filter(condition: Column): DataFrame = Filter(condition.expr, logicalPlan) @@ -572,6 +646,7 @@ class DataFrame private[sql]( * peopleDf.filter("age > 15") * }}} * @group dfops + * @since 1.3.0 */ def filter(conditionExpr: String): DataFrame = { filter(Column(new SqlParser().parseExpression(conditionExpr))) @@ -586,6 +661,7 @@ class DataFrame private[sql]( * peopleDf($"age" > 15) * }}} * @group dfops + * @since 1.3.0 */ def where(condition: Column): DataFrame = filter(condition) @@ -604,9 +680,56 @@ class DataFrame private[sql]( * )) * }}} * @group dfops + * @since 1.3.0 + */ + @scala.annotation.varargs + def groupBy(cols: Column*): GroupedData = { + GroupedData(this, cols.map(_.expr), GroupedData.GroupByType) + } + + /** + * Create a multi-dimensional rollup for the current [[DataFrame]] using the specified columns, + * so we can run aggregation on them. + * See [[GroupedData]] for all the available aggregate functions. + * + * {{{ + * // Compute the average for all numeric columns rolluped by department and group. + * df.rollup($"department", $"group").avg() + * + * // Compute the max age and average salary, rolluped by department and gender. + * df.rollup($"department", $"gender").agg(Map( + * "salary" -> "avg", + * "age" -> "max" + * )) + * }}} + * @group dfops + * @since 1.4.0 + */ + @scala.annotation.varargs + def rollup(cols: Column*): GroupedData = { + GroupedData(this, cols.map(_.expr), GroupedData.RollupType) + } + + /** + * Create a multi-dimensional cube for the current [[DataFrame]] using the specified columns, + * so we can run aggregation on them. + * See [[GroupedData]] for all the available aggregate functions. + * + * {{{ + * // Compute the average for all numeric columns cubed by department and group. + * df.cube($"department", $"group").avg() + * + * // Compute the max age and average salary, cubed by department and gender. + * df.cube($"department", $"gender").agg(Map( + * "salary" -> "avg", + * "age" -> "max" + * )) + * }}} + * @group dfops + * @since 1.4.0 */ @scala.annotation.varargs - def groupBy(cols: Column*): GroupedData = new GroupedData(this, cols.map(_.expr)) + def cube(cols: Column*): GroupedData = GroupedData(this, cols.map(_.expr), GroupedData.CubeType) /** * Groups the [[DataFrame]] using the specified columns, so we can run aggregation on them. @@ -626,64 +749,124 @@ class DataFrame private[sql]( * )) * }}} * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs def groupBy(col1: String, cols: String*): GroupedData = { val colNames: Seq[String] = col1 +: cols - new GroupedData(this, colNames.map(colName => resolve(colName))) + GroupedData(this, colNames.map(colName => resolve(colName)), GroupedData.GroupByType) + } + + /** + * Create a multi-dimensional rollup for the current [[DataFrame]] using the specified columns, + * so we can run aggregation on them. + * See [[GroupedData]] for all the available aggregate functions. + * + * This is a variant of rollup that can only group by existing columns using column names + * (i.e. cannot construct expressions). + * + * {{{ + * // Compute the average for all numeric columns rolluped by department and group. + * df.rollup("department", "group").avg() + * + * // Compute the max age and average salary, rolluped by department and gender. + * df.rollup($"department", $"gender").agg(Map( + * "salary" -> "avg", + * "age" -> "max" + * )) + * }}} + * @group dfops + * @since 1.4.0 + */ + @scala.annotation.varargs + def rollup(col1: String, cols: String*): GroupedData = { + val colNames: Seq[String] = col1 +: cols + GroupedData(this, colNames.map(colName => resolve(colName)), GroupedData.RollupType) + } + + /** + * Create a multi-dimensional cube for the current [[DataFrame]] using the specified columns, + * so we can run aggregation on them. + * See [[GroupedData]] for all the available aggregate functions. + * + * This is a variant of cube that can only group by existing columns using column names + * (i.e. cannot construct expressions). + * + * {{{ + * // Compute the average for all numeric columns cubed by department and group. + * df.cube("department", "group").avg() + * + * // Compute the max age and average salary, cubed by department and gender. + * df.cube($"department", $"gender").agg(Map( + * "salary" -> "avg", + * "age" -> "max" + * )) + * }}} + * @group dfops + * @since 1.4.0 + */ + @scala.annotation.varargs + def cube(col1: String, cols: String*): GroupedData = { + val colNames: Seq[String] = col1 +: cols + GroupedData(this, colNames.map(colName => resolve(colName)), GroupedData.CubeType) } /** * (Scala-specific) Aggregates on the entire [[DataFrame]] without groups. - * {{ + * {{{ * // df.agg(...) is a shorthand for df.groupBy().agg(...) * df.agg("age" -> "max", "salary" -> "avg") * df.groupBy().agg("age" -> "max", "salary" -> "avg") - * }} + * }}} * @group dfops + * @since 1.3.0 */ def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame = { - groupBy().agg(aggExpr, aggExprs :_*) + groupBy().agg(aggExpr, aggExprs : _*) } /** * (Scala-specific) Aggregates on the entire [[DataFrame]] without groups. - * {{ + * {{{ * // df.agg(...) is a shorthand for df.groupBy().agg(...) * df.agg(Map("age" -> "max", "salary" -> "avg")) * df.groupBy().agg(Map("age" -> "max", "salary" -> "avg")) - * }} + * }}} * @group dfops + * @since 1.3.0 */ def agg(exprs: Map[String, String]): DataFrame = groupBy().agg(exprs) /** * (Java-specific) Aggregates on the entire [[DataFrame]] without groups. - * {{ + * {{{ * // df.agg(...) is a shorthand for df.groupBy().agg(...) * df.agg(Map("age" -> "max", "salary" -> "avg")) * df.groupBy().agg(Map("age" -> "max", "salary" -> "avg")) - * }} + * }}} * @group dfops + * @since 1.3.0 */ def agg(exprs: java.util.Map[String, String]): DataFrame = groupBy().agg(exprs) /** * Aggregates on the entire [[DataFrame]] without groups. - * {{ + * {{{ * // df.agg(...) is a shorthand for df.groupBy().agg(...) * df.agg(max($"age"), avg($"salary")) * df.groupBy().agg(max($"age"), avg($"salary")) - * }} + * }}} * @group dfops + * @since 1.3.0 */ @scala.annotation.varargs - def agg(expr: Column, exprs: Column*): DataFrame = groupBy().agg(expr, exprs :_*) + def agg(expr: Column, exprs: Column*): DataFrame = groupBy().agg(expr, exprs : _*) /** * Returns a new [[DataFrame]] by taking the first `n` rows. The difference between this function * and `head` is that `head` returns an array while `limit` returns a new [[DataFrame]]. * @group dfops + * @since 1.3.0 */ def limit(n: Int): DataFrame = Limit(Literal(n), logicalPlan) @@ -691,6 +874,7 @@ class DataFrame private[sql]( * Returns a new [[DataFrame]] containing union of rows in this frame and another frame. * This is equivalent to `UNION ALL` in SQL. * @group dfops + * @since 1.3.0 */ def unionAll(other: DataFrame): DataFrame = Union(logicalPlan, other.logicalPlan) @@ -698,6 +882,7 @@ class DataFrame private[sql]( * Returns a new [[DataFrame]] containing rows only in both this frame and another frame. * This is equivalent to `INTERSECT` in SQL. * @group dfops + * @since 1.3.0 */ def intersect(other: DataFrame): DataFrame = Intersect(logicalPlan, other.logicalPlan) @@ -705,6 +890,7 @@ class DataFrame private[sql]( * Returns a new [[DataFrame]] containing rows in this frame but not in another frame. * This is equivalent to `EXCEPT` in SQL. * @group dfops + * @since 1.3.0 */ def except(other: DataFrame): DataFrame = Except(logicalPlan, other.logicalPlan) @@ -715,6 +901,7 @@ class DataFrame private[sql]( * @param fraction Fraction of rows to generate. * @param seed Seed for sampling. * @group dfops + * @since 1.3.0 */ def sample(withReplacement: Boolean, fraction: Double, seed: Long): DataFrame = { Sample(0.0, fraction, withReplacement, seed, logicalPlan) @@ -726,6 +913,7 @@ class DataFrame private[sql]( * @param withReplacement Sample with replacement or not. * @param fraction Fraction of rows to generate. * @group dfops + * @since 1.3.0 */ def sample(withReplacement: Boolean, fraction: Double): DataFrame = { sample(withReplacement, fraction, Utils.random.nextLong) @@ -737,6 +925,7 @@ class DataFrame private[sql]( * @param weights weights for splits, will be normalized if they don't sum to 1. * @param seed Seed for sampling. * @group dfops + * @since 1.4.0 */ def randomSplit(weights: Array[Double], seed: Long): Array[DataFrame] = { val sum = weights.sum @@ -751,6 +940,7 @@ class DataFrame private[sql]( * * @param weights weights for splits, will be normalized if they don't sum to 1. * @group dfops + * @since 1.4.0 */ def randomSplit(weights: Array[Double]): Array[DataFrame] = { randomSplit(weights, Utils.random.nextLong) @@ -787,6 +977,7 @@ class DataFrame private[sql]( * val bookCountPerWord = allWords.groupBy("word").agg(countDistinct("title")) * }}} * @group dfops + * @since 1.3.0 */ def explode[A <: Product : TypeTag](input: Column*)(f: Row => TraversableOnce[A]): DataFrame = { val schema = ScalaReflection.schemaFor[A].dataType.asInstanceOf[StructType] @@ -811,6 +1002,7 @@ class DataFrame private[sql]( * df.explode("words", "word")(words: String => words.split(" ")) * }}} * @group dfops + * @since 1.3.0 */ def explode[A, B : TypeTag](inputColumn: String, outputColumn: String)(f: A => TraversableOnce[B]) : DataFrame = { @@ -834,6 +1026,7 @@ class DataFrame private[sql]( /** * Returns a new [[DataFrame]] by adding a column. * @group dfops + * @since 1.3.0 */ def withColumn(colName: String, col: Column): DataFrame = { val resolver = sqlContext.analyzer.resolver @@ -843,7 +1036,7 @@ class DataFrame private[sql]( val name = field.name if (resolver(name, colName)) col.as(colName) else Column(name) } - select(colNames :_*) + select(colNames : _*) } else { select(Column("*"), col.as(colName)) } @@ -853,6 +1046,7 @@ class DataFrame private[sql]( * Returns a new [[DataFrame]] with a column renamed. * This is a no-op if schema doesn't contain existingName. * @group dfops + * @since 1.3.0 */ def withColumnRenamed(existingName: String, newName: String): DataFrame = { val resolver = sqlContext.analyzer.resolver @@ -872,6 +1066,7 @@ class DataFrame private[sql]( * Returns a new [[DataFrame]] with a column dropped. * This is a no-op if schema doesn't contain column name. * @group dfops + * @since 1.4.0 */ def drop(colName: String): DataFrame = { val resolver = sqlContext.analyzer.resolver @@ -887,6 +1082,43 @@ class DataFrame private[sql]( } } + /** + * Returns a new [[DataFrame]] that contains only the unique rows from this [[DataFrame]]. + * This is an alias for `distinct`. + * @group dfops + * @since 1.4.0 + */ + def dropDuplicates(): DataFrame = dropDuplicates(this.columns) + + /** + * (Scala-specific) Returns a new [[DataFrame]] with duplicate rows removed, considering only + * the subset of columns. + * + * @group dfops + * @since 1.4.0 + */ + def dropDuplicates(colNames: Seq[String]): DataFrame = { + val groupCols = colNames.map(resolve) + val groupColExprIds = groupCols.map(_.exprId) + val aggCols = logicalPlan.output.map { attr => + if (groupColExprIds.contains(attr.exprId)) { + attr + } else { + Alias(First(attr), attr.name)() + } + } + Aggregate(groupCols, aggCols, logicalPlan) + } + + /** + * Returns a new [[DataFrame]] with duplicate rows removed, considering only + * the subset of columns. + * + * @group dfops + * @since 1.4.0 + */ + def dropDuplicates(colNames: Array[String]): DataFrame = dropDuplicates(colNames.toSeq) + /** * Computes statistics for numeric columns, including count, mean, stddev, min, and max. * If no columns are given, this function computes statistics for all numerical columns. @@ -908,6 +1140,7 @@ class DataFrame private[sql]( * }}} * * @group action + * @since 1.3.1 */ @scala.annotation.varargs def describe(cols: String*): DataFrame = { @@ -928,7 +1161,7 @@ class DataFrame private[sql]( val ret: Seq[Row] = if (outputCols.nonEmpty) { val aggExprs = statistics.flatMap { case (_, colToAgg) => - outputCols.map(c => Column(colToAgg(Column(c).expr)).as(c)) + outputCols.map(c => Column(Cast(colToAgg(Column(c).expr), StringType)).as(c)) } val row = agg(aggExprs.head, aggExprs.tail: _*).head().toSeq @@ -942,33 +1175,37 @@ class DataFrame private[sql]( statistics.map { case (name, _) => Row(name) } } - // The first column is string type, and the rest are double type. + // All columns are string type val schema = StructType( - StructField("summary", StringType) :: outputCols.map(StructField(_, DoubleType))).toAttributes + StructField("summary", StringType) :: outputCols.map(StructField(_, StringType))).toAttributes LocalRelation(schema, ret) } /** * Returns the first `n` rows. * @group action + * @since 1.3.0 */ def head(n: Int): Array[Row] = limit(n).collect() /** * Returns the first row. * @group action + * @since 1.3.0 */ def head(): Row = head(1).head /** * Returns the first row. Alias for head(). * @group action + * @since 1.3.0 */ override def first(): Row = head() /** * Returns a new RDD by applying a function to all rows of this DataFrame. * @group rdd + * @since 1.3.0 */ override def map[R: ClassTag](f: Row => R): RDD[R] = rdd.map(f) @@ -976,12 +1213,14 @@ class DataFrame private[sql]( * Returns a new RDD by first applying a function to all rows of this [[DataFrame]], * and then flattening the results. * @group rdd + * @since 1.3.0 */ override def flatMap[R: ClassTag](f: Row => TraversableOnce[R]): RDD[R] = rdd.flatMap(f) /** * Returns a new RDD by applying a function to each partition of this DataFrame. * @group rdd + * @since 1.3.0 */ override def mapPartitions[R: ClassTag](f: Iterator[Row] => Iterator[R]): RDD[R] = { rdd.mapPartitions(f) @@ -990,42 +1229,49 @@ class DataFrame private[sql]( /** * Applies a function `f` to all rows. * @group rdd + * @since 1.3.0 */ override def foreach(f: Row => Unit): Unit = rdd.foreach(f) /** * Applies a function f to each partition of this [[DataFrame]]. * @group rdd + * @since 1.3.0 */ override def foreachPartition(f: Iterator[Row] => Unit): Unit = rdd.foreachPartition(f) /** * Returns the first `n` rows in the [[DataFrame]]. * @group action + * @since 1.3.0 */ override def take(n: Int): Array[Row] = head(n) /** * Returns an array that contains all of [[Row]]s in this [[DataFrame]]. * @group action + * @since 1.3.0 */ override def collect(): Array[Row] = queryExecution.executedPlan.executeCollect() /** * Returns a Java list that contains all of [[Row]]s in this [[DataFrame]]. * @group action + * @since 1.3.0 */ - override def collectAsList(): java.util.List[Row] = java.util.Arrays.asList(rdd.collect() :_*) + override def collectAsList(): java.util.List[Row] = java.util.Arrays.asList(rdd.collect() : _*) /** * Returns the number of rows in the [[DataFrame]]. * @group action + * @since 1.3.0 */ override def count(): Long = groupBy().count().collect().head.getLong(0) /** * Returns a new [[DataFrame]] that has exactly `numPartitions` partitions. * @group rdd + * @since 1.3.0 */ override def repartition(numPartitions: Int): DataFrame = { Repartition(numPartitions, shuffle = true, logicalPlan) @@ -1037,6 +1283,7 @@ class DataFrame private[sql]( * if you go from 1000 partitions to 100 partitions, there will not be a shuffle, instead each of * the 100 new partitions will claim 10 of the current partitions. * @group rdd + * @since 1.4.0 */ override def coalesce(numPartitions: Int): DataFrame = { Repartition(numPartitions, shuffle = false, logicalPlan) @@ -1044,12 +1291,15 @@ class DataFrame private[sql]( /** * Returns a new [[DataFrame]] that contains only the unique rows from this [[DataFrame]]. + * This is an alias for `dropDuplicates`. * @group dfops + * @since 1.3.0 */ override def distinct: DataFrame = Distinct(logicalPlan) /** * @group basic + * @since 1.3.0 */ override def persist(): this.type = { sqlContext.cacheManager.cacheQuery(this) @@ -1058,11 +1308,13 @@ class DataFrame private[sql]( /** * @group basic + * @since 1.3.0 */ override def cache(): this.type = persist() /** * @group basic + * @since 1.3.0 */ override def persist(newLevel: StorageLevel): this.type = { sqlContext.cacheManager.cacheQuery(this, None, newLevel) @@ -1071,6 +1323,7 @@ class DataFrame private[sql]( /** * @group basic + * @since 1.3.0 */ override def unpersist(blocking: Boolean): this.type = { sqlContext.cacheManager.tryUncacheQuery(this, blocking) @@ -1079,6 +1332,7 @@ class DataFrame private[sql]( /** * @group basic + * @since 1.3.0 */ override def unpersist(): this.type = unpersist(blocking = false) @@ -1091,6 +1345,7 @@ class DataFrame private[sql]( * memoized. Once called, it won't change even if you change any query planning related Spark SQL * configurations (e.g. `spark.sql.shuffle.partitions`). * @group rdd + * @since 1.3.0 */ lazy val rdd: RDD[Row] = { // use a local variable to make sure the map closure doesn't capture the whole DataFrame @@ -1104,12 +1359,14 @@ class DataFrame private[sql]( /** * Returns the content of the [[DataFrame]] as a [[JavaRDD]] of [[Row]]s. * @group rdd + * @since 1.3.0 */ def toJavaRDD: JavaRDD[Row] = rdd.toJavaRDD() /** * Returns the content of the [[DataFrame]] as a [[JavaRDD]] of [[Row]]s. * @group rdd + * @since 1.3.0 */ def javaRDD: JavaRDD[Row] = toJavaRDD @@ -1118,27 +1375,129 @@ class DataFrame private[sql]( * temporary table is tied to the [[SQLContext]] that was used to create this DataFrame. * * @group basic + * @since 1.3.0 */ def registerTempTable(tableName: String): Unit = { sqlContext.registerDataFrameAsTable(this, tableName) } + /** + * :: Experimental :: + * Interface for saving the content of the [[DataFrame]] out into external storage. + * + * @group output + * @since 1.4.0 + */ + @Experimental + def write: DataFrameWriter = new DataFrameWriter(this) + + /** + * Returns the content of the [[DataFrame]] as a RDD of JSON strings. + * @group rdd + * @since 1.3.0 + */ + def toJSON: RDD[String] = { + val rowSchema = this.schema + this.mapPartitions { iter => + val writer = new CharArrayWriter() + // create the Generator without separator inserted between 2 records + val gen = new JsonFactory().createGenerator(writer).setRootValueSeparator(null) + + new Iterator[String] { + override def hasNext: Boolean = iter.hasNext + override def next(): String = { + JacksonGenerator(rowSchema, gen)(iter.next()) + gen.flush() + + val json = writer.toString + if (hasNext) { + writer.reset() + } else { + gen.close() + } + + json + } + } + } + } + + //////////////////////////////////////////////////////////////////////////// + // for Python API + //////////////////////////////////////////////////////////////////////////// + + /** + * Converts a JavaRDD to a PythonRDD. + */ + protected[sql] def javaToPython: JavaRDD[Array[Byte]] = { + val fieldTypes = schema.fields.map(_.dataType) + val jrdd = rdd.map(EvaluatePython.rowToArray(_, fieldTypes)).toJavaRDD() + SerDeUtil.javaToPython(jrdd) + } + + //////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////// + // Deprecated methods + //////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////// + + /** + * @deprecated As of 1.3.0, replaced by `toDF()`. + */ + @deprecated("use toDF", "1.3.0") + def toSchemaRDD: DataFrame = this + + /** + * Save this [[DataFrame]] to a JDBC database at `url` under the table name `table`. + * This will run a `CREATE TABLE` and a bunch of `INSERT INTO` statements. + * If you pass `true` for `allowExisting`, it will drop any table with the + * given name; if you pass `false`, it will throw if the table already + * exists. + * @group output + * @deprecated As of 1.340, replaced by `write().jdbc()`. + */ + @deprecated("Use write.jdbc()", "1.4.0") + def createJDBCTable(url: String, table: String, allowExisting: Boolean): Unit = { + val w = if (allowExisting) write.mode(SaveMode.Overwrite) else write + w.jdbc(url, table, new Properties) + } + + /** + * Save this [[DataFrame]] to a JDBC database at `url` under the table name `table`. + * Assumes the table already exists and has a compatible schema. If you + * pass `true` for `overwrite`, it will `TRUNCATE` the table before + * performing the `INSERT`s. + * + * The table must already exist on the database. It must have a schema + * that is compatible with the schema of this RDD; inserting the rows of + * the RDD in order via the simple statement + * `INSERT INTO table VALUES (?, ?, ..., ?)` should not fail. + * @group output + * @deprecated As of 1.4.0, replaced by `write().jdbc()`. + */ + @deprecated("Use write.jdbc()", "1.4.0") + def insertIntoJDBC(url: String, table: String, overwrite: Boolean): Unit = { + val w = if (overwrite) write.mode(SaveMode.Overwrite) else write + w.jdbc(url, table, new Properties) + } + /** * Saves the contents of this [[DataFrame]] as a parquet file, preserving the schema. * Files that are written out using this method can be read back in as a [[DataFrame]] * using the `parquetFile` function in [[SQLContext]]. * @group output + * @deprecated As of 1.4.0, replaced by `write().parquet()`. */ + @deprecated("Use write.parquet(path)", "1.4.0") def saveAsParquetFile(path: String): Unit = { if (sqlContext.conf.parquetUseDataSourceApi) { - save("org.apache.spark.sql.parquet", SaveMode.ErrorIfExists, Map("path" -> path)) + write.format("parquet").mode(SaveMode.ErrorIfExists).save(path) } else { sqlContext.executePlan(WriteToFile(path, logicalPlan)).toRdd } } /** - * :: Experimental :: * Creates a table from the the contents of this DataFrame. * It will use the default data source configured by spark.sql.sources.default. * This will fail if the table already exists. @@ -1147,15 +1506,18 @@ class DataFrame private[sql]( * there is no notion of a persisted catalog in a standard SQL context. Instead you can write * an RDD out to a parquet file, and then register that file as a table. This "table" can then * be the target of an `insertInto`. + * + * Also note that while this function can persist the table metadata into Hive's metastore, + * the table will NOT be accessible from Hive, until SPARK-7550 is resolved. * @group output + * @deprecated As of 1.4.0, replaced by `write().saveAsTable(tableName)`. */ - @Experimental + @deprecated("Use write.saveAsTable(tableName)", "1.4.0") def saveAsTable(tableName: String): Unit = { - saveAsTable(tableName, SaveMode.ErrorIfExists) + write.mode(SaveMode.ErrorIfExists).saveAsTable(tableName) } /** - * :: Experimental :: * Creates a table from the the contents of this DataFrame, using the default data source * configured by spark.sql.sources.default and [[SaveMode.ErrorIfExists]] as the save mode. * @@ -1163,22 +1525,18 @@ class DataFrame private[sql]( * there is no notion of a persisted catalog in a standard SQL context. Instead you can write * an RDD out to a parquet file, and then register that file as a table. This "table" can then * be the target of an `insertInto`. + * + * Also note that while this function can persist the table metadata into Hive's metastore, + * the table will NOT be accessible from Hive, until SPARK-7550 is resolved. * @group output + * @deprecated As of 1.4.0, replaced by `write().mode(mode).saveAsTable(tableName)`. */ - @Experimental + @deprecated("Use write.mode(mode).saveAsTable(tableName)", "1.4.0") def saveAsTable(tableName: String, mode: SaveMode): Unit = { - if (sqlContext.catalog.tableExists(Seq(tableName)) && mode == SaveMode.Append) { - // If table already exists and the save mode is Append, - // we will just call insertInto to append the contents of this DataFrame. - insertInto(tableName, overwrite = false) - } else { - val dataSourceName = sqlContext.conf.defaultDataSourceName - saveAsTable(tableName, dataSourceName, mode) - } + write.mode(mode).saveAsTable(tableName) } /** - * :: Experimental :: * Creates a table at the given path from the the contents of this DataFrame * based on a given data source and a set of options, * using [[SaveMode.ErrorIfExists]] as the save mode. @@ -1187,11 +1545,15 @@ class DataFrame private[sql]( * there is no notion of a persisted catalog in a standard SQL context. Instead you can write * an RDD out to a parquet file, and then register that file as a table. This "table" can then * be the target of an `insertInto`. + * + * Also note that while this function can persist the table metadata into Hive's metastore, + * the table will NOT be accessible from Hive, until SPARK-7550 is resolved. * @group output + * @deprecated As of 1.4.0, replaced by `write().format(source).saveAsTable(tableName)`. */ - @Experimental + @deprecated("Use write.format(source).saveAsTable(tableName)", "1.4.0") def saveAsTable(tableName: String, source: String): Unit = { - saveAsTable(tableName, source, SaveMode.ErrorIfExists) + write.format(source).saveAsTable(tableName) } /** @@ -1203,15 +1565,18 @@ class DataFrame private[sql]( * there is no notion of a persisted catalog in a standard SQL context. Instead you can write * an RDD out to a parquet file, and then register that file as a table. This "table" can then * be the target of an `insertInto`. + * + * Also note that while this function can persist the table metadata into Hive's metastore, + * the table will NOT be accessible from Hive, until SPARK-7550 is resolved. * @group output + * @deprecated As of 1.4.0, replaced by `write().mode(mode).saveAsTable(tableName)`. */ - @Experimental + @deprecated("Use write.format(source).mode(mode).saveAsTable(tableName)", "1.4.0") def saveAsTable(tableName: String, source: String, mode: SaveMode): Unit = { - saveAsTable(tableName, source, mode, Map.empty[String, String]) + write.format(source).mode(mode).saveAsTable(tableName) } /** - * :: Experimental :: * Creates a table at the given path from the the contents of this DataFrame * based on a given data source, [[SaveMode]] specified by mode, and a set of options. * @@ -1219,19 +1584,24 @@ class DataFrame private[sql]( * there is no notion of a persisted catalog in a standard SQL context. Instead you can write * an RDD out to a parquet file, and then register that file as a table. This "table" can then * be the target of an `insertInto`. + * + * Also note that while this function can persist the table metadata into Hive's metastore, + * the table will NOT be accessible from Hive, until SPARK-7550 is resolved. * @group output + * @deprecated As of 1.4.0, replaced by + * `write().format(source).mode(mode).options(options).saveAsTable(tableName)`. */ - @Experimental + @deprecated("Use write.format(source).mode(mode).options(options).saveAsTable(tableName)", + "1.4.0") def saveAsTable( tableName: String, source: String, mode: SaveMode, options: java.util.Map[String, String]): Unit = { - saveAsTable(tableName, source, mode, options.toMap) + write.format(source).mode(mode).options(options).saveAsTable(tableName) } /** - * :: Experimental :: * (Scala-specific) * Creates a table from the the contents of this DataFrame based on a given data source, * [[SaveMode]] specified by mode, and a set of options. @@ -1240,214 +1610,127 @@ class DataFrame private[sql]( * there is no notion of a persisted catalog in a standard SQL context. Instead you can write * an RDD out to a parquet file, and then register that file as a table. This "table" can then * be the target of an `insertInto`. + * + * Also note that while this function can persist the table metadata into Hive's metastore, + * the table will NOT be accessible from Hive, until SPARK-7550 is resolved. * @group output + * @deprecated As of 1.4.0, replaced by + * `write().format(source).mode(mode).options(options).saveAsTable(tableName)`. */ - @Experimental + @deprecated("Use write.format(source).mode(mode).options(options).saveAsTable(tableName)", + "1.4.0") def saveAsTable( tableName: String, source: String, mode: SaveMode, options: Map[String, String]): Unit = { - val cmd = - CreateTableUsingAsSelect( - tableName, - source, - temporary = false, - mode, - options, - logicalPlan) - - sqlContext.executePlan(cmd).toRdd + write.format(source).mode(mode).options(options).saveAsTable(tableName) } /** - * :: Experimental :: * Saves the contents of this DataFrame to the given path, * using the default data source configured by spark.sql.sources.default and * [[SaveMode.ErrorIfExists]] as the save mode. * @group output + * @deprecated As of 1.4.0, replaced by `write().save(path)`. */ - @Experimental + @deprecated("Use write.save(path)", "1.4.0") def save(path: String): Unit = { - save(path, SaveMode.ErrorIfExists) + write.save(path) } /** - * :: Experimental :: * Saves the contents of this DataFrame to the given path and [[SaveMode]] specified by mode, * using the default data source configured by spark.sql.sources.default. * @group output + * @deprecated As of 1.4.0, replaced by `write().mode(mode).save(path)`. */ - @Experimental + @deprecated("Use write.mode(mode).save(path)", "1.4.0") def save(path: String, mode: SaveMode): Unit = { - val dataSourceName = sqlContext.conf.defaultDataSourceName - save(path, dataSourceName, mode) + write.mode(mode).save(path) } /** - * :: Experimental :: * Saves the contents of this DataFrame to the given path based on the given data source, * using [[SaveMode.ErrorIfExists]] as the save mode. * @group output + * @deprecated As of 1.4.0, replaced by `write().format(source).save(path)`. */ - @Experimental + @deprecated("Use write.format(source).save(path)", "1.4.0") def save(path: String, source: String): Unit = { - save(source, SaveMode.ErrorIfExists, Map("path" -> path)) + write.format(source).save(path) } /** - * :: Experimental :: * Saves the contents of this DataFrame to the given path based on the given data source and * [[SaveMode]] specified by mode. * @group output + * @deprecated As of 1.4.0, replaced by `write().format(source).mode(mode).save(path)`. */ - @Experimental + @deprecated("Use write.format(source).mode(mode).save(path)", "1.4.0") def save(path: String, source: String, mode: SaveMode): Unit = { - save(source, mode, Map("path" -> path)) + write.format(source).mode(mode).save(path) } /** - * :: Experimental :: * Saves the contents of this DataFrame based on the given data source, * [[SaveMode]] specified by mode, and a set of options. * @group output + * @deprecated As of 1.4.0, replaced by + * `write().format(source).mode(mode).options(options).save(path)`. */ - @Experimental + @deprecated("Use write.format(source).mode(mode).options(options).save()", "1.4.0") def save( source: String, mode: SaveMode, options: java.util.Map[String, String]): Unit = { - save(source, mode, options.toMap) + write.format(source).mode(mode).options(options).save() } /** - * :: Experimental :: * (Scala-specific) * Saves the contents of this DataFrame based on the given data source, * [[SaveMode]] specified by mode, and a set of options * @group output + * @deprecated As of 1.4.0, replaced by + * `write().format(source).mode(mode).options(options).save(path)`. */ - @Experimental + @deprecated("Use write.format(source).mode(mode).options(options).save()", "1.4.0") def save( source: String, mode: SaveMode, options: Map[String, String]): Unit = { - ResolvedDataSource(sqlContext, source, mode, options, this) + write.format(source).mode(mode).options(options).save() } + /** - * :: Experimental :: * Adds the rows from this RDD to the specified table, optionally overwriting the existing data. * @group output + * @deprecated As of 1.4.0, replaced by + * `write().mode(SaveMode.Append|SaveMode.Overwrite).saveAsTable(tableName)`. */ - @Experimental + @deprecated("Use write.mode(SaveMode.Append|SaveMode.Overwrite).saveAsTable(tableName)", "1.4.0") def insertInto(tableName: String, overwrite: Boolean): Unit = { - sqlContext.executePlan(InsertIntoTable(UnresolvedRelation(Seq(tableName)), - Map.empty, logicalPlan, overwrite, ifNotExists = false)).toRdd + write.mode(if (overwrite) SaveMode.Overwrite else SaveMode.Append).insertInto(tableName) } /** - * :: Experimental :: * Adds the rows from this RDD to the specified table. * Throws an exception if the table already exists. * @group output + * @deprecated As of 1.4.0, replaced by + * `write().mode(SaveMode.Append).saveAsTable(tableName)`. */ - @Experimental - def insertInto(tableName: String): Unit = insertInto(tableName, overwrite = false) - - /** - * Returns the content of the [[DataFrame]] as a RDD of JSON strings. - * @group rdd - */ - def toJSON: RDD[String] = { - val rowSchema = this.schema - this.mapPartitions { iter => - val writer = new CharArrayWriter() - // create the Generator without separator inserted between 2 records - val gen = new JsonFactory().createGenerator(writer).setRootValueSeparator(null) - - new Iterator[String] { - override def hasNext: Boolean = iter.hasNext - override def next(): String = { - JsonRDD.rowToJSON(rowSchema, gen)(iter.next()) - gen.flush() - - val json = writer.toString - if (hasNext) { - writer.reset() - } else { - gen.close() - } - - json - } - } - } + @deprecated("Use write.mode(SaveMode.Append).saveAsTable(tableName)", "1.4.0") + def insertInto(tableName: String): Unit = { + write.mode(SaveMode.Append).insertInto(tableName) } //////////////////////////////////////////////////////////////////////////// - // JDBC Write Support //////////////////////////////////////////////////////////////////////////// - - /** - * Save this [[DataFrame]] to a JDBC database at `url` under the table name `table`. - * This will run a `CREATE TABLE` and a bunch of `INSERT INTO` statements. - * If you pass `true` for `allowExisting`, it will drop any table with the - * given name; if you pass `false`, it will throw if the table already - * exists. - * @group output - */ - def createJDBCTable(url: String, table: String, allowExisting: Boolean): Unit = { - val conn = DriverManager.getConnection(url) - try { - if (allowExisting) { - val sql = s"DROP TABLE IF EXISTS $table" - conn.prepareStatement(sql).executeUpdate() - } - val schema = JDBCWriteDetails.schemaString(this, url) - val sql = s"CREATE TABLE $table ($schema)" - conn.prepareStatement(sql).executeUpdate() - } finally { - conn.close() - } - JDBCWriteDetails.saveTable(this, url, table) - } - - /** - * Save this [[DataFrame]] to a JDBC database at `url` under the table name `table`. - * Assumes the table already exists and has a compatible schema. If you - * pass `true` for `overwrite`, it will `TRUNCATE` the table before - * performing the `INSERT`s. - * - * The table must already exist on the database. It must have a schema - * that is compatible with the schema of this RDD; inserting the rows of - * the RDD in order via the simple statement - * `INSERT INTO table VALUES (?, ?, ..., ?)` should not fail. - * @group output - */ - def insertIntoJDBC(url: String, table: String, overwrite: Boolean): Unit = { - if (overwrite) { - val conn = DriverManager.getConnection(url) - try { - val sql = s"TRUNCATE TABLE $table" - conn.prepareStatement(sql).executeUpdate() - } finally { - conn.close() - } - } - JDBCWriteDetails.saveTable(this, url, table) - } - + // End of deprecated methods //////////////////////////////////////////////////////////////////////////// - // for Python API //////////////////////////////////////////////////////////////////////////// - /** - * Converts a JavaRDD to a PythonRDD. - */ - protected[sql] def javaToPython: JavaRDD[Array[Byte]] = { - val fieldTypes = schema.fields.map(_.dataType) - val jrdd = rdd.map(EvaluatePython.rowToArray(_, fieldTypes)).toJavaRDD() - SerDeUtil.javaToPython(jrdd) - } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameHolder.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameHolder.scala index a3187fe3230fd..2f19ec0403017 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameHolder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameHolder.scala @@ -19,6 +19,8 @@ package org.apache.spark.sql /** * A container for a [[DataFrame]], used for implicit conversions. + * + * @since 1.3.0 */ private[sql] case class DataFrameHolder(df: DataFrame) { @@ -26,5 +28,5 @@ private[sql] case class DataFrameHolder(df: DataFrame) { // `rdd.toDF("1")` as invoking this toDF and then apply on the returned DataFrame. def toDF(): DataFrame = df - def toDF(colNames: String*): DataFrame = df.toDF(colNames :_*) + def toDF(colNames: String*): DataFrame = df.toDF(colNames : _*) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala index 481ed4924857e..b4c2daa055868 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala @@ -30,12 +30,16 @@ import org.apache.spark.sql.types._ /** * :: Experimental :: * Functionality for working with missing data in [[DataFrame]]s. + * + * @since 1.3.1 */ @Experimental final class DataFrameNaFunctions private[sql](df: DataFrame) { /** * Returns a new [[DataFrame]] that drops rows containing any null values. + * + * @since 1.3.1 */ def drop(): DataFrame = drop("any", df.columns) @@ -44,18 +48,24 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * * If `how` is "any", then drop rows containing any null values. * If `how` is "all", then drop rows only if every column is null for that row. + * + * @since 1.3.1 */ def drop(how: String): DataFrame = drop(how, df.columns) /** * Returns a new [[DataFrame]] that drops rows containing any null values * in the specified columns. + * + * @since 1.3.1 */ def drop(cols: Array[String]): DataFrame = drop(cols.toSeq) /** * (Scala-specific) Returns a new [[DataFrame ]] that drops rows containing any null values * in the specified columns. + * + * @since 1.3.1 */ def drop(cols: Seq[String]): DataFrame = drop(cols.size, cols) @@ -65,6 +75,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * * If `how` is "any", then drop rows containing any null values in the specified columns. * If `how` is "all", then drop rows only if every specified column is null for that row. + * + * @since 1.3.1 */ def drop(how: String, cols: Array[String]): DataFrame = drop(how, cols.toSeq) @@ -74,6 +86,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * * If `how` is "any", then drop rows containing any null values in the specified columns. * If `how` is "all", then drop rows only if every specified column is null for that row. + * + * @since 1.3.1 */ def drop(how: String, cols: Seq[String]): DataFrame = { how.toLowerCase match { @@ -85,18 +99,24 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { /** * Returns a new [[DataFrame]] that drops rows containing less than `minNonNulls` non-null values. + * + * @since 1.3.1 */ def drop(minNonNulls: Int): DataFrame = drop(minNonNulls, df.columns) /** * Returns a new [[DataFrame]] that drops rows containing less than `minNonNulls` non-null * values in the specified columns. + * + * @since 1.3.1 */ def drop(minNonNulls: Int, cols: Array[String]): DataFrame = drop(minNonNulls, cols.toSeq) /** * (Scala-specific) Returns a new [[DataFrame]] that drops rows containing less than * `minNonNulls` non-null values in the specified columns. + * + * @since 1.3.1 */ def drop(minNonNulls: Int, cols: Seq[String]): DataFrame = { // Filtering condition -- only keep the row if it has at least `minNonNulls` non-null values. @@ -106,23 +126,31 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { /** * Returns a new [[DataFrame]] that replaces null values in numeric columns with `value`. + * + * @since 1.3.1 */ def fill(value: Double): DataFrame = fill(value, df.columns) /** * Returns a new [[DataFrame ]] that replaces null values in string columns with `value`. + * + * @since 1.3.1 */ def fill(value: String): DataFrame = fill(value, df.columns) /** * Returns a new [[DataFrame]] that replaces null values in specified numeric columns. * If a specified column is not a numeric column, it is ignored. + * + * @since 1.3.1 */ def fill(value: Double, cols: Array[String]): DataFrame = fill(value, cols.toSeq) /** * (Scala-specific) Returns a new [[DataFrame]] that replaces null values in specified * numeric columns. If a specified column is not a numeric column, it is ignored. + * + * @since 1.3.1 */ def fill(value: Double, cols: Seq[String]): DataFrame = { val columnEquals = df.sqlContext.analyzer.resolver @@ -140,12 +168,16 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { /** * Returns a new [[DataFrame]] that replaces null values in specified string columns. * If a specified column is not a string column, it is ignored. + * + * @since 1.3.1 */ def fill(value: String, cols: Array[String]): DataFrame = fill(value, cols.toSeq) /** * (Scala-specific) Returns a new [[DataFrame]] that replaces null values in * specified string columns. If a specified column is not a string column, it is ignored. + * + * @since 1.3.1 */ def fill(value: String, cols: Seq[String]): DataFrame = { val columnEquals = df.sqlContext.analyzer.resolver @@ -172,6 +204,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * import com.google.common.collect.ImmutableMap; * df.na.fill(ImmutableMap.of("A", "unknown", "B", 1.0)); * }}} + * + * @since 1.3.1 */ def fill(valueMap: java.util.Map[String, Any]): DataFrame = fill0(valueMap.toSeq) @@ -189,6 +223,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * "B" -> 1.0 * )) * }}} + * + * @since 1.3.1 */ def fill(valueMap: Map[String, Any]): DataFrame = fill0(valueMap.toSeq) @@ -212,6 +248,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * * @param col name of the column to apply the value replacement * @param replacement value replacement map, as explained above + * + * @since 1.3.1 */ def replace[T](col: String, replacement: java.util.Map[T, T]): DataFrame = { replace[T](col, replacement.toMap : Map[T, T]) @@ -233,6 +271,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * * @param cols list of columns to apply the value replacement * @param replacement value replacement map, as explained above + * + * @since 1.3.1 */ def replace[T](cols: Array[String], replacement: java.util.Map[T, T]): DataFrame = { replace(cols.toSeq, replacement.toMap) @@ -256,6 +296,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * * @param col name of the column to apply the value replacement * @param replacement value replacement map, as explained above + * + * @since 1.3.1 */ def replace[T](col: String, replacement: Map[T, T]): DataFrame = { if (col == "*") { @@ -279,6 +321,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * * @param cols list of columns to apply the value replacement * @param replacement value replacement map, as explained above + * + * @since 1.3.1 */ def replace[T](cols: Seq[String], replacement: Map[T, T]): DataFrame = replace0(cols, replacement) @@ -357,11 +401,12 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) { * TODO: This can be optimized to use broadcast join when replacementMap is large. */ private def replaceCol(col: StructField, replacementMap: Map[_, _]): Column = { - val branches: Seq[Expression] = replacementMap.flatMap { case (source, target) => - df.col(col.name).equalTo(lit(source).cast(col.dataType)).expr :: - lit(target).cast(col.dataType).expr :: Nil + val keyExpr = df.col(col.name).expr + def buildExpr(v: Any) = Cast(Literal(v), keyExpr.dataType) + val branches = replacementMap.flatMap { case (source, target) => + Seq(buildExpr(source), buildExpr(target)) }.toSeq - new Column(CaseWhen(branches ++ Seq(df.col(col.name).expr))).as(col.name) + new Column(CaseKeyWhen(keyExpr, branches :+ keyExpr)).as(col.name) } private def convertToDouble(v: Any): Double = v match { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala new file mode 100644 index 0000000000000..b44d4c86ac5d3 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -0,0 +1,289 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.spark.sql + +import java.util.Properties + +import org.apache.hadoop.fs.Path +import org.apache.spark.Partition + +import org.apache.spark.annotation.Experimental +import org.apache.spark.api.java.JavaRDD +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.jdbc.{JDBCPartition, JDBCPartitioningInfo, JDBCRelation} +import org.apache.spark.sql.json.{JsonRDD, JSONRelation} +import org.apache.spark.sql.parquet.ParquetRelation2 +import org.apache.spark.sql.sources.{LogicalRelation, ResolvedDataSource} +import org.apache.spark.sql.types.StructType + +/** + * :: Experimental :: + * Interface used to load a [[DataFrame]] from external storage systems (e.g. file systems, + * key-value stores, etc). Use [[SQLContext.read]] to access this. + * + * @since 1.4.0 + */ +@Experimental +class DataFrameReader private[sql](sqlContext: SQLContext) { + + /** + * Specifies the input data source format. + * + * @since 1.4.0 + */ + def format(source: String): DataFrameReader = { + this.source = source + this + } + + /** + * Specifies the input schema. Some data sources (e.g. JSON) can infer the input schema + * automatically from data. By specifying the schema here, the underlying data source can + * skip the schema inference step, and thus speed up data loading. + * + * @since 1.4.0 + */ + def schema(schema: StructType): DataFrameReader = { + this.userSpecifiedSchema = Option(schema) + this + } + + /** + * Adds an input option for the underlying data source. + * + * @since 1.4.0 + */ + def option(key: String, value: String): DataFrameReader = { + this.extraOptions += (key -> value) + this + } + + /** + * (Scala-specific) Adds input options for the underlying data source. + * + * @since 1.4.0 + */ + def options(options: scala.collection.Map[String, String]): DataFrameReader = { + this.extraOptions ++= options + this + } + + /** + * Adds input options for the underlying data source. + * + * @since 1.4.0 + */ + def options(options: java.util.Map[String, String]): DataFrameReader = { + this.options(scala.collection.JavaConversions.mapAsScalaMap(options)) + this + } + + /** + * Loads input in as a [[DataFrame]], for data sources that require a path (e.g. data backed by + * a local or distributed file system). + * + * @since 1.4.0 + */ + def load(path: String): DataFrame = { + option("path", path).load() + } + + /** + * Loads input in as a [[DataFrame]], for data sources that don't require a path (e.g. external + * key-value stores). + * + * @since 1.4.0 + */ + def load(): DataFrame = { + val resolved = ResolvedDataSource( + sqlContext, + userSpecifiedSchema = userSpecifiedSchema, + partitionColumns = Array.empty[String], + provider = source, + options = extraOptions.toMap) + DataFrame(sqlContext, LogicalRelation(resolved.relation)) + } + + /** + * Construct a [[DataFrame]] representing the database table accessible via JDBC URL + * url named table and connection properties. + * + * @since 1.4.0 + */ + def jdbc(url: String, table: String, properties: Properties): DataFrame = { + jdbc(url, table, JDBCRelation.columnPartition(null), properties) + } + + /** + * Construct a [[DataFrame]] representing the database table accessible via JDBC URL + * url named table. Partitions of the table will be retrieved in parallel based on the parameters + * passed to this function. + * + * Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash + * your external database systems. + * + * @param url JDBC database url of the form `jdbc:subprotocol:subname` + * @param table Name of the table in the external database. + * @param columnName the name of a column of integral type that will be used for partitioning. + * @param lowerBound the minimum value of `columnName` used to decide partition stride + * @param upperBound the maximum value of `columnName` used to decide partition stride + * @param numPartitions the number of partitions. the range `minValue`-`maxValue` will be split + * evenly into this many partitions + * @param connectionProperties JDBC database connection arguments, a list of arbitrary string + * tag/value. Normally at least a "user" and "password" property + * should be included. + * + * @since 1.4.0 + */ + def jdbc( + url: String, + table: String, + columnName: String, + lowerBound: Long, + upperBound: Long, + numPartitions: Int, + connectionProperties: Properties): DataFrame = { + val partitioning = JDBCPartitioningInfo(columnName, lowerBound, upperBound, numPartitions) + val parts = JDBCRelation.columnPartition(partitioning) + jdbc(url, table, parts, connectionProperties) + } + + /** + * Construct a [[DataFrame]] representing the database table accessible via JDBC URL + * url named table using connection properties. The `predicates` parameter gives a list + * expressions suitable for inclusion in WHERE clauses; each one defines one partition + * of the [[DataFrame]]. + * + * Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash + * your external database systems. + * + * @param url JDBC database url of the form `jdbc:subprotocol:subname` + * @param table Name of the table in the external database. + * @param predicates Condition in the where clause for each partition. + * @param connectionProperties JDBC database connection arguments, a list of arbitrary string + * tag/value. Normally at least a "user" and "password" property + * should be included. + * @since 1.4.0 + */ + def jdbc( + url: String, + table: String, + predicates: Array[String], + connectionProperties: Properties): DataFrame = { + val parts: Array[Partition] = predicates.zipWithIndex.map { case (part, i) => + JDBCPartition(part, i) : Partition + } + jdbc(url, table, parts, connectionProperties) + } + + private def jdbc( + url: String, + table: String, + parts: Array[Partition], + connectionProperties: Properties): DataFrame = { + val relation = JDBCRelation(url, table, parts, connectionProperties)(sqlContext) + sqlContext.baseRelationToDataFrame(relation) + } + + /** + * Loads a JSON file (one object per line) and returns the result as a [[DataFrame]]. + * + * This function goes through the input once to determine the input schema. If you know the + * schema in advance, use the version that specifies the schema to avoid the extra scan. + * + * @param path input path + * @since 1.4.0 + */ + def json(path: String): DataFrame = format("json").load(path) + + /** + * Loads an `JavaRDD[String]` storing JSON objects (one object per record) and + * returns the result as a [[DataFrame]]. + * + * Unless the schema is specified using [[schema]] function, this function goes through the + * input once to determine the input schema. + * + * @param jsonRDD input RDD with one JSON object per record + * @since 1.4.0 + */ + def json(jsonRDD: JavaRDD[String]): DataFrame = json(jsonRDD.rdd) + + /** + * Loads an `RDD[String]` storing JSON objects (one object per record) and + * returns the result as a [[DataFrame]]. + * + * Unless the schema is specified using [[schema]] function, this function goes through the + * input once to determine the input schema. + * + * @param jsonRDD input RDD with one JSON object per record + * @since 1.4.0 + */ + def json(jsonRDD: RDD[String]): DataFrame = { + val samplingRatio = extraOptions.getOrElse("samplingRatio", "1.0").toDouble + if (sqlContext.conf.useJacksonStreamingAPI) { + sqlContext.baseRelationToDataFrame( + new JSONRelation(() => jsonRDD, None, samplingRatio, userSpecifiedSchema)(sqlContext)) + } else { + val columnNameOfCorruptJsonRecord = sqlContext.conf.columnNameOfCorruptRecord + val appliedSchema = userSpecifiedSchema.getOrElse( + JsonRDD.nullTypeToStringType( + JsonRDD.inferSchema(jsonRDD, 1.0, columnNameOfCorruptJsonRecord))) + val rowRDD = JsonRDD.jsonStringToRow(jsonRDD, appliedSchema, columnNameOfCorruptJsonRecord) + sqlContext.createDataFrame(rowRDD, appliedSchema, needsConversion = false) + } + } + + /** + * Loads a Parquet file, returning the result as a [[DataFrame]]. This function returns an empty + * [[DataFrame]] if no paths are passed in. + * + * @since 1.4.0 + */ + @scala.annotation.varargs + def parquet(paths: String*): DataFrame = { + if (paths.isEmpty) { + sqlContext.emptyDataFrame + } else { + val globbedPaths = paths.map(new Path(_)).flatMap(SparkHadoopUtil.get.globPath).toArray + sqlContext.baseRelationToDataFrame( + new ParquetRelation2( + globbedPaths.map(_.toString), None, None, Map.empty[String, String])(sqlContext)) + } + } + + /** + * Returns the specified table as a [[DataFrame]]. + * + * @since 1.4.0 + */ + def table(tableName: String): DataFrame = { + DataFrame(sqlContext, sqlContext.catalog.lookupRelation(Seq(tableName))) + } + + /////////////////////////////////////////////////////////////////////////////////////// + // Builder pattern config options + /////////////////////////////////////////////////////////////////////////////////////// + + private var source: String = sqlContext.conf.defaultDataSourceName + + private var userSpecifiedSchema: Option[StructType] = None + + private var extraOptions = new scala.collection.mutable.HashMap[String, String] + +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala index 903532105284e..edb9ed7bba56a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala @@ -23,18 +23,34 @@ import org.apache.spark.sql.execution.stat._ /** * :: Experimental :: * Statistic functions for [[DataFrame]]s. + * + * @since 1.4.0 */ @Experimental final class DataFrameStatFunctions private[sql](df: DataFrame) { + /** + * Calculate the sample covariance of two numerical columns of a DataFrame. + * @param col1 the name of the first column + * @param col2 the name of the second column + * @return the covariance of the two columns. + * + * @since 1.4.0 + */ + def cov(col1: String, col2: String): Double = { + StatFunctions.calculateCov(df, Seq(col1, col2)) + } + /** * Calculates the correlation of two columns of a DataFrame. Currently only supports the Pearson - * Correlation Coefficient. For Spearman Correlation, consider using RDD methods found in + * Correlation Coefficient. For Spearman Correlation, consider using RDD methods found in * MLlib's Statistics. * * @param col1 the name of the column * @param col2 the name of the column to calculate the correlation against * @return The Pearson Correlation Coefficient as a Double. + * + * @since 1.4.0 */ def corr(col1: String, col2: String, method: String): Double = { require(method == "pearson", "Currently only the calculation of the Pearson Correlation " + @@ -48,21 +64,48 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * @param col1 the name of the column * @param col2 the name of the column to calculate the correlation against * @return The Pearson Correlation Coefficient as a Double. + * + * @since 1.4.0 */ def corr(col1: String, col2: String): Double = { corr(col1, col2, "pearson") } + /** + * Computes a pair-wise frequency table of the given columns. Also known as a contingency table. + * The number of distinct values for each column should be less than 1e4. At most 1e6 non-zero + * pair frequencies will be returned. + * The first column of each row will be the distinct values of `col1` and the column names will + * be the distinct values of `col2`. The name of the first column will be `$col1_$col2`. Counts + * will be returned as `Long`s. Pairs that have no occurrences will have `null` as their counts. + * + * @param col1 The name of the first column. Distinct items will make the first item of + * each row. + * @param col2 The name of the second column. Distinct items will make the column names + * of the DataFrame. + * @return A DataFrame containing for the contingency table. + * + * @since 1.4.0 + */ + def crosstab(col1: String, col2: String): DataFrame = { + StatFunctions.crossTabulate(df, col1, col2) + } + /** * Finding frequent items for columns, possibly with false positives. Using the * frequent element count algorithm described in * [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou]]. * The `support` should be greater than 1e-4. * + * This function is meant for exploratory data analysis, as we make no guarantee about the + * backward compatibility of the schema of the resulting [[DataFrame]]. + * * @param cols the names of the columns to search frequent items in. * @param support The minimum frequency for an item to be considered `frequent`. Should be greater * than 1e-4. * @return A Local DataFrame with the Array of frequent items for each column. + * + * @since 1.4.0 */ def freqItems(cols: Array[String], support: Double): DataFrame = { FrequentItems.singlePassFreqItems(df, cols, support) @@ -74,34 +117,50 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou]]. * Uses a `default` support of 1%. * + * This function is meant for exploratory data analysis, as we make no guarantee about the + * backward compatibility of the schema of the resulting [[DataFrame]]. + * * @param cols the names of the columns to search frequent items in. * @return A Local DataFrame with the Array of frequent items for each column. + * + * @since 1.4.0 */ def freqItems(cols: Array[String]): DataFrame = { FrequentItems.singlePassFreqItems(df, cols, 0.01) } /** - * Python friendly implementation for `freqItems` + * (Scala-specific) Finding frequent items for columns, possibly with false positives. Using the + * frequent element count algorithm described in + * [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou]]. + * + * This function is meant for exploratory data analysis, as we make no guarantee about the + * backward compatibility of the schema of the resulting [[DataFrame]]. + * + * @param cols the names of the columns to search frequent items in. + * @return A Local DataFrame with the Array of frequent items for each column. + * + * @since 1.4.0 */ def freqItems(cols: Seq[String], support: Double): DataFrame = { FrequentItems.singlePassFreqItems(df, cols, support) } /** - * Python friendly implementation for `freqItems` with a default `support` of 1%. + * (Scala-specific) Finding frequent items for columns, possibly with false positives. Using the + * frequent element count algorithm described in + * [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou]]. + * Uses a `default` support of 1%. + * + * This function is meant for exploratory data analysis, as we make no guarantee about the + * backward compatibility of the schema of the resulting [[DataFrame]]. + * + * @param cols the names of the columns to search frequent items in. + * @return A Local DataFrame with the Array of frequent items for each column. + * + * @since 1.4.0 */ def freqItems(cols: Seq[String]): DataFrame = { FrequentItems.singlePassFreqItems(df, cols, 0.01) } - - /** - * Calculate the sample covariance of two numerical columns of a DataFrame. - * @param col1 the name of the first column - * @param col2 the name of the second column - * @return the covariance of the two columns. - */ - def cov(col1: String, col2: String): Double = { - StatFunctions.calculateCov(df, Seq(col1, col2)) - } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala new file mode 100644 index 0000000000000..5548b26cb8f80 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala @@ -0,0 +1,295 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.spark.sql + +import java.util.Properties + +import org.apache.spark.annotation.Experimental +import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation +import org.apache.spark.sql.catalyst.plans.logical.InsertIntoTable +import org.apache.spark.sql.jdbc.{JDBCWriteDetails, JdbcUtils} +import org.apache.spark.sql.sources.{ResolvedDataSource, CreateTableUsingAsSelect} + + +/** + * :: Experimental :: + * Interface used to write a [[DataFrame]] to external storage systems (e.g. file systems, + * key-value stores, etc). Use [[DataFrame.write]] to access this. + * + * @since 1.4.0 + */ +@Experimental +final class DataFrameWriter private[sql](df: DataFrame) { + + /** + * Specifies the behavior when data or table already exists. Options include: + * - `SaveMode.Overwrite`: overwrite the existing data. + * - `SaveMode.Append`: append the data. + * - `SaveMode.Ignore`: ignore the operation (i.e. no-op). + * - `SaveMode.ErrorIfExists`: default option, throw an exception at runtime. + * + * @since 1.4.0 + */ + def mode(saveMode: SaveMode): DataFrameWriter = { + this.mode = saveMode + this + } + + /** + * Specifies the behavior when data or table already exists. Options include: + * - `overwrite`: overwrite the existing data. + * - `append`: append the data. + * - `ignore`: ignore the operation (i.e. no-op). + * - `error`: default option, throw an exception at runtime. + * + * @since 1.4.0 + */ + def mode(saveMode: String): DataFrameWriter = { + this.mode = saveMode.toLowerCase match { + case "overwrite" => SaveMode.Overwrite + case "append" => SaveMode.Append + case "ignore" => SaveMode.Ignore + case "error" | "default" => SaveMode.ErrorIfExists + case _ => throw new IllegalArgumentException(s"Unknown save mode: $saveMode. " + + "Accepted modes are 'overwrite', 'append', 'ignore', 'error'.") + } + this + } + + /** + * Specifies the underlying output data source. Built-in options include "parquet", "json", etc. + * + * @since 1.4.0 + */ + def format(source: String): DataFrameWriter = { + this.source = source + this + } + + /** + * Adds an output option for the underlying data source. + * + * @since 1.4.0 + */ + def option(key: String, value: String): DataFrameWriter = { + this.extraOptions += (key -> value) + this + } + + /** + * (Scala-specific) Adds output options for the underlying data source. + * + * @since 1.4.0 + */ + def options(options: scala.collection.Map[String, String]): DataFrameWriter = { + this.extraOptions ++= options + this + } + + /** + * Adds output options for the underlying data source. + * + * @since 1.4.0 + */ + def options(options: java.util.Map[String, String]): DataFrameWriter = { + this.options(scala.collection.JavaConversions.mapAsScalaMap(options)) + this + } + + /** + * Partitions the output by the given columns on the file system. If specified, the output is + * laid out on the file system similar to Hive's partitioning scheme. + * + * This is only applicable for Parquet at the moment. + * + * @since 1.4.0 + */ + @scala.annotation.varargs + def partitionBy(colNames: String*): DataFrameWriter = { + this.partitioningColumns = Option(colNames) + this + } + + /** + * Saves the content of the [[DataFrame]] at the specified path. + * + * @since 1.4.0 + */ + def save(path: String): Unit = { + this.extraOptions += ("path" -> path) + save() + } + + /** + * Saves the content of the [[DataFrame]] as the specified table. + * + * @since 1.4.0 + */ + def save(): Unit = { + ResolvedDataSource( + df.sqlContext, + source, + partitioningColumns.map(_.toArray).getOrElse(Array.empty[String]), + mode, + extraOptions.toMap, + df) + } + + /** + * Inserts the content of the [[DataFrame]] to the specified table. It requires that + * the schema of the [[DataFrame]] is the same as the schema of the table. + * + * Because it inserts data to an existing table, format or options will be ignored. + * + * @since 1.4.0 + */ + def insertInto(tableName: String): Unit = { + val partitions = + partitioningColumns.map(_.map(col => col -> (None: Option[String])).toMap) + val overwrite = (mode == SaveMode.Overwrite) + df.sqlContext.executePlan(InsertIntoTable( + UnresolvedRelation(Seq(tableName)), + partitions.getOrElse(Map.empty[String, Option[String]]), + df.logicalPlan, + overwrite, + ifNotExists = false)).toRdd + } + + /** + * Saves the content of the [[DataFrame]] as the specified table. + * + * In the case the table already exists, behavior of this function depends on the + * save mode, specified by the `mode` function (default to throwing an exception). + * When `mode` is `Overwrite`, the schema of the [[DataFrame]] does not need to be + * the same as that of the existing table. + * When `mode` is `Append`, the schema of the [[DataFrame]] need to be + * the same as that of the existing table, and format or options will be ignored. + * + * @since 1.4.0 + */ + def saveAsTable(tableName: String): Unit = { + if (df.sqlContext.catalog.tableExists(tableName :: Nil) && mode != SaveMode.Overwrite) { + mode match { + case SaveMode.Ignore => + // Do nothing + + case SaveMode.ErrorIfExists => + throw new AnalysisException(s"Table $tableName already exists.") + + case SaveMode.Append => + // If it is Append, we just ask insertInto to handle it. We will not use insertInto + // to handle saveAsTable with Overwrite because saveAsTable can change the schema of + // the table. But, insertInto with Overwrite requires the schema of data be the same + // the schema of the table. + insertInto(tableName) + } + } else { + val cmd = + CreateTableUsingAsSelect( + tableName, + source, + temporary = false, + partitioningColumns.map(_.toArray).getOrElse(Array.empty[String]), + mode, + extraOptions.toMap, + df.logicalPlan) + df.sqlContext.executePlan(cmd).toRdd + } + } + + /** + * Saves the content of the [[DataFrame]] to a external database table via JDBC. In the case the + * table already exists in the external database, behavior of this function depends on the + * save mode, specified by the `mode` function (default to throwing an exception). + * + * Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash + * your external database systems. + * + * @param url JDBC database url of the form `jdbc:subprotocol:subname` + * @param table Name of the table in the external database. + * @param connectionProperties JDBC database connection arguments, a list of arbitrary string + * tag/value. Normally at least a "user" and "password" property + * should be included. + */ + def jdbc(url: String, table: String, connectionProperties: Properties): Unit = { + val conn = JdbcUtils.createConnection(url, connectionProperties) + + try { + var tableExists = JdbcUtils.tableExists(conn, table) + + if (mode == SaveMode.Ignore && tableExists) { + return + } + + if (mode == SaveMode.ErrorIfExists && tableExists) { + sys.error(s"Table $table already exists.") + } + + if (mode == SaveMode.Overwrite && tableExists) { + JdbcUtils.dropTable(conn, table) + tableExists = false + } + + // Create the table if the table didn't exist. + if (!tableExists) { + val schema = JDBCWriteDetails.schemaString(df, url) + val sql = s"CREATE TABLE $table ($schema)" + conn.prepareStatement(sql).executeUpdate() + } + } finally { + conn.close() + } + + JDBCWriteDetails.saveTable(df, url, table, connectionProperties) + } + + /** + * Saves the content of the [[DataFrame]] in JSON format at the specified path. + * This is equivalent to: + * {{{ + * format("json").save(path) + * }}} + * + * @since 1.4.0 + */ + def json(path: String): Unit = format("json").save(path) + + /** + * Saves the content of the [[DataFrame]] in Parquet format at the specified path. + * This is equivalent to: + * {{{ + * format("parquet").save(path) + * }}} + * + * @since 1.4.0 + */ + def parquet(path: String): Unit = format("parquet").save(path) + + /////////////////////////////////////////////////////////////////////////////////////// + // Builder pattern config options + /////////////////////////////////////////////////////////////////////////////////////// + + private var source: String = df.sqlContext.conf.defaultDataSourceName + + private var mode: SaveMode = SaveMode.ErrorIfExists + + private var extraOptions = new scala.collection.mutable.HashMap[String, String] + + private var partitioningColumns: Option[Seq[String]] = None + +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala index d5d7e35a6b35d..717709e4f9312 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala @@ -27,6 +27,8 @@ import org.apache.spark.annotation.Experimental * {{{ * sqlContext.experimental.extraStrategies += ... * }}} + * + * @since 1.3.0 */ @Experimental class ExperimentalMethods protected[sql](sqlContext: SQLContext) { @@ -34,6 +36,8 @@ class ExperimentalMethods protected[sql](sqlContext: SQLContext) { /** * Allows extra strategies to be injected into the query planner at runtime. Note this API * should be consider experimental and is not intended to be stable across releases. + * + * @since 1.3.0 */ @Experimental var extraStrategies: Seq[Strategy] = Nil diff --git a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala index 53ad67372e024..45b3e1bc627d5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/GroupedData.scala @@ -23,28 +23,79 @@ import scala.language.implicitConversions import org.apache.spark.annotation.Experimental import org.apache.spark.sql.catalyst.analysis.Star import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.plans.logical.Aggregate +import org.apache.spark.sql.catalyst.plans.logical.{Rollup, Cube, Aggregate} import org.apache.spark.sql.types.NumericType +/** + * Companion object for GroupedData + */ +private[sql] object GroupedData { + def apply( + df: DataFrame, + groupingExprs: Seq[Expression], + groupType: GroupType): GroupedData = { + new GroupedData(df, groupingExprs, groupType: GroupType) + } + + /** + * The Grouping Type + */ + private[sql] trait GroupType + + /** + * To indicate it's the GroupBy + */ + private[sql] object GroupByType extends GroupType + + /** + * To indicate it's the CUBE + */ + private[sql] object CubeType extends GroupType + + /** + * To indicate it's the ROLLUP + */ + private[sql] object RollupType extends GroupType +} /** * :: Experimental :: * A set of methods for aggregations on a [[DataFrame]], created by [[DataFrame.groupBy]]. + * + * @since 1.3.0 */ @Experimental -class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) { +class GroupedData protected[sql]( + df: DataFrame, + groupingExprs: Seq[Expression], + private val groupType: GroupedData.GroupType) { - private[sql] implicit def toDF(aggExprs: Seq[NamedExpression]): DataFrame = { - val namedGroupingExprs = groupingExprs.map { - case expr: NamedExpression => expr - case expr: Expression => Alias(expr, expr.prettyString)() + private[this] def toDF(aggExprs: Seq[NamedExpression]): DataFrame = { + val aggregates = if (df.sqlContext.conf.dataFrameRetainGroupColumns) { + val retainedExprs = groupingExprs.map { + case expr: NamedExpression => expr + case expr: Expression => Alias(expr, expr.prettyString)() + } + retainedExprs ++ aggExprs + } else { + aggExprs + } + + groupType match { + case GroupedData.GroupByType => + DataFrame( + df.sqlContext, Aggregate(groupingExprs, aggregates, df.logicalPlan)) + case GroupedData.RollupType => + DataFrame( + df.sqlContext, Rollup(groupingExprs, df.logicalPlan, aggregates)) + case GroupedData.CubeType => + DataFrame( + df.sqlContext, Cube(groupingExprs, df.logicalPlan, aggregates)) } - DataFrame( - df.sqlContext, Aggregate(groupingExprs, namedGroupingExprs ++ aggExprs, df.logicalPlan)) } private[this] def aggregateNumericColumns(colNames: String*)(f: Expression => Expression) - : Seq[NamedExpression] = { + : DataFrame = { val columnExprs = if (colNames.isEmpty) { // No columns specified. Use all numeric columns. @@ -61,10 +112,10 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) namedExpr } } - columnExprs.map { c => + toDF(columnExprs.map { c => val a = f(c) Alias(a, a.prettyString)() - } + }) } private[this] def strToExpr(expr: String): (Expression => Expression) = { @@ -94,6 +145,8 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) * "expense" -> "sum" * ) * }}} + * + * @since 1.3.0 */ def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame = { agg((aggExpr +: aggExprs).toMap) @@ -111,12 +164,14 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) * "expense" -> "sum" * )) * }}} + * + * @since 1.3.0 */ def agg(exprs: Map[String, String]): DataFrame = { - exprs.map { case (colName, expr) => + toDF(exprs.map { case (colName, expr) => val a = strToExpr(expr)(df(colName).expr) Alias(a, a.prettyString)() - }.toSeq + }.toSeq) } /** @@ -129,14 +184,17 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) * import com.google.common.collect.ImmutableMap; * df.groupBy("department").agg(ImmutableMap.of("age", "max", "expense", "sum")); * }}} + * + * @since 1.3.0 */ def agg(exprs: java.util.Map[String, String]): DataFrame = { agg(exprs.toMap) } /** - * Compute aggregates by specifying a series of aggregate columns. Unlike other methods in this - * class, the resulting [[DataFrame]] won't automatically include the grouping columns. + * Compute aggregates by specifying a series of aggregate columns. Note that this function by + * default retains the grouping columns in its output. To not retain grouping columns, set + * `spark.sql.retainGroupColumns` to false. * * The available aggregate methods are defined in [[org.apache.spark.sql.functions]]. * @@ -145,75 +203,98 @@ class GroupedData protected[sql](df: DataFrame, groupingExprs: Seq[Expression]) * * // Scala: * import org.apache.spark.sql.functions._ - * df.groupBy("department").agg($"department", max($"age"), sum($"expense")) + * df.groupBy("department").agg(max("age"), sum("expense")) * * // Java: * import static org.apache.spark.sql.functions.*; - * df.groupBy("department").agg(col("department"), max(col("age")), sum(col("expense"))); + * df.groupBy("department").agg(max("age"), sum("expense")); + * }}} + * + * Note that before Spark 1.4, the default behavior is to NOT retain grouping columns. To change + * to that behavior, set config variable `spark.sql.retainGroupColumns` to `false`. + * {{{ + * // Scala, 1.3.x: + * df.groupBy("department").agg($"department", max("age"), sum("expense")) + * + * // Java, 1.3.x: + * df.groupBy("department").agg(col("department"), max("age"), sum("expense")); * }}} + * + * @since 1.3.0 */ @scala.annotation.varargs def agg(expr: Column, exprs: Column*): DataFrame = { - val aggExprs = (expr +: exprs).map(_.expr).map { + toDF((expr +: exprs).map(_.expr).map { case expr: NamedExpression => expr case expr: Expression => Alias(expr, expr.prettyString)() - } - DataFrame(df.sqlContext, Aggregate(groupingExprs, aggExprs, df.logicalPlan)) + }) } /** * Count the number of rows for each group. * The resulting [[DataFrame]] will also contain the grouping columns. + * + * @since 1.3.0 */ - def count(): DataFrame = Seq(Alias(Count(Literal(1)), "count")()) + def count(): DataFrame = toDF(Seq(Alias(Count(Literal(1)), "count")())) /** * Compute the average value for each numeric columns for each group. This is an alias for `avg`. * The resulting [[DataFrame]] will also contain the grouping columns. * When specified columns are given, only compute the average values for them. + * + * @since 1.3.0 */ @scala.annotation.varargs def mean(colNames: String*): DataFrame = { - aggregateNumericColumns(colNames:_*)(Average) + aggregateNumericColumns(colNames : _*)(Average) } - + /** * Compute the max value for each numeric columns for each group. * The resulting [[DataFrame]] will also contain the grouping columns. * When specified columns are given, only compute the max values for them. + * + * @since 1.3.0 */ @scala.annotation.varargs def max(colNames: String*): DataFrame = { - aggregateNumericColumns(colNames:_*)(Max) + aggregateNumericColumns(colNames : _*)(Max) } /** * Compute the mean value for each numeric columns for each group. * The resulting [[DataFrame]] will also contain the grouping columns. * When specified columns are given, only compute the mean values for them. + * + * @since 1.3.0 */ @scala.annotation.varargs def avg(colNames: String*): DataFrame = { - aggregateNumericColumns(colNames:_*)(Average) + aggregateNumericColumns(colNames : _*)(Average) } /** * Compute the min value for each numeric column for each group. * The resulting [[DataFrame]] will also contain the grouping columns. * When specified columns are given, only compute the min values for them. + * + * @since 1.3.0 */ @scala.annotation.varargs def min(colNames: String*): DataFrame = { - aggregateNumericColumns(colNames:_*)(Min) + aggregateNumericColumns(colNames : _*)(Min) } /** * Compute the sum for each numeric columns for each group. * The resulting [[DataFrame]] will also contain the grouping columns. * When specified columns are given, only compute the sum for them. + * + * @since 1.3.0 */ @scala.annotation.varargs def sum(colNames: String*): DataFrame = { - aggregateNumericColumns(colNames:_*)(Sum) - } + aggregateNumericColumns(colNames : _*)(Sum) + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala index 99db959a8741c..77c6af27d1007 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala @@ -17,10 +17,12 @@ package org.apache.spark.sql +import java.util.Properties + import scala.collection.immutable import scala.collection.JavaConversions._ -import java.util.Properties +import org.apache.spark.sql.catalyst.CatalystConf private[spark] object SQLConf { val COMPRESS_CACHED = "spark.sql.inMemoryColumnarStorage.compressed" @@ -32,6 +34,7 @@ private[spark] object SQLConf { val CODEGEN_ENABLED = "spark.sql.codegen" val UNSAFE_ENABLED = "spark.sql.unsafe.enabled" val DIALECT = "spark.sql.dialect" + val CASE_SENSITIVE = "spark.sql.caseSensitive" val PARQUET_BINARY_AS_STRING = "spark.sql.parquet.binaryAsString" val PARQUET_INT96_AS_TIMESTAMP = "spark.sql.parquet.int96AsTimestamp" @@ -40,6 +43,8 @@ private[spark] object SQLConf { val PARQUET_FILTER_PUSHDOWN_ENABLED = "spark.sql.parquet.filterPushdown" val PARQUET_USE_DATA_SOURCE_API = "spark.sql.parquet.useDataSourceApi" + val ORC_FILTER_PUSHDOWN_ENABLED = "spark.sql.orc.filterPushdown" + val HIVE_VERIFY_PARTITIONPATH = "spark.sql.hive.verifyPartitionPath" val COLUMN_NAME_OF_CORRUPT_RECORD = "spark.sql.columnNameOfCorruptRecord" @@ -63,12 +68,28 @@ private[spark] object SQLConf { // to its length exceeds the threshold. val SCHEMA_STRING_LENGTH_THRESHOLD = "spark.sql.sources.schemaStringLengthThreshold" + // Whether to perform partition discovery when loading external data sources. Default to true. + val PARTITION_DISCOVERY_ENABLED = "spark.sql.sources.partitionDiscovery.enabled" + + // The output committer class used by FSBasedRelation. The specified class needs to be a + // subclass of org.apache.hadoop.mapreduce.OutputCommitter. + val OUTPUT_COMMITTER_CLASS = "spark.sql.sources.outputCommitterClass" + // Whether to perform eager analysis when constructing a dataframe. // Set to false when debugging requires the ability to look at invalid query plans. val DATAFRAME_EAGER_ANALYSIS = "spark.sql.eagerAnalysis" + // Whether to automatically resolve ambiguity in join conditions for self-joins. + // See SPARK-6231. + val DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY = "spark.sql.selfJoinAutoResolveAmbiguity" + + // Whether to retain group by columns or not in GroupedData.agg. + val DATAFRAME_RETAIN_GROUP_COLUMNS = "spark.sql.retainGroupColumns" + val USE_SQL_SERIALIZER2 = "spark.sql.useSerializer2" + val USE_JACKSON_STREAMING_API = "spark.sql.json.useJacksonStreamingAPI" + object Deprecated { val MAPRED_REDUCE_TASKS = "mapred.reduce.tasks" } @@ -83,7 +104,8 @@ private[spark] object SQLConf { * * SQLConf is thread-safe (internally synchronized, so safe to be used in multiple threads). */ -private[sql] class SQLConf extends Serializable { + +private[sql] class SQLConf extends Serializable with CatalystConf { import SQLConf._ /** Only low degree of contention is expected for conf, thus NOT using ConcurrentHashMap. */ @@ -127,6 +149,9 @@ private[sql] class SQLConf extends Serializable { private[spark] def parquetUseDataSourceApi = getConf(PARQUET_USE_DATA_SOURCE_API, "true").toBoolean + private[spark] def orcFilterPushDown = + getConf(ORC_FILTER_PUSHDOWN_ENABLED, "false").toBoolean + /** When true uses verifyPartitionPath to prune the path which is not exists. */ private[spark] def verifyPartitionPath = getConf(HIVE_VERIFY_PARTITIONPATH, "true").toBoolean @@ -152,6 +177,11 @@ private[sql] class SQLConf extends Serializable { */ private[spark] def codegenEnabled: Boolean = getConf(CODEGEN_ENABLED, "false").toBoolean + /** + * caseSensitive analysis true by default + */ + def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE, "true").toBoolean + /** * When set to true, Spark SQL will use managed memory for certain operations. This option only * takes effect if codegen is enabled. @@ -162,6 +192,12 @@ private[sql] class SQLConf extends Serializable { private[spark] def useSqlSerializer2: Boolean = getConf(USE_SQL_SERIALIZER2, "true").toBoolean + /** + * Selects between the new (true) and old (false) JSON handlers, to be removed in Spark 1.5.0 + */ + private[spark] def useJacksonStreamingAPI: Boolean = + getConf(USE_JACKSON_STREAMING_API, "true").toBoolean + /** * Upper bound on the sizes (in bytes) of the tables qualified for the auto conversion to * a broadcast value during the physical executions of join operations. Setting this to -1 @@ -211,6 +247,9 @@ private[sql] class SQLConf extends Serializable { private[spark] def defaultDataSourceName: String = getConf(DEFAULT_DATA_SOURCE_NAME, "org.apache.spark.sql.parquet") + private[spark] def partitionDiscoveryEnabled() = + getConf(SQLConf.PARTITION_DISCOVERY_ENABLED, "true").toBoolean + // Do not use a value larger than 4000 as the default value of this property. // See the comments of SCHEMA_STRING_LENGTH_THRESHOLD above for more information. private[spark] def schemaStringLengthThreshold: Int = @@ -219,6 +258,12 @@ private[sql] class SQLConf extends Serializable { private[spark] def dataFrameEagerAnalysis: Boolean = getConf(DATAFRAME_EAGER_ANALYSIS, "true").toBoolean + private[spark] def dataFrameSelfJoinAutoResolveAmbiguity: Boolean = + getConf(DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY, "true").toBoolean + + private[spark] def dataFrameRetainGroupColumns: Boolean = + getConf(DATAFRAME_RETAIN_GROUP_COLUMNS, "true").toBoolean + /** ********************** SQLConf functionality methods ************ */ /** Set Spark SQL configuration properties. */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index 7eabb93c1e3d6..91e6385dec81b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql import java.beans.Introspector import java.util.Properties +import java.util.concurrent.atomic.AtomicReference import scala.collection.JavaConversions._ import scala.collection.immutable @@ -26,62 +27,22 @@ import scala.language.implicitConversions import scala.reflect.runtime.universe.TypeTag import scala.util.control.NonFatal -import com.google.common.reflect.TypeToken - +import org.apache.spark.SparkContext import org.apache.spark.annotation.{DeveloperApi, Experimental} import org.apache.spark.api.java.{JavaRDD, JavaSparkContext} import org.apache.spark.rdd.RDD +import org.apache.spark.sql.catalyst._ import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.errors.DialectException import org.apache.spark.sql.catalyst.optimizer.{DefaultOptimizer, Optimizer} import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor -import org.apache.spark.sql.catalyst.Dialect -import org.apache.spark.sql.catalyst.{CatalystTypeConverters, ScalaReflection, expressions} +import org.apache.spark.sql.catalyst.ParserDialect import org.apache.spark.sql.execution.{Filter, _} -import org.apache.spark.sql.jdbc.{JDBCPartition, JDBCPartitioningInfo, JDBCRelation} -import org.apache.spark.sql.json._ import org.apache.spark.sql.sources._ import org.apache.spark.sql.types._ import org.apache.spark.util.Utils -import org.apache.spark.{Partition, SparkContext} - -/** - * Currently we support the default dialect named "sql", associated with the class - * [[DefaultDialect]] - * - * And we can also provide custom SQL Dialect, for example in Spark SQL CLI: - * {{{ - *-- switch to "hiveql" dialect - * spark-sql>SET spark.sql.dialect=hiveql; - * spark-sql>SELECT * FROM src LIMIT 1; - * - *-- switch to "sql" dialect - * spark-sql>SET spark.sql.dialect=sql; - * spark-sql>SELECT * FROM src LIMIT 1; - * - *-- register the new SQL dialect - * spark-sql> SET spark.sql.dialect=com.xxx.xxx.SQL99Dialect; - * spark-sql> SELECT * FROM src LIMIT 1; - * - *-- register the non-exist SQL dialect - * spark-sql> SET spark.sql.dialect=NotExistedClass; - * spark-sql> SELECT * FROM src LIMIT 1; - * - *-- Exception will be thrown and switch to dialect - *-- "sql" (for SQLContext) or - *-- "hiveql" (for HiveContext) - * }}} - */ -private[spark] class DefaultDialect extends Dialect { - @transient - protected val sqlParser = new catalyst.SqlParser - - override def parse(sqlText: String): LogicalPlan = { - sqlParser.parse(sqlText) - } -} /** * The entry point for working with structured data (rows and columns) in Spark. Allows the @@ -95,6 +56,8 @@ private[spark] class DefaultDialect extends Dialect { * @groupname config Configuration * @groupname dataframes Custom DataFrame Creation * @groupname Ungrouped Support functions for language integrated queries. + * + * @since 1.0.0 */ class SQLContext(@transient val sparkContext: SparkContext) extends org.apache.spark.Logging @@ -107,12 +70,13 @@ class SQLContext(@transient val sparkContext: SparkContext) /** * @return Spark SQL configuration */ - protected[sql] def conf = tlSession.get().conf + protected[sql] def conf = currentSession().conf /** * Set Spark SQL configuration properties. * * @group config + * @since 1.0.0 */ def setConf(props: Properties): Unit = conf.setConf(props) @@ -120,6 +84,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Set the given Spark SQL configuration property. * * @group config + * @since 1.0.0 */ def setConf(key: String, value: String): Unit = conf.setConf(key, value) @@ -127,6 +92,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Return the value of Spark SQL configuration property for the given key. * * @group config + * @since 1.0.0 */ def getConf(key: String): String = conf.getConf(key) @@ -135,6 +101,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * yet, return `defaultValue`. * * @group config + * @since 1.0.0 */ def getConf(key: String, defaultValue: String): String = conf.getConf(key, defaultValue) @@ -143,20 +110,21 @@ class SQLContext(@transient val sparkContext: SparkContext) * This creates a new copy of the config properties in the form of a Map. * * @group config + * @since 1.0.0 */ def getAllConfs: immutable.Map[String, String] = conf.getAllConfs // TODO how to handle the temp table per user session? @transient - protected[sql] lazy val catalog: Catalog = new SimpleCatalog(true) + protected[sql] lazy val catalog: Catalog = new SimpleCatalog(conf) // TODO how to handle the temp function per user session? @transient - protected[sql] lazy val functionRegistry: FunctionRegistry = new SimpleFunctionRegistry(true) + protected[sql] lazy val functionRegistry: FunctionRegistry = new SimpleFunctionRegistry(conf) @transient protected[sql] lazy val analyzer: Analyzer = - new Analyzer(catalog, functionRegistry, caseSensitive = true) { + new Analyzer(catalog, functionRegistry, conf) { override val extendedResolutionRules = ExtractPythonUdfs :: sources.PreInsertCastAndRename :: @@ -176,10 +144,10 @@ class SQLContext(@transient val sparkContext: SparkContext) @transient protected[sql] val sqlParser = new SparkSQLParser(getSQLDialect().parse(_)) - protected[sql] def getSQLDialect(): Dialect = { + protected[sql] def getSQLDialect(): ParserDialect = { try { val clazz = Utils.classForName(dialectClassName) - clazz.newInstance().asInstanceOf[Dialect] + clazz.newInstance().asInstanceOf[ParserDialect] } catch { case NonFatal(e) => // Since we didn't find the available SQL Dialect, it will fail even for SET command: @@ -209,14 +177,33 @@ class SQLContext(@transient val sparkContext: SparkContext) protected[sql] val defaultSession = createSession() protected[sql] def dialectClassName = if (conf.dialect == "sql") { - classOf[DefaultDialect].getCanonicalName + classOf[DefaultParserDialect].getCanonicalName } else { conf.dialect } - sparkContext.getConf.getAll.foreach { - case (key, value) if key.startsWith("spark.sql") => setConf(key, value) - case _ => + { + // We extract spark sql settings from SparkContext's conf and put them to + // Spark SQL's conf. + // First, we populate the SQLConf (conf). So, we can make sure that other values using + // those settings in their construction can get the correct settings. + // For example, metadataHive in HiveContext may need both spark.sql.hive.metastore.version + // and spark.sql.hive.metastore.jars to get correctly constructed. + val properties = new Properties + sparkContext.getConf.getAll.foreach { + case (key, value) if key.startsWith("spark.sql") => properties.setProperty(key, value) + case _ => + } + // We directly put those settings to conf to avoid of calling setConf, which may have + // side-effects. For example, in HiveContext, setConf may cause executionHive and metadataHive + // get constructed. If we call setConf directly, the constructed metadataHive may have + // wrong settings, or the construction may fail. + conf.setConf(properties) + // After we have populated SQLConf, we call setConf to populate other confs in the subclass + // (e.g. hiveconf in HiveContext). + properties.foreach { + case (key, value) => setConf(key, value) + } } @transient @@ -228,6 +215,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * the query planner for advanced functionality. * * @group basic + * @since 1.3.0 */ @Experimental @transient @@ -238,6 +226,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Returns a [[DataFrame]] with no rows or columns. * * @group basic + * @since 1.3.0 */ @Experimental @transient @@ -270,6 +259,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * }}} * * @group basic + * @since 1.3.0 * TODO move to SQLSession? */ @transient @@ -278,23 +268,27 @@ class SQLContext(@transient val sparkContext: SparkContext) /** * Returns true if the table is currently cached in-memory. * @group cachemgmt + * @since 1.3.0 */ def isCached(tableName: String): Boolean = cacheManager.isCached(tableName) /** * Caches the specified table in-memory. * @group cachemgmt + * @since 1.3.0 */ def cacheTable(tableName: String): Unit = cacheManager.cacheTable(tableName) /** * Removes the specified table from the in-memory cache. * @group cachemgmt + * @since 1.3.0 */ def uncacheTable(tableName: String): Unit = cacheManager.uncacheTable(tableName) /** * Removes all cached tables from the in-memory cache. + * @since 1.3.0 */ def clearCache(): Unit = cacheManager.clearCache() @@ -311,27 +305,40 @@ class SQLContext(@transient val sparkContext: SparkContext) * }}} * * @group basic + * @since 1.3.0 */ @Experimental object implicits extends Serializable { // scalastyle:on - /** Converts $"col name" into an [[Column]]. */ + /** + * Converts $"col name" into an [[Column]]. + * @since 1.3.0 + */ implicit class StringToColumn(val sc: StringContext) { def $(args: Any*): ColumnName = { - new ColumnName(sc.s(args :_*)) + new ColumnName(sc.s(args : _*)) } } - /** An implicit conversion that turns a Scala `Symbol` into a [[Column]]. */ + /** + * An implicit conversion that turns a Scala `Symbol` into a [[Column]]. + * @since 1.3.0 + */ implicit def symbolToColumn(s: Symbol): ColumnName = new ColumnName(s.name) - /** Creates a DataFrame from an RDD of case classes or tuples. */ + /** + * Creates a DataFrame from an RDD of case classes or tuples. + * @since 1.3.0 + */ implicit def rddToDataFrameHolder[A <: Product : TypeTag](rdd: RDD[A]): DataFrameHolder = { DataFrameHolder(self.createDataFrame(rdd)) } - /** Creates a DataFrame from a local Seq of Product. */ + /** + * Creates a DataFrame from a local Seq of Product. + * @since 1.3.0 + */ implicit def localSeqToDataFrameHolder[A <: Product : TypeTag](data: Seq[A]): DataFrameHolder = { DataFrameHolder(self.createDataFrame(data)) @@ -341,7 +348,10 @@ class SQLContext(@transient val sparkContext: SparkContext) // making existing implicit conversions ambiguous. In particular, RDD[Double] is dangerous // because of [[DoubleRDDFunctions]]. - /** Creates a single column DataFrame from an RDD[Int]. */ + /** + * Creates a single column DataFrame from an RDD[Int]. + * @since 1.3.0 + */ implicit def intRddToDataFrameHolder(data: RDD[Int]): DataFrameHolder = { val dataType = IntegerType val rows = data.mapPartitions { iter => @@ -354,7 +364,10 @@ class SQLContext(@transient val sparkContext: SparkContext) DataFrameHolder(self.createDataFrame(rows, StructType(StructField("_1", dataType) :: Nil))) } - /** Creates a single column DataFrame from an RDD[Long]. */ + /** + * Creates a single column DataFrame from an RDD[Long]. + * @since 1.3.0 + */ implicit def longRddToDataFrameHolder(data: RDD[Long]): DataFrameHolder = { val dataType = LongType val rows = data.mapPartitions { iter => @@ -367,7 +380,10 @@ class SQLContext(@transient val sparkContext: SparkContext) DataFrameHolder(self.createDataFrame(rows, StructType(StructField("_1", dataType) :: Nil))) } - /** Creates a single column DataFrame from an RDD[String]. */ + /** + * Creates a single column DataFrame from an RDD[String]. + * @since 1.3.0 + */ implicit def stringRddToDataFrameHolder(data: RDD[String]): DataFrameHolder = { val dataType = StringType val rows = data.mapPartitions { iter => @@ -386,13 +402,14 @@ class SQLContext(@transient val sparkContext: SparkContext) * Creates a DataFrame from an RDD of case classes. * * @group dataframes + * @since 1.3.0 */ @Experimental def createDataFrame[A <: Product : TypeTag](rdd: RDD[A]): DataFrame = { SparkPlan.currentContext.set(self) val schema = ScalaReflection.schemaFor[A].dataType.asInstanceOf[StructType] val attributeSeq = schema.toAttributes - val rowRDD = RDDConversions.productToRowRdd(rdd, schema) + val rowRDD = RDDConversions.productToRowRdd(rdd, schema.map(_.dataType)) DataFrame(self, LogicalRDD(attributeSeq, rowRDD)(self)) } @@ -401,6 +418,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Creates a DataFrame from a local Seq of Product. * * @group dataframes + * @since 1.3.0 */ @Experimental def createDataFrame[A <: Product : TypeTag](data: Seq[A]): DataFrame = { @@ -414,6 +432,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Convert a [[BaseRelation]] created for external data sources into a [[DataFrame]]. * * @group dataframes + * @since 1.3.0 */ def baseRelationToDataFrame(baseRelation: BaseRelation): DataFrame = { DataFrame(this, LogicalRelation(baseRelation)) @@ -449,6 +468,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * }}} * * @group dataframes + * @since 1.3.0 */ @DeveloperApi def createDataFrame(rowRDD: RDD[Row], schema: StructType): DataFrame = { @@ -480,6 +500,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * the provided schema. Otherwise, there will be runtime exception. * * @group dataframes + * @since 1.3.0 */ @DeveloperApi def createDataFrame(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = { @@ -492,6 +513,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * WARNING: Since there is no guaranteed ordering for fields in a Java Bean, * SELECT * queries will return the columns in an undefined order. * @group dataframes + * @since 1.3.0 */ def createDataFrame(rdd: RDD[_], beanClass: Class[_]): DataFrame = { val attributeSeq = getSchema(beanClass) @@ -520,273 +542,25 @@ class SQLContext(@transient val sparkContext: SparkContext) * WARNING: Since there is no guaranteed ordering for fields in a Java Bean, * SELECT * queries will return the columns in an undefined order. * @group dataframes + * @since 1.3.0 */ def createDataFrame(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = { createDataFrame(rdd.rdd, beanClass) } /** - * :: DeveloperApi :: - * Creates a [[DataFrame]] from an [[RDD]] containing [[Row]]s by applying a schema to this RDD. - * It is important to make sure that the structure of every [[Row]] of the provided RDD matches - * the provided schema. Otherwise, there will be runtime exception. - * Example: + * :: Experimental :: + * Returns a [[DataFrameReader]] that can be used to read data in as a [[DataFrame]]. * {{{ - * import org.apache.spark.sql._ - * import org.apache.spark.sql.types._ - * val sqlContext = new org.apache.spark.sql.SQLContext(sc) - * - * val schema = - * StructType( - * StructField("name", StringType, false) :: - * StructField("age", IntegerType, true) :: Nil) - * - * val people = - * sc.textFile("examples/src/main/resources/people.txt").map( - * _.split(",")).map(p => Row(p(0), p(1).trim.toInt)) - * val dataFrame = sqlContext. applySchema(people, schema) - * dataFrame.printSchema - * // root - * // |-- name: string (nullable = false) - * // |-- age: integer (nullable = true) - * - * dataFrame.registerTempTable("people") - * sqlContext.sql("select name from people").collect.foreach(println) + * sqlContext.read.parquet("/path/to/file.parquet") + * sqlContext.read.schema(schema).json("/path/to/file.json") * }}} - */ - @deprecated("use createDataFrame", "1.3.0") - def applySchema(rowRDD: RDD[Row], schema: StructType): DataFrame = { - createDataFrame(rowRDD, schema) - } - - @deprecated("use createDataFrame", "1.3.0") - def applySchema(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = { - createDataFrame(rowRDD, schema) - } - - /** - * Applies a schema to an RDD of Java Beans. - * - * WARNING: Since there is no guaranteed ordering for fields in a Java Bean, - * SELECT * queries will return the columns in an undefined order. - */ - @deprecated("use createDataFrame", "1.3.0") - def applySchema(rdd: RDD[_], beanClass: Class[_]): DataFrame = { - createDataFrame(rdd, beanClass) - } - - /** - * Applies a schema to an RDD of Java Beans. - * - * WARNING: Since there is no guaranteed ordering for fields in a Java Bean, - * SELECT * queries will return the columns in an undefined order. - */ - @deprecated("use createDataFrame", "1.3.0") - def applySchema(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = { - createDataFrame(rdd, beanClass) - } - - /** - * Loads a Parquet file, returning the result as a [[DataFrame]]. This function returns an empty - * [[DataFrame]] if no paths are passed in. - * - * @group specificdata - */ - @scala.annotation.varargs - def parquetFile(paths: String*): DataFrame = { - if (paths.isEmpty) { - emptyDataFrame - } else if (conf.parquetUseDataSourceApi) { - baseRelationToDataFrame(parquet.ParquetRelation2(paths, Map.empty)(this)) - } else { - DataFrame(this, parquet.ParquetRelation( - paths.mkString(","), Some(sparkContext.hadoopConfiguration), this)) - } - } - - /** - * Loads a JSON file (one object per line), returning the result as a [[DataFrame]]. - * It goes through the entire dataset once to determine the schema. - * - * @group specificdata - */ - def jsonFile(path: String): DataFrame = jsonFile(path, 1.0) - - /** - * :: Experimental :: - * Loads a JSON file (one object per line) and applies the given schema, - * returning the result as a [[DataFrame]]. - * - * @group specificdata - */ - @Experimental - def jsonFile(path: String, schema: StructType): DataFrame = - load("json", schema, Map("path" -> path)) - - /** - * :: Experimental :: - * @group specificdata - */ - @Experimental - def jsonFile(path: String, samplingRatio: Double): DataFrame = - load("json", Map("path" -> path, "samplingRatio" -> samplingRatio.toString)) - - /** - * Loads an RDD[String] storing JSON objects (one object per record), returning the result as a - * [[DataFrame]]. - * It goes through the entire dataset once to determine the schema. - * - * @group specificdata - */ - def jsonRDD(json: RDD[String]): DataFrame = jsonRDD(json, 1.0) - - - /** - * Loads an RDD[String] storing JSON objects (one object per record), returning the result as a - * [[DataFrame]]. - * It goes through the entire dataset once to determine the schema. - * - * @group specificdata - */ - def jsonRDD(json: JavaRDD[String]): DataFrame = jsonRDD(json.rdd, 1.0) - - /** - * :: Experimental :: - * Loads an RDD[String] storing JSON objects (one object per record) and applies the given schema, - * returning the result as a [[DataFrame]]. - * - * @group specificdata - */ - @Experimental - def jsonRDD(json: RDD[String], schema: StructType): DataFrame = { - val columnNameOfCorruptJsonRecord = conf.columnNameOfCorruptRecord - val appliedSchema = - Option(schema).getOrElse( - JsonRDD.nullTypeToStringType( - JsonRDD.inferSchema(json, 1.0, columnNameOfCorruptJsonRecord))) - val rowRDD = JsonRDD.jsonStringToRow(json, appliedSchema, columnNameOfCorruptJsonRecord) - createDataFrame(rowRDD, appliedSchema, needsConversion = false) - } - - /** - * :: Experimental :: - * Loads an JavaRDD storing JSON objects (one object per record) and applies the given - * schema, returning the result as a [[DataFrame]]. - * - * @group specificdata - */ - @Experimental - def jsonRDD(json: JavaRDD[String], schema: StructType): DataFrame = { - jsonRDD(json.rdd, schema) - } - - /** - * :: Experimental :: - * Loads an RDD[String] storing JSON objects (one object per record) inferring the - * schema, returning the result as a [[DataFrame]]. - * - * @group specificdata - */ - @Experimental - def jsonRDD(json: RDD[String], samplingRatio: Double): DataFrame = { - val columnNameOfCorruptJsonRecord = conf.columnNameOfCorruptRecord - val appliedSchema = - JsonRDD.nullTypeToStringType( - JsonRDD.inferSchema(json, samplingRatio, columnNameOfCorruptJsonRecord)) - val rowRDD = JsonRDD.jsonStringToRow(json, appliedSchema, columnNameOfCorruptJsonRecord) - createDataFrame(rowRDD, appliedSchema, needsConversion = false) - } - - /** - * :: Experimental :: - * Loads a JavaRDD[String] storing JSON objects (one object per record) inferring the - * schema, returning the result as a [[DataFrame]]. - * - * @group specificdata - */ - @Experimental - def jsonRDD(json: JavaRDD[String], samplingRatio: Double): DataFrame = { - jsonRDD(json.rdd, samplingRatio); - } - - /** - * :: Experimental :: - * Returns the dataset stored at path as a DataFrame, - * using the default data source configured by spark.sql.sources.default. - * - * @group genericdata - */ - @Experimental - def load(path: String): DataFrame = { - val dataSourceName = conf.defaultDataSourceName - load(path, dataSourceName) - } - - /** - * :: Experimental :: - * Returns the dataset stored at path as a DataFrame, using the given data source. - * - * @group genericdata - */ - @Experimental - def load(path: String, source: String): DataFrame = { - load(source, Map("path" -> path)) - } - - /** - * :: Experimental :: - * (Java-specific) Returns the dataset specified by the given data source and - * a set of options as a DataFrame. - * - * @group genericdata - */ - @Experimental - def load(source: String, options: java.util.Map[String, String]): DataFrame = { - load(source, options.toMap) - } - - /** - * :: Experimental :: - * (Scala-specific) Returns the dataset specified by the given data source and - * a set of options as a DataFrame. * * @group genericdata + * @since 1.4.0 */ @Experimental - def load(source: String, options: Map[String, String]): DataFrame = { - val resolved = ResolvedDataSource(this, None, source, options) - DataFrame(this, LogicalRelation(resolved.relation)) - } - - /** - * :: Experimental :: - * (Java-specific) Returns the dataset specified by the given data source and - * a set of options as a DataFrame, using the given schema as the schema of the DataFrame. - * - * @group genericdata - */ - @Experimental - def load( - source: String, - schema: StructType, - options: java.util.Map[String, String]): DataFrame = { - load(source, schema, options.toMap) - } - - /** - * :: Experimental :: - * (Scala-specific) Returns the dataset specified by the given data source and - * a set of options as a DataFrame, using the given schema as the schema of the DataFrame. - * @group genericdata - */ - @Experimental - def load( - source: String, - schema: StructType, - options: Map[String, String]): DataFrame = { - val resolved = ResolvedDataSource(this, Some(schema), source, options) - DataFrame(this, LogicalRelation(resolved.relation)) - } + def read: DataFrameReader = new DataFrameReader(this) /** * :: Experimental :: @@ -794,6 +568,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * It will use the default data source configured by spark.sql.sources.default. * * @group ddl_ops + * @since 1.3.0 */ @Experimental def createExternalTable(tableName: String, path: String): DataFrame = { @@ -807,6 +582,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * and returns the corresponding DataFrame. * * @group ddl_ops + * @since 1.3.0 */ @Experimental def createExternalTable( @@ -822,6 +598,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Then, returns the corresponding DataFrame. * * @group ddl_ops + * @since 1.3.0 */ @Experimental def createExternalTable( @@ -838,6 +615,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Then, returns the corresponding DataFrame. * * @group ddl_ops + * @since 1.3.0 */ @Experimental def createExternalTable( @@ -863,6 +641,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * a set of options. Then, returns the corresponding DataFrame. * * @group ddl_ops + * @since 1.3.0 */ @Experimental def createExternalTable( @@ -880,6 +659,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * a set of options. Then, returns the corresponding DataFrame. * * @group ddl_ops + * @since 1.3.0 */ @Experimental def createExternalTable( @@ -901,85 +681,56 @@ class SQLContext(@transient val sparkContext: SparkContext) } /** - * :: Experimental :: - * Construct a [[DataFrame]] representing the database table accessible via JDBC URL - * url named table. - * - * @group specificdata + * Registers the given [[DataFrame]] as a temporary table in the catalog. Temporary tables exist + * only during the lifetime of this instance of SQLContext. */ - @Experimental - def jdbc(url: String, table: String): DataFrame = { - jdbc(url, table, JDBCRelation.columnPartition(null)) + private[sql] def registerDataFrameAsTable(df: DataFrame, tableName: String): Unit = { + catalog.registerTable(Seq(tableName), df.logicalPlan) } /** - * :: Experimental :: - * Construct a [[DataFrame]] representing the database table accessible via JDBC URL - * url named table. Partitions of the table will be retrieved in parallel based on the parameters - * passed to this function. + * Drops the temporary table with the given table name in the catalog. If the table has been + * cached/persisted before, it's also unpersisted. * - * @param columnName the name of a column of integral type that will be used for partitioning. - * @param lowerBound the minimum value of `columnName` used to decide partition stride - * @param upperBound the maximum value of `columnName` used to decide partition stride - * @param numPartitions the number of partitions. the range `minValue`-`maxValue` will be split - * evenly into this many partitions + * @param tableName the name of the table to be unregistered. * - * @group specificdata + * @group basic + * @since 1.3.0 */ - @Experimental - def jdbc( - url: String, - table: String, - columnName: String, - lowerBound: Long, - upperBound: Long, - numPartitions: Int): DataFrame = { - val partitioning = JDBCPartitioningInfo(columnName, lowerBound, upperBound, numPartitions) - val parts = JDBCRelation.columnPartition(partitioning) - jdbc(url, table, parts) + def dropTempTable(tableName: String): Unit = { + cacheManager.tryUncacheQuery(table(tableName)) + catalog.unregisterTable(Seq(tableName)) } /** * :: Experimental :: - * Construct a [[DataFrame]] representing the database table accessible via JDBC URL - * url named table. The theParts parameter gives a list expressions - * suitable for inclusion in WHERE clauses; each one defines one partition - * of the [[DataFrame]]. + * Creates a [[DataFrame]] with a single [[LongType]] column named `id`, containing elements + * in an range from `start` to `end`(exclusive) with step value 1. * - * @group specificdata + * @since 1.4.0 + * @group dataframe */ @Experimental - def jdbc(url: String, table: String, theParts: Array[String]): DataFrame = { - val parts: Array[Partition] = theParts.zipWithIndex.map { case (part, i) => - JDBCPartition(part, i) : Partition - } - jdbc(url, table, parts) - } - - private def jdbc(url: String, table: String, parts: Array[Partition]): DataFrame = { - val relation = JDBCRelation(url, table, parts)(this) - baseRelationToDataFrame(relation) - } - - /** - * Registers the given [[DataFrame]] as a temporary table in the catalog. Temporary tables exist - * only during the lifetime of this instance of SQLContext. - */ - private[sql] def registerDataFrameAsTable(df: DataFrame, tableName: String): Unit = { - catalog.registerTable(Seq(tableName), df.logicalPlan) + def range(start: Long, end: Long): DataFrame = { + createDataFrame( + sparkContext.range(start, end).map(Row(_)), + StructType(StructField("id", LongType, nullable = false) :: Nil)) } /** - * Drops the temporary table with the given table name in the catalog. If the table has been - * cached/persisted before, it's also unpersisted. - * - * @param tableName the name of the table to be unregistered. + * :: Experimental :: + * Creates a [[DataFrame]] with a single [[LongType]] column named `id`, containing elements + * in an range from `start` to `end`(exclusive) with an step value, with partition number + * specified. * - * @group basic + * @since 1.4.0 + * @group dataframe */ - def dropTempTable(tableName: String): Unit = { - cacheManager.tryUncacheQuery(table(tableName)) - catalog.unregisterTable(Seq(tableName)) + @Experimental + def range(start: Long, end: Long, step: Long, numPartitions: Int): DataFrame = { + createDataFrame( + sparkContext.range(start, end, step, numPartitions).map(Row(_)), + StructType(StructField("id", LongType, nullable = false) :: Nil)) } /** @@ -987,6 +738,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * used for SQL parsing can be configured with 'spark.sql.dialect'. * * @group basic + * @since 1.3.0 */ def sql(sqlText: String): DataFrame = { DataFrame(this, parseSql(sqlText)) @@ -996,6 +748,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Returns the specified table as a [[DataFrame]]. * * @group ddl_ops + * @since 1.3.0 */ def table(tableName: String): DataFrame = DataFrame(this, catalog.lookupRelation(Seq(tableName))) @@ -1006,6 +759,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * indicating if a table is a temporary one or not). * * @group ddl_ops + * @since 1.3.0 */ def tables(): DataFrame = { DataFrame(this, ShowTablesCommand(None)) @@ -1017,6 +771,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * indicating if a table is a temporary one or not). * * @group ddl_ops + * @since 1.3.0 */ def tables(databaseName: String): DataFrame = { DataFrame(this, ShowTablesCommand(Some(databaseName))) @@ -1026,6 +781,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Returns the names of tables in the current database as an array. * * @group ddl_ops + * @since 1.3.0 */ def tableNames(): Array[String] = { catalog.getTables(None).map { @@ -1037,6 +793,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * Returns the names of tables in the given database as an array. * * @group ddl_ops + * @since 1.3.0 */ def tableNames(databaseName: String): Array[String] = { catalog.getTables(Some(databaseName)).map { @@ -1091,7 +848,7 @@ class SQLContext(@transient val sparkContext: SparkContext) val projectSet = AttributeSet(projectList.flatMap(_.references)) val filterSet = AttributeSet(filterPredicates.flatMap(_.references)) val filterCondition = - prunePushedDownFilters(filterPredicates).reduceLeftOption(expressions.And) + prunePushedDownFilters(filterPredicates).reduceLeftOption(catalyst.expressions.And) // Right now we still use a projection even if the only evaluation is applying an alias // to a column. Since this is a no-op, it could be avoided. However, using this @@ -1172,7 +929,7 @@ class SQLContext(@transient val sparkContext: SparkContext) // TODO: Don't just pick the first one... lazy val sparkPlan: SparkPlan = { SparkPlan.currentContext.set(self) - planner(optimizedPlan).next() + planner.plan(optimizedPlan).next() } // executedPlan should not be used to initialize any SparkPlan. It should be // only used for execution. @@ -1189,13 +946,17 @@ class SQLContext(@transient val sparkContext: SparkContext) |${stringOrError(executedPlan)} """.stripMargin.trim - override def toString: String = + override def toString: String = { + def output = + analyzed.output.map(o => s"${o.name}: ${o.dataType.simpleString}").mkString(", ") + // TODO previously will output RDD details by run (${stringOrError(toRdd.toDebugString)}) // however, the `toRdd` will cause the real execution, which is not what we want. // We need to think about how to avoid the side effect. s"""== Parsed Logical Plan == |${stringOrError(logical)} |== Analyzed Logical Plan == + |${stringOrError(output)} |${stringOrError(analyzed)} |== Optimized Logical Plan == |${stringOrError(optimizedPlan)} @@ -1204,6 +965,7 @@ class SQLContext(@transient val sparkContext: SparkContext) |Code Generation: ${stringOrError(executedPlan.codegenEnabled)} |== RDD == """.stripMargin.trim + } } /** @@ -1266,12 +1028,346 @@ class SQLContext(@transient val sparkContext: SparkContext) * Returns a Catalyst Schema for the given java bean class. */ protected def getSchema(beanClass: Class[_]): Seq[AttributeReference] = { - val (dataType, _) = JavaTypeInference.inferDataType(TypeToken.of(beanClass)) + val (dataType, _) = JavaTypeInference.inferDataType(beanClass) dataType.asInstanceOf[StructType].fields.map { f => AttributeReference(f.name, f.dataType, f.nullable)() } } + //////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////// + // Deprecated methods + //////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////// + + /** + * @deprecated As of 1.3.0, replaced by `createDataFrame()`. + */ + @deprecated("use createDataFrame", "1.3.0") + def applySchema(rowRDD: RDD[Row], schema: StructType): DataFrame = { + createDataFrame(rowRDD, schema) + } + + /** + * @deprecated As of 1.3.0, replaced by `createDataFrame()`. + */ + @deprecated("use createDataFrame", "1.3.0") + def applySchema(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = { + createDataFrame(rowRDD, schema) + } + + /** + * @deprecated As of 1.3.0, replaced by `createDataFrame()`. + */ + @deprecated("use createDataFrame", "1.3.0") + def applySchema(rdd: RDD[_], beanClass: Class[_]): DataFrame = { + createDataFrame(rdd, beanClass) + } + + /** + * @deprecated As of 1.3.0, replaced by `createDataFrame()`. + */ + @deprecated("use createDataFrame", "1.3.0") + def applySchema(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = { + createDataFrame(rdd, beanClass) + } + + /** + * Loads a Parquet file, returning the result as a [[DataFrame]]. This function returns an empty + * [[DataFrame]] if no paths are passed in. + * + * @group specificdata + * @deprecated As of 1.4.0, replaced by `read().parquet()`. + */ + @deprecated("Use read.parquet()", "1.4.0") + @scala.annotation.varargs + def parquetFile(paths: String*): DataFrame = { + if (paths.isEmpty) { + emptyDataFrame + } else if (conf.parquetUseDataSourceApi) { + read.parquet(paths : _*) + } else { + DataFrame(this, parquet.ParquetRelation( + paths.mkString(","), Some(sparkContext.hadoopConfiguration), this)) + } + } + + /** + * Loads a JSON file (one object per line), returning the result as a [[DataFrame]]. + * It goes through the entire dataset once to determine the schema. + * + * @group specificdata + * @deprecated As of 1.4.0, replaced by `read().json()`. + */ + @deprecated("Use read.json()", "1.4.0") + def jsonFile(path: String): DataFrame = { + read.json(path) + } + + /** + * Loads a JSON file (one object per line) and applies the given schema, + * returning the result as a [[DataFrame]]. + * + * @group specificdata + * @deprecated As of 1.4.0, replaced by `read().json()`. + */ + @deprecated("Use read.json()", "1.4.0") + def jsonFile(path: String, schema: StructType): DataFrame = { + read.schema(schema).json(path) + } + + /** + * @group specificdata + * @deprecated As of 1.4.0, replaced by `read().json()`. + */ + @deprecated("Use read.json()", "1.4.0") + def jsonFile(path: String, samplingRatio: Double): DataFrame = { + read.option("samplingRatio", samplingRatio.toString).json(path) + } + + /** + * Loads an RDD[String] storing JSON objects (one object per record), returning the result as a + * [[DataFrame]]. + * It goes through the entire dataset once to determine the schema. + * + * @group specificdata + * @deprecated As of 1.4.0, replaced by `read().json()`. + */ + @deprecated("Use read.json()", "1.4.0") + def jsonRDD(json: RDD[String]): DataFrame = read.json(json) + + /** + * Loads an RDD[String] storing JSON objects (one object per record), returning the result as a + * [[DataFrame]]. + * It goes through the entire dataset once to determine the schema. + * + * @group specificdata + * @deprecated As of 1.4.0, replaced by `read().json()`. + */ + @deprecated("Use read.json()", "1.4.0") + def jsonRDD(json: JavaRDD[String]): DataFrame = read.json(json) + + /** + * Loads an RDD[String] storing JSON objects (one object per record) and applies the given schema, + * returning the result as a [[DataFrame]]. + * + * @group specificdata + * @deprecated As of 1.4.0, replaced by `read().json()`. + */ + @deprecated("Use read.json()", "1.4.0") + def jsonRDD(json: RDD[String], schema: StructType): DataFrame = { + read.schema(schema).json(json) + } + + /** + * Loads an JavaRDD storing JSON objects (one object per record) and applies the given + * schema, returning the result as a [[DataFrame]]. + * + * @group specificdata + * @deprecated As of 1.4.0, replaced by `read().json()`. + */ + @deprecated("Use read.json()", "1.4.0") + def jsonRDD(json: JavaRDD[String], schema: StructType): DataFrame = { + read.schema(schema).json(json) + } + + /** + * Loads an RDD[String] storing JSON objects (one object per record) inferring the + * schema, returning the result as a [[DataFrame]]. + * + * @group specificdata + * @deprecated As of 1.4.0, replaced by `read().json()`. + */ + @deprecated("Use read.json()", "1.4.0") + def jsonRDD(json: RDD[String], samplingRatio: Double): DataFrame = { + read.option("samplingRatio", samplingRatio.toString).json(json) + } + + /** + * Loads a JavaRDD[String] storing JSON objects (one object per record) inferring the + * schema, returning the result as a [[DataFrame]]. + * + * @group specificdata + * @deprecated As of 1.4.0, replaced by `read().json()`. + */ + @deprecated("Use read.json()", "1.4.0") + def jsonRDD(json: JavaRDD[String], samplingRatio: Double): DataFrame = { + read.option("samplingRatio", samplingRatio.toString).json(json) + } + + /** + * Returns the dataset stored at path as a DataFrame, + * using the default data source configured by spark.sql.sources.default. + * + * @group genericdata + * @deprecated As of 1.4.0, replaced by `read().load(path)`. + */ + @deprecated("Use read.load(path)", "1.4.0") + def load(path: String): DataFrame = { + read.load(path) + } + + /** + * Returns the dataset stored at path as a DataFrame, using the given data source. + * + * @group genericdata + * @deprecated As of 1.4.0, replaced by `read().format(source).load(path)`. + */ + @deprecated("Use read.format(source).load(path)", "1.4.0") + def load(path: String, source: String): DataFrame = { + read.format(source).load(path) + } + + /** + * (Java-specific) Returns the dataset specified by the given data source and + * a set of options as a DataFrame. + * + * @group genericdata + * @deprecated As of 1.4.0, replaced by `read().format(source).options(options).load()`. + */ + @deprecated("Use read.format(source).options(options).load()", "1.4.0") + def load(source: String, options: java.util.Map[String, String]): DataFrame = { + read.options(options).format(source).load() + } + + /** + * (Scala-specific) Returns the dataset specified by the given data source and + * a set of options as a DataFrame. + * + * @group genericdata + * @deprecated As of 1.4.0, replaced by `read().format(source).options(options).load()`. + */ + @deprecated("Use read.format(source).options(options).load()", "1.4.0") + def load(source: String, options: Map[String, String]): DataFrame = { + read.options(options).format(source).load() + } + + /** + * (Java-specific) Returns the dataset specified by the given data source and + * a set of options as a DataFrame, using the given schema as the schema of the DataFrame. + * + * @group genericdata + * @deprecated As of 1.4.0, replaced by + * `read().format(source).schema(schema).options(options).load()`. + */ + @deprecated("Use read.format(source).schema(schema).options(options).load()", "1.4.0") + def load(source: String, schema: StructType, options: java.util.Map[String, String]): DataFrame = + { + read.format(source).schema(schema).options(options).load() + } + + /** + * (Scala-specific) Returns the dataset specified by the given data source and + * a set of options as a DataFrame, using the given schema as the schema of the DataFrame. + * + * @group genericdata + * @deprecated As of 1.4.0, replaced by + * `read().format(source).schema(schema).options(options).load()`. + */ + @deprecated("Use read.format(source).schema(schema).options(options).load()", "1.4.0") + def load(source: String, schema: StructType, options: Map[String, String]): DataFrame = { + read.format(source).schema(schema).options(options).load() + } + + /** + * Construct a [[DataFrame]] representing the database table accessible via JDBC URL + * url named table. + * + * @group specificdata + * @deprecated As of 1.4.0, replaced by `read().jdbc()`. + */ + @deprecated("use read.jdbc()", "1.4.0") + def jdbc(url: String, table: String): DataFrame = { + read.jdbc(url, table, new Properties) + } + + /** + * Construct a [[DataFrame]] representing the database table accessible via JDBC URL + * url named table. Partitions of the table will be retrieved in parallel based on the parameters + * passed to this function. + * + * @param columnName the name of a column of integral type that will be used for partitioning. + * @param lowerBound the minimum value of `columnName` used to decide partition stride + * @param upperBound the maximum value of `columnName` used to decide partition stride + * @param numPartitions the number of partitions. the range `minValue`-`maxValue` will be split + * evenly into this many partitions + * @group specificdata + * @deprecated As of 1.4.0, replaced by `read().jdbc()`. + */ + @deprecated("use read.jdbc()", "1.4.0") + def jdbc( + url: String, + table: String, + columnName: String, + lowerBound: Long, + upperBound: Long, + numPartitions: Int): DataFrame = { + read.jdbc(url, table, columnName, lowerBound, upperBound, numPartitions, new Properties) + } + + /** + * Construct a [[DataFrame]] representing the database table accessible via JDBC URL + * url named table. The theParts parameter gives a list expressions + * suitable for inclusion in WHERE clauses; each one defines one partition + * of the [[DataFrame]]. + * + * @group specificdata + * @deprecated As of 1.4.0, replaced by `read().jdbc()`. + */ + @deprecated("use read.jdbc()", "1.4.0") + def jdbc(url: String, table: String, theParts: Array[String]): DataFrame = { + read.jdbc(url, table, theParts, new Properties) + } + + //////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////// + // End of deprecated methods + //////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////// + + + // Register a succesfully instantiatd context to the singleton. This should be at the end of + // the class definition so that the singleton is updated only if there is no exception in the + // construction of the instance. + SQLContext.setLastInstantiatedContext(self) } +/** + * This SQLContext object contains utility functions to create a singleton SQLContext instance, + * or to get the last created SQLContext instance. + */ +object SQLContext { + + private val INSTANTIATION_LOCK = new Object() + + /** + * Reference to the last created SQLContext. + */ + @transient private val lastInstantiatedContext = new AtomicReference[SQLContext]() + + /** + * Get the singleton SQLContext if it exists or create a new one using the given SparkContext. + * This function can be used to create a singleton SQLContext object that can be shared across + * the JVM. + */ + def getOrCreate(sparkContext: SparkContext): SQLContext = { + INSTANTIATION_LOCK.synchronized { + if (lastInstantiatedContext.get() == null) { + new SQLContext(sparkContext) + } + } + lastInstantiatedContext.get() + } + private[sql] def clearLastInstantiatedContext(): Unit = { + INSTANTIATION_LOCK.synchronized { + lastInstantiatedContext.set(null) + } + } + + private[sql] def setLastInstantiatedContext(sqlContext: SQLContext): Unit = { + INSTANTIATION_LOCK.synchronized { + lastInstantiatedContext.set(sqlContext) + } + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala index 5921eaf5e63f4..305b306a79871 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSQLParser.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql - import scala.util.parsing.combinator.RegexParsers import org.apache.spark.sql.catalyst.AbstractSparkSQLParser @@ -55,15 +54,15 @@ private[sql] class SparkSQLParser(fallback: String => LogicalPlan) extends Abstr } } - protected val AS = Keyword("AS") - protected val CACHE = Keyword("CACHE") - protected val CLEAR = Keyword("CLEAR") - protected val IN = Keyword("IN") - protected val LAZY = Keyword("LAZY") - protected val SET = Keyword("SET") - protected val SHOW = Keyword("SHOW") - protected val TABLE = Keyword("TABLE") - protected val TABLES = Keyword("TABLES") + protected val AS = Keyword("AS") + protected val CACHE = Keyword("CACHE") + protected val CLEAR = Keyword("CLEAR") + protected val IN = Keyword("IN") + protected val LAZY = Keyword("LAZY") + protected val SET = Keyword("SET") + protected val SHOW = Keyword("SHOW") + protected val TABLE = Keyword("TABLE") + protected val TABLES = Keyword("TABLES") protected val UNCACHE = Keyword("UNCACHE") override protected lazy val start: Parser[LogicalPlan] = cache | uncache | set | show | others diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala index b97aaf73529a3..3cc5c2441d8a5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala @@ -33,6 +33,8 @@ import org.apache.spark.sql.types.DataType /** * Functions for registering user-defined functions. Use [[SQLContext.udf]] to access this. + * + * @since 1.3.0 */ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { @@ -44,6 +46,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { envVars: JMap[String, String], pythonIncludes: JList[String], pythonExec: String, + pythonVer: String, broadcastVars: JList[Broadcast[PythonBroadcast]], accumulator: Accumulator[JList[Array[Byte]]], stringDataType: String): Unit = { @@ -68,6 +71,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { envVars, pythonIncludes, pythonExec, + pythonVer, broadcastVars, accumulator, dataType, @@ -87,6 +91,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of ${x} arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[$typeTags](name: String, func: Function$x[$types]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -104,6 +109,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { println(s""" |/** | * Register a user-defined function with ${i} arguments. + | * @since 1.3.0 | */ |def register(name: String, f: UDF$i[$extTypeArgs, _], returnType: DataType) = { | functionRegistry.registerFunction( @@ -116,6 +122,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 0 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag](name: String, func: Function0[RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -127,6 +134,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 1 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag](name: String, func: Function1[A1, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -138,6 +146,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 2 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag](name: String, func: Function2[A1, A2, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -149,6 +158,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 3 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag](name: String, func: Function3[A1, A2, A3, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -160,6 +170,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 4 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag](name: String, func: Function4[A1, A2, A3, A4, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -171,6 +182,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 5 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag](name: String, func: Function5[A1, A2, A3, A4, A5, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -182,6 +194,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 6 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag](name: String, func: Function6[A1, A2, A3, A4, A5, A6, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -193,6 +206,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 7 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag](name: String, func: Function7[A1, A2, A3, A4, A5, A6, A7, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -204,6 +218,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 8 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag](name: String, func: Function8[A1, A2, A3, A4, A5, A6, A7, A8, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -215,6 +230,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 9 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag](name: String, func: Function9[A1, A2, A3, A4, A5, A6, A7, A8, A9, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -226,6 +242,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 10 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag](name: String, func: Function10[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -237,6 +254,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 11 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag](name: String, func: Function11[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -248,6 +266,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 12 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag](name: String, func: Function12[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -259,6 +278,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 13 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag](name: String, func: Function13[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -270,6 +290,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 14 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag](name: String, func: Function14[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -281,6 +302,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 15 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag](name: String, func: Function15[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -292,6 +314,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 16 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag](name: String, func: Function16[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -303,6 +326,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 17 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag](name: String, func: Function17[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -314,6 +338,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 18 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag](name: String, func: Function18[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -325,6 +350,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 19 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag](name: String, func: Function19[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -336,6 +362,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 20 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag](name: String, func: Function20[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -347,6 +374,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 21 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag, A21: TypeTag](name: String, func: Function21[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -358,6 +386,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a Scala closure of 22 arguments as user-defined function (UDF). * @tparam RT return type of UDF. + * @since 1.3.0 */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag, A21: TypeTag, A22: TypeTag](name: String, func: Function22[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType @@ -371,6 +400,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 1 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF1[_, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -380,6 +410,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 2 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF2[_, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -389,6 +420,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 3 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF3[_, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -398,6 +430,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 4 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF4[_, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -407,6 +440,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 5 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF5[_, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -416,6 +450,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 6 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF6[_, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -425,6 +460,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 7 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF7[_, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -434,6 +470,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 8 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF8[_, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -443,6 +480,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 9 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF9[_, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -452,6 +490,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 10 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF10[_, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -461,6 +500,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 11 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF11[_, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -470,6 +510,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 12 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF12[_, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -479,6 +520,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 13 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF13[_, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -488,6 +530,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 14 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -497,6 +540,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 15 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -506,6 +550,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 16 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -515,6 +560,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 17 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -524,6 +570,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 18 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -533,6 +580,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 19 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -542,6 +590,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 20 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -551,6 +600,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 21 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( @@ -560,6 +610,7 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { /** * Register a user-defined function with 22 arguments. + * @since 1.3.0 */ def register(name: String, f: UDF22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { functionRegistry.registerFunction( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala index 295db539adfc4..a02e202d2eebc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql import java.util.{List => JList, Map => JMap} import org.apache.spark.Accumulator +import org.apache.spark.annotation.Experimental import org.apache.spark.api.python.PythonBroadcast import org.apache.spark.broadcast.Broadcast import org.apache.spark.sql.catalyst.expressions.ScalaUdf @@ -36,7 +37,10 @@ import org.apache.spark.sql.types.DataType * // Projects a column that adds a prediction column based on the score column. * df.select( predict(df("score")) ) * }}} + * + * @since 1.3.0 */ +@Experimental case class UserDefinedFunction protected[sql] (f: AnyRef, dataType: DataType) { def apply(exprs: Column*): Column = { @@ -54,14 +58,15 @@ private[sql] case class UserDefinedPythonFunction( envVars: JMap[String, String], pythonIncludes: JList[String], pythonExec: String, + pythonVer: String, broadcastVars: JList[Broadcast[PythonBroadcast]], accumulator: Accumulator[JList[Array[Byte]]], dataType: DataType) { /** Returns a [[Column]] that will evaluate to calling this UDF with the given input. */ def apply(exprs: Column*): Column = { - val udf = PythonUDF(name, command, envVars, pythonIncludes, pythonExec, broadcastVars, - accumulator, dataType, exprs.map(_.expr)) + val udf = PythonUDF(name, command, envVars, pythonIncludes, pythonExec, pythonVer, + broadcastVars, accumulator, dataType, exprs.map(_.expr)) Column(udf) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala index ae77f72998a22..604f3124e23ae 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala @@ -72,17 +72,6 @@ private[r] object SQLUtils { sqlContext.createDataFrame(rowRDD, schema) } - // A helper to include grouping columns in Agg() - def aggWithGrouping(gd: GroupedData, exprs: Column*): DataFrame = { - val aggExprs = exprs.map { col => - col.expr match { - case expr: NamedExpression => expr - case expr: Expression => Alias(expr, expr.simpleString)() - } - } - gd.toDF(aggExprs) - } - def dfToRowRDD(df: DataFrame): JavaRDD[Array[Byte]] = { df.map(r => rowToRBytes(r)) } @@ -117,7 +106,7 @@ private[r] object SQLUtils { dfCols.map { col => colToRBytes(col) - } + } } def convertRowsToColumns(localDF: Array[Row], numCols: Int): Array[Array[Any]] = { @@ -132,7 +121,7 @@ private[r] object SQLUtils { val numRows = col.length val bos = new ByteArrayOutputStream() val dos = new DataOutputStream(bos) - + SerDe.writeInt(dos, numRows) col.map { item => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala index d9b6fb43ab83d..3db26fad2b92f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala @@ -236,7 +236,7 @@ private[sql] case class InMemoryColumnarTableScan( case GreaterThanOrEqual(a: AttributeReference, l: Literal) => l <= statsFor(a).upperBound case GreaterThanOrEqual(l: Literal, a: AttributeReference) => statsFor(a).lowerBound <= l - case IsNull(a: Attribute) => statsFor(a).nullCount > 0 + case IsNull(a: Attribute) => statsFor(a).nullCount > 0 case IsNotNull(a: Attribute) => statsFor(a).count - statsFor(a).nullCount > 0 } @@ -267,7 +267,7 @@ private[sql] case class InMemoryColumnarTableScan( private val inMemoryPartitionPruningEnabled = sqlContext.conf.inMemoryPartitionPruning - override def execute(): RDD[Row] = { + protected override def doExecute(): RDD[Row] = { if (enableAccumulators) { readPartitions.setValue(0) readBatches.setValue(0) @@ -314,7 +314,7 @@ private[sql] case class InMemoryColumnarTableScan( columnAccessors(i).extractTo(nextRow, i) i += 1 } - nextRow + if (attributes.isEmpty) Row.empty else nextRow } override def hasNext: Boolean = columnAccessors(0).hasNext diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala index 18b1ba4c5c4b9..8d16749697aa2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Aggregate.scala @@ -121,7 +121,7 @@ case class Aggregate( } } - override def execute(): RDD[Row] = attachTree(this, "execute") { + protected override def doExecute(): RDD[Row] = attachTree(this, "execute") { if (groupingExpressions.isEmpty) { child.execute().mapPartitions { iter => val buffer = newAggregateBuffer() diff --git a/sql/core/src/main/scala/org/apache/spark/sql/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala similarity index 97% rename from sql/core/src/main/scala/org/apache/spark/sql/CacheManager.scala rename to sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index 18584c2dcf797..5fcc48a67948b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -15,18 +15,19 @@ * limitations under the License. */ -package org.apache.spark.sql +package org.apache.spark.sql.execution import java.util.concurrent.locks.ReentrantReadWriteLock import org.apache.spark.Logging import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.columnar.InMemoryRelation +import org.apache.spark.sql.{DataFrame, SQLContext} import org.apache.spark.storage.StorageLevel import org.apache.spark.storage.StorageLevel.MEMORY_AND_DISK /** Holds a cached logical plan and its data */ -private case class CachedData(plan: LogicalPlan, cachedRepresentation: InMemoryRelation) +private[sql] case class CachedData(plan: LogicalPlan, cachedRepresentation: InMemoryRelation) /** * Provides support in a SQLContext for caching query results and automatically using these cached diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala index 5b2e46962cd3b..f25d10fec0411 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala @@ -17,17 +17,18 @@ package org.apache.spark.sql.execution +import org.apache.spark.{HashPartitioner, Partitioner, RangePartitioner, SparkEnv} import org.apache.spark.annotation.DeveloperApi -import org.apache.spark.shuffle.sort.SortShuffleManager -import org.apache.spark.{SparkEnv, HashPartitioner, RangePartitioner} import org.apache.spark.rdd.{RDD, ShuffledRDD} import org.apache.spark.serializer.Serializer -import org.apache.spark.sql.{SQLContext, Row} +import org.apache.spark.shuffle.sort.SortShuffleManager +import org.apache.spark.shuffle.unsafe.UnsafeShuffleManager import org.apache.spark.sql.catalyst.errors.attachTree import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.physical._ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.types.DataType +import org.apache.spark.sql.{SQLContext, Row} import org.apache.spark.util.MutablePair object Exchange { @@ -59,11 +60,69 @@ case class Exchange( override def output: Seq[Attribute] = child.output - /** We must copy rows when sort based shuffle is on */ - protected def sortBasedShuffleOn = SparkEnv.get.shuffleManager.isInstanceOf[SortShuffleManager] - - private val bypassMergeThreshold = - child.sqlContext.sparkContext.conf.getInt("spark.shuffle.sort.bypassMergeThreshold", 200) + /** + * Determines whether records must be defensively copied before being sent to the shuffle. + * Several of Spark's shuffle components will buffer deserialized Java objects in memory. The + * shuffle code assumes that objects are immutable and hence does not perform its own defensive + * copying. In Spark SQL, however, operators' iterators return the same mutable `Row` object. In + * order to properly shuffle the output of these operators, we need to perform our own copying + * prior to sending records to the shuffle. This copying is expensive, so we try to avoid it + * whenever possible. This method encapsulates the logic for choosing when to copy. + * + * In the long run, we might want to push this logic into core's shuffle APIs so that we don't + * have to rely on knowledge of core internals here in SQL. + * + * See SPARK-2967, SPARK-4479, and SPARK-7375 for more discussion of this issue. + * + * @param partitioner the partitioner for the shuffle + * @param serializer the serializer that will be used to write rows + * @return true if rows should be copied before being shuffled, false otherwise + */ + private def needToCopyObjectsBeforeShuffle( + partitioner: Partitioner, + serializer: Serializer): Boolean = { + // Note: even though we only use the partitioner's `numPartitions` field, we require it to be + // passed instead of directly passing the number of partitions in order to guard against + // corner-cases where a partitioner constructed with `numPartitions` partitions may output + // fewer partitions (like RangePartitioner, for example). + val conf = child.sqlContext.sparkContext.conf + val shuffleManager = SparkEnv.get.shuffleManager + val sortBasedShuffleOn = shuffleManager.isInstanceOf[SortShuffleManager] || + shuffleManager.isInstanceOf[UnsafeShuffleManager] + val bypassMergeThreshold = conf.getInt("spark.shuffle.sort.bypassMergeThreshold", 200) + val serializeMapOutputs = conf.getBoolean("spark.shuffle.sort.serializeMapOutputs", true) + if (newOrdering.nonEmpty) { + // If a new ordering is required, then records will be sorted with Spark's `ExternalSorter`, + // which requires a defensive copy. + true + } else if (sortBasedShuffleOn) { + val bypassIsSupported = SparkEnv.get.shuffleManager.isInstanceOf[SortShuffleManager] + if (bypassIsSupported && partitioner.numPartitions <= bypassMergeThreshold) { + // If we're using the original SortShuffleManager and the number of output partitions is + // sufficiently small, then Spark will fall back to the hash-based shuffle write path, which + // doesn't buffer deserialized records. + // Note that we'll have to remove this case if we fix SPARK-6026 and remove this bypass. + false + } else if (serializeMapOutputs && serializer.supportsRelocationOfSerializedObjects) { + // SPARK-4550 extended sort-based shuffle to serialize individual records prior to sorting + // them. This optimization is guarded by a feature-flag and is only applied in cases where + // shuffle dependency does not specify an ordering and the record serializer has certain + // properties. If this optimization is enabled, we can safely avoid the copy. + // + // This optimization also applies to UnsafeShuffleManager (added in SPARK-7081). + false + } else { + // Spark's SortShuffleManager uses `ExternalSorter` to buffer records in memory. This code + // path is used both when SortShuffleManager is used and when UnsafeShuffleManager falls + // back to SortShuffleManager to perform a shuffle that the new fast path can't handle. In + // both cases, we must copy. + true + } + } else { + // We're using hash-based shuffle, so we don't need to copy. + false + } + } private val keyOrdering = { if (newOrdering.nonEmpty) { @@ -81,21 +140,11 @@ case class Exchange( @transient private lazy val sparkConf = child.sqlContext.sparkContext.getConf - def serializer( + private def getSerializer( keySchema: Array[DataType], valueSchema: Array[DataType], + hasKeyOrdering: Boolean, numPartitions: Int): Serializer = { - // In ExternalSorter's spillToMergeableFile function, key-value pairs are written out - // through write(key) and then write(value) instead of write((key, value)). Because - // SparkSqlSerializer2 assumes that objects passed in are Product2, we cannot safely use - // it when spillToMergeableFile in ExternalSorter will be used. - // So, we will not use SparkSqlSerializer2 when - // - Sort-based shuffle is enabled and the number of reducers (numPartitions) is greater - // then the bypassMergeThreshold; or - // - newOrdering is defined. - val cannotUseSqlSerializer2 = - (sortBasedShuffleOn && numPartitions > bypassMergeThreshold) || newOrdering.nonEmpty - // It is true when there is no field that needs to be write out. // For now, we will not use SparkSqlSerializer2 when noField is true. val noField = @@ -104,14 +153,13 @@ case class Exchange( val useSqlSerializer2 = child.sqlContext.conf.useSqlSerializer2 && // SparkSqlSerializer2 is enabled. - !cannotUseSqlSerializer2 && // Safe to use Serializer2. SparkSqlSerializer2.support(keySchema) && // The schema of key is supported. SparkSqlSerializer2.support(valueSchema) && // The schema of value is supported. !noField val serializer = if (useSqlSerializer2) { logInfo("Using SparkSqlSerializer2.") - new SparkSqlSerializer2(keySchema, valueSchema) + new SparkSqlSerializer2(keySchema, valueSchema, hasKeyOrdering) } else { logInfo("Using SparkSqlSerializer.") new SparkSqlSerializer(sparkConf) @@ -120,20 +168,15 @@ case class Exchange( serializer } - override def execute(): RDD[Row] = attachTree(this , "execute") { + protected override def doExecute(): RDD[Row] = attachTree(this , "execute") { newPartitioning match { case HashPartitioning(expressions, numPartitions) => - // TODO: Eliminate redundant expressions in grouping key and value. - // This is a workaround for SPARK-4479. When: - // 1. sort based shuffle is on, and - // 2. the partition number is under the merge threshold, and - // 3. no ordering is required - // we can avoid the defensive copies to improve performance. In the long run, we probably - // want to include information in shuffle dependencies to indicate whether elements in the - // source RDD should be copied. - val willMergeSort = sortBasedShuffleOn && numPartitions > bypassMergeThreshold - - val rdd = if (willMergeSort || newOrdering.nonEmpty) { + val keySchema = expressions.map(_.dataType).toArray + val valueSchema = child.output.map(_.dataType).toArray + val serializer = getSerializer(keySchema, valueSchema, newOrdering.nonEmpty, numPartitions) + val part = new HashPartitioner(numPartitions) + + val rdd = if (needToCopyObjectsBeforeShuffle(part, serializer)) { child.execute().mapPartitions { iter => val hashExpressions = newMutableProjection(expressions, child.output)() iter.map(r => (hashExpressions(r).copy(), r.copy())) @@ -145,50 +188,52 @@ case class Exchange( iter.map(r => mutablePair.update(hashExpressions(r), r)) } } - val part = new HashPartitioner(numPartitions) - val shuffled = - if (newOrdering.nonEmpty) { - new ShuffledRDD[Row, Row, Row](rdd, part).setKeyOrdering(keyOrdering) - } else { - new ShuffledRDD[Row, Row, Row](rdd, part) - } - val keySchema = expressions.map(_.dataType).toArray - val valueSchema = child.output.map(_.dataType).toArray - shuffled.setSerializer(serializer(keySchema, valueSchema, numPartitions)) - + val shuffled = new ShuffledRDD[Row, Row, Row](rdd, part) + if (newOrdering.nonEmpty) { + shuffled.setKeyOrdering(keyOrdering) + } + shuffled.setSerializer(serializer) shuffled.map(_._2) case RangePartitioning(sortingExpressions, numPartitions) => - val rdd = if (sortBasedShuffleOn || newOrdering.nonEmpty) { - child.execute().mapPartitions { iter => iter.map(row => (row.copy(), null))} - } else { - child.execute().mapPartitions { iter => - val mutablePair = new MutablePair[Row, Null](null, null) - iter.map(row => mutablePair.update(row, null)) + val keySchema = child.output.map(_.dataType).toArray + val serializer = getSerializer(keySchema, null, newOrdering.nonEmpty, numPartitions) + + val childRdd = child.execute() + val part: Partitioner = { + // Internally, RangePartitioner runs a job on the RDD that samples keys to compute + // partition bounds. To get accurate samples, we need to copy the mutable keys. + val rddForSampling = childRdd.mapPartitions { iter => + val mutablePair = new MutablePair[Row, Null]() + iter.map(row => mutablePair.update(row.copy(), null)) } + // TODO: RangePartitioner should take an Ordering. + implicit val ordering = new RowOrdering(sortingExpressions, child.output) + new RangePartitioner(numPartitions, rddForSampling, ascending = true) } - // TODO: RangePartitioner should take an Ordering. - implicit val ordering = new RowOrdering(sortingExpressions, child.output) - - val part = new RangePartitioner(numPartitions, rdd, ascending = true) - val shuffled = - if (newOrdering.nonEmpty) { - new ShuffledRDD[Row, Null, Null](rdd, part).setKeyOrdering(keyOrdering) - } else { - new ShuffledRDD[Row, Null, Null](rdd, part) + val rdd = if (needToCopyObjectsBeforeShuffle(part, serializer)) { + childRdd.mapPartitions { iter => iter.map(row => (row.copy(), null))} + } else { + childRdd.mapPartitions { iter => + val mutablePair = new MutablePair[Row, Null]() + iter.map(row => mutablePair.update(row, null)) } - val keySchema = child.output.map(_.dataType).toArray - shuffled.setSerializer(serializer(keySchema, null, numPartitions)) + } + val shuffled = new ShuffledRDD[Row, Null, Null](rdd, part) + if (newOrdering.nonEmpty) { + shuffled.setKeyOrdering(keyOrdering) + } + shuffled.setSerializer(serializer) shuffled.map(_._1) case SinglePartition => - // SPARK-4479: Can't turn off defensive copy as what we do for `HashPartitioning`, since - // operators like `TakeOrdered` may require an ordering within the partition, and currently - // `SinglePartition` doesn't include ordering information. - // TODO Add `SingleOrderedPartition` for operators like `TakeOrdered` - val rdd = if (sortBasedShuffleOn) { + val valueSchema = child.output.map(_.dataType).toArray + val serializer = getSerializer(null, valueSchema, hasKeyOrdering = false, 1) + val partitioner = new HashPartitioner(1) + + val rdd = if (needToCopyObjectsBeforeShuffle(partitioner, serializer)) { child.execute().mapPartitions { iter => iter.map(r => (null, r.copy())) } } else { child.execute().mapPartitions { iter => @@ -196,10 +241,8 @@ case class Exchange( iter.map(r => mutablePair.update(null, r)) } } - val partitioner = new HashPartitioner(1) val shuffled = new ShuffledRDD[Null, Row, Row](rdd, partitioner) - val valueSchema = child.output.map(_.dataType).toArray - shuffled.setSerializer(serializer(null, valueSchema, 1)) + shuffled.setSerializer(serializer) shuffled.map(_._2) case _ => sys.error(s"Exchange not implemented for $newPartitioning") @@ -253,7 +296,7 @@ private[sql] case class EnsureRequirements(sqlContext: SQLContext) extends Rule[ .sliding(2) .map { case Seq(a) => true - case Seq(a,b) => a compatibleWith b + case Seq(a, b) => a.compatibleWith(b) }.exists(!_) // Adds Exchange or Sort operators as required diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala index 57effbf7ec501..f931dc95ef575 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala @@ -21,9 +21,9 @@ import org.apache.spark.annotation.DeveloperApi import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.CatalystTypeConverters import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation -import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericMutableRow, SpecificMutableRow} +import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericMutableRow} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Statistics} -import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.types.DataType import org.apache.spark.sql.{Row, SQLContext} /** @@ -31,26 +31,19 @@ import org.apache.spark.sql.{Row, SQLContext} */ @DeveloperApi object RDDConversions { - def productToRowRdd[A <: Product](data: RDD[A], schema: StructType): RDD[Row] = { + def productToRowRdd[A <: Product](data: RDD[A], outputTypes: Seq[DataType]): RDD[Row] = { data.mapPartitions { iterator => - if (iterator.isEmpty) { - Iterator.empty - } else { - val bufferedIterator = iterator.buffered - val mutableRow = new SpecificMutableRow(schema.fields.map(_.dataType)) - val schemaFields = schema.fields.toArray - val converters = schemaFields.map { - f => CatalystTypeConverters.createToCatalystConverter(f.dataType) - } - bufferedIterator.map { r => - var i = 0 - while (i < mutableRow.length) { - mutableRow(i) = converters(i)(r.productElement(i)) - i += 1 - } - - mutableRow + val numColumns = outputTypes.length + val mutableRow = new GenericMutableRow(numColumns) + val converters = outputTypes.map(CatalystTypeConverters.createToCatalystConverter) + iterator.map { r => + var i = 0 + while (i < numColumns) { + mutableRow(i) = converters(i)(r.productElement(i)) + i += 1 } + + mutableRow } } } @@ -58,26 +51,19 @@ object RDDConversions { /** * Convert the objects inside Row into the types Catalyst expected. */ - def rowToRowRdd(data: RDD[Row], schema: StructType): RDD[Row] = { + def rowToRowRdd(data: RDD[Row], outputTypes: Seq[DataType]): RDD[Row] = { data.mapPartitions { iterator => - if (iterator.isEmpty) { - Iterator.empty - } else { - val bufferedIterator = iterator.buffered - val mutableRow = new GenericMutableRow(bufferedIterator.head.toSeq.toArray) - val schemaFields = schema.fields.toArray - val converters = schemaFields.map { - f => CatalystTypeConverters.createToCatalystConverter(f.dataType) - } - bufferedIterator.map { r => - var i = 0 - while (i < mutableRow.length) { - mutableRow(i) = converters(i)(r(i)) - i += 1 - } - - mutableRow + val numColumns = outputTypes.length + val mutableRow = new GenericMutableRow(numColumns) + val converters = outputTypes.map(CatalystTypeConverters.createToCatalystConverter) + iterator.map { r => + var i = 0 + while (i < numColumns) { + mutableRow(i) = converters(i)(r(i)) + i += 1 } + + mutableRow } } } @@ -106,7 +92,7 @@ private[sql] case class LogicalRDD(output: Seq[Attribute], rdd: RDD[Row])(sqlCon /** Physical plan node for scanning data from an RDD. */ private[sql] case class PhysicalRDD(output: Seq[Attribute], rdd: RDD[Row]) extends LeafNode { - override def execute(): RDD[Row] = rdd + protected override def doExecute(): RDD[Row] = rdd } /** Logical plan node for scanning data from a local collection. */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Expand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Expand.scala index 575849481faad..f16ca36909fab 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Expand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Expand.scala @@ -43,7 +43,7 @@ case class Expand( // as UNKNOWN partitioning override def outputPartitioning: Partitioning = UnknownPartitioning(0) - override def execute(): RDD[Row] = attachTree(this, "execute") { + protected override def doExecute(): RDD[Row] = attachTree(this, "execute") { child.execute().mapPartitions { iter => // TODO Move out projection objects creation and transfer to // workers via closure. However we can't assume the Projection diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala index 5201e20a10565..dd02c1f4573bb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala @@ -21,6 +21,18 @@ import org.apache.spark.annotation.DeveloperApi import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.expressions._ +/** + * For lazy computing, be sure the generator.terminate() called in the very last + * TODO reusing the CompletionIterator? + */ +private[execution] sealed case class LazyIterator(func: () => TraversableOnce[Row]) + extends Iterator[Row] { + + lazy val results = func().toIterator + override def hasNext: Boolean = results.hasNext + override def next(): Row = results.next() +} + /** * :: DeveloperApi :: * Applies a [[catalyst.expressions.Generator Generator]] to a stream of input rows, combining the @@ -46,28 +58,34 @@ case class Generate( val boundGenerator = BindReferences.bindReference(generator, child.output) - override def execute(): RDD[Row] = { + protected override def doExecute(): RDD[Row] = { + // boundGenerator.terminate() should be triggered after all of the rows in the partition if (join) { child.execute().mapPartitions { iter => - val nullValues = Seq.fill(generator.elementTypes.size)(Literal(null)) - // Used to produce rows with no matches when outer = true. - val outerProjection = - newProjection(child.output ++ nullValues, child.output) - - val joinProjection = newProjection(output, output) + val generatorNullRow = Row.fromSeq(Seq.fill[Any](generator.elementTypes.size)(null)) val joinedRow = new JoinedRow - iter.flatMap {row => + iter.flatMap { row => + // we should always set the left (child output) + joinedRow.withLeft(row) val outputRows = boundGenerator.eval(row) if (outer && outputRows.isEmpty) { - outerProjection(row) :: Nil + joinedRow.withRight(generatorNullRow) :: Nil } else { - outputRows.map(or => joinProjection(joinedRow(row, or))) + outputRows.map(or => joinedRow.withRight(or)) } + } ++ LazyIterator(() => boundGenerator.terminate()).map { row => + // we leave the left side as the last element of its child output + // keep it the same as Hive does + joinedRow.withRight(row) } } } else { - child.execute().mapPartitions(iter => iter.flatMap(row => boundGenerator.eval(row))) + child.execute().mapPartitions { iter => + iter.flatMap(row => boundGenerator.eval(row)) ++ + LazyIterator(() => boundGenerator.terminate()) + } } } } + diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/GeneratedAggregate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/GeneratedAggregate.scala index 5d9f202681045..3e27c1bde2dfd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/GeneratedAggregate.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/GeneratedAggregate.scala @@ -66,7 +66,7 @@ case class GeneratedAggregate( override def output: Seq[Attribute] = aggregateExpressions.map(_.toAttribute) - override def execute(): RDD[Row] = { + protected override def doExecute(): RDD[Row] = { val aggregatesToCompute = aggregateExpressions.flatMap { a => a.collect { case agg: AggregateExpression => agg} } @@ -138,15 +138,15 @@ case class GeneratedAggregate( case UnscaledValue(e) => e case _ => expr } - // partial sum result can be null only when no input rows present + // partial sum result can be null only when no input rows present val updateFunction = If( IsNotNull(actualExpr), Coalesce( Add( - Coalesce(currentSum :: zero :: Nil), + Coalesce(currentSum :: zero :: Nil), Cast(expr, calcType)) :: currentSum :: zero :: Nil), currentSum) - + val result = expr.dataType match { case DecimalType.Fixed(_, _) => @@ -155,7 +155,7 @@ case class GeneratedAggregate( } AggregateEvaluation(currentSum :: Nil, initialValue :: Nil, updateFunction :: Nil, result) - + case m @ Max(expr) => val currentMax = AttributeReference("currentMax", expr.dataType, nullable = true)() val initialValue = Literal.create(null, expr.dataType) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScan.scala index ace9af5f384c9..03bee80ad7f38 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScan.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScan.scala @@ -30,7 +30,7 @@ private[sql] case class LocalTableScan(output: Seq[Attribute], rows: Seq[Row]) e private lazy val rdd = sqlContext.sparkContext.parallelize(rows) - override def execute(): RDD[Row] = rdd + protected override def doExecute(): RDD[Row] = rdd override def executeCollect(): Array[Row] = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala index 59c89800da00f..435ac011178de 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.execution import org.apache.spark.annotation.DeveloperApi import org.apache.spark.Logging -import org.apache.spark.rdd.RDD +import org.apache.spark.rdd.{RDD, RDDOperationScope} import org.apache.spark.sql.SQLContext import org.apache.spark.sql.catalyst.{CatalystTypeConverters, trees} import org.apache.spark.sql.catalyst.expressions._ @@ -79,14 +79,25 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ def requiredChildOrdering: Seq[Seq[SortOrder]] = Seq.fill(children.size)(Nil) /** - * Runs this query returning the result as an RDD. + * Returns the result of this query as an RDD[Row] by delegating to doExecute + * after adding query plan information to created RDDs for visualization. + * Concrete implementations of SparkPlan should override doExecute instead. */ - def execute(): RDD[Row] + final def execute(): RDD[Row] = { + RDDOperationScope.withScope(sparkContext, nodeName, false, true) { + doExecute() + } + } /** - * Runs this query returning the result as an array. + * Overridden by concrete implementations of SparkPlan. + * Produces the result of the query as an RDD[Row] */ + protected def doExecute(): RDD[Row] + /** + * Runs this query returning the result as an array. + */ def executeCollect(): Array[Row] = { execute().mapPartitions { iter => val converter = CatalystTypeConverters.createToScalaConverter(schema) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer2.scala index 9552f41115866..256d527d7b636 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer2.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlSerializer2.scala @@ -27,7 +27,7 @@ import scala.reflect.ClassTag import org.apache.spark.serializer._ import org.apache.spark.Logging import org.apache.spark.sql.Row -import org.apache.spark.sql.catalyst.expressions.SpecificMutableRow +import org.apache.spark.sql.catalyst.expressions.{SpecificMutableRow, MutableRow, GenericMutableRow} import org.apache.spark.sql.types._ /** @@ -49,9 +49,9 @@ private[sql] class Serializer2SerializationStream( out: OutputStream) extends SerializationStream with Logging { - val rowOut = new DataOutputStream(out) - val writeKeyFunc = SparkSqlSerializer2.createSerializationFunction(keySchema, rowOut) - val writeValueFunc = SparkSqlSerializer2.createSerializationFunction(valueSchema, rowOut) + private val rowOut = new DataOutputStream(new BufferedOutputStream(out)) + private val writeKeyFunc = SparkSqlSerializer2.createSerializationFunction(keySchema, rowOut) + private val writeValueFunc = SparkSqlSerializer2.createSerializationFunction(valueSchema, rowOut) override def writeObject[T: ClassTag](t: T): SerializationStream = { val kv = t.asInstanceOf[Product2[Row, Row]] @@ -86,31 +86,44 @@ private[sql] class Serializer2SerializationStream( private[sql] class Serializer2DeserializationStream( keySchema: Array[DataType], valueSchema: Array[DataType], + hasKeyOrdering: Boolean, in: InputStream) extends DeserializationStream with Logging { - val rowIn = new DataInputStream(new BufferedInputStream(in)) + private val rowIn = new DataInputStream(new BufferedInputStream(in)) + + private def rowGenerator(schema: Array[DataType]): () => (MutableRow) = { + if (schema == null) { + () => null + } else { + if (hasKeyOrdering) { + // We have key ordering specified in a ShuffledRDD, it is not safe to reuse a mutable row. + () => new GenericMutableRow(schema.length) + } else { + // It is safe to reuse the mutable row. + val mutableRow = new SpecificMutableRow(schema) + () => mutableRow + } + } + } - val key = if (keySchema != null) new SpecificMutableRow(keySchema) else null - val value = if (valueSchema != null) new SpecificMutableRow(valueSchema) else null - val readKeyFunc = SparkSqlSerializer2.createDeserializationFunction(keySchema, rowIn, key) - val readValueFunc = SparkSqlSerializer2.createDeserializationFunction(valueSchema, rowIn, value) + // Functions used to return rows for key and value. + private val getKey = rowGenerator(keySchema) + private val getValue = rowGenerator(valueSchema) + // Functions used to read a serialized row from the InputStream and deserialize it. + private val readKeyFunc = SparkSqlSerializer2.createDeserializationFunction(keySchema, rowIn) + private val readValueFunc = SparkSqlSerializer2.createDeserializationFunction(valueSchema, rowIn) override def readObject[T: ClassTag](): T = { - readKeyFunc() - readValueFunc() - - (key, value).asInstanceOf[T] + (readKeyFunc(getKey()), readValueFunc(getValue())).asInstanceOf[T] } override def readKey[T: ClassTag](): T = { - readKeyFunc() - key.asInstanceOf[T] + readKeyFunc(getKey()).asInstanceOf[T] } override def readValue[T: ClassTag](): T = { - readValueFunc() - value.asInstanceOf[T] + readValueFunc(getValue()).asInstanceOf[T] } override def close(): Unit = { @@ -118,9 +131,10 @@ private[sql] class Serializer2DeserializationStream( } } -private[sql] class ShuffleSerializerInstance( +private[sql] class SparkSqlSerializer2Instance( keySchema: Array[DataType], - valueSchema: Array[DataType]) + valueSchema: Array[DataType], + hasKeyOrdering: Boolean) extends SerializerInstance { def serialize[T: ClassTag](t: T): ByteBuffer = @@ -137,7 +151,7 @@ private[sql] class ShuffleSerializerInstance( } def deserializeStream(s: InputStream): DeserializationStream = { - new Serializer2DeserializationStream(keySchema, valueSchema, s) + new Serializer2DeserializationStream(keySchema, valueSchema, hasKeyOrdering, s) } } @@ -148,12 +162,21 @@ private[sql] class ShuffleSerializerInstance( * The schema of keys is represented by `keySchema` and that of values is represented by * `valueSchema`. */ -private[sql] class SparkSqlSerializer2(keySchema: Array[DataType], valueSchema: Array[DataType]) +private[sql] class SparkSqlSerializer2( + keySchema: Array[DataType], + valueSchema: Array[DataType], + hasKeyOrdering: Boolean) extends Serializer with Logging with Serializable{ - def newInstance(): SerializerInstance = new ShuffleSerializerInstance(keySchema, valueSchema) + def newInstance(): SerializerInstance = + new SparkSqlSerializer2Instance(keySchema, valueSchema, hasKeyOrdering) + + override def supportsRelocationOfSerializedObjects: Boolean = { + // SparkSqlSerializer2 is stateless and writes no stream headers + true + } } private[sql] object SparkSqlSerializer2 { @@ -318,11 +341,11 @@ private[sql] object SparkSqlSerializer2 { */ def createDeserializationFunction( schema: Array[DataType], - in: DataInputStream, - mutableRow: SpecificMutableRow): () => Unit = { - () => { - // If the schema is null, the returned function does nothing when it get called. - if (schema != null) { + in: DataInputStream): (MutableRow) => Row = { + if (schema == null) { + (mutableRow: MutableRow) => null + } else { + (mutableRow: MutableRow) => { var i = 0 while (i < schema.length) { schema(i) match { @@ -435,6 +458,8 @@ private[sql] object SparkSqlSerializer2 { } i += 1 } + + mutableRow } } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala index 326e8ce4ca524..d0a1ad00560d3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala @@ -243,8 +243,9 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] { case (predicate, None) => predicate // Filter needs to be applied above when it contains partitioning // columns - case (predicate, _) if(!predicate.references.map(_.name).toSet - .intersect (partitionColNames).isEmpty) => predicate + case (predicate, _) + if !predicate.references.map(_.name).toSet.intersect(partitionColNames).isEmpty => + predicate } } } else { @@ -270,7 +271,7 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] { projectList, filters, identity[Seq[Expression]], // All filters still need to be evaluated. - InMemoryColumnarTableScan(_, filters, mem)) :: Nil + InMemoryColumnarTableScan(_, filters, mem)) :: Nil case _ => Nil } } @@ -303,6 +304,8 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] { execution.Expand(projections, output, planLater(child)) :: Nil case logical.Aggregate(group, agg, child) => execution.Aggregate(partial = false, group, agg, planLater(child)) :: Nil + case logical.Window(projectList, windowExpressions, spec, child) => + execution.Window(projectList, windowExpressions, spec, planLater(child)) :: Nil case logical.Sample(lb, ub, withReplacement, seed, child) => execution.Sample(lb, ub, withReplacement, seed, planLater(child)) :: Nil case logical.LocalRelation(output, data) => @@ -341,17 +344,21 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] { case c: CreateTableUsing if c.temporary && c.allowExisting => sys.error("allowExisting should be set to false when creating a temporary table.") - case CreateTableUsingAsSelect(tableName, provider, true, mode, opts, query) => - val cmd = - CreateTempTableUsingAsSelect(tableName, provider, mode, opts, query) + case CreateTableUsingAsSelect(tableName, provider, true, partitionsCols, mode, opts, query) + if partitionsCols.nonEmpty => + sys.error("Cannot create temporary partitioned table.") + + case CreateTableUsingAsSelect(tableName, provider, true, _, mode, opts, query) => + val cmd = CreateTempTableUsingAsSelect( + tableName, provider, Array.empty[String], mode, opts, query) ExecutedCommand(cmd) :: Nil case c: CreateTableUsingAsSelect if !c.temporary => sys.error("Tables created with SQLContext must be TEMPORARY. Use a HiveContext instead.") - case LogicalDescribeCommand(table, isExtended) => + case describe @ LogicalDescribeCommand(table, isExtended) => val resultPlan = self.sqlContext.executePlan(table).executedPlan ExecutedCommand( - RunnableDescribeCommand(resultPlan, resultPlan.output, isExtended)) :: Nil + RunnableDescribeCommand(resultPlan, describe.output, isExtended)) :: Nil case _ => Nil } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Window.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Window.scala new file mode 100644 index 0000000000000..c4327ce262ac5 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Window.scala @@ -0,0 +1,480 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution + +import java.util + +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.Row +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, Distribution, ClusteredDistribution, Partitioning} +import org.apache.spark.util.collection.CompactBuffer + +/** + * :: DeveloperApi :: + * For every row, evaluates `windowExpression` containing Window Functions and attaches + * the results with other regular expressions (presented by `projectList`). + * Evert operator handles a single Window Specification, `windowSpec`. + */ +case class Window( + projectList: Seq[Attribute], + windowExpression: Seq[NamedExpression], + windowSpec: WindowSpecDefinition, + child: SparkPlan) + extends UnaryNode { + + override def output: Seq[Attribute] = + (projectList ++ windowExpression).map(_.toAttribute) + + override def requiredChildDistribution: Seq[Distribution] = + if (windowSpec.partitionSpec.isEmpty) { + // This operator will be very expensive. + AllTuples :: Nil + } else { + ClusteredDistribution(windowSpec.partitionSpec) :: Nil + } + + // Since window functions are adding columns to the input rows, the child's outputPartitioning + // is preserved. + override def outputPartitioning: Partitioning = child.outputPartitioning + + override def requiredChildOrdering: Seq[Seq[SortOrder]] = { + // The required child ordering has two parts. + // The first part is the expressions in the partition specification. + // We add these expressions to the required ordering to make sure input rows are grouped + // based on the partition specification. So, we only need to process a single partition + // at a time. + // The second part is the expressions specified in the ORDER BY cluase. + // Basically, we first use sort to group rows based on partition specifications and then sort + // Rows in a group based on the order specification. + (windowSpec.partitionSpec.map(SortOrder(_, Ascending)) ++ windowSpec.orderSpec) :: Nil + } + + // Since window functions basically add columns to input rows, this operator + // will not change the ordering of input rows. + override def outputOrdering: Seq[SortOrder] = child.outputOrdering + + case class ComputedWindow( + unbound: WindowExpression, + windowFunction: WindowFunction, + resultAttribute: AttributeReference) + + // A list of window functions that need to be computed for each group. + private[this] val computedWindowExpressions = windowExpression.flatMap { window => + window.collect { + case w: WindowExpression => + ComputedWindow( + w, + BindReferences.bindReference(w.windowFunction, child.output), + AttributeReference(s"windowResult:$w", w.dataType, w.nullable)()) + } + }.toArray + + private[this] val windowFrame = + windowSpec.frameSpecification.asInstanceOf[SpecifiedWindowFrame] + + // Create window functions. + private[this] def windowFunctions(): Array[WindowFunction] = { + val functions = new Array[WindowFunction](computedWindowExpressions.length) + var i = 0 + while (i < computedWindowExpressions.length) { + functions(i) = computedWindowExpressions(i).windowFunction.newInstance() + functions(i).init() + i += 1 + } + functions + } + + // The schema of the result of all window function evaluations + private[this] val computedSchema = computedWindowExpressions.map(_.resultAttribute) + + private[this] val computedResultMap = + computedWindowExpressions.map { w => w.unbound -> w.resultAttribute }.toMap + + private[this] val windowExpressionResult = windowExpression.map { window => + window.transform { + case w: WindowExpression if computedResultMap.contains(w) => computedResultMap(w) + } + } + + protected override def doExecute(): RDD[Row] = { + child.execute().mapPartitions { iter => + new Iterator[Row] { + + // Although input rows are grouped based on windowSpec.partitionSpec, we need to + // know when we have a new partition. + // This is to manually construct an ordering that can be used to compare rows. + // TODO: We may want to have a newOrdering that takes BoundReferences. + // So, we can take advantave of code gen. + private val partitionOrdering: Ordering[Row] = + RowOrdering.forSchema(windowSpec.partitionSpec.map(_.dataType)) + + // This is used to project expressions for the partition specification. + protected val partitionGenerator = + newMutableProjection(windowSpec.partitionSpec, child.output)() + + // This is ued to project expressions for the order specification. + protected val rowOrderGenerator = + newMutableProjection(windowSpec.orderSpec.map(_.child), child.output)() + + // The position of next output row in the inputRowBuffer. + var rowPosition: Int = 0 + // The number of buffered rows in the inputRowBuffer (the size of the current partition). + var partitionSize: Int = 0 + // The buffer used to buffer rows in a partition. + var inputRowBuffer: CompactBuffer[Row] = _ + // The partition key of the current partition. + var currentPartitionKey: Row = _ + // The partition key of next partition. + var nextPartitionKey: Row = _ + // The first row of next partition. + var firstRowInNextPartition: Row = _ + // Indicates if this partition is the last one in the iter. + var lastPartition: Boolean = false + + def createBoundaryEvaluator(): () => Unit = { + def findPhysicalBoundary( + boundary: FrameBoundary): () => Int = boundary match { + case UnboundedPreceding => () => 0 + case UnboundedFollowing => () => partitionSize - 1 + case CurrentRow => () => rowPosition + case ValuePreceding(value) => + () => + val newPosition = rowPosition - value + if (newPosition > 0) newPosition else 0 + case ValueFollowing(value) => + () => + val newPosition = rowPosition + value + if (newPosition < partitionSize) newPosition else partitionSize - 1 + } + + def findLogicalBoundary( + boundary: FrameBoundary, + searchDirection: Int, + evaluator: Expression, + joinedRow: JoinedRow): () => Int = boundary match { + case UnboundedPreceding => () => 0 + case UnboundedFollowing => () => partitionSize - 1 + case other => + () => { + // CurrentRow, ValuePreceding, or ValueFollowing. + var newPosition = rowPosition + searchDirection + var stopSearch = false + // rowOrderGenerator is a mutable projection. + // We need to make a copy of the returned by rowOrderGenerator since we will + // compare searched row with this currentOrderByValue. + val currentOrderByValue = rowOrderGenerator(inputRowBuffer(rowPosition)).copy() + while (newPosition >= 0 && newPosition < partitionSize && !stopSearch) { + val r = rowOrderGenerator(inputRowBuffer(newPosition)) + stopSearch = + !(evaluator.eval(joinedRow(currentOrderByValue, r)).asInstanceOf[Boolean]) + if (!stopSearch) { + newPosition += searchDirection + } + } + newPosition -= searchDirection + + if (newPosition < 0) { + 0 + } else if (newPosition >= partitionSize) { + partitionSize - 1 + } else { + newPosition + } + } + } + + windowFrame.frameType match { + case RowFrame => + val findStart = findPhysicalBoundary(windowFrame.frameStart) + val findEnd = findPhysicalBoundary(windowFrame.frameEnd) + () => { + frameStart = findStart() + frameEnd = findEnd() + } + case RangeFrame => + val joinedRowForBoundaryEvaluation: JoinedRow = new JoinedRow() + val orderByExpr = windowSpec.orderSpec.head + val currentRowExpr = + BoundReference(0, orderByExpr.dataType, orderByExpr.nullable) + val examedRowExpr = + BoundReference(1, orderByExpr.dataType, orderByExpr.nullable) + val differenceExpr = Abs(Subtract(currentRowExpr, examedRowExpr)) + + val frameStartEvaluator = windowFrame.frameStart match { + case CurrentRow => EqualTo(currentRowExpr, examedRowExpr) + case ValuePreceding(value) => + LessThanOrEqual(differenceExpr, Cast(Literal(value), orderByExpr.dataType)) + case ValueFollowing(value) => + GreaterThanOrEqual(differenceExpr, Cast(Literal(value), orderByExpr.dataType)) + case o => Literal(true) // This is just a dummy expression, we will not use it. + } + + val frameEndEvaluator = windowFrame.frameEnd match { + case CurrentRow => EqualTo(currentRowExpr, examedRowExpr) + case ValuePreceding(value) => + GreaterThanOrEqual(differenceExpr, Cast(Literal(value), orderByExpr.dataType)) + case ValueFollowing(value) => + LessThanOrEqual(differenceExpr, Cast(Literal(value), orderByExpr.dataType)) + case o => Literal(true) // This is just a dummy expression, we will not use it. + } + + val findStart = + findLogicalBoundary( + boundary = windowFrame.frameStart, + searchDirection = -1, + evaluator = frameStartEvaluator, + joinedRow = joinedRowForBoundaryEvaluation) + val findEnd = + findLogicalBoundary( + boundary = windowFrame.frameEnd, + searchDirection = 1, + evaluator = frameEndEvaluator, + joinedRow = joinedRowForBoundaryEvaluation) + () => { + frameStart = findStart() + frameEnd = findEnd() + } + } + } + + val boundaryEvaluator = createBoundaryEvaluator() + // Indicates if we the specified window frame requires us to maintain a sliding frame + // (e.g. RANGES BETWEEN 1 PRECEDING AND CURRENT ROW) or the window frame + // is the entire partition (e.g. ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING). + val requireUpdateFrame: Boolean = { + def requireUpdateBoundary(boundary: FrameBoundary): Boolean = boundary match { + case UnboundedPreceding => false + case UnboundedFollowing => false + case _ => true + } + + requireUpdateBoundary(windowFrame.frameStart) || + requireUpdateBoundary(windowFrame.frameEnd) + } + // The start position of the current frame in the partition. + var frameStart: Int = 0 + // The end position of the current frame in the partition. + var frameEnd: Int = -1 + // Window functions. + val functions: Array[WindowFunction] = windowFunctions() + // Buffers used to store input parameters for window functions. Because we may need to + // maintain a sliding frame, we use this buffer to avoid evaluate the parameters from + // the same row multiple times. + val windowFunctionParameterBuffers: Array[util.LinkedList[AnyRef]] = + functions.map(_ => new util.LinkedList[AnyRef]()) + + // The projection used to generate the final result rows of this operator. + private[this] val resultProjection = + newMutableProjection( + projectList ++ windowExpressionResult, + projectList ++ computedSchema)() + + // The row used to hold results of window functions. + private[this] val windowExpressionResultRow = + new GenericMutableRow(computedSchema.length) + + private[this] val joinedRow = new JoinedRow6 + + // Initialize this iterator. + initialize() + + private def initialize(): Unit = { + if (iter.hasNext) { + val currentRow = iter.next().copy() + // partitionGenerator is a mutable projection. Since we need to track nextPartitionKey, + // we are making a copy of the returned partitionKey at here. + nextPartitionKey = partitionGenerator(currentRow).copy() + firstRowInNextPartition = currentRow + fetchNextPartition() + } else { + // The iter is an empty one. So, we set all of the following variables + // to make sure hasNext will return false. + lastPartition = true + rowPosition = 0 + partitionSize = 0 + } + } + + // Indicates if we will have new output row. + override final def hasNext: Boolean = { + !lastPartition || (rowPosition < partitionSize) + } + + override final def next(): Row = { + if (hasNext) { + if (rowPosition == partitionSize) { + // All rows of this buffer have been consumed. + // We will move to next partition. + fetchNextPartition() + } + // Get the input row for the current output row. + val inputRow = inputRowBuffer(rowPosition) + // Get all results of the window functions for this output row. + var i = 0 + while (i < functions.length) { + windowExpressionResultRow.update(i, functions(i).get(rowPosition)) + i += 1 + } + + // Construct the output row. + val outputRow = resultProjection(joinedRow(inputRow, windowExpressionResultRow)) + // We will move to the next one. + rowPosition += 1 + if (requireUpdateFrame && rowPosition < partitionSize) { + // If we need to maintain a sliding frame and + // we will still work on this partition when next is called next time, do the update. + updateFrame() + } + + // Return the output row. + outputRow + } else { + // no more result + throw new NoSuchElementException + } + } + + // Fetch the next partition. + private def fetchNextPartition(): Unit = { + // Create a new buffer for input rows. + inputRowBuffer = new CompactBuffer[Row]() + // We already have the first row for this partition + // (recorded in firstRowInNextPartition). Add it back. + inputRowBuffer += firstRowInNextPartition + // Set the current partition key. + currentPartitionKey = nextPartitionKey + // Now, we will start to find all rows belonging to this partition. + // Create a variable to track if we see the next partition. + var findNextPartition = false + // The search will stop when we see the next partition or there is no + // input row left in the iter. + while (iter.hasNext && !findNextPartition) { + // Make a copy of the input row since we will put it in the buffer. + val currentRow = iter.next().copy() + // Get the partition key based on the partition specification. + // For the below compare method, we do not need to make a copy of partitionKey. + val partitionKey = partitionGenerator(currentRow) + // Check if the current row belongs the current input row. + val comparing = partitionOrdering.compare(currentPartitionKey, partitionKey) + if (comparing == 0) { + // This row is still in the current partition. + inputRowBuffer += currentRow + } else { + // The current input row is in a different partition. + findNextPartition = true + // partitionGenerator is a mutable projection. + // Since we need to track nextPartitionKey and we determine that it should be set + // as partitionKey, we are making a copy of the partitionKey at here. + nextPartitionKey = partitionKey.copy() + firstRowInNextPartition = currentRow + } + } + + // We have not seen a new partition. It means that there is no new row in the + // iter. The current partition is the last partition of the iter. + if (!findNextPartition) { + lastPartition = true + } + + // We have got all rows for the current partition. + // Set rowPosition to 0 (the next output row will be based on the first + // input row of this partition). + rowPosition = 0 + // The size of this partition. + partitionSize = inputRowBuffer.size + // Reset all parameter buffers of window functions. + var i = 0 + while (i < windowFunctionParameterBuffers.length) { + windowFunctionParameterBuffers(i).clear() + i += 1 + } + frameStart = 0 + frameEnd = -1 + // Create the first window frame for this partition. + // If we do not need to maintain a sliding frame, this frame will + // have the entire partition. + updateFrame() + } + + /** The function used to maintain the sliding frame. */ + private def updateFrame(): Unit = { + // Based on the difference between the new frame and old frame, + // updates the buffers holding input parameters of window functions. + // We will start to prepare input parameters starting from the row + // indicated by offset in the input row buffer. + def updateWindowFunctionParameterBuffers( + numToRemove: Int, + numToAdd: Int, + offset: Int): Unit = { + // First, remove unneeded entries from the head of every buffer. + var i = 0 + while (i < numToRemove) { + var j = 0 + while (j < windowFunctionParameterBuffers.length) { + windowFunctionParameterBuffers(j).remove() + j += 1 + } + i += 1 + } + // Then, add needed entries to the tail of every buffer. + i = 0 + while (i < numToAdd) { + var j = 0 + while (j < windowFunctionParameterBuffers.length) { + // Ask the function to prepare the input parameters. + val parameters = functions(j).prepareInputParameters(inputRowBuffer(i + offset)) + windowFunctionParameterBuffers(j).add(parameters) + j += 1 + } + i += 1 + } + } + + // Record the current frame start point and end point before + // we update them. + val previousFrameStart = frameStart + val previousFrameEnd = frameEnd + boundaryEvaluator() + updateWindowFunctionParameterBuffers( + frameStart - previousFrameStart, + frameEnd - previousFrameEnd, + previousFrameEnd + 1) + // Evaluate the current frame. + evaluateCurrentFrame() + } + + /** Evaluate the current window frame. */ + private def evaluateCurrentFrame(): Unit = { + var i = 0 + while (i < functions.length) { + // Reset the state of the window function. + functions(i).reset() + // Get all buffered input parameters based on rows of this window frame. + val inputParameters = windowFunctionParameterBuffers(i).toArray() + // Send these input parameters to the window function. + functions(i).batchUpdate(inputParameters) + // Ask the function to evaluate based on this window frame. + functions(i).evaluate() + i += 1 + } + } + } + } + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala index 5ca11e67a9434..a30ade86441ca 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala @@ -37,7 +37,7 @@ case class Project(projectList: Seq[NamedExpression], child: SparkPlan) extends @transient lazy val buildProjection = newMutableProjection(projectList, child.output) - override def execute(): RDD[Row] = child.execute().mapPartitions { iter => + protected override def doExecute(): RDD[Row] = child.execute().mapPartitions { iter => val resuableProjection = buildProjection() iter.map(resuableProjection) } @@ -54,7 +54,7 @@ case class Filter(condition: Expression, child: SparkPlan) extends UnaryNode { @transient lazy val conditionEvaluator: (Row) => Boolean = newPredicate(condition, child.output) - override def execute(): RDD[Row] = child.execute().mapPartitions { iter => + protected override def doExecute(): RDD[Row] = child.execute().mapPartitions { iter => iter.filter(conditionEvaluator) } @@ -65,7 +65,7 @@ case class Filter(condition: Expression, child: SparkPlan) extends UnaryNode { * :: DeveloperApi :: * Sample the dataset. * @param lowerBound Lower-bound of the sampling probability (usually 0.0) - * @param upperBound Upper-bound of the sampling probability. The expected fraction sampled + * @param upperBound Upper-bound of the sampling probability. The expected fraction sampled * will be ub - lb. * @param withReplacement Whether to sample with replacement. * @param seed the random seed @@ -83,7 +83,7 @@ case class Sample( override def output: Seq[Attribute] = child.output // TODO: How to pick seed? - override def execute(): RDD[Row] = { + protected override def doExecute(): RDD[Row] = { if (withReplacement) { child.execute().map(_.copy()).sample(withReplacement, upperBound - lowerBound, seed) } else { @@ -99,7 +99,7 @@ case class Sample( case class Union(children: Seq[SparkPlan]) extends SparkPlan { // TODO: attributes output by union should be distinct for nullability purposes override def output: Seq[Attribute] = children.head.output - override def execute(): RDD[Row] = sparkContext.union(children.map(_.execute())) + protected override def doExecute(): RDD[Row] = sparkContext.union(children.map(_.execute())) } /** @@ -124,7 +124,7 @@ case class Limit(limit: Int, child: SparkPlan) override def executeCollect(): Array[Row] = child.executeTake(limit) - override def execute(): RDD[Row] = { + protected override def doExecute(): RDD[Row] = { val rdd: RDD[_ <: Product2[Boolean, Row]] = if (sortBasedShuffleOn) { child.execute().mapPartitions { iter => iter.take(limit).map(row => (false, row.copy())) @@ -166,7 +166,7 @@ case class TakeOrdered(limit: Int, sortOrder: Seq[SortOrder], child: SparkPlan) // TODO: Terminal split should be implemented differently from non-terminal split. // TODO: Pick num splits based on |limit|. - override def execute(): RDD[Row] = sparkContext.makeRDD(collectData(), 1) + protected override def doExecute(): RDD[Row] = sparkContext.makeRDD(collectData(), 1) override def outputOrdering: Seq[SortOrder] = sortOrder } @@ -186,7 +186,7 @@ case class Sort( override def requiredChildDistribution: Seq[Distribution] = if (global) OrderedDistribution(sortOrder) :: Nil else UnspecifiedDistribution :: Nil - override def execute(): RDD[Row] = attachTree(this, "sort") { + protected override def doExecute(): RDD[Row] = attachTree(this, "sort") { child.execute().mapPartitions( { iterator => val ordering = newOrdering(sortOrder, child.output) iterator.map(_.copy()).toArray.sorted(ordering).iterator @@ -214,7 +214,7 @@ case class ExternalSort( override def requiredChildDistribution: Seq[Distribution] = if (global) OrderedDistribution(sortOrder) :: Nil else UnspecifiedDistribution :: Nil - override def execute(): RDD[Row] = attachTree(this, "sort") { + protected override def doExecute(): RDD[Row] = attachTree(this, "sort") { child.execute().mapPartitions( { iterator => val ordering = newOrdering(sortOrder, child.output) val sorter = new ExternalSorter[Row, Null, Row](ordering = Some(ordering)) @@ -244,7 +244,7 @@ case class Distinct(partial: Boolean, child: SparkPlan) extends UnaryNode { override def requiredChildDistribution: Seq[Distribution] = if (partial) UnspecifiedDistribution :: Nil else ClusteredDistribution(child.output) :: Nil - override def execute(): RDD[Row] = { + protected override def doExecute(): RDD[Row] = { child.execute().mapPartitions { iter => val hashSet = new scala.collection.mutable.HashSet[Row]() @@ -270,7 +270,7 @@ case class Repartition(numPartitions: Int, shuffle: Boolean, child: SparkPlan) extends UnaryNode { override def output: Seq[Attribute] = child.output - override def execute(): RDD[Row] = { + protected override def doExecute(): RDD[Row] = { child.execute().map(_.copy()).coalesce(numPartitions, shuffle) } } @@ -285,7 +285,7 @@ case class Repartition(numPartitions: Int, shuffle: Boolean, child: SparkPlan) case class Except(left: SparkPlan, right: SparkPlan) extends BinaryNode { override def output: Seq[Attribute] = left.output - override def execute(): RDD[Row] = { + protected override def doExecute(): RDD[Row] = { left.execute().map(_.copy()).subtract(right.execute().map(_.copy())) } } @@ -299,7 +299,7 @@ case class Except(left: SparkPlan, right: SparkPlan) extends BinaryNode { case class Intersect(left: SparkPlan, right: SparkPlan) extends BinaryNode { override def output: Seq[Attribute] = children.head.output - override def execute(): RDD[Row] = { + protected override def doExecute(): RDD[Row] = { left.execute().map(_.copy()).intersection(right.execute().map(_.copy())) } } @@ -314,5 +314,5 @@ case class Intersect(left: SparkPlan, right: SparkPlan) extends BinaryNode { case class OutputFaker(output: Seq[Attribute], child: SparkPlan) extends SparkPlan { def children: Seq[SparkPlan] = child :: Nil - def execute(): RDD[Row] = child.execute() + protected override def doExecute(): RDD[Row] = child.execute() } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala index 98df5bef34efa..49b361e96b2d6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala @@ -32,9 +32,11 @@ import org.apache.spark.sql.{DataFrame, SQLConf, SQLContext} * A logical command that is executed for its side-effects. `RunnableCommand`s are * wrapped in `ExecutedCommand` during execution. */ -trait RunnableCommand extends logical.Command { +private[sql] trait RunnableCommand extends LogicalPlan with logical.Command { self: Product => + override def output: Seq[Attribute] = Seq.empty + override def children: Seq[LogicalPlan] = Seq.empty def run(sqlContext: SQLContext): Seq[Row] } @@ -62,7 +64,7 @@ private[sql] case class ExecutedCommand(cmd: RunnableCommand) extends SparkPlan override def executeTake(limit: Int): Array[Row] = sideEffectResult.take(limit).toArray - override def execute(): RDD[Row] = { + protected override def doExecute(): RDD[Row] = { val converted = sideEffectResult.map(r => CatalystTypeConverters.convertToCatalyst(r, schema).asInstanceOf[Row]) sqlContext.sparkContext.parallelize(converted, 1) @@ -84,8 +86,14 @@ case class SetCommand( logWarning( s"Property ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS} is deprecated, " + s"automatically converted to ${SQLConf.SHUFFLE_PARTITIONS} instead.") - sqlContext.setConf(SQLConf.SHUFFLE_PARTITIONS, value) - Seq(Row(s"${SQLConf.SHUFFLE_PARTITIONS}=$value")) + if (value.toInt < 1) { + val msg = s"Setting negative ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS} for automatically " + + "determining the number of reducers is not supported." + throw new IllegalArgumentException(msg) + } else { + sqlContext.setConf(SQLConf.SHUFFLE_PARTITIONS, value) + Seq(Row(s"${SQLConf.SHUFFLE_PARTITIONS}=$value")) + } // Configures a single property. case Some((key, Some(value))) => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala index 710787096e6cb..dffb265601bdb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala @@ -125,7 +125,7 @@ package object debug { } } - def execute(): RDD[Row] = { + protected override def doExecute(): RDD[Row] = { child.execute().mapPartitions { iter => new Iterator[Row] { def hasNext: Boolean = iter.hasNext @@ -193,7 +193,7 @@ package object debug { def children: List[SparkPlan] = child :: Nil - def execute(): RDD[Row] = { + protected override def doExecute(): RDD[Row] = { child.execute().map { row => try typeCheck(row, child.schema) catch { case e: Exception => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala index 926f5e6c137ee..b8b12be8756f9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoin.scala @@ -18,10 +18,10 @@ package org.apache.spark.sql.execution.joins import org.apache.spark.rdd.RDD +import org.apache.spark.util.ThreadUtils import scala.concurrent._ import scala.concurrent.duration._ -import scala.concurrent.ExecutionContext.Implicits.global import org.apache.spark.annotation.DeveloperApi import org.apache.spark.sql.catalyst.expressions.{Row, Expression} @@ -64,9 +64,9 @@ case class BroadcastHashJoin( val input: Array[Row] = buildPlan.execute().map(_.copy()).collect() val hashed = HashedRelation(input.iterator, buildSideKeyGenerator, input.length) sparkContext.broadcast(hashed) - } + }(BroadcastHashJoin.broadcastHashJoinExecutionContext) - override def execute(): RDD[Row] = { + protected override def doExecute(): RDD[Row] = { val broadcastRelation = Await.result(broadcastFuture, timeout) streamedPlan.execute().mapPartitions { streamedIter => @@ -74,3 +74,9 @@ case class BroadcastHashJoin( } } } + +object BroadcastHashJoin { + + private val broadcastHashJoinExecutionContext = ExecutionContext.fromExecutorService( + ThreadUtils.newDaemonCachedThreadPool("broadcast-hash-join", 128)) +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastLeftSemiJoinHash.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastLeftSemiJoinHash.scala index 3ef1e0d7fbdd4..a32e5fc4f7ea4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastLeftSemiJoinHash.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastLeftSemiJoinHash.scala @@ -38,8 +38,8 @@ case class BroadcastLeftSemiJoinHash( override def output: Seq[Attribute] = left.output - override def execute(): RDD[Row] = { - val buildIter= buildPlan.execute().map(_.copy()).collect().toIterator + protected override def doExecute(): RDD[Row] = { + val buildIter = buildPlan.execute().map(_.copy()).collect().toIterator val hashSet = new java.util.HashSet[Row]() var currentRow: Row = null diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoin.scala index 6aaf35fb429e2..caad3dfbe1c5e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoin.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoin.scala @@ -61,7 +61,7 @@ case class BroadcastNestedLoopJoin( @transient private lazy val boundCondition = newPredicate(condition.getOrElse(Literal(true)), left.output ++ right.output) - override def execute(): RDD[Row] = { + protected override def doExecute(): RDD[Row] = { val broadcastedRelation = sparkContext.broadcast(broadcast.execute().map(_.copy()).collect().toIndexedSeq) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProduct.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProduct.scala index 1cbc98354d673..191c00cb55da2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProduct.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProduct.scala @@ -30,7 +30,7 @@ import org.apache.spark.sql.execution.{BinaryNode, SparkPlan} case class CartesianProduct(left: SparkPlan, right: SparkPlan) extends BinaryNode { override def output: Seq[Attribute] = left.output ++ right.output - override def execute(): RDD[Row] = { + protected override def doExecute(): RDD[Row] = { val leftResults = left.execute().map(_.copy()) val rightResults = right.execute().map(_.copy()) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashOuterJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashOuterJoin.scala index a396c0f5d56ee..45574392996ca 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashOuterJoin.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashOuterJoin.scala @@ -183,7 +183,7 @@ case class HashOuterJoin( hashTable } - override def execute(): RDD[Row] = { + protected override def doExecute(): RDD[Row] = { val joinedRow = new JoinedRow() left.execute().zipPartitions(right.execute()) { (leftIter, rightIter) => // TODO this probably can be replaced by external sort (sort merged join?) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinBNL.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinBNL.scala index b03af410dca08..036423e6faea4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinBNL.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinBNL.scala @@ -47,7 +47,7 @@ case class LeftSemiJoinBNL( @transient private lazy val boundCondition = newPredicate(condition.getOrElse(Literal(true)), left.output ++ right.output) - override def execute(): RDD[Row] = { + protected override def doExecute(): RDD[Row] = { val broadcastedRelation = sparkContext.broadcast(broadcast.execute().map(_.copy()).collect().toIndexedSeq) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinHash.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinHash.scala index a04f2a63b5a55..8ad27eae80ffb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinHash.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/LeftSemiJoinHash.scala @@ -42,7 +42,7 @@ case class LeftSemiJoinHash( override def output: Seq[Attribute] = left.output - override def execute(): RDD[Row] = { + protected override def doExecute(): RDD[Row] = { buildPlan.execute().zipPartitions(streamedPlan.execute()) { (buildIter, streamIter) => val hashSet = new java.util.HashSet[Row]() var currentRow: Row = null diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoin.scala index a6cd8337c1c3e..219525d9d85f3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoin.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledHashJoin.scala @@ -43,7 +43,7 @@ case class ShuffledHashJoin( override def requiredChildDistribution: Seq[ClusteredDistribution] = ClusteredDistribution(leftKeys) :: ClusteredDistribution(rightKeys) :: Nil - override def execute(): RDD[Row] = { + protected override def doExecute(): RDD[Row] = { buildPlan.execute().zipPartitions(streamedPlan.execute()) { (buildIter, streamIter) => val hashed = HashedRelation(buildIter, buildSideKeyGenerator) hashJoin(streamIter, hashed) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoin.scala index b5123668ba11e..1a39fb4b96608 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoin.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoin.scala @@ -60,7 +60,7 @@ case class SortMergeJoin( private def requiredOrders(keys: Seq[Expression]): Seq[SortOrder] = keys.map(SortOrder(_, Ascending)) - override def execute(): RDD[Row] = { + protected override def doExecute(): RDD[Row] = { val leftResults = left.execute().map(_.copy()) val rightResults = right.execute().map(_.copy()) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala index 7a43bfd8bc8d9..11b2897f76786 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/pythonUdfs.scala @@ -19,20 +19,21 @@ package org.apache.spark.sql.execution import java.util.{List => JList, Map => JMap} -import org.apache.spark.rdd.RDD - import scala.collection.JavaConversions._ import scala.collection.JavaConverters._ import net.razorvine.pickle.{Pickler, Unpickler} + import org.apache.spark.annotation.DeveloperApi import org.apache.spark.api.python.{PythonBroadcast, PythonRDD} import org.apache.spark.broadcast.Broadcast +import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.expressions.Row import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.catalyst.util.DateUtils import org.apache.spark.sql.types._ import org.apache.spark.{Accumulator, Logging => SparkLogging} @@ -45,6 +46,7 @@ private[spark] case class PythonUDF( envVars: JMap[String, String], pythonIncludes: JList[String], pythonExec: String, + pythonVer: String, broadcastVars: JList[Broadcast[PythonBroadcast]], accumulator: Accumulator[JList[Array[Byte]]], dataType: DataType, @@ -219,8 +221,8 @@ case class EvaluatePython( /** * :: DeveloperApi :: - * Uses PythonRDD to evaluate a [[PythonUDF]], one partition of tuples at a time. The input - * data is cached and zipped with the result of the udf evaluation. + * Uses PythonRDD to evaluate a [[PythonUDF]], one partition of tuples at a time. + * The input data is zipped with the result of the udf evaluation. */ @DeveloperApi case class BatchPythonEvaluation(udf: PythonUDF, output: Seq[Attribute], child: SparkPlan) @@ -228,9 +230,8 @@ case class BatchPythonEvaluation(udf: PythonUDF, output: Seq[Attribute], child: def children: Seq[SparkPlan] = child :: Nil - def execute(): RDD[Row] = { - // TODO: Clean up after ourselves? - val childResults = child.execute().map(_.copy()).cache() + protected override def doExecute(): RDD[Row] = { + val childResults = child.execute().map(_.copy()) val parent = childResults.mapPartitions { iter => val pickle = new Pickler @@ -251,6 +252,7 @@ case class BatchPythonEvaluation(udf: PythonUDF, output: Seq[Attribute], child: udf.pythonIncludes, false, udf.pythonExec, + udf.pythonVer, udf.broadcastVars, udf.accumulator ).mapPartitions { iter => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala index 5ae7e107544f8..c41c21c0eeb50 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala @@ -62,7 +62,7 @@ private[sql] object FrequentItems extends Logging { } /** - * Finding frequent items for columns, possibly with false positives. Using the + * Finding frequent items for columns, possibly with false positives. Using the * frequent element count algorithm described in * [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou]]. * The `support` should be greater than 1e-4. @@ -75,7 +75,7 @@ private[sql] object FrequentItems extends Logging { * @return A Local DataFrame with the Array of frequent items for each column. */ private[sql] def singlePassFreqItems( - df: DataFrame, + df: DataFrame, cols: Seq[String], support: Double): DataFrame = { require(support >= 1e-4, s"support ($support) must be greater than 1e-4.") @@ -88,8 +88,8 @@ private[sql] object FrequentItems extends Logging { val index = originalSchema.fieldIndex(name) (name, originalSchema.fields(index).dataType) } - - val freqItems = df.select(cols.map(Column(_)):_*).rdd.aggregate(countMaps)( + + val freqItems = df.select(cols.map(Column(_)) : _*).rdd.aggregate(countMaps)( seqOp = (counts, row) => { var i = 0 while (i < numCols) { @@ -110,7 +110,7 @@ private[sql] object FrequentItems extends Logging { } ) val justItems = freqItems.map(m => m.baseMap.keys.toSeq) - val resultRow = Row(justItems:_*) + val resultRow = Row(justItems : _*) // append frequent Items to the column name for easy debugging val outputCols = colInfo.map { v => StructField(v._1 + "_freqItems", ArrayType(v._2, false)) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala index 67b48e58b17ab..93383e5a62f11 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala @@ -17,12 +17,15 @@ package org.apache.spark.sql.execution.stat -import org.apache.spark.sql.catalyst.expressions.Cast -import org.apache.spark.sql.{Column, DataFrame} -import org.apache.spark.sql.types.{DoubleType, NumericType} +import org.apache.spark.Logging +import org.apache.spark.sql.{Row, Column, DataFrame} +import org.apache.spark.sql.catalyst.expressions.{GenericMutableRow, Cast} +import org.apache.spark.sql.catalyst.plans.logical.LocalRelation +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types._ + +private[sql] object StatFunctions extends Logging { -private[sql] object StatFunctions { - /** Calculate the Pearson Correlation Coefficient for the given columns */ private[sql] def pearsonCorrelation(df: DataFrame, cols: Seq[String]): Double = { val counts = collectStatisticalData(df, cols) @@ -35,7 +38,7 @@ private[sql] object StatFunctions { var yAvg = 0.0 // the mean of all examples seen so far in col2 var Ck = 0.0 // the co-moment after k examples var MkX = 0.0 // sum of squares of differences from the (current) mean for col1 - var MkY = 0.0 // sum of squares of differences from the (current) mean for col1 + var MkY = 0.0 // sum of squares of differences from the (current) mean for col2 var count = 0L // count of observed examples // add an example to the calculation def add(x: Double, y: Double): this.type = { @@ -52,15 +55,17 @@ private[sql] object StatFunctions { // merge counters from other partitions. Formula can be found at: // http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance def merge(other: CovarianceCounter): this.type = { - val totalCount = count + other.count - val deltaX = xAvg - other.xAvg - val deltaY = yAvg - other.yAvg - Ck += other.Ck + deltaX * deltaY * count / totalCount * other.count - xAvg = (xAvg * count + other.xAvg * other.count) / totalCount - yAvg = (yAvg * count + other.yAvg * other.count) / totalCount - MkX += other.MkX + deltaX * deltaX * count / totalCount * other.count - MkY += other.MkY + deltaY * deltaY * count / totalCount * other.count - count = totalCount + if (other.count > 0) { + val totalCount = count + other.count + val deltaX = xAvg - other.xAvg + val deltaY = yAvg - other.yAvg + Ck += other.Ck + deltaX * deltaY * count / totalCount * other.count + xAvg = (xAvg * count + other.xAvg * other.count) / totalCount + yAvg = (yAvg * count + other.yAvg * other.count) / totalCount + MkX += other.MkX + deltaX * deltaX * count / totalCount * other.count + MkY += other.MkY + deltaY * deltaY * count / totalCount * other.count + count = totalCount + } this } // return the sample covariance for the observed examples @@ -95,4 +100,35 @@ private[sql] object StatFunctions { val counts = collectStatisticalData(df, cols) counts.cov } + + /** Generate a table of frequencies for the elements of two columns. */ + private[sql] def crossTabulate(df: DataFrame, col1: String, col2: String): DataFrame = { + val tableName = s"${col1}_$col2" + val counts = df.groupBy(col1, col2).agg(count("*")).take(1e6.toInt) + if (counts.length == 1e6.toInt) { + logWarning("The maximum limit of 1e6 pairs have been collected, which may not be all of " + + "the pairs. Please try reducing the amount of distinct items in your columns.") + } + // get the distinct values of column 2, so that we can make them the column names + val distinctCol2 = counts.map(_.get(1)).distinct.zipWithIndex.toMap + val columnSize = distinctCol2.size + require(columnSize < 1e4, s"The number of distinct values for $col2, can't " + + s"exceed 1e4. Currently $columnSize") + val table = counts.groupBy(_.get(0)).map { case (col1Item, rows) => + val countsRow = new GenericMutableRow(columnSize + 1) + rows.foreach { (row: Row) => + // row.get(0) is column 1 + // row.get(1) is column 2 + // row.get(3) is the frequency + countsRow.setLong(distinctCol2.get(row.get(1)).get + 1, row.getLong(2)) + } + // the value of col1 is the first value, the rest are the counts + countsRow.setString(0, col1Item.toString) + countsRow + }.toSeq + val headerNames = distinctCol2.map(r => StructField(r._1.toString, LongType)).toSeq + val schema = StructType(StructField(tableName, StringType) +: headerNames) + + new DataFrame(df.sqlContext, LocalRelation(schema.toAttributes, table)).na.fill(0.0) + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala new file mode 100644 index 0000000000000..e9b60841fc28c --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.expressions + +import org.apache.spark.annotation.Experimental +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.expressions._ + +/** + * :: Experimental :: + * Utility functions for defining window in DataFrames. + * + * {{{ + * // PARTITION BY country ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + * Window.partitionBy("country").orderBy("date").rowsBetween(Long.MinValue, 0) + * + * // PARTITION BY country ORDER BY date ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING + * Window.partitionBy("country").orderBy("date").rowsBetween(-3, 3) + * }}} + * + * @since 1.4.0 + */ +@Experimental +object Window { + + /** + * Creates a [[WindowSpec]] with the partitioning defined. + * @since 1.4.0 + */ + @scala.annotation.varargs + def partitionBy(colName: String, colNames: String*): WindowSpec = { + spec.partitionBy(colName, colNames : _*) + } + + /** + * Creates a [[WindowSpec]] with the partitioning defined. + * @since 1.4.0 + */ + @scala.annotation.varargs + def partitionBy(cols: Column*): WindowSpec = { + spec.partitionBy(cols : _*) + } + + /** + * Creates a [[WindowSpec]] with the ordering defined. + * @since 1.4.0 + */ + @scala.annotation.varargs + def orderBy(colName: String, colNames: String*): WindowSpec = { + spec.orderBy(colName, colNames : _*) + } + + /** + * Creates a [[WindowSpec]] with the ordering defined. + * @since 1.4.0 + */ + @scala.annotation.varargs + def orderBy(cols: Column*): WindowSpec = { + spec.orderBy(cols : _*) + } + + private def spec: WindowSpec = { + new WindowSpec(Seq.empty, Seq.empty, UnspecifiedFrame) + } + +} + +/** + * :: Experimental :: + * Utility functions for defining window in DataFrames. + * + * {{{ + * // PARTITION BY country ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + * Window.partitionBy("country").orderBy("date").rowsBetween(Long.MinValue, 0) + * + * // PARTITION BY country ORDER BY date ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING + * Window.partitionBy("country").orderBy("date").rowsBetween(-3, 3) + * }}} + * + * @since 1.4.0 + */ +@Experimental +class Window private() // So we can see Window in JavaDoc. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala new file mode 100644 index 0000000000000..c3d2246297021 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.expressions + +import org.apache.spark.annotation.Experimental +import org.apache.spark.sql.{Column, catalyst} +import org.apache.spark.sql.catalyst.expressions._ + + +/** + * :: Experimental :: + * A window specification that defines the partitioning, ordering, and frame boundaries. + * + * Use the static methods in [[Window]] to create a [[WindowSpec]]. + * + * @since 1.4.0 + */ +@Experimental +class WindowSpec private[sql]( + partitionSpec: Seq[Expression], + orderSpec: Seq[SortOrder], + frame: catalyst.expressions.WindowFrame) { + + /** + * Defines the partitioning columns in a [[WindowSpec]]. + * @since 1.4.0 + */ + @scala.annotation.varargs + def partitionBy(colName: String, colNames: String*): WindowSpec = { + partitionBy((colName +: colNames).map(Column(_)): _*) + } + + /** + * Defines the partitioning columns in a [[WindowSpec]]. + * @since 1.4.0 + */ + @scala.annotation.varargs + def partitionBy(cols: Column*): WindowSpec = { + new WindowSpec(cols.map(_.expr), orderSpec, frame) + } + + /** + * Defines the ordering columns in a [[WindowSpec]]. + * @since 1.4.0 + */ + @scala.annotation.varargs + def orderBy(colName: String, colNames: String*): WindowSpec = { + orderBy((colName +: colNames).map(Column(_)): _*) + } + + /** + * Defines the ordering columns in a [[WindowSpec]]. + * @since 1.4.0 + */ + @scala.annotation.varargs + def orderBy(cols: Column*): WindowSpec = { + val sortOrder: Seq[SortOrder] = cols.map { col => + col.expr match { + case expr: SortOrder => + expr + case expr: Expression => + SortOrder(expr, Ascending) + } + } + new WindowSpec(partitionSpec, sortOrder, frame) + } + + /** + * Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive). + * + * Both `start` and `end` are relative positions from the current row. For example, "0" means + * "current row", while "-1" means the row before the current row, and "5" means the fifth row + * after the current row. + * + * @param start boundary start, inclusive. + * The frame is unbounded if this is the minimum long value. + * @param end boundary end, inclusive. + * The frame is unbounded if this is the maximum long value. + * @since 1.4.0 + */ + def rowsBetween(start: Long, end: Long): WindowSpec = { + between(RowFrame, start, end) + } + + /** + * Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive). + * + * Both `start` and `end` are relative from the current row. For example, "0" means "current row", + * while "-1" means one off before the current row, and "5" means the five off after the + * current row. + * + * @param start boundary start, inclusive. + * The frame is unbounded if this is the minimum long value. + * @param end boundary end, inclusive. + * The frame is unbounded if this is the maximum long value. + * @since 1.4.0 + */ + def rangeBetween(start: Long, end: Long): WindowSpec = { + between(RangeFrame, start, end) + } + + private def between(typ: FrameType, start: Long, end: Long): WindowSpec = { + val boundaryStart = start match { + case 0 => CurrentRow + case Long.MinValue => UnboundedPreceding + case x if x < 0 => ValuePreceding(-start.toInt) + case x if x > 0 => ValueFollowing(start.toInt) + } + + val boundaryEnd = end match { + case 0 => CurrentRow + case Long.MaxValue => UnboundedFollowing + case x if x < 0 => ValuePreceding(-end.toInt) + case x if x > 0 => ValueFollowing(end.toInt) + } + + new WindowSpec( + partitionSpec, + orderSpec, + SpecifiedWindowFrame(typ, boundaryStart, boundaryEnd)) + } + + /** + * Converts this [[WindowSpec]] into a [[Column]] with an aggregate expression. + */ + private[sql] def withAggregate(aggregate: Column): Column = { + val windowExpr = aggregate.expr match { + case Average(child) => WindowExpression( + UnresolvedWindowFunction("avg", child :: Nil), + WindowSpecDefinition(partitionSpec, orderSpec, frame)) + case Sum(child) => WindowExpression( + UnresolvedWindowFunction("sum", child :: Nil), + WindowSpecDefinition(partitionSpec, orderSpec, frame)) + case Count(child) => WindowExpression( + UnresolvedWindowFunction("count", child :: Nil), + WindowSpecDefinition(partitionSpec, orderSpec, frame)) + case First(child) => WindowExpression( + // TODO this is a hack for Hive UDAF first_value + UnresolvedWindowFunction("first_value", child :: Nil), + WindowSpecDefinition(partitionSpec, orderSpec, frame)) + case Last(child) => WindowExpression( + // TODO this is a hack for Hive UDAF last_value + UnresolvedWindowFunction("last_value", child :: Nil), + WindowSpecDefinition(partitionSpec, orderSpec, frame)) + case Min(child) => WindowExpression( + UnresolvedWindowFunction("min", child :: Nil), + WindowSpecDefinition(partitionSpec, orderSpec, frame)) + case Max(child) => WindowExpression( + UnresolvedWindowFunction("max", child :: Nil), + WindowSpecDefinition(partitionSpec, orderSpec, frame)) + case wf: WindowFunction => WindowExpression( + wf, + WindowSpecDefinition(partitionSpec, orderSpec, frame)) + case x => + throw new UnsupportedOperationException(s"$x is not supported in window operation.") + } + new Column(windowExpr) + } + +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 7e283393d0563..77327f2b84eaa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -22,8 +22,9 @@ import scala.reflect.runtime.universe.{TypeTag, typeTag} import org.apache.spark.annotation.Experimental import org.apache.spark.sql.catalyst.ScalaReflection -import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction, Star} +import org.apache.spark.sql.catalyst.analysis.{UnresolvedFunction, Star} import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.mathfuncs._ import org.apache.spark.sql.types._ import org.apache.spark.util.Utils @@ -35,7 +36,10 @@ import org.apache.spark.util.Utils * @groupname agg_funcs Aggregate functions * @groupname sort_funcs Sorting functions * @groupname normal_funcs Non-aggregate functions + * @groupname math_funcs Math functions + * @groupname window_funcs Window functions * @groupname Ungrouped Support functions for DataFrames. + * @since 1.3.0 */ @Experimental // scalastyle:off @@ -48,6 +52,7 @@ object functions { * Returns a [[Column]] based on the given column name. * * @group normal_funcs + * @since 1.3.0 */ def col(colName: String): Column = Column(colName) @@ -55,6 +60,7 @@ object functions { * Returns a [[Column]] based on the given column name. Alias of [[col]]. * * @group normal_funcs + * @since 1.3.0 */ def column(colName: String): Column = Column(colName) @@ -66,6 +72,7 @@ object functions { * Otherwise, a new [[Column]] is created to represent the literal value. * * @group normal_funcs + * @since 1.3.0 */ def lit(literal: Any): Column = { literal match { @@ -84,23 +91,25 @@ object functions { /** * Returns a sort expression based on ascending order of the column. - * {{ + * {{{ * // Sort by dept in ascending order, and then age in descending order. * df.sort(asc("dept"), desc("age")) - * }} + * }}} * * @group sort_funcs + * @since 1.3.0 */ def asc(columnName: String): Column = Column(columnName).asc /** * Returns a sort expression based on the descending order of the column. - * {{ + * {{{ * // Sort by dept in ascending order, and then age in descending order. * df.sort(asc("dept"), desc("age")) - * }} + * }}} * * @group sort_funcs + * @since 1.3.0 */ def desc(columnName: String): Column = Column(columnName).desc @@ -112,6 +121,7 @@ object functions { * Aggregate function: returns the sum of all values in the expression. * * @group agg_funcs + * @since 1.3.0 */ def sum(e: Column): Column = Sum(e.expr) @@ -119,6 +129,7 @@ object functions { * Aggregate function: returns the sum of all values in the given column. * * @group agg_funcs + * @since 1.3.0 */ def sum(columnName: String): Column = sum(Column(columnName)) @@ -126,6 +137,7 @@ object functions { * Aggregate function: returns the sum of distinct values in the expression. * * @group agg_funcs + * @since 1.3.0 */ def sumDistinct(e: Column): Column = SumDistinct(e.expr) @@ -133,6 +145,7 @@ object functions { * Aggregate function: returns the sum of distinct values in the expression. * * @group agg_funcs + * @since 1.3.0 */ def sumDistinct(columnName: String): Column = sumDistinct(Column(columnName)) @@ -140,6 +153,7 @@ object functions { * Aggregate function: returns the number of items in a group. * * @group agg_funcs + * @since 1.3.0 */ def count(e: Column): Column = e.expr match { // Turn count(*) into count(1) @@ -151,6 +165,7 @@ object functions { * Aggregate function: returns the number of items in a group. * * @group agg_funcs + * @since 1.3.0 */ def count(columnName: String): Column = count(Column(columnName)) @@ -158,6 +173,7 @@ object functions { * Aggregate function: returns the number of distinct items in a group. * * @group agg_funcs + * @since 1.3.0 */ @scala.annotation.varargs def countDistinct(expr: Column, exprs: Column*): Column = @@ -167,15 +183,17 @@ object functions { * Aggregate function: returns the number of distinct items in a group. * * @group agg_funcs + * @since 1.3.0 */ @scala.annotation.varargs def countDistinct(columnName: String, columnNames: String*): Column = - countDistinct(Column(columnName), columnNames.map(Column.apply) :_*) + countDistinct(Column(columnName), columnNames.map(Column.apply) : _*) /** * Aggregate function: returns the approximate number of distinct items in a group. * * @group agg_funcs + * @since 1.3.0 */ def approxCountDistinct(e: Column): Column = ApproxCountDistinct(e.expr) @@ -183,6 +201,7 @@ object functions { * Aggregate function: returns the approximate number of distinct items in a group. * * @group agg_funcs + * @since 1.3.0 */ def approxCountDistinct(columnName: String): Column = approxCountDistinct(column(columnName)) @@ -190,6 +209,7 @@ object functions { * Aggregate function: returns the approximate number of distinct items in a group. * * @group agg_funcs + * @since 1.3.0 */ def approxCountDistinct(e: Column, rsd: Double): Column = ApproxCountDistinct(e.expr, rsd) @@ -197,6 +217,7 @@ object functions { * Aggregate function: returns the approximate number of distinct items in a group. * * @group agg_funcs + * @since 1.3.0 */ def approxCountDistinct(columnName: String, rsd: Double): Column = { approxCountDistinct(Column(columnName), rsd) @@ -206,6 +227,7 @@ object functions { * Aggregate function: returns the average of the values in a group. * * @group agg_funcs + * @since 1.3.0 */ def avg(e: Column): Column = Average(e.expr) @@ -213,6 +235,7 @@ object functions { * Aggregate function: returns the average of the values in a group. * * @group agg_funcs + * @since 1.3.0 */ def avg(columnName: String): Column = avg(Column(columnName)) @@ -220,6 +243,7 @@ object functions { * Aggregate function: returns the first value in a group. * * @group agg_funcs + * @since 1.3.0 */ def first(e: Column): Column = First(e.expr) @@ -227,6 +251,7 @@ object functions { * Aggregate function: returns the first value of a column in a group. * * @group agg_funcs + * @since 1.3.0 */ def first(columnName: String): Column = first(Column(columnName)) @@ -234,6 +259,7 @@ object functions { * Aggregate function: returns the last value in a group. * * @group agg_funcs + * @since 1.3.0 */ def last(e: Column): Column = Last(e.expr) @@ -241,13 +267,33 @@ object functions { * Aggregate function: returns the last value of the column in a group. * * @group agg_funcs + * @since 1.3.0 */ def last(columnName: String): Column = last(Column(columnName)) + /** + * Aggregate function: returns the average of the values in a group. + * Alias for avg. + * + * @group agg_funcs + * @since 1.4.0 + */ + def mean(e: Column): Column = avg(e) + + /** + * Aggregate function: returns the average of the values in a group. + * Alias for avg. + * + * @group agg_funcs + * @since 1.4.0 + */ + def mean(columnName: String): Column = avg(columnName) + /** * Aggregate function: returns the minimum value of the expression in a group. * * @group agg_funcs + * @since 1.3.0 */ def min(e: Column): Column = Min(e.expr) @@ -255,6 +301,7 @@ object functions { * Aggregate function: returns the minimum value of the column in a group. * * @group agg_funcs + * @since 1.3.0 */ def min(columnName: String): Column = min(Column(columnName)) @@ -262,6 +309,7 @@ object functions { * Aggregate function: returns the maximum value of the expression in a group. * * @group agg_funcs + * @since 1.3.0 */ def max(e: Column): Column = Max(e.expr) @@ -269,9 +317,222 @@ object functions { * Aggregate function: returns the maximum value of the column in a group. * * @group agg_funcs + * @since 1.3.0 */ def max(columnName: String): Column = max(Column(columnName)) + ////////////////////////////////////////////////////////////////////////////////////////////// + // Window functions + ////////////////////////////////////////////////////////////////////////////////////////////// + + /** + * Window function: returns the value that is `offset` rows before the current row, and + * `null` if there is less than `offset` rows before the current row. For example, + * an `offset` of one will return the previous row at any given point in the window partition. + * + * This is equivalent to the LAG function in SQL. + * + * @group window_funcs + * @since 1.4.0 + */ + def lag(e: Column, offset: Int): Column = { + lag(e, offset, null) + } + + /** + * Window function: returns the value that is `offset` rows before the current row, and + * `null` if there is less than `offset` rows before the current row. For example, + * an `offset` of one will return the previous row at any given point in the window partition. + * + * This is equivalent to the LAG function in SQL. + * + * @group window_funcs + * @since 1.4.0 + */ + def lag(columnName: String, offset: Int): Column = { + lag(columnName, offset, null) + } + + /** + * Window function: returns the value that is `offset` rows before the current row, and + * `defaultValue` if there is less than `offset` rows before the current row. For example, + * an `offset` of one will return the previous row at any given point in the window partition. + * + * This is equivalent to the LAG function in SQL. + * + * @group window_funcs + * @since 1.4.0 + */ + def lag(columnName: String, offset: Int, defaultValue: Any): Column = { + lag(Column(columnName), offset, defaultValue) + } + + /** + * Window function: returns the value that is `offset` rows before the current row, and + * `defaultValue` if there is less than `offset` rows before the current row. For example, + * an `offset` of one will return the previous row at any given point in the window partition. + * + * This is equivalent to the LAG function in SQL. + * + * @group window_funcs + * @since 1.4.0 + */ + def lag(e: Column, offset: Int, defaultValue: Any): Column = { + UnresolvedWindowFunction("lag", e.expr :: Literal(offset) :: Literal(defaultValue) :: Nil) + } + + /** + * Window function: returns the value that is `offset` rows after the current row, and + * `null` if there is less than `offset` rows after the current row. For example, + * an `offset` of one will return the next row at any given point in the window partition. + * + * This is equivalent to the LEAD function in SQL. + * + * @group window_funcs + * @since 1.4.0 + */ + def lead(columnName: String, offset: Int): Column = { + lead(columnName, offset, null) + } + + /** + * Window function: returns the value that is `offset` rows after the current row, and + * `null` if there is less than `offset` rows after the current row. For example, + * an `offset` of one will return the next row at any given point in the window partition. + * + * This is equivalent to the LEAD function in SQL. + * + * @group window_funcs + * @since 1.4.0 + */ + def lead(e: Column, offset: Int): Column = { + lead(e, offset, null) + } + + /** + * Window function: returns the value that is `offset` rows after the current row, and + * `defaultValue` if there is less than `offset` rows after the current row. For example, + * an `offset` of one will return the next row at any given point in the window partition. + * + * This is equivalent to the LEAD function in SQL. + * + * @group window_funcs + * @since 1.4.0 + */ + def lead(columnName: String, offset: Int, defaultValue: Any): Column = { + lead(Column(columnName), offset, defaultValue) + } + + /** + * Window function: returns the value that is `offset` rows after the current row, and + * `defaultValue` if there is less than `offset` rows after the current row. For example, + * an `offset` of one will return the next row at any given point in the window partition. + * + * This is equivalent to the LEAD function in SQL. + * + * @group window_funcs + * @since 1.4.0 + */ + def lead(e: Column, offset: Int, defaultValue: Any): Column = { + UnresolvedWindowFunction("lead", e.expr :: Literal(offset) :: Literal(defaultValue) :: Nil) + } + + /** + * Window function: returns the ntile group id (from 1 to `n` inclusive) in an ordered window + * partition. Fow example, if `n` is 4, the first quarter of the rows will get value 1, the second + * quarter will get 2, the third quarter will get 3, and the last quarter will get 4. + * + * This is equivalent to the NTILE function in SQL. + * + * @group window_funcs + * @since 1.4.0 + */ + def ntile(n: Int): Column = { + UnresolvedWindowFunction("ntile", lit(n).expr :: Nil) + } + + /** + * Window function: returns a sequential number starting at 1 within a window partition. + * + * This is equivalent to the ROW_NUMBER function in SQL. + * + * @group window_funcs + * @since 1.4.0 + */ + def rowNumber(): Column = { + UnresolvedWindowFunction("row_number", Nil) + } + + /** + * Window function: returns the rank of rows within a window partition, without any gaps. + * + * The difference between rank and denseRank is that denseRank leaves no gaps in ranking + * sequence when there are ties. That is, if you were ranking a competition using denseRank + * and had three people tie for second place, you would say that all three were in second + * place and that the next person came in third. + * + * This is equivalent to the DENSE_RANK function in SQL. + * + * @group window_funcs + * @since 1.4.0 + */ + def denseRank(): Column = { + UnresolvedWindowFunction("dense_rank", Nil) + } + + /** + * Window function: returns the rank of rows within a window partition. + * + * The difference between rank and denseRank is that denseRank leaves no gaps in ranking + * sequence when there are ties. That is, if you were ranking a competition using denseRank + * and had three people tie for second place, you would say that all three were in second + * place and that the next person came in third. + * + * This is equivalent to the RANK function in SQL. + * + * @group window_funcs + * @since 1.4.0 + */ + def rank(): Column = { + UnresolvedWindowFunction("rank", Nil) + } + + /** + * Window function: returns the cumulative distribution of values within a window partition, + * i.e. the fraction of rows that are below the current row. + * + * {{{ + * N = total number of rows in the partition + * cumeDist(x) = number of values before (and including) x / N + * }}} + * + * + * This is equivalent to the CUME_DIST function in SQL. + * + * @group window_funcs + * @since 1.4.0 + */ + def cumeDist(): Column = { + UnresolvedWindowFunction("cume_dist", Nil) + } + + /** + * Window function: returns the relative rank (i.e. percentile) of rows within a window partition. + * + * This is computed by: + * {{{ + * (rank of row in its partition - 1) / (number of rows in the partition - 1) + * }}} + * + * This is equivalent to the PERCENT_RANK function in SQL. + * + * @group window_funcs + * @since 1.4.0 + */ + def percentRank(): Column = { + UnresolvedWindowFunction("percent_rank", Nil) + } + ////////////////////////////////////////////////////////////////////////////////////////////// // Non-aggregate functions ////////////////////////////////////////////////////////////////////////////////////////////// @@ -280,6 +541,7 @@ object functions { * Computes the absolute value. * * @group normal_funcs + * @since 1.3.0 */ def abs(e: Column): Column = Abs(e.expr) @@ -287,6 +549,7 @@ object functions { * Creates a new array column. The input columns must all have the same data type. * * @group normal_funcs + * @since 1.4.0 */ @scala.annotation.varargs def array(cols: Column*): Column = CreateArray(cols.map(_.expr)) @@ -295,6 +558,7 @@ object functions { * Creates a new array column. The input columns must all have the same data type. * * @group normal_funcs + * @since 1.4.0 */ def array(colName: String, colNames: String*): Column = { array((colName +: colNames).map(col) : _*) @@ -307,14 +571,21 @@ object functions { * }}} * * @group normal_funcs + * @since 1.3.0 */ @scala.annotation.varargs def coalesce(e: Column*): Column = Coalesce(e.map(_.expr)) + /** + * Creates a new row for each element in the given array or map column. + */ + def explode(e: Column): Column = Explode(e.expr) + /** * Converts a string exprsesion to lower case. * * @group normal_funcs + * @since 1.3.0 */ def lower(e: Column): Column = Lower(e.expr) @@ -331,6 +602,7 @@ object functions { * 0, 1, 2, 8589934592 (1L << 33), 8589934593, 8589934594. * * @group normal_funcs + * @since 1.4.0 */ def monotonicallyIncreasingId(): Column = execution.expressions.MonotonicallyIncreasingID() @@ -346,27 +618,55 @@ object functions { * }}} * * @group normal_funcs + * @since 1.3.0 */ def negate(e: Column): Column = -e /** * Inversion of boolean expression, i.e. NOT. - * {{ + * {{{ * // Scala: select rows that are not active (isActive === false) * df.filter( !df("isActive") ) * * // Java: * df.filter( not(df.col("isActive")) ); - * }} + * }}} * * @group normal_funcs + * @since 1.3.0 */ def not(e: Column): Column = !e + /** + * Evaluates a list of conditions and returns one of multiple possible result expressions. + * If otherwise is not defined at the end, null is returned for unmatched conditions. + * + * {{{ + * // Example: encoding gender string column into integer. + * + * // Scala: + * people.select(when(people("gender") === "male", 0) + * .when(people("gender") === "female", 1) + * .otherwise(2)) + * + * // Java: + * people.select(when(col("gender").equalTo("male"), 0) + * .when(col("gender").equalTo("female"), 1) + * .otherwise(2)) + * }}} + * + * @group normal_funcs + * @since 1.4.0 + */ + def when(condition: Column, value: Any): Column = { + CaseWhen(Seq(condition.expr, lit(value).expr)) + } + /** * Generate a random column with i.i.d. samples from U[0.0, 1.0]. * * @group normal_funcs + * @since 1.4.0 */ def rand(seed: Long): Column = Rand(seed) @@ -374,6 +674,7 @@ object functions { * Generate a random column with i.i.d. samples from U[0.0, 1.0]. * * @group normal_funcs + * @since 1.4.0 */ def rand(): Column = rand(Utils.random.nextLong) @@ -381,6 +682,7 @@ object functions { * Generate a column with i.i.d. samples from the standard normal distribution. * * @group normal_funcs + * @since 1.4.0 */ def randn(seed: Long): Column = Randn(seed) @@ -388,6 +690,7 @@ object functions { * Generate a column with i.i.d. samples from the standard normal distribution. * * @group normal_funcs + * @since 1.4.0 */ def randn(): Column = randn(Utils.random.nextLong) @@ -397,6 +700,7 @@ object functions { * Note that this is indeterministic because it depends on data partitioning and task scheduling. * * @group normal_funcs + * @since 1.4.0 */ def sparkPartitionId(): Column = execution.expressions.SparkPartitionID @@ -404,6 +708,7 @@ object functions { * Computes the square root of the specified float value. * * @group normal_funcs + * @since 1.3.0 */ def sqrt(e: Column): Column = Sqrt(e.expr) @@ -412,6 +717,7 @@ object functions { * a derived column expression that is named (i.e. aliased). * * @group normal_funcs + * @since 1.4.0 */ @scala.annotation.varargs def struct(cols: Column*): Column = { @@ -424,6 +730,7 @@ object functions { * Creates a new struct column that composes multiple input columns. * * @group normal_funcs + * @since 1.4.0 */ def struct(colName: String, colNames: String*): Column = { struct((colName +: colNames).map(col) : _*) @@ -433,9 +740,567 @@ object functions { * Converts a string expression to upper case. * * @group normal_funcs + * @since 1.3.0 */ def upper(e: Column): Column = Upper(e.expr) + /** + * Computes bitwise NOT. + * + * @group normal_funcs + * @since 1.4.0 + */ + def bitwiseNOT(e: Column): Column = BitwiseNot(e.expr) + + ////////////////////////////////////////////////////////////////////////////////////////////// + // Math Functions + ////////////////////////////////////////////////////////////////////////////////////////////// + + /** + * Computes the cosine inverse of the given value; the returned angle is in the range + * 0.0 through pi. + * + * @group math_funcs + * @since 1.4.0 + */ + def acos(e: Column): Column = Acos(e.expr) + + /** + * Computes the cosine inverse of the given column; the returned angle is in the range + * 0.0 through pi. + * + * @group math_funcs + * @since 1.4.0 + */ + def acos(columnName: String): Column = acos(Column(columnName)) + + /** + * Computes the sine inverse of the given value; the returned angle is in the range + * -pi/2 through pi/2. + * + * @group math_funcs + * @since 1.4.0 + */ + def asin(e: Column): Column = Asin(e.expr) + + /** + * Computes the sine inverse of the given column; the returned angle is in the range + * -pi/2 through pi/2. + * + * @group math_funcs + * @since 1.4.0 + */ + def asin(columnName: String): Column = asin(Column(columnName)) + + /** + * Computes the tangent inverse of the given value. + * + * @group math_funcs + * @since 1.4.0 + */ + def atan(e: Column): Column = Atan(e.expr) + + /** + * Computes the tangent inverse of the given column. + * + * @group math_funcs + * @since 1.4.0 + */ + def atan(columnName: String): Column = atan(Column(columnName)) + + /** + * Returns the angle theta from the conversion of rectangular coordinates (x, y) to + * polar coordinates (r, theta). + * + * @group math_funcs + * @since 1.4.0 + */ + def atan2(l: Column, r: Column): Column = Atan2(l.expr, r.expr) + + /** + * Returns the angle theta from the conversion of rectangular coordinates (x, y) to + * polar coordinates (r, theta). + * + * @group math_funcs + * @since 1.4.0 + */ + def atan2(l: Column, rightName: String): Column = atan2(l, Column(rightName)) + + /** + * Returns the angle theta from the conversion of rectangular coordinates (x, y) to + * polar coordinates (r, theta). + * + * @group math_funcs + * @since 1.4.0 + */ + def atan2(leftName: String, r: Column): Column = atan2(Column(leftName), r) + + /** + * Returns the angle theta from the conversion of rectangular coordinates (x, y) to + * polar coordinates (r, theta). + * + * @group math_funcs + * @since 1.4.0 + */ + def atan2(leftName: String, rightName: String): Column = + atan2(Column(leftName), Column(rightName)) + + /** + * Returns the angle theta from the conversion of rectangular coordinates (x, y) to + * polar coordinates (r, theta). + * + * @group math_funcs + * @since 1.4.0 + */ + def atan2(l: Column, r: Double): Column = atan2(l, lit(r).expr) + + /** + * Returns the angle theta from the conversion of rectangular coordinates (x, y) to + * polar coordinates (r, theta). + * + * @group math_funcs + * @since 1.4.0 + */ + def atan2(leftName: String, r: Double): Column = atan2(Column(leftName), r) + + /** + * Returns the angle theta from the conversion of rectangular coordinates (x, y) to + * polar coordinates (r, theta). + * + * @group math_funcs + * @since 1.4.0 + */ + def atan2(l: Double, r: Column): Column = atan2(lit(l).expr, r) + + /** + * Returns the angle theta from the conversion of rectangular coordinates (x, y) to + * polar coordinates (r, theta). + * + * @group math_funcs + * @since 1.4.0 + */ + def atan2(l: Double, rightName: String): Column = atan2(l, Column(rightName)) + + /** + * Computes the cube-root of the given value. + * + * @group math_funcs + * @since 1.4.0 + */ + def cbrt(e: Column): Column = Cbrt(e.expr) + + /** + * Computes the cube-root of the given column. + * + * @group math_funcs + * @since 1.4.0 + */ + def cbrt(columnName: String): Column = cbrt(Column(columnName)) + + /** + * Computes the ceiling of the given value. + * + * @group math_funcs + * @since 1.4.0 + */ + def ceil(e: Column): Column = Ceil(e.expr) + + /** + * Computes the ceiling of the given column. + * + * @group math_funcs + * @since 1.4.0 + */ + def ceil(columnName: String): Column = ceil(Column(columnName)) + + /** + * Computes the cosine of the given value. + * + * @group math_funcs + * @since 1.4.0 + */ + def cos(e: Column): Column = Cos(e.expr) + + /** + * Computes the cosine of the given column. + * + * @group math_funcs + * @since 1.4.0 + */ + def cos(columnName: String): Column = cos(Column(columnName)) + + /** + * Computes the hyperbolic cosine of the given value. + * + * @group math_funcs + * @since 1.4.0 + */ + def cosh(e: Column): Column = Cosh(e.expr) + + /** + * Computes the hyperbolic cosine of the given column. + * + * @group math_funcs + * @since 1.4.0 + */ + def cosh(columnName: String): Column = cosh(Column(columnName)) + + /** + * Computes the exponential of the given value. + * + * @group math_funcs + * @since 1.4.0 + */ + def exp(e: Column): Column = Exp(e.expr) + + /** + * Computes the exponential of the given column. + * + * @group math_funcs + * @since 1.4.0 + */ + def exp(columnName: String): Column = exp(Column(columnName)) + + /** + * Computes the exponential of the given value minus one. + * + * @group math_funcs + * @since 1.4.0 + */ + def expm1(e: Column): Column = Expm1(e.expr) + + /** + * Computes the exponential of the given column. + * + * @group math_funcs + * @since 1.4.0 + */ + def expm1(columnName: String): Column = expm1(Column(columnName)) + + /** + * Computes the floor of the given value. + * + * @group math_funcs + * @since 1.4.0 + */ + def floor(e: Column): Column = Floor(e.expr) + + /** + * Computes the floor of the given column. + * + * @group math_funcs + * @since 1.4.0 + */ + def floor(columnName: String): Column = floor(Column(columnName)) + + /** + * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow. + * + * @group math_funcs + * @since 1.4.0 + */ + def hypot(l: Column, r: Column): Column = Hypot(l.expr, r.expr) + + /** + * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow. + * + * @group math_funcs + * @since 1.4.0 + */ + def hypot(l: Column, rightName: String): Column = hypot(l, Column(rightName)) + + /** + * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow. + * + * @group math_funcs + * @since 1.4.0 + */ + def hypot(leftName: String, r: Column): Column = hypot(Column(leftName), r) + + /** + * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow. + * + * @group math_funcs + * @since 1.4.0 + */ + def hypot(leftName: String, rightName: String): Column = + hypot(Column(leftName), Column(rightName)) + + /** + * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow. + * + * @group math_funcs + * @since 1.4.0 + */ + def hypot(l: Column, r: Double): Column = hypot(l, lit(r).expr) + + /** + * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow. + * + * @group math_funcs + * @since 1.4.0 + */ + def hypot(leftName: String, r: Double): Column = hypot(Column(leftName), r) + + /** + * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow. + * + * @group math_funcs + * @since 1.4.0 + */ + def hypot(l: Double, r: Column): Column = hypot(lit(l).expr, r) + + /** + * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow. + * + * @group math_funcs + * @since 1.4.0 + */ + def hypot(l: Double, rightName: String): Column = hypot(l, Column(rightName)) + + /** + * Computes the natural logarithm of the given value. + * + * @group math_funcs + * @since 1.4.0 + */ + def log(e: Column): Column = Log(e.expr) + + /** + * Computes the natural logarithm of the given column. + * + * @group math_funcs + * @since 1.4.0 + */ + def log(columnName: String): Column = log(Column(columnName)) + + /** + * Computes the logarithm of the given value in Base 10. + * + * @group math_funcs + * @since 1.4.0 + */ + def log10(e: Column): Column = Log10(e.expr) + + /** + * Computes the logarithm of the given value in Base 10. + * + * @group math_funcs + * @since 1.4.0 + */ + def log10(columnName: String): Column = log10(Column(columnName)) + + /** + * Computes the natural logarithm of the given value plus one. + * + * @group math_funcs + * @since 1.4.0 + */ + def log1p(e: Column): Column = Log1p(e.expr) + + /** + * Computes the natural logarithm of the given column plus one. + * + * @group math_funcs + * @since 1.4.0 + */ + def log1p(columnName: String): Column = log1p(Column(columnName)) + + /** + * Returns the value of the first argument raised to the power of the second argument. + * + * @group math_funcs + * @since 1.4.0 + */ + def pow(l: Column, r: Column): Column = Pow(l.expr, r.expr) + + /** + * Returns the value of the first argument raised to the power of the second argument. + * + * @group math_funcs + * @since 1.4.0 + */ + def pow(l: Column, rightName: String): Column = pow(l, Column(rightName)) + + /** + * Returns the value of the first argument raised to the power of the second argument. + * + * @group math_funcs + * @since 1.4.0 + */ + def pow(leftName: String, r: Column): Column = pow(Column(leftName), r) + + /** + * Returns the value of the first argument raised to the power of the second argument. + * + * @group math_funcs + * @since 1.4.0 + */ + def pow(leftName: String, rightName: String): Column = pow(Column(leftName), Column(rightName)) + + /** + * Returns the value of the first argument raised to the power of the second argument. + * + * @group math_funcs + * @since 1.4.0 + */ + def pow(l: Column, r: Double): Column = pow(l, lit(r).expr) + + /** + * Returns the value of the first argument raised to the power of the second argument. + * + * @group math_funcs + * @since 1.4.0 + */ + def pow(leftName: String, r: Double): Column = pow(Column(leftName), r) + + /** + * Returns the value of the first argument raised to the power of the second argument. + * + * @group math_funcs + * @since 1.4.0 + */ + def pow(l: Double, r: Column): Column = pow(lit(l).expr, r) + + /** + * Returns the value of the first argument raised to the power of the second argument. + * + * @group math_funcs + * @since 1.4.0 + */ + def pow(l: Double, rightName: String): Column = pow(l, Column(rightName)) + + /** + * Returns the double value that is closest in value to the argument and + * is equal to a mathematical integer. + * + * @group math_funcs + * @since 1.4.0 + */ + def rint(e: Column): Column = Rint(e.expr) + + /** + * Returns the double value that is closest in value to the argument and + * is equal to a mathematical integer. + * + * @group math_funcs + * @since 1.4.0 + */ + def rint(columnName: String): Column = rint(Column(columnName)) + + /** + * Computes the signum of the given value. + * + * @group math_funcs + * @since 1.4.0 + */ + def signum(e: Column): Column = Signum(e.expr) + + /** + * Computes the signum of the given column. + * + * @group math_funcs + * @since 1.4.0 + */ + def signum(columnName: String): Column = signum(Column(columnName)) + + /** + * Computes the sine of the given value. + * + * @group math_funcs + * @since 1.4.0 + */ + def sin(e: Column): Column = Sin(e.expr) + + /** + * Computes the sine of the given column. + * + * @group math_funcs + * @since 1.4.0 + */ + def sin(columnName: String): Column = sin(Column(columnName)) + + /** + * Computes the hyperbolic sine of the given value. + * + * @group math_funcs + * @since 1.4.0 + */ + def sinh(e: Column): Column = Sinh(e.expr) + + /** + * Computes the hyperbolic sine of the given column. + * + * @group math_funcs + * @since 1.4.0 + */ + def sinh(columnName: String): Column = sinh(Column(columnName)) + + /** + * Computes the tangent of the given value. + * + * @group math_funcs + * @since 1.4.0 + */ + def tan(e: Column): Column = Tan(e.expr) + + /** + * Computes the tangent of the given column. + * + * @group math_funcs + * @since 1.4.0 + */ + def tan(columnName: String): Column = tan(Column(columnName)) + + /** + * Computes the hyperbolic tangent of the given value. + * + * @group math_funcs + * @since 1.4.0 + */ + def tanh(e: Column): Column = Tanh(e.expr) + + /** + * Computes the hyperbolic tangent of the given column. + * + * @group math_funcs + * @since 1.4.0 + */ + def tanh(columnName: String): Column = tanh(Column(columnName)) + + /** + * Converts an angle measured in radians to an approximately equivalent angle measured in degrees. + * + * @group math_funcs + * @since 1.4.0 + */ + def toDegrees(e: Column): Column = ToDegrees(e.expr) + + /** + * Converts an angle measured in radians to an approximately equivalent angle measured in degrees. + * + * @group math_funcs + * @since 1.4.0 + */ + def toDegrees(columnName: String): Column = toDegrees(Column(columnName)) + + /** + * Converts an angle measured in degrees to an approximately equivalent angle measured in radians. + * + * @group math_funcs + * @since 1.4.0 + */ + def toRadians(e: Column): Column = ToRadians(e.expr) + + /** + * Converts an angle measured in degrees to an approximately equivalent angle measured in radians. + * + * @group math_funcs + * @since 1.4.0 + */ + def toRadians(columnName: String): Column = toRadians(Column(columnName)) + + ////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////// @@ -451,6 +1316,7 @@ object functions { * The data types are automatically inferred based on the function's signature. * * @group udf_funcs + * @since 1.3.0 */ def udf[$typeTags](f: Function$x[$types]): UserDefinedFunction = { UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType) @@ -467,6 +1333,7 @@ object functions { * you to specify the return data type. * * @group udf_funcs + * @since 1.3.0 */ def callUDF(f: Function$x[$fTypes], returnType: DataType${if (args.length > 0) ", " + args else ""}): Column = { ScalaUdf(f, returnType, Seq($argsInUdf)) @@ -479,6 +1346,7 @@ object functions { * The data types are automatically inferred based on the function's signature. * * @group udf_funcs + * @since 1.3.0 */ def udf[RT: TypeTag](f: Function0[RT]): UserDefinedFunction = { UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType) @@ -489,6 +1357,7 @@ object functions { * The data types are automatically inferred based on the function's signature. * * @group udf_funcs + * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag](f: Function1[A1, RT]): UserDefinedFunction = { UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType) @@ -499,6 +1368,7 @@ object functions { * The data types are automatically inferred based on the function's signature. * * @group udf_funcs + * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag](f: Function2[A1, A2, RT]): UserDefinedFunction = { UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType) @@ -509,6 +1379,7 @@ object functions { * The data types are automatically inferred based on the function's signature. * * @group udf_funcs + * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag](f: Function3[A1, A2, A3, RT]): UserDefinedFunction = { UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType) @@ -519,6 +1390,7 @@ object functions { * The data types are automatically inferred based on the function's signature. * * @group udf_funcs + * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag](f: Function4[A1, A2, A3, A4, RT]): UserDefinedFunction = { UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType) @@ -529,6 +1401,7 @@ object functions { * The data types are automatically inferred based on the function's signature. * * @group udf_funcs + * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag](f: Function5[A1, A2, A3, A4, A5, RT]): UserDefinedFunction = { UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType) @@ -539,6 +1412,7 @@ object functions { * The data types are automatically inferred based on the function's signature. * * @group udf_funcs + * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag](f: Function6[A1, A2, A3, A4, A5, A6, RT]): UserDefinedFunction = { UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType) @@ -549,6 +1423,7 @@ object functions { * The data types are automatically inferred based on the function's signature. * * @group udf_funcs + * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag](f: Function7[A1, A2, A3, A4, A5, A6, A7, RT]): UserDefinedFunction = { UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType) @@ -559,6 +1434,7 @@ object functions { * The data types are automatically inferred based on the function's signature. * * @group udf_funcs + * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag](f: Function8[A1, A2, A3, A4, A5, A6, A7, A8, RT]): UserDefinedFunction = { UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType) @@ -569,6 +1445,7 @@ object functions { * The data types are automatically inferred based on the function's signature. * * @group udf_funcs + * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag](f: Function9[A1, A2, A3, A4, A5, A6, A7, A8, A9, RT]): UserDefinedFunction = { UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType) @@ -579,6 +1456,7 @@ object functions { * The data types are automatically inferred based on the function's signature. * * @group udf_funcs + * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag](f: Function10[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, RT]): UserDefinedFunction = { UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType) @@ -591,6 +1469,7 @@ object functions { * you to specify the return data type. * * @group udf_funcs + * @since 1.3.0 */ def callUDF(f: Function0[_], returnType: DataType): Column = { ScalaUdf(f, returnType, Seq()) @@ -601,6 +1480,7 @@ object functions { * you to specify the return data type. * * @group udf_funcs + * @since 1.3.0 */ def callUDF(f: Function1[_, _], returnType: DataType, arg1: Column): Column = { ScalaUdf(f, returnType, Seq(arg1.expr)) @@ -611,6 +1491,7 @@ object functions { * you to specify the return data type. * * @group udf_funcs + * @since 1.3.0 */ def callUDF(f: Function2[_, _, _], returnType: DataType, arg1: Column, arg2: Column): Column = { ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr)) @@ -621,6 +1502,7 @@ object functions { * you to specify the return data type. * * @group udf_funcs + * @since 1.3.0 */ def callUDF(f: Function3[_, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column): Column = { ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr)) @@ -631,6 +1513,7 @@ object functions { * you to specify the return data type. * * @group udf_funcs + * @since 1.3.0 */ def callUDF(f: Function4[_, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column): Column = { ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr)) @@ -641,6 +1524,7 @@ object functions { * you to specify the return data type. * * @group udf_funcs + * @since 1.3.0 */ def callUDF(f: Function5[_, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column): Column = { ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr)) @@ -651,6 +1535,7 @@ object functions { * you to specify the return data type. * * @group udf_funcs + * @since 1.3.0 */ def callUDF(f: Function6[_, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column): Column = { ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr)) @@ -661,6 +1546,7 @@ object functions { * you to specify the return data type. * * @group udf_funcs + * @since 1.3.0 */ def callUDF(f: Function7[_, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column): Column = { ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr)) @@ -671,6 +1557,7 @@ object functions { * you to specify the return data type. * * @group udf_funcs + * @since 1.3.0 */ def callUDF(f: Function8[_, _, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column, arg8: Column): Column = { ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr)) @@ -681,6 +1568,7 @@ object functions { * you to specify the return data type. * * @group udf_funcs + * @since 1.3.0 */ def callUDF(f: Function9[_, _, _, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column, arg8: Column, arg9: Column): Column = { ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr, arg9.expr)) @@ -691,6 +1579,7 @@ object functions { * you to specify the return data type. * * @group udf_funcs + * @since 1.3.0 */ def callUDF(f: Function10[_, _, _, _, _, _, _, _, _, _, _], returnType: DataType, arg1: Column, arg2: Column, arg3: Column, arg4: Column, arg5: Column, arg6: Column, arg7: Column, arg8: Column, arg9: Column, arg10: Column): Column = { ScalaUdf(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr, arg9.expr, arg10.expr)) @@ -711,6 +1600,7 @@ object functions { * }}} * * @group udf_funcs + * @since 1.4.0 */ def callUdf(udfName: String, cols: Column*): Column = { UnresolvedFunction(udfName, cols.map(_.expr)) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DriverQuirks.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DriverQuirks.scala deleted file mode 100644 index 0feabc4282f4a..0000000000000 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DriverQuirks.scala +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.jdbc - -import org.apache.spark.sql.types._ - -import java.sql.Types - - -/** - * Encapsulates workarounds for the extensions, quirks, and bugs in various - * databases. Lots of databases define types that aren't explicitly supported - * by the JDBC spec. Some JDBC drivers also report inaccurate - * information---for instance, BIT(n>1) being reported as a BIT type is quite - * common, even though BIT in JDBC is meant for single-bit values. Also, there - * does not appear to be a standard name for an unbounded string or binary - * type; we use BLOB and CLOB by default but override with database-specific - * alternatives when these are absent or do not behave correctly. - * - * Currently, the only thing DriverQuirks does is handle type mapping. - * `getCatalystType` is used when reading from a JDBC table and `getJDBCType` - * is used when writing to a JDBC table. If `getCatalystType` returns `null`, - * the default type handling is used for the given JDBC type. Similarly, - * if `getJDBCType` returns `(null, None)`, the default type handling is used - * for the given Catalyst type. - */ -private[sql] abstract class DriverQuirks { - def getCatalystType(sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): DataType - def getJDBCType(dt: DataType): (String, Option[Int]) -} - -private[sql] object DriverQuirks { - /** - * Fetch the DriverQuirks class corresponding to a given database url. - */ - def get(url: String): DriverQuirks = { - if (url.startsWith("jdbc:mysql")) { - new MySQLQuirks() - } else if (url.startsWith("jdbc:postgresql")) { - new PostgresQuirks() - } else { - new NoQuirks() - } - } -} - -private[sql] class NoQuirks extends DriverQuirks { - def getCatalystType(sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): DataType = - null - def getJDBCType(dt: DataType): (String, Option[Int]) = (null, None) -} - -private[sql] class PostgresQuirks extends DriverQuirks { - def getCatalystType(sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): DataType = { - if (sqlType == Types.BIT && typeName.equals("bit") && size != 1) { - BinaryType - } else if (sqlType == Types.OTHER && typeName.equals("cidr")) { - StringType - } else if (sqlType == Types.OTHER && typeName.equals("inet")) { - StringType - } else null - } - - def getJDBCType(dt: DataType): (String, Option[Int]) = dt match { - case StringType => ("TEXT", Some(java.sql.Types.CHAR)) - case BinaryType => ("BYTEA", Some(java.sql.Types.BINARY)) - case BooleanType => ("BOOLEAN", Some(java.sql.Types.BOOLEAN)) - case _ => (null, None) - } -} - -private[sql] class MySQLQuirks extends DriverQuirks { - def getCatalystType(sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): DataType = { - if (sqlType == Types.VARBINARY && typeName.equals("BIT") && size != 1) { - // This could instead be a BinaryType if we'd rather return bit-vectors of up to 64 bits as - // byte arrays instead of longs. - md.putLong("binarylong", 1) - LongType - } else if (sqlType == Types.BIT && typeName.equals("TINYINT")) { - BooleanType - } else null - } - def getJDBCType(dt: DataType): (String, Option[Int]) = (null, None) -} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala index 2f6ba48dbc3d9..40b604d710dce 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRDD.scala @@ -25,24 +25,38 @@ import org.apache.commons.lang3.StringUtils import org.apache.spark.{Logging, Partition, SparkContext, TaskContext} import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.expressions.{Row, SpecificMutableRow} +import org.apache.spark.sql.catalyst.util.DateUtils import org.apache.spark.sql.types._ import org.apache.spark.sql.sources._ -import org.apache.spark.util.Utils + +/** + * Data corresponding to one partition of a JDBCRDD. + */ +private[sql] case class JDBCPartition(whereClause: String, idx: Int) extends Partition { + override def index: Int = idx +} + private[sql] object JDBCRDD extends Logging { + /** * Maps a JDBC type to a Catalyst type. This function is called only when - * the DriverQuirks class corresponding to your database driver returns null. + * the JdbcDialect class corresponding to your database driver returns null. * * @param sqlType - A field of java.sql.Types * @return The Catalyst type corresponding to sqlType. */ - private def getCatalystType(sqlType: Int, precision: Int, scale: Int): DataType = { + private def getCatalystType( + sqlType: Int, + precision: Int, + scale: Int, + signed: Boolean): DataType = { val answer = sqlType match { + // scalastyle:off case java.sql.Types.ARRAY => null - case java.sql.Types.BIGINT => LongType + case java.sql.Types.BIGINT => if (signed) { LongType } else { DecimalType.Unlimited } case java.sql.Types.BINARY => BinaryType - case java.sql.Types.BIT => BooleanType // Per JDBC; Quirks handles quirky drivers. + case java.sql.Types.BIT => BooleanType // @see JdbcDialect for quirks case java.sql.Types.BLOB => BinaryType case java.sql.Types.BOOLEAN => BooleanType case java.sql.Types.CHAR => StringType @@ -55,7 +69,7 @@ private[sql] object JDBCRDD extends Logging { case java.sql.Types.DISTINCT => null case java.sql.Types.DOUBLE => DoubleType case java.sql.Types.FLOAT => FloatType - case java.sql.Types.INTEGER => IntegerType + case java.sql.Types.INTEGER => if (signed) { IntegerType } else { LongType } case java.sql.Types.JAVA_OBJECT => null case java.sql.Types.LONGNVARCHAR => StringType case java.sql.Types.LONGVARBINARY => BinaryType @@ -79,7 +93,8 @@ private[sql] object JDBCRDD extends Logging { case java.sql.Types.TINYINT => IntegerType case java.sql.Types.VARBINARY => BinaryType case java.sql.Types.VARCHAR => StringType - case _ => null + case _ => null + // scalastyle:on } if (answer == null) throw new SQLException("Unsupported type " + sqlType) @@ -99,7 +114,7 @@ private[sql] object JDBCRDD extends Logging { * @throws SQLException if the table contains an unsupported type. */ def resolveTable(url: String, table: String, properties: Properties): StructType = { - val quirks = DriverQuirks.get(url) + val dialect = JdbcDialects.get(url) val conn: Connection = DriverManager.getConnection(url, properties) try { val rs = conn.prepareStatement(s"SELECT * FROM $table WHERE 1=0").executeQuery() @@ -109,15 +124,17 @@ private[sql] object JDBCRDD extends Logging { val fields = new Array[StructField](ncols) var i = 0 while (i < ncols) { - val columnName = rsmd.getColumnName(i + 1) + val columnName = rsmd.getColumnLabel(i + 1) val dataType = rsmd.getColumnType(i + 1) val typeName = rsmd.getColumnTypeName(i + 1) val fieldSize = rsmd.getPrecision(i + 1) val fieldScale = rsmd.getScale(i + 1) + val isSigned = rsmd.isSigned(i + 1) val nullable = rsmd.isNullable(i + 1) != ResultSetMetaData.columnNoNulls val metadata = new MetadataBuilder().putString("name", columnName) - var columnType = quirks.getCatalystType(dataType, typeName, fieldSize, metadata) - if (columnType == null) columnType = getCatalystType(dataType, fieldSize, fieldScale) + val columnType = + dialect.getCatalystType(dataType, typeName, fieldSize, metadata).getOrElse( + getCatalystType(dataType, fieldSize, fieldScale, isSigned)) fields(i) = StructField(columnName, columnType, nullable, metadata.build()) i = i + 1 } @@ -168,6 +185,7 @@ private[sql] object JDBCRDD extends Logging { DriverManager.getConnection(url, properties) } } + /** * Build and return JDBCRDD from the given information. * @@ -193,18 +211,15 @@ private[sql] object JDBCRDD extends Logging { requiredColumns: Array[String], filters: Array[Filter], parts: Array[Partition]): RDD[Row] = { - - val prunedSchema = pruneSchema(schema, requiredColumns) - - return new - JDBCRDD( - sc, - getConnector(driver, url, properties), - prunedSchema, - fqTable, - requiredColumns, - filters, - parts) + new JDBCRDD( + sc, + getConnector(driver, url, properties), + pruneSchema(schema, requiredColumns), + fqTable, + requiredColumns, + filters, + parts, + properties) } } @@ -220,7 +235,8 @@ private[sql] class JDBCRDD( fqTable: String, columns: Array[String], filters: Array[Filter], - partitions: Array[Partition]) + partitions: Array[Partition], + properties: Properties) extends RDD[Row](sc, Nil) { /** @@ -246,7 +262,7 @@ private[sql] class JDBCRDD( } private def escapeSql(value: String): String = - if (value == null) null else StringUtils.replace(value, "'", "''") + if (value == null) null else StringUtils.replace(value, "'", "''") /** * Turns a single Filter into a String representing a SQL expression. @@ -288,13 +304,13 @@ private[sql] class JDBCRDD( // Each JDBC-to-Catalyst conversion corresponds to a tag defined here so that // we don't have to potentially poke around in the Metadata once for every - // row. + // row. // Is there a better way to do this? I'd rather be using a type that // contains only the tags I define. abstract class JDBCConversion case object BooleanConversion extends JDBCConversion case object DateConversion extends JDBCConversion - case object DecimalConversion extends JDBCConversion + case class DecimalConversion(precisionInfo: Option[(Int, Int)]) extends JDBCConversion case object DoubleConversion extends JDBCConversion case object FloatConversion extends JDBCConversion case object IntegerConversion extends JDBCConversion @@ -309,19 +325,19 @@ private[sql] class JDBCRDD( */ def getConversions(schema: StructType): Array[JDBCConversion] = { schema.fields.map(sf => sf.dataType match { - case BooleanType => BooleanConversion - case DateType => DateConversion - case DecimalType.Unlimited => DecimalConversion - case DecimalType.Fixed(d) => DecimalConversion - case DoubleType => DoubleConversion - case FloatType => FloatConversion - case IntegerType => IntegerConversion - case LongType => + case BooleanType => BooleanConversion + case DateType => DateConversion + case DecimalType.Unlimited => DecimalConversion(None) + case DecimalType.Fixed(d) => DecimalConversion(Some(d)) + case DoubleType => DoubleConversion + case FloatType => FloatConversion + case IntegerType => IntegerConversion + case LongType => if (sf.metadata.contains("binarylong")) BinaryLongConversion else LongConversion - case StringType => StringConversion - case TimestampType => TimestampConversion - case BinaryType => BinaryConversion - case _ => throw new IllegalArgumentException(s"Unsupported field $sf") + case StringType => StringConversion + case TimestampType => TimestampConversion + case BinaryType => BinaryConversion + case _ => throw new IllegalArgumentException(s"Unsupported field $sf") }).toArray } @@ -349,6 +365,8 @@ private[sql] class JDBCRDD( val sqlText = s"SELECT $columnList FROM $fqTable $myWhereClause" val stmt = conn.prepareStatement(sqlText, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY) + val fetchSize = properties.getProperty("fetchSize", "0").toInt + stmt.setFetchSize(fetchSize) val rs = stmt.executeQuery() val conversions = getConversions(schema) @@ -360,18 +378,45 @@ private[sql] class JDBCRDD( while (i < conversions.length) { val pos = i + 1 conversions(i) match { - case BooleanConversion => mutableRow.setBoolean(i, rs.getBoolean(pos)) - case DateConversion => - mutableRow.update(i, DateUtils.fromJavaDate(rs.getDate(pos))) - case DecimalConversion => mutableRow.update(i, rs.getBigDecimal(pos)) - case DoubleConversion => mutableRow.setDouble(i, rs.getDouble(pos)) - case FloatConversion => mutableRow.setFloat(i, rs.getFloat(pos)) - case IntegerConversion => mutableRow.setInt(i, rs.getInt(pos)) - case LongConversion => mutableRow.setLong(i, rs.getLong(pos)) + case BooleanConversion => mutableRow.setBoolean(i, rs.getBoolean(pos)) + case DateConversion => + // DateUtils.fromJavaDate does not handle null value, so we need to check it. + val dateVal = rs.getDate(pos) + if (dateVal != null) { + mutableRow.update(i, DateUtils.fromJavaDate(dateVal)) + } else { + mutableRow.update(i, null) + } + // When connecting with Oracle DB through JDBC, the precision and scale of BigDecimal + // object returned by ResultSet.getBigDecimal is not correctly matched to the table + // schema reported by ResultSetMetaData.getPrecision and ResultSetMetaData.getScale. + // If inserting values like 19999 into a column with NUMBER(12, 2) type, you get through + // a BigDecimal object with scale as 0. But the dataframe schema has correct type as + // DecimalType(12, 2). Thus, after saving the dataframe into parquet file and then + // retrieve it, you will get wrong result 199.99. + // So it is needed to set precision and scale for Decimal based on JDBC metadata. + case DecimalConversion(Some((p, s))) => + val decimalVal = rs.getBigDecimal(pos) + if (decimalVal == null) { + mutableRow.update(i, null) + } else { + mutableRow.update(i, Decimal(decimalVal, p, s)) + } + case DecimalConversion(None) => + val decimalVal = rs.getBigDecimal(pos) + if (decimalVal == null) { + mutableRow.update(i, null) + } else { + mutableRow.update(i, Decimal(decimalVal)) + } + case DoubleConversion => mutableRow.setDouble(i, rs.getDouble(pos)) + case FloatConversion => mutableRow.setFloat(i, rs.getFloat(pos)) + case IntegerConversion => mutableRow.setInt(i, rs.getInt(pos)) + case LongConversion => mutableRow.setLong(i, rs.getLong(pos)) // TODO(davies): use getBytes for better performance, if the encoding is UTF-8 - case StringConversion => mutableRow.setString(i, rs.getString(pos)) - case TimestampConversion => mutableRow.update(i, rs.getTimestamp(pos)) - case BinaryConversion => mutableRow.update(i, rs.getBytes(pos)) + case StringConversion => mutableRow.setString(i, rs.getString(pos)) + case TimestampConversion => mutableRow.update(i, rs.getTimestamp(pos)) + case BinaryConversion => mutableRow.update(i, rs.getBytes(pos)) case BinaryLongConversion => { val bytes = rs.getBytes(pos) var ans = 0L diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRelation.scala index d6b3fb3291a2e..30f9190d45bf8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRelation.scala @@ -17,25 +17,16 @@ package org.apache.spark.sql.jdbc -import java.sql.DriverManager import java.util.Properties import scala.collection.mutable.ArrayBuffer import org.apache.spark.Partition import org.apache.spark.rdd.RDD -import org.apache.spark.sql.SQLContext +import org.apache.spark.sql.{SaveMode, DataFrame, SQLContext} import org.apache.spark.sql.catalyst.expressions.Row import org.apache.spark.sql.sources._ import org.apache.spark.sql.types.StructType -import org.apache.spark.util.Utils - -/** - * Data corresponding to one partition of a JDBCRDD. - */ -private[sql] case class JDBCPartition(whereClause: String, idx: Int) extends Partition { - override def index: Int = idx -} /** * Instructions on how to partition the table among workers. @@ -63,7 +54,7 @@ private[sql] object JDBCRelation { if (numPartitions == 1) return Array[Partition](JDBCPartition(null, 0)) // Overflow and silliness can happen if you subtract then divide. // Here we get a little roundoff, but that's (hopefully) OK. - val stride: Long = (partitioning.upperBound / numPartitions + val stride: Long = (partitioning.upperBound / numPartitions - partitioning.lowerBound / numPartitions) var i: Int = 0 var currentValue: Long = partitioning.lowerBound @@ -129,7 +120,8 @@ private[sql] case class JDBCRelation( parts: Array[Partition], properties: Properties = new Properties())(@transient val sqlContext: SQLContext) extends BaseRelation - with PrunedFilteredScan { + with PrunedFilteredScan + with InsertableRelation { override val needConversion: Boolean = false @@ -148,4 +140,10 @@ private[sql] case class JDBCRelation( filters, parts) } + + override def insert(data: DataFrame, overwrite: Boolean): Unit = { + data.write + .mode(if (overwrite) SaveMode.Overwrite else SaveMode.Append) + .jdbc(url, table, properties) + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala new file mode 100644 index 0000000000000..6a169e106b968 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.jdbc + +import org.apache.spark.sql.types._ +import org.apache.spark.annotation.DeveloperApi + +import java.sql.Types + +/** + * :: DeveloperApi :: + * A database type definition coupled with the jdbc type needed to send null + * values to the database. + * @param databaseTypeDefinition The database type definition + * @param jdbcNullType The jdbc type (as defined in java.sql.Types) used to + * send a null value to the database. + */ +@DeveloperApi +case class JdbcType(databaseTypeDefinition : String, jdbcNullType : Int) + +/** + * :: DeveloperApi :: + * Encapsulates everything (extensions, workarounds, quirks) to handle the + * SQL dialect of a certain database or jdbc driver. + * Lots of databases define types that aren't explicitly supported + * by the JDBC spec. Some JDBC drivers also report inaccurate + * information---for instance, BIT(n>1) being reported as a BIT type is quite + * common, even though BIT in JDBC is meant for single-bit values. Also, there + * does not appear to be a standard name for an unbounded string or binary + * type; we use BLOB and CLOB by default but override with database-specific + * alternatives when these are absent or do not behave correctly. + * + * Currently, the only thing done by the dialect is type mapping. + * `getCatalystType` is used when reading from a JDBC table and `getJDBCType` + * is used when writing to a JDBC table. If `getCatalystType` returns `null`, + * the default type handling is used for the given JDBC type. Similarly, + * if `getJDBCType` returns `(null, None)`, the default type handling is used + * for the given Catalyst type. + */ +@DeveloperApi +abstract class JdbcDialect { + /** + * Check if this dialect instance can handle a certain jdbc url. + * @param url the jdbc url. + * @return True if the dialect can be applied on the given jdbc url. + * @throws NullPointerException if the url is null. + */ + def canHandle(url : String): Boolean + + /** + * Get the custom datatype mapping for the given jdbc meta information. + * @param sqlType The sql type (see java.sql.Types) + * @param typeName The sql type name (e.g. "BIGINT UNSIGNED") + * @param size The size of the type. + * @param md Result metadata associated with this type. + * @return The actual DataType (subclasses of [[org.apache.spark.sql.types.DataType]]) + * or null if the default type mapping should be used. + */ + def getCatalystType( + sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = None + + /** + * Retrieve the jdbc / sql type for a given datatype. + * @param dt The datatype (e.g. [[org.apache.spark.sql.types.StringType]]) + * @return The new JdbcType if there is an override for this DataType + */ + def getJDBCType(dt: DataType): Option[JdbcType] = None +} + +/** + * :: DeveloperApi :: + * Registry of dialects that apply to every new jdbc [[org.apache.spark.sql.DataFrame]]. + * + * If multiple matching dialects are registered then all matching ones will be + * tried in reverse order. A user-added dialect will thus be applied first, + * overwriting the defaults. + * + * Note that all new dialects are applied to new jdbc DataFrames only. Make + * sure to register your dialects first. + */ +@DeveloperApi +object JdbcDialects { + + private var dialects = List[JdbcDialect]() + + /** + * Register a dialect for use on all new matching jdbc [[org.apache.spark.sql.DataFrame]]. + * Readding an existing dialect will cause a move-to-front. + * @param dialect The new dialect. + */ + def registerDialect(dialect: JdbcDialect) : Unit = { + dialects = dialect :: dialects.filterNot(_ == dialect) + } + + /** + * Unregister a dialect. Does nothing if the dialect is not registered. + * @param dialect The jdbc dialect. + */ + def unregisterDialect(dialect : JdbcDialect) : Unit = { + dialects = dialects.filterNot(_ == dialect) + } + + registerDialect(MySQLDialect) + registerDialect(PostgresDialect) + + /** + * Fetch the JdbcDialect class corresponding to a given database url. + */ + private[sql] def get(url: String): JdbcDialect = { + val matchingDialects = dialects.filter(_.canHandle(url)) + matchingDialects.length match { + case 0 => NoopDialect + case 1 => matchingDialects.head + case _ => new AggregatedDialect(matchingDialects) + } + } +} + +/** + * :: DeveloperApi :: + * AggregatedDialect can unify multiple dialects into one virtual Dialect. + * Dialects are tried in order, and the first dialect that does not return a + * neutral element will will. + * @param dialects List of dialects. + */ +@DeveloperApi +class AggregatedDialect(dialects: List[JdbcDialect]) extends JdbcDialect { + + require(!dialects.isEmpty) + + def canHandle(url : String): Boolean = + dialects.map(_.canHandle(url)).reduce(_ && _) + + override def getCatalystType( + sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = + dialects.map(_.getCatalystType(sqlType, typeName, size, md)).flatten.headOption + + override def getJDBCType(dt: DataType): Option[JdbcType] = + dialects.map(_.getJDBCType(dt)).flatten.headOption + +} + +/** + * :: DeveloperApi :: + * NOOP dialect object, always returning the neutral element. + */ +@DeveloperApi +case object NoopDialect extends JdbcDialect { + def canHandle(url : String): Boolean = true +} + +/** + * :: DeveloperApi :: + * Default postgres dialect, mapping bit/cidr/inet on read and string/binary/boolean on write. + */ +@DeveloperApi +case object PostgresDialect extends JdbcDialect { + def canHandle(url: String): Boolean = url.startsWith("jdbc:postgresql") + override def getCatalystType( + sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = { + if (sqlType == Types.BIT && typeName.equals("bit") && size != 1) { + Some(BinaryType) + } else if (sqlType == Types.OTHER && typeName.equals("cidr")) { + Some(StringType) + } else if (sqlType == Types.OTHER && typeName.equals("inet")) { + Some(StringType) + } else None + } + + override def getJDBCType(dt: DataType): Option[JdbcType] = dt match { + case StringType => Some(JdbcType("TEXT", java.sql.Types.CHAR)) + case BinaryType => Some(JdbcType("BYTEA", java.sql.Types.BINARY)) + case BooleanType => Some(JdbcType("BOOLEAN", java.sql.Types.BOOLEAN)) + case _ => None + } +} + +/** + * :: DeveloperApi :: + * Default mysql dialect to read bit/bitsets correctly. + */ +@DeveloperApi +case object MySQLDialect extends JdbcDialect { + def canHandle(url : String): Boolean = url.startsWith("jdbc:mysql") + override def getCatalystType( + sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = { + if (sqlType == Types.VARBINARY && typeName.equals("BIT") && size != 1) { + // This could instead be a BinaryType if we'd rather return bit-vectors of up to 64 bits as + // byte arrays instead of longs. + md.putLong("binarylong", 1) + Some(LongType) + } else if (sqlType == Types.BIT && typeName.equals("TINYINT")) { + Some(BooleanType) + } else None + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcUtils.scala new file mode 100644 index 0000000000000..cc918c237192b --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcUtils.scala @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.jdbc + +import java.sql.{Connection, DriverManager} +import java.util.Properties + +import scala.util.Try + +/** + * Util functions for JDBC tables. + */ +private[sql] object JdbcUtils { + + /** + * Establishes a JDBC connection. + */ + def createConnection(url: String, connectionProperties: Properties): Connection = { + DriverManager.getConnection(url, connectionProperties) + } + + /** + * Returns true if the table already exists in the JDBC database. + */ + def tableExists(conn: Connection, table: String): Boolean = { + // Somewhat hacky, but there isn't a good way to identify whether a table exists for all + // SQL database systems, considering "table" could also include the database name. + Try(conn.prepareStatement(s"SELECT 1 FROM $table LIMIT 1").executeQuery().next()).isSuccess + } + + /** + * Drops a table from the JDBC database. + */ + def dropTable(conn: Connection, table: String): Unit = { + conn.prepareStatement(s"DROP TABLE $table").executeUpdate() + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala index 3a6c2c1e9101f..dd8aaf6474895 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala @@ -57,9 +57,14 @@ package object jdbc { * non-Serializable. Instead, we explicitly close over all variables that * are used. */ - def savePartition(url: String, table: String, iterator: Iterator[Row], - rddSchema: StructType, nullTypes: Array[Int]): Iterator[Byte] = { - val conn = DriverManager.getConnection(url) + def savePartition( + url: String, + table: String, + iterator: Iterator[Row], + rddSchema: StructType, + nullTypes: Array[Int], + properties: Properties): Iterator[Byte] = { + val conn = DriverManager.getConnection(url, properties) var committed = false try { conn.setAutoCommit(false) // Everything in the same db transaction. @@ -124,25 +129,26 @@ package object jdbc { */ def schemaString(df: DataFrame, url: String): String = { val sb = new StringBuilder() - val quirks = DriverQuirks.get(url) + val dialect = JdbcDialects.get(url) df.schema.fields foreach { field => { val name = field.name - var typ: String = quirks.getJDBCType(field.dataType)._1 - if (typ == null) typ = field.dataType match { - case IntegerType => "INTEGER" - case LongType => "BIGINT" - case DoubleType => "DOUBLE PRECISION" - case FloatType => "REAL" - case ShortType => "INTEGER" - case ByteType => "BYTE" - case BooleanType => "BIT(1)" - case StringType => "TEXT" - case BinaryType => "BLOB" - case TimestampType => "TIMESTAMP" - case DateType => "DATE" - case DecimalType.Unlimited => "DECIMAL(40,20)" - case _ => throw new IllegalArgumentException(s"Don't know how to save $field to JDBC") - } + val typ: String = + dialect.getJDBCType(field.dataType).map(_.databaseTypeDefinition).getOrElse( + field.dataType match { + case IntegerType => "INTEGER" + case LongType => "BIGINT" + case DoubleType => "DOUBLE PRECISION" + case FloatType => "REAL" + case ShortType => "INTEGER" + case ByteType => "BYTE" + case BooleanType => "BIT(1)" + case StringType => "TEXT" + case BinaryType => "BLOB" + case TimestampType => "TIMESTAMP" + case DateType => "DATE" + case DecimalType.Unlimited => "DECIMAL(40,20)" + case _ => throw new IllegalArgumentException(s"Don't know how to save $field to JDBC") + }) val nullable = if (field.nullable) "" else "NOT NULL" sb.append(s", $name $typ $nullable") }} @@ -152,11 +158,14 @@ package object jdbc { /** * Saves the RDD to the database in a single transaction. */ - def saveTable(df: DataFrame, url: String, table: String) { - val quirks = DriverQuirks.get(url) - var nullTypes: Array[Int] = df.schema.fields.map(field => { - var nullType: Option[Int] = quirks.getJDBCType(field.dataType)._2 - if (nullType.isEmpty) { + def saveTable( + df: DataFrame, + url: String, + table: String, + properties: Properties = new Properties()) { + val dialect = JdbcDialects.get(url) + val nullTypes: Array[Int] = df.schema.fields.map { field => + dialect.getJDBCType(field.dataType).map(_.jdbcNullType).getOrElse( field.dataType match { case IntegerType => java.sql.Types.INTEGER case LongType => java.sql.Types.BIGINT @@ -172,13 +181,12 @@ package object jdbc { case DecimalType.Unlimited => java.sql.Types.DECIMAL case _ => throw new IllegalArgumentException( s"Can't translate null value for field $field") - } - } else nullType.get - }).toArray + }) + } val rddSchema = df.schema df.foreachPartition { iterator => - JDBCWriteDetails.savePartition(url, table, iterator, rddSchema, nullTypes) + JDBCWriteDetails.savePartition(url, table, iterator, rddSchema, nullTypes, properties) } } @@ -232,10 +240,10 @@ package object jdbc { } } } - + def getDriverClassName(url: String): String = DriverManager.getDriver(url) match { case wrapper: DriverWrapper => wrapper.wrapped.getClass.getCanonicalName - case driver => driver.getClass.getCanonicalName + case driver => driver.getClass.getCanonicalName } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/InferSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/InferSchema.scala new file mode 100644 index 0000000000000..06aa19ef09bd2 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/json/InferSchema.scala @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.json + +import com.fasterxml.jackson.core._ + +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.catalyst.analysis.HiveTypeCoercion +import org.apache.spark.sql.json.JacksonUtils.nextUntil +import org.apache.spark.sql.types._ + +private[sql] object InferSchema { + /** + * Infer the type of a collection of json records in three stages: + * 1. Infer the type of each record + * 2. Merge types by choosing the lowest type necessary to cover equal keys + * 3. Replace any remaining null fields with string, the top type + */ + def apply( + json: RDD[String], + samplingRatio: Double = 1.0, + columnNameOfCorruptRecords: String): StructType = { + require(samplingRatio > 0, s"samplingRatio ($samplingRatio) should be greater than 0") + val schemaData = if (samplingRatio > 0.99) { + json + } else { + json.sample(withReplacement = false, samplingRatio, 1) + } + + // perform schema inference on each row and merge afterwards + schemaData.mapPartitions { iter => + val factory = new JsonFactory() + iter.map { row => + try { + val parser = factory.createParser(row) + parser.nextToken() + inferField(parser) + } catch { + case _: JsonParseException => + StructType(Seq(StructField(columnNameOfCorruptRecords, StringType))) + } + } + }.treeAggregate[DataType](StructType(Seq()))(compatibleRootType, compatibleRootType) match { + case st: StructType => nullTypeToStringType(st) + } + } + + /** + * Infer the type of a json document from the parser's token stream + */ + private def inferField(parser: JsonParser): DataType = { + import com.fasterxml.jackson.core.JsonToken._ + parser.getCurrentToken match { + case null | VALUE_NULL => NullType + + case FIELD_NAME => + parser.nextToken() + inferField(parser) + + case VALUE_STRING if parser.getTextLength < 1 => + // Zero length strings and nulls have special handling to deal + // with JSON generators that do not distinguish between the two. + // To accurately infer types for empty strings that are really + // meant to represent nulls we assume that the two are isomorphic + // but will defer treating null fields as strings until all the + // record fields' types have been combined. + NullType + + case VALUE_STRING => StringType + case START_OBJECT => + val builder = Seq.newBuilder[StructField] + while (nextUntil(parser, END_OBJECT)) { + builder += StructField(parser.getCurrentName, inferField(parser), nullable = true) + } + + StructType(builder.result().sortBy(_.name)) + + case START_ARRAY => + // If this JSON array is empty, we use NullType as a placeholder. + // If this array is not empty in other JSON objects, we can resolve + // the type as we pass through all JSON objects. + var elementType: DataType = NullType + while (nextUntil(parser, END_ARRAY)) { + elementType = compatibleType(elementType, inferField(parser)) + } + + ArrayType(elementType) + + case VALUE_NUMBER_INT | VALUE_NUMBER_FLOAT => + import JsonParser.NumberType._ + parser.getNumberType match { + // For Integer values, use LongType by default. + case INT | LONG => LongType + // Since we do not have a data type backed by BigInteger, + // when we see a Java BigInteger, we use DecimalType. + case BIG_INTEGER | BIG_DECIMAL => DecimalType.Unlimited + case FLOAT | DOUBLE => DoubleType + } + + case VALUE_TRUE | VALUE_FALSE => BooleanType + } + } + + private def nullTypeToStringType(struct: StructType): StructType = { + val fields = struct.fields.map { + case StructField(fieldName, dataType, nullable, _) => + val newType = dataType match { + case NullType => StringType + case ArrayType(NullType, containsNull) => ArrayType(StringType, containsNull) + case ArrayType(struct: StructType, containsNull) => + ArrayType(nullTypeToStringType(struct), containsNull) + case struct: StructType => nullTypeToStringType(struct) + case other: DataType => other + } + + StructField(fieldName, newType, nullable) + } + + StructType(fields) + } + + /** + * Remove top-level ArrayType wrappers and merge the remaining schemas + */ + private def compatibleRootType: (DataType, DataType) => DataType = { + case (ArrayType(ty1, _), ty2) => compatibleRootType(ty1, ty2) + case (ty1, ArrayType(ty2, _)) => compatibleRootType(ty1, ty2) + case (ty1, ty2) => compatibleType(ty1, ty2) + } + + /** + * Returns the most general data type for two given data types. + */ + private[json] def compatibleType(t1: DataType, t2: DataType): DataType = { + HiveTypeCoercion.findTightestCommonType(t1, t2).getOrElse { + // t1 or t2 is a StructType, ArrayType, or an unexpected type. + (t1, t2) match { + case (other: DataType, NullType) => other + case (NullType, other: DataType) => other + case (StructType(fields1), StructType(fields2)) => + val newFields = (fields1 ++ fields2).groupBy(field => field.name).map { + case (name, fieldTypes) => + val dataType = fieldTypes.view.map(_.dataType).reduce(compatibleType) + StructField(name, dataType, nullable = true) + } + StructType(newFields.toSeq.sortBy(_.name)) + + case (ArrayType(elementType1, containsNull1), ArrayType(elementType2, containsNull2)) => + ArrayType(compatibleType(elementType1, elementType2), containsNull1 || containsNull2) + + // strings and every string is a Json object. + case (_, _) => StringType + } + } + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala index e3352d02787fd..c772cd1f53e53 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala @@ -22,14 +22,16 @@ import java.io.IOException import org.apache.hadoop.fs.Path import org.apache.spark.rdd.RDD -import org.apache.spark.sql.catalyst.expressions.Row +import org.apache.spark.sql.catalyst.expressions.{Expression, Attribute, Row} import org.apache.spark.sql.sources._ -import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.types.{StructField, StructType} import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode} private[sql] class DefaultSource - extends RelationProvider with SchemaRelationProvider with CreatableRelationProvider { + extends RelationProvider + with SchemaRelationProvider + with CreatableRelationProvider { private def checkPath(parameters: Map[String, String]): String = { parameters.getOrElse("path", sys.error("'path' must be specified for json data.")) @@ -42,7 +44,7 @@ private[sql] class DefaultSource val path = checkPath(parameters) val samplingRatio = parameters.get("samplingRatio").map(_.toDouble).getOrElse(1.0) - JSONRelation(path, samplingRatio, None)(sqlContext) + new JSONRelation(path, samplingRatio, None, sqlContext) } /** Returns a new base relation with the given schema and parameters. */ @@ -53,7 +55,7 @@ private[sql] class DefaultSource val path = checkPath(parameters) val samplingRatio = parameters.get("samplingRatio").map(_.toDouble).getOrElse(1.0) - JSONRelation(path, samplingRatio, Some(schema))(sqlContext) + new JSONRelation(path, samplingRatio, Some(schema), sqlContext) } override def createRelation( @@ -101,32 +103,87 @@ private[sql] class DefaultSource } } -private[sql] case class JSONRelation( - path: String, - samplingRatio: Double, +private[sql] class JSONRelation( + // baseRDD is not immutable with respect to INSERT OVERWRITE + // and so it must be recreated at least as often as the + // underlying inputs are modified. To be safe, a function is + // used instead of a regular RDD value to ensure a fresh RDD is + // recreated for each and every operation. + baseRDD: () => RDD[String], + val path: Option[String], + val samplingRatio: Double, userSpecifiedSchema: Option[StructType])( @transient val sqlContext: SQLContext) extends BaseRelation with TableScan - with InsertableRelation { - - // TODO: Support partitioned JSON relation. - private def baseRDD = sqlContext.sparkContext.textFile(path) + with InsertableRelation + with CatalystScan { + + def this( + path: String, + samplingRatio: Double, + userSpecifiedSchema: Option[StructType], + sqlContext: SQLContext) = + this( + () => sqlContext.sparkContext.textFile(path), + Some(path), + samplingRatio, + userSpecifiedSchema)(sqlContext) + + private val useJacksonStreamingAPI: Boolean = sqlContext.conf.useJacksonStreamingAPI override val needConversion: Boolean = false - override val schema = userSpecifiedSchema.getOrElse( - JsonRDD.nullTypeToStringType( - JsonRDD.inferSchema( - baseRDD, + override lazy val schema = userSpecifiedSchema.getOrElse { + if (useJacksonStreamingAPI) { + InferSchema( + baseRDD(), samplingRatio, - sqlContext.conf.columnNameOfCorruptRecord))) + sqlContext.conf.columnNameOfCorruptRecord) + } else { + JsonRDD.nullTypeToStringType( + JsonRDD.inferSchema( + baseRDD(), + samplingRatio, + sqlContext.conf.columnNameOfCorruptRecord)) + } + } - override def buildScan(): RDD[Row] = - JsonRDD.jsonStringToRow(baseRDD, schema, sqlContext.conf.columnNameOfCorruptRecord) + override def buildScan(): RDD[Row] = { + if (useJacksonStreamingAPI) { + JacksonParser( + baseRDD(), + schema, + sqlContext.conf.columnNameOfCorruptRecord) + } else { + JsonRDD.jsonStringToRow( + baseRDD(), + schema, + sqlContext.conf.columnNameOfCorruptRecord) + } + } + + override def buildScan(requiredColumns: Seq[Attribute], filters: Seq[Expression]): RDD[Row] = { + if (useJacksonStreamingAPI) { + JacksonParser( + baseRDD(), + StructType.fromAttributes(requiredColumns), + sqlContext.conf.columnNameOfCorruptRecord) + } else { + JsonRDD.jsonStringToRow( + baseRDD(), + StructType.fromAttributes(requiredColumns), + sqlContext.conf.columnNameOfCorruptRecord) + } + } override def insert(data: DataFrame, overwrite: Boolean): Unit = { - val filesystemPath = new Path(path) + val filesystemPath = path match { + case Some(p) => new Path(p) + case None => + throw new IOException(s"Cannot INSERT into table with no path defined") + } + val fs = filesystemPath.getFileSystem(sqlContext.sparkContext.hadoopConfiguration) if (overwrite) { @@ -147,7 +204,7 @@ private[sql] case class JSONRelation( } } // Write the data. - data.toJSON.saveAsTextFile(path) + data.toJSON.saveAsTextFile(filesystemPath.toString) // Right now, we assume that the schema is not changed. We will not update the schema. // schema = data.schema } else { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JacksonGenerator.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JacksonGenerator.scala new file mode 100644 index 0000000000000..325f54b6808a8 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JacksonGenerator.scala @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.json + +import scala.collection.Map + +import com.fasterxml.jackson.core._ + +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.types._ + +private[sql] object JacksonGenerator { + /** Transforms a single Row to JSON using Jackson + * + * @param rowSchema the schema object used for conversion + * @param gen a JsonGenerator object + * @param row The row to convert + */ + def apply(rowSchema: StructType, gen: JsonGenerator)(row: Row): Unit = { + def valWriter: (DataType, Any) => Unit = { + case (_, null) | (NullType, _) => gen.writeNull() + case (StringType, v: String) => gen.writeString(v) + case (TimestampType, v: java.sql.Timestamp) => gen.writeString(v.toString) + case (IntegerType, v: Int) => gen.writeNumber(v) + case (ShortType, v: Short) => gen.writeNumber(v) + case (FloatType, v: Float) => gen.writeNumber(v) + case (DoubleType, v: Double) => gen.writeNumber(v) + case (LongType, v: Long) => gen.writeNumber(v) + case (DecimalType(), v: java.math.BigDecimal) => gen.writeNumber(v) + case (ByteType, v: Byte) => gen.writeNumber(v.toInt) + case (BinaryType, v: Array[Byte]) => gen.writeBinary(v) + case (BooleanType, v: Boolean) => gen.writeBoolean(v) + case (DateType, v) => gen.writeString(v.toString) + case (udt: UserDefinedType[_], v) => valWriter(udt.sqlType, udt.serialize(v)) + + case (ArrayType(ty, _), v: Seq[_]) => + gen.writeStartArray() + v.foreach(valWriter(ty, _)) + gen.writeEndArray() + + case (MapType(kv, vv, _), v: Map[_, _]) => + gen.writeStartObject() + v.foreach { p => + gen.writeFieldName(p._1.toString) + valWriter(vv, p._2) + } + gen.writeEndObject() + + case (StructType(ty), v: Row) => + gen.writeStartObject() + ty.zip(v.toSeq).foreach { + case (_, null) => + case (field, v) => + gen.writeFieldName(field.name) + valWriter(field.dataType, v) + } + gen.writeEndObject() + } + + valWriter(rowSchema, row) + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JacksonParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JacksonParser.scala new file mode 100644 index 0000000000000..0e223758051a6 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JacksonParser.scala @@ -0,0 +1,216 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.json + +import java.io.ByteArrayOutputStream +import java.sql.Timestamp + +import scala.collection.Map + +import com.fasterxml.jackson.core._ + +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.util.DateUtils +import org.apache.spark.sql.json.JacksonUtils.nextUntil +import org.apache.spark.sql.types._ + +private[sql] object JacksonParser { + def apply( + json: RDD[String], + schema: StructType, + columnNameOfCorruptRecords: String): RDD[Row] = { + parseJson(json, schema, columnNameOfCorruptRecords) + } + + /** + * Parse the current token (and related children) according to a desired schema + */ + private[sql] def convertField( + factory: JsonFactory, + parser: JsonParser, + schema: DataType): Any = { + import com.fasterxml.jackson.core.JsonToken._ + (parser.getCurrentToken, schema) match { + case (null | VALUE_NULL, _) => + null + + case (FIELD_NAME, _) => + parser.nextToken() + convertField(factory, parser, schema) + + case (VALUE_STRING, StringType) => + UTF8String(parser.getText) + + case (VALUE_STRING, _) if parser.getTextLength < 1 => + // guard the non string type + null + + case (VALUE_STRING, DateType) => + DateUtils.millisToDays(DateUtils.stringToTime(parser.getText).getTime) + + case (VALUE_STRING, TimestampType) => + new Timestamp(DateUtils.stringToTime(parser.getText).getTime) + + case (VALUE_NUMBER_INT, TimestampType) => + new Timestamp(parser.getLongValue) + + case (_, StringType) => + val writer = new ByteArrayOutputStream() + val generator = factory.createGenerator(writer, JsonEncoding.UTF8) + generator.copyCurrentStructure(parser) + generator.close() + UTF8String(writer.toByteArray) + + case (VALUE_NUMBER_INT | VALUE_NUMBER_FLOAT, FloatType) => + parser.getFloatValue + + case (VALUE_NUMBER_INT | VALUE_NUMBER_FLOAT, DoubleType) => + parser.getDoubleValue + + case (VALUE_NUMBER_INT | VALUE_NUMBER_FLOAT, DecimalType()) => + // TODO: add fixed precision and scale handling + Decimal(parser.getDecimalValue) + + case (VALUE_NUMBER_INT, ByteType) => + parser.getByteValue + + case (VALUE_NUMBER_INT, ShortType) => + parser.getShortValue + + case (VALUE_NUMBER_INT, IntegerType) => + parser.getIntValue + + case (VALUE_NUMBER_INT, LongType) => + parser.getLongValue + + case (VALUE_TRUE, BooleanType) => + true + + case (VALUE_FALSE, BooleanType) => + false + + case (START_OBJECT, st: StructType) => + convertObject(factory, parser, st) + + case (START_ARRAY, ArrayType(st, _)) => + convertList(factory, parser, st) + + case (START_OBJECT, ArrayType(st, _)) => + // the business end of SPARK-3308: + // when an object is found but an array is requested just wrap it in a list + convertField(factory, parser, st) :: Nil + + case (START_OBJECT, MapType(StringType, kt, _)) => + convertMap(factory, parser, kt) + + case (_, udt: UserDefinedType[_]) => + udt.deserialize(convertField(factory, parser, udt.sqlType)) + } + } + + /** + * Parse an object from the token stream into a new Row representing the schema. + * + * Fields in the json that are not defined in the requested schema will be dropped. + */ + private def convertObject(factory: JsonFactory, parser: JsonParser, schema: StructType): Row = { + val row = new GenericMutableRow(schema.length) + while (nextUntil(parser, JsonToken.END_OBJECT)) { + schema.getFieldIndex(parser.getCurrentName) match { + case Some(index) => + row.update(index, convertField(factory, parser, schema(index).dataType)) + + case None => + parser.skipChildren() + } + } + + row + } + + /** + * Parse an object as a Map, preserving all fields + */ + private def convertMap( + factory: JsonFactory, + parser: JsonParser, + valueType: DataType): Map[UTF8String, Any] = { + val builder = Map.newBuilder[UTF8String, Any] + while (nextUntil(parser, JsonToken.END_OBJECT)) { + builder += UTF8String(parser.getCurrentName) -> convertField(factory, parser, valueType) + } + + builder.result() + } + + private def convertList( + factory: JsonFactory, + parser: JsonParser, + schema: DataType): Seq[Any] = { + val builder = Seq.newBuilder[Any] + while (nextUntil(parser, JsonToken.END_ARRAY)) { + builder += convertField(factory, parser, schema) + } + + builder.result() + } + + private def parseJson( + json: RDD[String], + schema: StructType, + columnNameOfCorruptRecords: String): RDD[Row] = { + + def failedRecord(record: String): Seq[Row] = { + // create a row even if no corrupt record column is present + val row = new GenericMutableRow(schema.length) + for (corruptIndex <- schema.getFieldIndex(columnNameOfCorruptRecords)) { + require(schema(corruptIndex).dataType == StringType) + row.update(corruptIndex, UTF8String(record)) + } + + Seq(row) + } + + json.mapPartitions { iter => + val factory = new JsonFactory() + + iter.flatMap { record => + try { + val parser = factory.createParser(record) + parser.nextToken() + + // to support both object and arrays (see SPARK-3308) we'll start + // by converting the StructType schema to an ArrayType and let + // convertField wrap an object into a single value array when necessary. + convertField(factory, parser, ArrayType(schema)) match { + case null => failedRecord(record) + case list: Seq[Row @unchecked] => list + case _ => + sys.error( + s"Failed to parse record $record. Please make sure that each line of the file " + + "(or each string in the RDD) is a valid JSON object or an array of JSON objects.") + } + } catch { + case _: JsonProcessingException => + failedRecord(record) + } + } + } + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JacksonUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JacksonUtils.scala new file mode 100644 index 0000000000000..fde96852ce68e --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JacksonUtils.scala @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.json + +import com.fasterxml.jackson.core.{JsonParser, JsonToken} + +private object JacksonUtils { + /** + * Advance the parser until a null or a specific token is found + */ + def nextUntil(parser: JsonParser, stopOn: JsonToken): Boolean = { + parser.nextToken() match { + case null => false + case x => x != stopOn + } + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala index 6e94e7056eb0b..95eb1174b1dd6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala @@ -20,17 +20,18 @@ package org.apache.spark.sql.json import java.sql.Timestamp import scala.collection.Map -import scala.collection.convert.Wrappers.{JMapWrapper, JListWrapper} +import scala.collection.convert.Wrappers.{JListWrapper, JMapWrapper} -import com.fasterxml.jackson.core.{JsonGenerator, JsonProcessingException} +import com.fasterxml.jackson.core.JsonProcessingException import com.fasterxml.jackson.databind.ObjectMapper +import org.apache.spark.Logging import org.apache.spark.rdd.RDD +import org.apache.spark.sql.catalyst.ScalaReflection import org.apache.spark.sql.catalyst.analysis.HiveTypeCoercion import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.ScalaReflection +import org.apache.spark.sql.catalyst.util.DateUtils import org.apache.spark.sql.types._ -import org.apache.spark.Logging private[sql] object JsonRDD extends Logging { @@ -140,7 +141,7 @@ private[sql] object JsonRDD extends Logging { case ArrayType(NullType, containsNull) => ArrayType(StringType, containsNull) case ArrayType(struct: StructType, containsNull) => ArrayType(nullTypeToStringType(struct), containsNull) - case struct: StructType =>nullTypeToStringType(struct) + case struct: StructType => nullTypeToStringType(struct) case other: DataType => other } StructField(fieldName, newType, nullable) @@ -215,7 +216,7 @@ private[sql] object JsonRDD extends Logging { case map: Map[_, _] => StructType(Nil) // We have an array of arrays. If those element arrays do not have the same // element types, we will return ArrayType[StringType]. - case seq: Seq[_] => typeOfArray(seq) + case seq: Seq[_] => typeOfArray(seq) case value => typeOfPrimitiveValue(value) } }.reduce((type1: DataType, type2: DataType) => compatibleType(type1, type2)) @@ -317,7 +318,8 @@ private[sql] object JsonRDD extends Logging { parsed } catch { - case e: JsonProcessingException => Map(columnNameOfCorruptRecords -> record) :: Nil + case e: JsonProcessingException => + Map(columnNameOfCorruptRecords -> UTF8String(record)) :: Nil } } }) @@ -404,7 +406,7 @@ private[sql] object JsonRDD extends Logging { } } - private[json] def enforceCorrectType(value: Any, desiredType: DataType): Any ={ + private[json] def enforceCorrectType(value: Any, desiredType: DataType): Any = { if (value == null) { null } else { @@ -421,7 +423,10 @@ private[sql] object JsonRDD extends Logging { value.asInstanceOf[Seq[Any]].map(enforceCorrectType(_, elementType)) case MapType(StringType, valueType, _) => val map = value.asInstanceOf[Map[String, Any]] - map.mapValues(enforceCorrectType(_, valueType)).map(identity) + map.map { + case (k, v) => + (UTF8String(k), enforceCorrectType(v, valueType)) + }.map(identity) case struct: StructType => asRow(value.asInstanceOf[Map[String, Any]], struct) case DateType => toDate(value) case TimestampType => toTimestamp(value) @@ -429,7 +434,7 @@ private[sql] object JsonRDD extends Logging { } } - private def asRow(json: Map[String,Any], schema: StructType): Row = { + private def asRow(json: Map[String, Any], schema: StructType): Row = { // TODO: Reuse the row instead of creating a new one for every record. val row = new GenericMutableRow(schema.fields.length) schema.fields.zipWithIndex.foreach { @@ -440,54 +445,4 @@ private[sql] object JsonRDD extends Logging { row } - - /** Transforms a single Row to JSON using Jackson - * - * @param rowSchema the schema object used for conversion - * @param gen a JsonGenerator object - * @param row The row to convert - */ - private[sql] def rowToJSON(rowSchema: StructType, gen: JsonGenerator)(row: Row) = { - def valWriter: (DataType, Any) => Unit = { - case (_, null) | (NullType, _) => gen.writeNull() - case (StringType, v: String) => gen.writeString(v) - case (TimestampType, v: java.sql.Timestamp) => gen.writeString(v.toString) - case (IntegerType, v: Int) => gen.writeNumber(v) - case (ShortType, v: Short) => gen.writeNumber(v) - case (FloatType, v: Float) => gen.writeNumber(v) - case (DoubleType, v: Double) => gen.writeNumber(v) - case (LongType, v: Long) => gen.writeNumber(v) - case (DecimalType(), v: java.math.BigDecimal) => gen.writeNumber(v) - case (ByteType, v: Byte) => gen.writeNumber(v.toInt) - case (BinaryType, v: Array[Byte]) => gen.writeBinary(v) - case (BooleanType, v: Boolean) => gen.writeBoolean(v) - case (DateType, v) => gen.writeString(v.toString) - case (udt: UserDefinedType[_], v) => valWriter(udt.sqlType, v) - - case (ArrayType(ty, _), v: Seq[_] ) => - gen.writeStartArray() - v.foreach(valWriter(ty,_)) - gen.writeEndArray() - - case (MapType(kv,vv, _), v: Map[_,_]) => - gen.writeStartObject() - v.foreach { p => - gen.writeFieldName(p._1.toString) - valWriter(vv,p._2) - } - gen.writeEndObject() - - case (StructType(ty), v: Row) => - gen.writeStartObject() - ty.zip(v.toSeq).foreach { - case (_, null) => - case (field, v) => - gen.writeFieldName(field.name) - valWriter(field.dataType, v) - } - gen.writeEndObject() - } - - valWriter(rowSchema, row) - } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/mathfunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/mathfunctions.scala deleted file mode 100644 index db47480c3864d..0000000000000 --- a/sql/core/src/main/scala/org/apache/spark/sql/mathfunctions.scala +++ /dev/null @@ -1,383 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql - -import scala.language.implicitConversions - -import org.apache.spark.annotation.Experimental -import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.mathfuncs._ -import org.apache.spark.sql.functions.lit - -/** - * :: Experimental :: - * Mathematical Functions available for [[DataFrame]]. - */ -@Experimental -// scalastyle:off -object mathfunctions { -// scalastyle:on - - private[this] implicit def toColumn(expr: Expression): Column = Column(expr) - - /** - * Computes the cosine inverse of the given value; the returned angle is in the range - * 0.0 through pi. - */ - def acos(e: Column): Column = Acos(e.expr) - - /** - * Computes the cosine inverse of the given column; the returned angle is in the range - * 0.0 through pi. - */ - def acos(columnName: String): Column = acos(Column(columnName)) - - /** - * Computes the sine inverse of the given value; the returned angle is in the range - * -pi/2 through pi/2. - */ - def asin(e: Column): Column = Asin(e.expr) - - /** - * Computes the sine inverse of the given column; the returned angle is in the range - * -pi/2 through pi/2. - */ - def asin(columnName: String): Column = asin(Column(columnName)) - - /** - * Computes the tangent inverse of the given value. - */ - def atan(e: Column): Column = Atan(e.expr) - - /** - * Computes the tangent inverse of the given column. - */ - def atan(columnName: String): Column = atan(Column(columnName)) - - /** - * Returns the angle theta from the conversion of rectangular coordinates (x, y) to - * polar coordinates (r, theta). - */ - def atan2(l: Column, r: Column): Column = Atan2(l.expr, r.expr) - - /** - * Returns the angle theta from the conversion of rectangular coordinates (x, y) to - * polar coordinates (r, theta). - */ - def atan2(l: Column, rightName: String): Column = atan2(l, Column(rightName)) - - /** - * Returns the angle theta from the conversion of rectangular coordinates (x, y) to - * polar coordinates (r, theta). - */ - def atan2(leftName: String, r: Column): Column = atan2(Column(leftName), r) - - /** - * Returns the angle theta from the conversion of rectangular coordinates (x, y) to - * polar coordinates (r, theta). - */ - def atan2(leftName: String, rightName: String): Column = - atan2(Column(leftName), Column(rightName)) - - /** - * Returns the angle theta from the conversion of rectangular coordinates (x, y) to - * polar coordinates (r, theta). - */ - def atan2(l: Column, r: Double): Column = atan2(l, lit(r).expr) - - /** - * Returns the angle theta from the conversion of rectangular coordinates (x, y) to - * polar coordinates (r, theta).= - */ - def atan2(leftName: String, r: Double): Column = atan2(Column(leftName), r) - - /** - * Returns the angle theta from the conversion of rectangular coordinates (x, y) to - * polar coordinates (r, theta). - */ - def atan2(l: Double, r: Column): Column = atan2(lit(l).expr, r) - - /** - * Returns the angle theta from the conversion of rectangular coordinates (x, y) to - * polar coordinates (r, theta). - */ - def atan2(l: Double, rightName: String): Column = atan2(l, Column(rightName)) - - /** - * Computes the cube-root of the given value. - */ - def cbrt(e: Column): Column = Cbrt(e.expr) - - /** - * Computes the cube-root of the given column. - */ - def cbrt(columnName: String): Column = cbrt(Column(columnName)) - - /** - * Computes the ceiling of the given value. - */ - def ceil(e: Column): Column = Ceil(e.expr) - - /** - * Computes the ceiling of the given column. - */ - def ceil(columnName: String): Column = ceil(Column(columnName)) - - /** - * Computes the cosine of the given value. - */ - def cos(e: Column): Column = Cos(e.expr) - - /** - * Computes the cosine of the given column. - */ - def cos(columnName: String): Column = cos(Column(columnName)) - - /** - * Computes the hyperbolic cosine of the given value. - */ - def cosh(e: Column): Column = Cosh(e.expr) - - /** - * Computes the hyperbolic cosine of the given column. - */ - def cosh(columnName: String): Column = cosh(Column(columnName)) - - /** - * Computes the exponential of the given value. - */ - def exp(e: Column): Column = Exp(e.expr) - - /** - * Computes the exponential of the given column. - */ - def exp(columnName: String): Column = exp(Column(columnName)) - - /** - * Computes the exponential of the given value minus one. - */ - def expm1(e: Column): Column = Expm1(e.expr) - - /** - * Computes the exponential of the given column. - */ - def expm1(columnName: String): Column = expm1(Column(columnName)) - - /** - * Computes the floor of the given value. - */ - def floor(e: Column): Column = Floor(e.expr) - - /** - * Computes the floor of the given column. - */ - def floor(columnName: String): Column = floor(Column(columnName)) - - /** - * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow. - */ - def hypot(l: Column, r: Column): Column = Hypot(l.expr, r.expr) - - /** - * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow. - */ - def hypot(l: Column, rightName: String): Column = hypot(l, Column(rightName)) - - /** - * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow. - */ - def hypot(leftName: String, r: Column): Column = hypot(Column(leftName), r) - - /** - * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow. - */ - def hypot(leftName: String, rightName: String): Column = - hypot(Column(leftName), Column(rightName)) - - /** - * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow. - */ - def hypot(l: Column, r: Double): Column = hypot(l, lit(r).expr) - - /** - * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow. - */ - def hypot(leftName: String, r: Double): Column = hypot(Column(leftName), r) - - /** - * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow. - */ - def hypot(l: Double, r: Column): Column = hypot(lit(l).expr, r) - - /** - * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow. - */ - def hypot(l: Double, rightName: String): Column = hypot(l, Column(rightName)) - - /** - * Computes the natural logarithm of the given value. - */ - def log(e: Column): Column = Log(e.expr) - - /** - * Computes the natural logarithm of the given column. - */ - def log(columnName: String): Column = log(Column(columnName)) - - /** - * Computes the logarithm of the given value in Base 10. - */ - def log10(e: Column): Column = Log10(e.expr) - - /** - * Computes the logarithm of the given value in Base 10. - */ - def log10(columnName: String): Column = log10(Column(columnName)) - - /** - * Computes the natural logarithm of the given value plus one. - */ - def log1p(e: Column): Column = Log1p(e.expr) - - /** - * Computes the natural logarithm of the given column plus one. - */ - def log1p(columnName: String): Column = log1p(Column(columnName)) - - /** - * Returns the value of the first argument raised to the power of the second argument. - */ - def pow(l: Column, r: Column): Column = Pow(l.expr, r.expr) - - /** - * Returns the value of the first argument raised to the power of the second argument. - */ - def pow(l: Column, rightName: String): Column = pow(l, Column(rightName)) - - /** - * Returns the value of the first argument raised to the power of the second argument. - */ - def pow(leftName: String, r: Column): Column = pow(Column(leftName), r) - - /** - * Returns the value of the first argument raised to the power of the second argument. - */ - def pow(leftName: String, rightName: String): Column = pow(Column(leftName), Column(rightName)) - - /** - * Returns the value of the first argument raised to the power of the second argument. - */ - def pow(l: Column, r: Double): Column = pow(l, lit(r).expr) - - /** - * Returns the value of the first argument raised to the power of the second argument. - */ - def pow(leftName: String, r: Double): Column = pow(Column(leftName), r) - - /** - * Returns the value of the first argument raised to the power of the second argument. - */ - def pow(l: Double, r: Column): Column = pow(lit(l).expr, r) - - /** - * Returns the value of the first argument raised to the power of the second argument. - */ - def pow(l: Double, rightName: String): Column = pow(l, Column(rightName)) - - /** - * Returns the double value that is closest in value to the argument and - * is equal to a mathematical integer. - */ - def rint(e: Column): Column = Rint(e.expr) - - /** - * Returns the double value that is closest in value to the argument and - * is equal to a mathematical integer. - */ - def rint(columnName: String): Column = rint(Column(columnName)) - - /** - * Computes the signum of the given value. - */ - def signum(e: Column): Column = Signum(e.expr) - - /** - * Computes the signum of the given column. - */ - def signum(columnName: String): Column = signum(Column(columnName)) - - /** - * Computes the sine of the given value. - */ - def sin(e: Column): Column = Sin(e.expr) - - /** - * Computes the sine of the given column. - */ - def sin(columnName: String): Column = sin(Column(columnName)) - - /** - * Computes the hyperbolic sine of the given value. - */ - def sinh(e: Column): Column = Sinh(e.expr) - - /** - * Computes the hyperbolic sine of the given column. - */ - def sinh(columnName: String): Column = sinh(Column(columnName)) - - /** - * Computes the tangent of the given value. - */ - def tan(e: Column): Column = Tan(e.expr) - - /** - * Computes the tangent of the given column. - */ - def tan(columnName: String): Column = tan(Column(columnName)) - - /** - * Computes the hyperbolic tangent of the given value. - */ - def tanh(e: Column): Column = Tanh(e.expr) - - /** - * Computes the hyperbolic tangent of the given column. - */ - def tanh(columnName: String): Column = tanh(Column(columnName)) - - /** - * Converts an angle measured in radians to an approximately equivalent angle measured in degrees. - */ - def toDeg(e: Column): Column = ToDegrees(e.expr) - - /** - * Converts an angle measured in radians to an approximately equivalent angle measured in degrees. - */ - def toDeg(columnName: String): Column = toDeg(Column(columnName)) - - /** - * Converts an angle measured in degrees to an approximately equivalent angle measured in radians. - */ - def toRad(e: Column): Column = ToRadians(e.expr) - - /** - * Converts an angle measured in degrees to an approximately equivalent angle measured in radians. - */ - def toRad(columnName: String): Column = toRad(Column(columnName)) -} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/package.scala index 3f97a11ceb97d..4e94fd07a8771 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/package.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/package.scala @@ -44,6 +44,7 @@ package object sql { /** * Type alias for [[DataFrame]]. Kept here for backward source compatibility for Scala. + * @deprecated As of 1.3.0, replaced by `DataFrame`. */ @deprecated("1.3.0", "use DataFrame") type SchemaRDD = DataFrame diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala index 36cb5e03bbca7..caa9f045537d0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetConverter.scala @@ -243,8 +243,10 @@ private[parquet] abstract class CatalystConverter extends GroupConverter { /** * Read a decimal value from a Parquet Binary into "dest". Only supports decimals that fit in * a long (i.e. precision <= 18) + * + * Returned value is needed by CatalystConverter, which doesn't reuse the Decimal object. */ - protected[parquet] def readDecimal(dest: Decimal, value: Binary, ctype: DecimalType): Unit = { + protected[parquet] def readDecimal(dest: Decimal, value: Binary, ctype: DecimalType): Decimal = { val precision = ctype.precisionInfo.get.precision val scale = ctype.precisionInfo.get.scale val bytes = value.getBytes @@ -480,7 +482,7 @@ private[parquet] class CatalystPrimitiveStringConverter(parent: CatalystConverte override def hasDictionarySupport: Boolean = true - override def setDictionary(dictionary: Dictionary):Unit = + override def setDictionary(dictionary: Dictionary): Unit = dict = Array.tabulate(dictionary.getMaxId + 1) { dictionary.decodeToBinary(_).getBytes } override def addValueFromDictionary(dictionaryId: Int): Unit = @@ -591,8 +593,8 @@ private[parquet] class CatalystArrayConverter( CatalystConverter.ARRAY_ELEMENTS_SCHEMA_NAME, elementType, false), - fieldIndex=0, - parent=this) + fieldIndex = 0, + parent = this) override def getConverter(fieldIndex: Int): Converter = converter @@ -601,7 +603,7 @@ private[parquet] class CatalystArrayConverter( override protected[parquet] def updateField(fieldIndex: Int, value: Any): Unit = { // fieldIndex is ignored (assumed to be zero but not checked) - if(value == null) { + if (value == null) { throw new IllegalArgumentException("Null values inside Parquet arrays are not supported!") } buffer += value @@ -654,8 +656,8 @@ private[parquet] class CatalystNativeArrayConverter( CatalystConverter.ARRAY_ELEMENTS_SCHEMA_NAME, elementType, false), - fieldIndex=0, - parent=this) + fieldIndex = 0, + parent = this) override def getConverter(fieldIndex: Int): Converter = converter diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala index 5eb1c6abc2432..f0f4e7d147e75 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala @@ -29,128 +29,184 @@ import parquet.io.api.Binary import org.apache.spark.SparkEnv import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.sources import org.apache.spark.sql.types._ private[sql] object ParquetFilters { val PARQUET_FILTER_DATA = "org.apache.spark.sql.parquet.row.filter" def createRecordFilter(filterExpressions: Seq[Expression]): Option[Filter] = { - filterExpressions.flatMap(createFilter).reduceOption(FilterApi.and).map(FilterCompat.get) + filterExpressions.flatMap { filter => + createFilter(filter) + }.reduceOption(FilterApi.and).map(FilterCompat.get) } - def createFilter(predicate: Expression): Option[FilterPredicate] = { - val makeEq: PartialFunction[DataType, (String, Any) => FilterPredicate] = { - case BooleanType => - (n: String, v: Any) => FilterApi.eq(booleanColumn(n), v.asInstanceOf[java.lang.Boolean]) - case IntegerType => - (n: String, v: Any) => FilterApi.eq(intColumn(n), v.asInstanceOf[Integer]) - case LongType => - (n: String, v: Any) => FilterApi.eq(longColumn(n), v.asInstanceOf[java.lang.Long]) - case FloatType => - (n: String, v: Any) => FilterApi.eq(floatColumn(n), v.asInstanceOf[java.lang.Float]) - case DoubleType => - (n: String, v: Any) => FilterApi.eq(doubleColumn(n), v.asInstanceOf[java.lang.Double]) - - // Binary.fromString and Binary.fromByteArray don't accept null values - case StringType => - (n: String, v: Any) => FilterApi.eq( - binaryColumn(n), - Option(v).map(s => Binary.fromByteArray(s.asInstanceOf[UTF8String].getBytes)).orNull) - case BinaryType => - (n: String, v: Any) => FilterApi.eq( - binaryColumn(n), - Option(v).map(b => Binary.fromByteArray(v.asInstanceOf[Array[Byte]])).orNull) - } + private val makeEq: PartialFunction[DataType, (String, Any) => FilterPredicate] = { + case BooleanType => + (n: String, v: Any) => FilterApi.eq(booleanColumn(n), v.asInstanceOf[java.lang.Boolean]) + case IntegerType => + (n: String, v: Any) => FilterApi.eq(intColumn(n), v.asInstanceOf[Integer]) + case LongType => + (n: String, v: Any) => FilterApi.eq(longColumn(n), v.asInstanceOf[java.lang.Long]) + case FloatType => + (n: String, v: Any) => FilterApi.eq(floatColumn(n), v.asInstanceOf[java.lang.Float]) + case DoubleType => + (n: String, v: Any) => FilterApi.eq(doubleColumn(n), v.asInstanceOf[java.lang.Double]) - val makeNotEq: PartialFunction[DataType, (String, Any) => FilterPredicate] = { - case BooleanType => - (n: String, v: Any) => FilterApi.notEq(booleanColumn(n), v.asInstanceOf[java.lang.Boolean]) - case IntegerType => - (n: String, v: Any) => FilterApi.notEq(intColumn(n), v.asInstanceOf[Integer]) - case LongType => - (n: String, v: Any) => FilterApi.notEq(longColumn(n), v.asInstanceOf[java.lang.Long]) - case FloatType => - (n: String, v: Any) => FilterApi.notEq(floatColumn(n), v.asInstanceOf[java.lang.Float]) - case DoubleType => - (n: String, v: Any) => FilterApi.notEq(doubleColumn(n), v.asInstanceOf[java.lang.Double]) - case StringType => - (n: String, v: Any) => FilterApi.notEq( - binaryColumn(n), - Option(v).map(s => Binary.fromByteArray(s.asInstanceOf[UTF8String].getBytes)).orNull) - case BinaryType => - (n: String, v: Any) => FilterApi.notEq( - binaryColumn(n), - Option(v).map(b => Binary.fromByteArray(v.asInstanceOf[Array[Byte]])).orNull) - } + // Binary.fromString and Binary.fromByteArray don't accept null values + case StringType => + (n: String, v: Any) => FilterApi.eq( + binaryColumn(n), + Option(v).map(s => Binary.fromByteArray(s.asInstanceOf[UTF8String].getBytes)).orNull) + case BinaryType => + (n: String, v: Any) => FilterApi.eq( + binaryColumn(n), + Option(v).map(b => Binary.fromByteArray(v.asInstanceOf[Array[Byte]])).orNull) + } - val makeLt: PartialFunction[DataType, (String, Any) => FilterPredicate] = { - case IntegerType => - (n: String, v: Any) => FilterApi.lt(intColumn(n), v.asInstanceOf[Integer]) - case LongType => - (n: String, v: Any) => FilterApi.lt(longColumn(n), v.asInstanceOf[java.lang.Long]) - case FloatType => - (n: String, v: Any) => FilterApi.lt(floatColumn(n), v.asInstanceOf[java.lang.Float]) - case DoubleType => - (n: String, v: Any) => FilterApi.lt(doubleColumn(n), v.asInstanceOf[java.lang.Double]) - case StringType => - (n: String, v: Any) => - FilterApi.lt(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[UTF8String].getBytes)) - case BinaryType => - (n: String, v: Any) => - FilterApi.lt(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[Array[Byte]])) - } + private val makeNotEq: PartialFunction[DataType, (String, Any) => FilterPredicate] = { + case BooleanType => + (n: String, v: Any) => FilterApi.notEq(booleanColumn(n), v.asInstanceOf[java.lang.Boolean]) + case IntegerType => + (n: String, v: Any) => FilterApi.notEq(intColumn(n), v.asInstanceOf[Integer]) + case LongType => + (n: String, v: Any) => FilterApi.notEq(longColumn(n), v.asInstanceOf[java.lang.Long]) + case FloatType => + (n: String, v: Any) => FilterApi.notEq(floatColumn(n), v.asInstanceOf[java.lang.Float]) + case DoubleType => + (n: String, v: Any) => FilterApi.notEq(doubleColumn(n), v.asInstanceOf[java.lang.Double]) + case StringType => + (n: String, v: Any) => FilterApi.notEq( + binaryColumn(n), + Option(v).map(s => Binary.fromByteArray(s.asInstanceOf[UTF8String].getBytes)).orNull) + case BinaryType => + (n: String, v: Any) => FilterApi.notEq( + binaryColumn(n), + Option(v).map(b => Binary.fromByteArray(v.asInstanceOf[Array[Byte]])).orNull) + } - val makeLtEq: PartialFunction[DataType, (String, Any) => FilterPredicate] = { - case IntegerType => - (n: String, v: Any) => FilterApi.ltEq(intColumn(n), v.asInstanceOf[java.lang.Integer]) - case LongType => - (n: String, v: Any) => FilterApi.ltEq(longColumn(n), v.asInstanceOf[java.lang.Long]) - case FloatType => - (n: String, v: Any) => FilterApi.ltEq(floatColumn(n), v.asInstanceOf[java.lang.Float]) - case DoubleType => - (n: String, v: Any) => FilterApi.ltEq(doubleColumn(n), v.asInstanceOf[java.lang.Double]) - case StringType => - (n: String, v: Any) => - FilterApi.ltEq(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[UTF8String].getBytes)) - case BinaryType => - (n: String, v: Any) => - FilterApi.ltEq(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[Array[Byte]])) - } + private val makeLt: PartialFunction[DataType, (String, Any) => FilterPredicate] = { + case IntegerType => + (n: String, v: Any) => FilterApi.lt(intColumn(n), v.asInstanceOf[Integer]) + case LongType => + (n: String, v: Any) => FilterApi.lt(longColumn(n), v.asInstanceOf[java.lang.Long]) + case FloatType => + (n: String, v: Any) => FilterApi.lt(floatColumn(n), v.asInstanceOf[java.lang.Float]) + case DoubleType => + (n: String, v: Any) => FilterApi.lt(doubleColumn(n), v.asInstanceOf[java.lang.Double]) + case StringType => + (n: String, v: Any) => + FilterApi.lt(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[UTF8String].getBytes)) + case BinaryType => + (n: String, v: Any) => + FilterApi.lt(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[Array[Byte]])) + } - val makeGt: PartialFunction[DataType, (String, Any) => FilterPredicate] = { - case IntegerType => - (n: String, v: Any) => FilterApi.gt(intColumn(n), v.asInstanceOf[java.lang.Integer]) - case LongType => - (n: String, v: Any) => FilterApi.gt(longColumn(n), v.asInstanceOf[java.lang.Long]) - case FloatType => - (n: String, v: Any) => FilterApi.gt(floatColumn(n), v.asInstanceOf[java.lang.Float]) - case DoubleType => - (n: String, v: Any) => FilterApi.gt(doubleColumn(n), v.asInstanceOf[java.lang.Double]) - case StringType => - (n: String, v: Any) => - FilterApi.gt(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[UTF8String].getBytes)) - case BinaryType => - (n: String, v: Any) => - FilterApi.gt(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[Array[Byte]])) - } + private val makeLtEq: PartialFunction[DataType, (String, Any) => FilterPredicate] = { + case IntegerType => + (n: String, v: Any) => FilterApi.ltEq(intColumn(n), v.asInstanceOf[java.lang.Integer]) + case LongType => + (n: String, v: Any) => FilterApi.ltEq(longColumn(n), v.asInstanceOf[java.lang.Long]) + case FloatType => + (n: String, v: Any) => FilterApi.ltEq(floatColumn(n), v.asInstanceOf[java.lang.Float]) + case DoubleType => + (n: String, v: Any) => FilterApi.ltEq(doubleColumn(n), v.asInstanceOf[java.lang.Double]) + case StringType => + (n: String, v: Any) => + FilterApi.ltEq(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[UTF8String].getBytes)) + case BinaryType => + (n: String, v: Any) => + FilterApi.ltEq(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[Array[Byte]])) + } - val makeGtEq: PartialFunction[DataType, (String, Any) => FilterPredicate] = { - case IntegerType => - (n: String, v: Any) => FilterApi.gtEq(intColumn(n), v.asInstanceOf[java.lang.Integer]) - case LongType => - (n: String, v: Any) => FilterApi.gtEq(longColumn(n), v.asInstanceOf[java.lang.Long]) - case FloatType => - (n: String, v: Any) => FilterApi.gtEq(floatColumn(n), v.asInstanceOf[java.lang.Float]) - case DoubleType => - (n: String, v: Any) => FilterApi.gtEq(doubleColumn(n), v.asInstanceOf[java.lang.Double]) - case StringType => - (n: String, v: Any) => - FilterApi.gtEq(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[UTF8String].getBytes)) - case BinaryType => - (n: String, v: Any) => - FilterApi.gtEq(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[Array[Byte]])) + private val makeGt: PartialFunction[DataType, (String, Any) => FilterPredicate] = { + case IntegerType => + (n: String, v: Any) => FilterApi.gt(intColumn(n), v.asInstanceOf[java.lang.Integer]) + case LongType => + (n: String, v: Any) => FilterApi.gt(longColumn(n), v.asInstanceOf[java.lang.Long]) + case FloatType => + (n: String, v: Any) => FilterApi.gt(floatColumn(n), v.asInstanceOf[java.lang.Float]) + case DoubleType => + (n: String, v: Any) => FilterApi.gt(doubleColumn(n), v.asInstanceOf[java.lang.Double]) + case StringType => + (n: String, v: Any) => + FilterApi.gt(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[UTF8String].getBytes)) + case BinaryType => + (n: String, v: Any) => + FilterApi.gt(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[Array[Byte]])) + } + + private val makeGtEq: PartialFunction[DataType, (String, Any) => FilterPredicate] = { + case IntegerType => + (n: String, v: Any) => FilterApi.gtEq(intColumn(n), v.asInstanceOf[java.lang.Integer]) + case LongType => + (n: String, v: Any) => FilterApi.gtEq(longColumn(n), v.asInstanceOf[java.lang.Long]) + case FloatType => + (n: String, v: Any) => FilterApi.gtEq(floatColumn(n), v.asInstanceOf[java.lang.Float]) + case DoubleType => + (n: String, v: Any) => FilterApi.gtEq(doubleColumn(n), v.asInstanceOf[java.lang.Double]) + case StringType => + (n: String, v: Any) => + FilterApi.gtEq(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[UTF8String].getBytes)) + case BinaryType => + (n: String, v: Any) => + FilterApi.gtEq(binaryColumn(n), Binary.fromByteArray(v.asInstanceOf[Array[Byte]])) + } + + /** + * Converts data sources filters to Parquet filter predicates. + */ + def createFilter(schema: StructType, predicate: sources.Filter): Option[FilterPredicate] = { + val dataTypeOf = schema.map(f => f.name -> f.dataType).toMap + + // NOTE: + // + // For any comparison operator `cmp`, both `a cmp NULL` and `NULL cmp a` evaluate to `NULL`, + // which can be casted to `false` implicitly. Please refer to the `eval` method of these + // operators and the `SimplifyFilters` rule for details. + predicate match { + case sources.IsNull(name) => + makeEq.lift(dataTypeOf(name)).map(_(name, null)) + case sources.IsNotNull(name) => + makeNotEq.lift(dataTypeOf(name)).map(_(name, null)) + + case sources.EqualTo(name, value) => + makeEq.lift(dataTypeOf(name)).map(_(name, value)) + case sources.Not(sources.EqualTo(name, value)) => + makeNotEq.lift(dataTypeOf(name)).map(_(name, value)) + + case sources.LessThan(name, value) => + makeLt.lift(dataTypeOf(name)).map(_(name, value)) + case sources.LessThanOrEqual(name, value) => + makeLtEq.lift(dataTypeOf(name)).map(_(name, value)) + + case sources.GreaterThan(name, value) => + makeGt.lift(dataTypeOf(name)).map(_(name, value)) + case sources.GreaterThanOrEqual(name, value) => + makeGtEq.lift(dataTypeOf(name)).map(_(name, value)) + + case sources.And(lhs, rhs) => + (createFilter(schema, lhs) ++ createFilter(schema, rhs)).reduceOption(FilterApi.and) + + case sources.Or(lhs, rhs) => + for { + lhsFilter <- createFilter(schema, lhs) + rhsFilter <- createFilter(schema, rhs) + } yield FilterApi.or(lhsFilter, rhsFilter) + + case sources.Not(pred) => + createFilter(schema, pred).map(FilterApi.not) + + case _ => None } + } + /** + * Converts Catalyst predicate expressions to Parquet filter predicates. + * + * @todo This can be removed once we get rid of the old Parquet support. + */ + def createFilter(predicate: Expression): Option[FilterPredicate] = { // NOTE: // // For any comparison operator `cmp`, both `a cmp NULL` and `NULL cmp a` evaluate to `NULL`, @@ -170,7 +226,7 @@ private[sql] object ParquetFilters { makeEq.lift(dataType).map(_(name, value)) case EqualTo(NonNullLiteral(value, _), Cast(NamedExpression(name, _), dataType)) => makeEq.lift(dataType).map(_(name, value)) - + case Not(EqualTo(NamedExpression(name, _), NonNullLiteral(value, dataType))) => makeNotEq.lift(dataType).map(_(name, value)) case Not(EqualTo(Cast(NamedExpression(name, _), dataType), NonNullLiteral(value, _))) => @@ -192,7 +248,7 @@ private[sql] object ParquetFilters { case LessThanOrEqual(NamedExpression(name, _), NonNullLiteral(value, dataType)) => makeLtEq.lift(dataType).map(_(name, value)) case LessThanOrEqual(Cast(NamedExpression(name, _), dataType), NonNullLiteral(value, _)) => - makeLtEq.lift(dataType).map(_(name, value)) + makeLtEq.lift(dataType).map(_(name, value)) case LessThanOrEqual(NonNullLiteral(value, dataType), NamedExpression(name, _)) => makeGtEq.lift(dataType).map(_(name, value)) case LessThanOrEqual(NonNullLiteral(value, _), Cast(NamedExpression(name, _), dataType)) => @@ -201,7 +257,7 @@ private[sql] object ParquetFilters { case GreaterThan(NamedExpression(name, _), NonNullLiteral(value, dataType)) => makeGt.lift(dataType).map(_(name, value)) case GreaterThan(Cast(NamedExpression(name, _), dataType), NonNullLiteral(value, _)) => - makeGt.lift(dataType).map(_(name, value)) + makeGt.lift(dataType).map(_(name, value)) case GreaterThan(NonNullLiteral(value, dataType), NamedExpression(name, _)) => makeLt.lift(dataType).map(_(name, value)) case GreaterThan(NonNullLiteral(value, _), Cast(NamedExpression(name, _), dataType)) => @@ -210,7 +266,7 @@ private[sql] object ParquetFilters { case GreaterThanOrEqual(NamedExpression(name, _), NonNullLiteral(value, dataType)) => makeGtEq.lift(dataType).map(_(name, value)) case GreaterThanOrEqual(Cast(NamedExpression(name, _), dataType), NonNullLiteral(value, _)) => - makeGtEq.lift(dataType).map(_(name, value)) + makeGtEq.lift(dataType).map(_(name, value)) case GreaterThanOrEqual(NonNullLiteral(value, dataType), NamedExpression(name, _)) => makeLtEq.lift(dataType).map(_(name, value)) case GreaterThanOrEqual(NonNullLiteral(value, _), Cast(NamedExpression(name, _), dataType)) => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala index aded126ea0615..cb7ae246d0d75 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala @@ -77,7 +77,7 @@ private[sql] case class ParquetTableScan( } }.toArray - override def execute(): RDD[Row] = { + protected override def doExecute(): RDD[Row] = { import parquet.filter2.compat.FilterCompat.FilterPredicateCompat val sc = sqlContext.sparkContext @@ -255,7 +255,7 @@ private[sql] case class InsertIntoParquetTable( /** * Inserts all rows into the Parquet file. */ - override def execute(): RDD[Row] = { + protected override def doExecute(): RDD[Row] = { // TODO: currently we do not check whether the "schema"s are compatible // That means if one first creates a table and then INSERTs data with // and incompatible schema the execution will fail. It would be nice @@ -541,7 +541,7 @@ private[parquet] class FilteringParquetRowInputFormat val splits = mutable.ArrayBuffer.empty[ParquetInputSplit] val filter: Filter = ParquetInputFormat.getFilter(configuration) var rowGroupsDropped: Long = 0 - var totalRowGroups: Long = 0 + var totalRowGroups: Long = 0 // Ugly hack, stuck with it until PR: // https://github.com/apache/incubator-parquet-mr/pull/17 @@ -664,7 +664,7 @@ private[parquet] object FileSystemHelper { s"ParquetTableOperations: path $path does not exist or is not a directory") } fs.globStatus(path) - .flatMap { status => if(status.isDir) fs.listStatus(status.getPath) else List(status) } + .flatMap { status => if (status.isDir) fs.listStatus(status.getPath) else List(status) } .map(_.getPath) } @@ -674,7 +674,7 @@ private[parquet] object FileSystemHelper { def findMaxTaskId(pathStr: String, conf: Configuration): Int = { val files = FileSystemHelper.listFiles(pathStr, conf) // filename pattern is part-r-.parquet - val nameP = new scala.util.matching.Regex("""part-r-(\d{1,}).parquet""", "taskid") + val nameP = new scala.util.matching.Regex("""part-.-(\d{1,}).*""", "taskid") val hiddenFileP = new scala.util.matching.Regex("_.*") files.map(_.getName).map { case nameP(taskid) => taskid.toInt diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala index c45c431438efc..70a220cc43ab9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableSupport.scala @@ -129,7 +129,7 @@ private[parquet] object RowReadSupport { } /** - * A `parquet.hadoop.api.WriteSupport` for Row ojects. + * A `parquet.hadoop.api.WriteSupport` for Row objects. */ private[parquet] class RowWriteSupport extends WriteSupport[Row] with Logging { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala index 1dc819b5d7b9b..6698b19c7477d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala @@ -489,7 +489,7 @@ private[parquet] object ParquetTypesConverter extends Logging { val children = fs .globStatus(path) - .flatMap { status => if(status.isDir) fs.listStatus(status.getPath) else List(status) } + .flatMap { status => if (status.isDir) fs.listStatus(status.getPath) else List(status) } .filterNot { status => val name = status.getPath.getName (name(0) == '.' || name(0) == '_') && name != ParquetFileWriter.PARQUET_METADATA_FILE diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala index 85e60733bc57a..824ae36968c32 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/newParquet.scala @@ -14,209 +14,318 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package org.apache.spark.sql.parquet -import java.io.IOException -import java.lang.{Double => JDouble, Float => JFloat, Long => JLong} -import java.math.{BigDecimal => JBigDecimal} import java.net.URI -import java.text.SimpleDateFormat -import java.util.{Date, List => JList} +import java.util.{List => JList} import scala.collection.JavaConversions._ -import scala.collection.mutable.ArrayBuffer import scala.util.Try +import com.google.common.base.Objects import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.{FileStatus, FileSystem, Path} +import org.apache.hadoop.fs.{FileStatus, Path} import org.apache.hadoop.io.Writable +import org.apache.hadoop.mapreduce._ import org.apache.hadoop.mapreduce.lib.input.FileInputFormat -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat -import org.apache.hadoop.mapreduce.{InputSplit, Job, JobContext} import parquet.filter2.predicate.FilterApi -import parquet.format.converter.ParquetMetadataConverter +import parquet.hadoop._ import parquet.hadoop.metadata.CompressionCodecName import parquet.hadoop.util.ContextUtil -import parquet.hadoop.{ParquetInputFormat, _} -import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.{Partition => SparkPartition, SerializableWritable, Logging, SparkException} +import org.apache.spark.broadcast.Broadcast import org.apache.spark.deploy.SparkHadoopUtil -import org.apache.spark.mapred.SparkHadoopMapRedUtil -import org.apache.spark.mapreduce.SparkHadoopMapReduceUtil -import org.apache.spark.rdd.{NewHadoopPartition, NewHadoopRDD, RDD} -import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.{CatalystTypeConverters, expressions} -import org.apache.spark.sql.parquet.ParquetTypesConverter._ +import org.apache.spark.rdd.RDD._ +import org.apache.spark.rdd.RDD import org.apache.spark.sql.sources._ -import org.apache.spark.sql.types.{IntegerType, StructField, StructType, _} -import org.apache.spark.sql.{DataFrame, Row, SQLConf, SQLContext, SaveMode} -import org.apache.spark.{Logging, SerializableWritable, SparkException, TaskContext, Partition => SparkPartition} - -/** - * Allows creation of Parquet based tables using the syntax: - * {{{ - * CREATE TEMPORARY TABLE ... USING org.apache.spark.sql.parquet OPTIONS (...) - * }}} - * - * Supported options include: - * - * - `path`: Required. When reading Parquet files, `path` should point to the location of the - * Parquet file(s). It can be either a single raw Parquet file, or a directory of Parquet files. - * In the latter case, this data source tries to discover partitioning information if the the - * directory is structured in the same style of Hive partitioned tables. When writing Parquet - * file, `path` should point to the destination folder. - * - * - `mergeSchema`: Optional. Indicates whether we should merge potentially different (but - * compatible) schemas stored in all Parquet part-files. - * - * - `partition.defaultName`: Optional. Partition name used when a value of a partition column is - * null or empty string. This is similar to the `hive.exec.default.partition.name` configuration - * in Hive. - */ -private[sql] class DefaultSource - extends RelationProvider - with SchemaRelationProvider - with CreatableRelationProvider { - - private def checkPath(parameters: Map[String, String]): String = { - parameters.getOrElse("path", sys.error("'path' must be specified for parquet tables.")) - } +import org.apache.spark.sql.types.{DataType, StructType} +import org.apache.spark.sql.{Row, SQLConf, SQLContext} +import org.apache.spark.util.Utils - /** Returns a new base relation with the given parameters. */ +private[sql] class DefaultSource extends HadoopFsRelationProvider { override def createRelation( sqlContext: SQLContext, - parameters: Map[String, String]): BaseRelation = { - ParquetRelation2(Seq(checkPath(parameters)), parameters, None)(sqlContext) - } - - /** Returns a new base relation with the given parameters and schema. */ - override def createRelation( - sqlContext: SQLContext, - parameters: Map[String, String], - schema: StructType): BaseRelation = { - ParquetRelation2(Seq(checkPath(parameters)), parameters, Some(schema))(sqlContext) + paths: Array[String], + schema: Option[StructType], + partitionColumns: Option[StructType], + parameters: Map[String, String]): HadoopFsRelation = { + new ParquetRelation2(paths, schema, None, partitionColumns, parameters)(sqlContext) } +} - /** Returns a new base relation with the given parameters and save given data into it. */ - override def createRelation( - sqlContext: SQLContext, - mode: SaveMode, - parameters: Map[String, String], - data: DataFrame): BaseRelation = { - val path = checkPath(parameters) - val filesystemPath = new Path(path) - val fs = filesystemPath.getFileSystem(sqlContext.sparkContext.hadoopConfiguration) - val doInsertion = (mode, fs.exists(filesystemPath)) match { - case (SaveMode.ErrorIfExists, true) => - sys.error(s"path $path already exists.") - case (SaveMode.Append, _) | (SaveMode.Overwrite, _) | (SaveMode.ErrorIfExists, false) => - true - case (SaveMode.Ignore, exists) => - !exists - } +// NOTE: This class is instantiated and used on executor side only, no need to be serializable. +private[sql] class ParquetOutputWriter(path: String, context: TaskAttemptContext) + extends OutputWriter { + + private val recordWriter: RecordWriter[Void, Row] = { + val conf = context.getConfiguration + val outputFormat = { + // When appending new Parquet files to an existing Parquet file directory, to avoid + // overwriting existing data files, we need to find out the max task ID encoded in these data + // file names. + // TODO Make this snippet a utility function for other data source developers + val maxExistingTaskId = { + // Note that `path` may point to a temporary location. Here we retrieve the real + // destination path from the configuration + val outputPath = new Path(conf.get("spark.sql.sources.output.path")) + val fs = outputPath.getFileSystem(conf) + + if (fs.exists(outputPath)) { + // Pattern used to match task ID in part file names, e.g.: + // + // part-r-00001.gz.parquet + // ^~~~~ + val partFilePattern = """part-.-(\d{1,}).*""".r + + fs.listStatus(outputPath).map(_.getPath.getName).map { + case partFilePattern(id) => id.toInt + case name if name.startsWith("_") => 0 + case name if name.startsWith(".") => 0 + case name => sys.error( + s"Trying to write Parquet files to directory $outputPath, " + + s"but found items with illegal name '$name'.") + }.reduceOption(_ max _).getOrElse(0) + } else { + 0 + } + } - val relation = if (doInsertion) { - // This is a hack. We always set nullable/containsNull/valueContainsNull to true - // for the schema of a parquet data. - val df = - sqlContext.createDataFrame( - data.queryExecution.toRdd, - data.schema.asNullable, - needsConversion = false) - val createdRelation = - createRelation(sqlContext, parameters, df.schema).asInstanceOf[ParquetRelation2] - createdRelation.insert(df, overwrite = mode == SaveMode.Overwrite) - createdRelation - } else { - // If the save mode is Ignore, we will just create the relation based on existing data. - createRelation(sqlContext, parameters) + new ParquetOutputFormat[Row]() { + // Here we override `getDefaultWorkFile` for two reasons: + // + // 1. To allow appending. We need to generate output file name based on the max available + // task ID computed above. + // + // 2. To allow dynamic partitioning. Default `getDefaultWorkFile` uses + // `FileOutputCommitter.getWorkPath()`, which points to the base directory of all + // partitions in the case of dynamic partitioning. + override def getDefaultWorkFile(context: TaskAttemptContext, extension: String): Path = { + val split = context.getTaskAttemptID.getTaskID.getId + maxExistingTaskId + 1 + new Path(path, f"part-r-$split%05d$extension") + } + } } - relation + outputFormat.getRecordWriter(context) } -} -private[sql] case class Partition(values: Row, path: String) + override def write(row: Row): Unit = recordWriter.write(null, row) -private[sql] case class PartitionSpec(partitionColumns: StructType, partitions: Seq[Partition]) + override def close(): Unit = recordWriter.close(context) +} -/** - * An alternative to [[ParquetRelation]] that plugs in using the data sources API. This class is - * intended as a full replacement of the Parquet support in Spark SQL. The old implementation will - * be deprecated and eventually removed once this version is proved to be stable enough. - * - * Compared with the old implementation, this class has the following notable differences: - * - * - Partitioning discovery: Hive style multi-level partitions are auto discovered. - * - Metadata discovery: Parquet is a format comes with schema evolving support. This data source - * can detect and merge schemas from all Parquet part-files as long as they are compatible. - * Also, metadata and [[FileStatus]]es are cached for better performance. - * - Statistics: Statistics for the size of the table are automatically populated during schema - * discovery. - */ -@DeveloperApi -private[sql] case class ParquetRelation2( - paths: Seq[String], - parameters: Map[String, String], - maybeSchema: Option[StructType] = None, - maybePartitionSpec: Option[PartitionSpec] = None)( - @transient val sqlContext: SQLContext) - extends BaseRelation - with CatalystScan - with InsertableRelation - with SparkHadoopMapReduceUtil +private[sql] class ParquetRelation2( + override val paths: Array[String], + private val maybeDataSchema: Option[StructType], + // This is for metastore conversion. + private val maybePartitionSpec: Option[PartitionSpec], + override val userDefinedPartitionColumns: Option[StructType], + parameters: Map[String, String])( + val sqlContext: SQLContext) + extends HadoopFsRelation(maybePartitionSpec) with Logging { + private[sql] def this( + paths: Array[String], + maybeDataSchema: Option[StructType], + maybePartitionSpec: Option[PartitionSpec], + parameters: Map[String, String])( + sqlContext: SQLContext) = { + this( + paths, + maybeDataSchema, + maybePartitionSpec, + maybePartitionSpec.map(_.partitionColumns), + parameters)(sqlContext) + } + // Should we merge schemas from all Parquet part-files? private val shouldMergeSchemas = parameters.getOrElse(ParquetRelation2.MERGE_SCHEMA, "true").toBoolean - // Optional Metastore schema, used when converting Hive Metastore Parquet table - private val maybeMetastoreSchema = - parameters - .get(ParquetRelation2.METASTORE_SCHEMA) - .map(s => DataType.fromJson(s).asInstanceOf[StructType]) + private val maybeMetastoreSchema = parameters + .get(ParquetRelation2.METASTORE_SCHEMA) + .map(DataType.fromJson(_).asInstanceOf[StructType]) - // Hive uses this as part of the default partition name when the partition column value is null - // or empty string - private val defaultPartitionName = parameters.getOrElse( - ParquetRelation2.DEFAULT_PARTITION_NAME, "__HIVE_DEFAULT_PARTITION__") + private lazy val metadataCache: MetadataCache = { + val meta = new MetadataCache + meta.refresh() + meta + } override def equals(other: Any): Boolean = other match { - case relation: ParquetRelation2 => - // If schema merging is required, we don't compare the actual schemas since they may evolve. + case that: ParquetRelation2 => val schemaEquality = if (shouldMergeSchemas) { - shouldMergeSchemas == relation.shouldMergeSchemas + this.shouldMergeSchemas == that.shouldMergeSchemas } else { - schema == relation.schema + this.dataSchema == that.dataSchema && + this.schema == that.schema } - paths.toSet == relation.paths.toSet && + this.paths.toSet == that.paths.toSet && schemaEquality && - maybeMetastoreSchema == relation.maybeMetastoreSchema && - maybePartitionSpec == relation.maybePartitionSpec + this.maybeDataSchema == that.maybeDataSchema && + this.partitionColumns == that.partitionColumns case _ => false } override def hashCode(): Int = { if (shouldMergeSchemas) { - com.google.common.base.Objects.hashCode( - shouldMergeSchemas: java.lang.Boolean, + Objects.hashCode( + Boolean.box(shouldMergeSchemas), paths.toSet, - maybeMetastoreSchema, - maybePartitionSpec) + maybeDataSchema, + partitionColumns) } else { - com.google.common.base.Objects.hashCode( - shouldMergeSchemas: java.lang.Boolean, - schema, + Objects.hashCode( + Boolean.box(shouldMergeSchemas), paths.toSet, - maybeMetastoreSchema, - maybePartitionSpec) + dataSchema, + schema, + maybeDataSchema, + partitionColumns) + } + } + + override def dataSchema: StructType = maybeDataSchema.getOrElse(metadataCache.dataSchema) + + override private[sql] def refresh(): Unit = { + super.refresh() + metadataCache.refresh() + } + + // Parquet data source always uses Catalyst internal representations. + override val needConversion: Boolean = false + + override def sizeInBytes: Long = metadataCache.dataStatuses.map(_.getLen).sum + + override def prepareJobForWrite(job: Job): OutputWriterFactory = { + val conf = ContextUtil.getConfiguration(job) + + val committerClass = + conf.getClass( + "spark.sql.parquet.output.committer.class", + classOf[ParquetOutputCommitter], + classOf[ParquetOutputCommitter]) + + conf.setClass( + SQLConf.OUTPUT_COMMITTER_CLASS, + committerClass, + classOf[ParquetOutputCommitter]) + + // TODO There's no need to use two kinds of WriteSupport + // We should unify them. `SpecificMutableRow` can process both atomic (primitive) types and + // complex types. + val writeSupportClass = + if (dataSchema.map(_.dataType).forall(ParquetTypesConverter.isPrimitiveType)) { + classOf[MutableRowWriteSupport] + } else { + classOf[RowWriteSupport] + } + + ParquetOutputFormat.setWriteSupportClass(job, writeSupportClass) + RowWriteSupport.setSchema(dataSchema.toAttributes, conf) + + // Sets compression scheme + conf.set( + ParquetOutputFormat.COMPRESSION, + ParquetRelation + .shortParquetCompressionCodecNames + .getOrElse( + sqlContext.conf.parquetCompressionCodec.toUpperCase, + CompressionCodecName.UNCOMPRESSED).name()) + + new OutputWriterFactory { + override def newInstance( + path: String, dataSchema: StructType, context: TaskAttemptContext): OutputWriter = { + new ParquetOutputWriter(path, context) + } } } - private[sql] def sparkContext = sqlContext.sparkContext + override def buildScan( + requiredColumns: Array[String], + filters: Array[Filter], + inputFiles: Array[FileStatus], + broadcastedConf: Broadcast[SerializableWritable[Configuration]]): RDD[Row] = { + val useMetadataCache = sqlContext.getConf(SQLConf.PARQUET_CACHE_METADATA, "true").toBoolean + val parquetFilterPushDown = sqlContext.conf.parquetFilterPushDown + // Create the function to set variable Parquet confs at both driver and executor side. + val initLocalJobFuncOpt = + ParquetRelation2.initializeLocalJobFunc( + requiredColumns, + filters, + dataSchema, + useMetadataCache, + parquetFilterPushDown) _ + // Create the function to set input paths at the driver side. + val setInputPaths = ParquetRelation2.initializeDriverSideJobFunc(inputFiles) _ + + val footers = inputFiles.map(f => metadataCache.footers(f.getPath)) + + Utils.withDummyCallSite(sqlContext.sparkContext) { + // TODO Stop using `FilteringParquetRowInputFormat` and overriding `getPartition`. + // After upgrading to Parquet 1.6.0, we should be able to stop caching `FileStatus` objects + // and footers. Especially when a global arbitrative schema (either from metastore or data + // source DDL) is available. + new SqlNewHadoopRDD( + sc = sqlContext.sparkContext, + broadcastedConf = broadcastedConf, + initDriverSideJobFuncOpt = Some(setInputPaths), + initLocalJobFuncOpt = Some(initLocalJobFuncOpt), + inputFormatClass = classOf[FilteringParquetRowInputFormat], + keyClass = classOf[Void], + valueClass = classOf[Row]) { + + val cacheMetadata = useMetadataCache + + @transient val cachedStatuses = inputFiles.map { f => + // In order to encode the authority of a Path containing special characters such as '/' + // (which does happen in some S3N credentials), we need to use the string returned by the + // URI of the path to create a new Path. + val pathWithEscapedAuthority = escapePathUserInfo(f.getPath) + new FileStatus( + f.getLen, f.isDir, f.getReplication, f.getBlockSize, f.getModificationTime, + f.getAccessTime, f.getPermission, f.getOwner, f.getGroup, pathWithEscapedAuthority) + }.toSeq + + @transient val cachedFooters = footers.map { f => + // In order to encode the authority of a Path containing special characters such as /, + // we need to use the string returned by the URI of the path to create a new Path. + new Footer(escapePathUserInfo(f.getFile), f.getParquetMetadata) + }.toSeq + + private def escapePathUserInfo(path: Path): Path = { + val uri = path.toUri + new Path(new URI( + uri.getScheme, uri.getRawUserInfo, uri.getHost, uri.getPort, uri.getPath, + uri.getQuery, uri.getFragment)) + } + + // Overridden so we can inject our own cached files statuses. + override def getPartitions: Array[SparkPartition] = { + val inputFormat = if (cacheMetadata) { + new FilteringParquetRowInputFormat { + override def listStatus(jobContext: JobContext): JList[FileStatus] = cachedStatuses + override def getFooters(jobContext: JobContext): JList[Footer] = cachedFooters + } + } else { + new FilteringParquetRowInputFormat + } + + val jobContext = newJobContext(getConf(isDriverSide = true), jobId) + val rawSplits = inputFormat.getSplits(jobContext) + + Array.tabulate[SparkPartition](rawSplits.size) { i => + new SqlNewHadoopPartition(id, i, rawSplits(i).asInstanceOf[InputSplit with Writable]) + } + } + }.values + } + } private class MetadataCache { // `FileStatus` objects of all "_metadata" files. @@ -226,127 +335,75 @@ private[sql] case class ParquetRelation2( private var commonMetadataStatuses: Array[FileStatus] = _ // Parquet footer cache. - var footers: Map[FileStatus, Footer] = _ + var footers: Map[Path, Footer] = _ // `FileStatus` objects of all data files (Parquet part-files). var dataStatuses: Array[FileStatus] = _ - // Partition spec of this table, including names, data types, and values of each partition - // column, and paths of each partition. - var partitionSpec: PartitionSpec = _ - // Schema of the actual Parquet files, without partition columns discovered from partition // directory paths. - var parquetSchema: StructType = _ + var dataSchema: StructType = null // Schema of the whole table, including partition columns. var schema: StructType = _ - // Indicates whether partition columns are also included in Parquet data file schema. If not, - // we need to fill in partition column values into read rows when scanning the table. - var partitionKeysIncludedInParquetSchema: Boolean = _ - - def prepareMetadata(path: Path, schema: StructType, conf: Configuration): Unit = { - conf.set( - ParquetOutputFormat.COMPRESSION, - ParquetRelation - .shortParquetCompressionCodecNames - .getOrElse( - sqlContext.conf.parquetCompressionCodec.toUpperCase, - CompressionCodecName.UNCOMPRESSED).name()) - - ParquetRelation.enableLogForwarding() - ParquetTypesConverter.writeMetaData(schema.toAttributes, path, conf) - } - /** * Refreshes `FileStatus`es, footers, partition spec, and table schema. */ def refresh(): Unit = { - // Support either reading a collection of raw Parquet part-files, or a collection of folders - // containing Parquet files (e.g. partitioned Parquet table). - val baseStatuses = paths.distinct.map { p => - val fs = FileSystem.get(URI.create(p), sparkContext.hadoopConfiguration) - val path = new Path(p) - val qualified = path.makeQualified(fs.getUri, fs.getWorkingDirectory) - - if (!fs.exists(qualified) && maybeSchema.isDefined) { - fs.mkdirs(qualified) - prepareMetadata(qualified, maybeSchema.get, sparkContext.hadoopConfiguration) - } - - fs.getFileStatus(qualified) - }.toArray - assert(baseStatuses.forall(!_.isDir) || baseStatuses.forall(_.isDir)) - // Lists `FileStatus`es of all leaf nodes (files) under all base directories. - val leaves = baseStatuses.flatMap { f => - val fs = FileSystem.get(f.getPath.toUri, sparkContext.hadoopConfiguration) - SparkHadoopUtil.get.listLeafStatuses(fs, f.getPath).filter { f => - isSummaryFile(f.getPath) || - !(f.getPath.getName.startsWith("_") || f.getPath.getName.startsWith(".")) - } - } + val leaves = cachedLeafStatuses().filter { f => + isSummaryFile(f.getPath) || + !(f.getPath.getName.startsWith("_") || f.getPath.getName.startsWith(".")) + }.toArray dataStatuses = leaves.filterNot(f => isSummaryFile(f.getPath)) metadataStatuses = leaves.filter(_.getPath.getName == ParquetFileWriter.PARQUET_METADATA_FILE) commonMetadataStatuses = leaves.filter(_.getPath.getName == ParquetFileWriter.PARQUET_COMMON_METADATA_FILE) - footers = (dataStatuses ++ metadataStatuses ++ commonMetadataStatuses).par.map { f => - val parquetMetadata = ParquetFileReader.readFooter( - sparkContext.hadoopConfiguration, f, ParquetMetadataConverter.NO_FILTER) - f -> new Footer(f.getPath, parquetMetadata) - }.seq.toMap - - partitionSpec = maybePartitionSpec.getOrElse { - val partitionDirs = leaves - .filterNot(baseStatuses.contains) - .map(_.getPath.getParent) - .distinct - - if (partitionDirs.nonEmpty) { - // Parses names and values of partition columns, and infer their data types. - ParquetRelation2.parsePartitions(partitionDirs, defaultPartitionName) + footers = { + val conf = SparkHadoopUtil.get.conf + val taskSideMetaData = conf.getBoolean(ParquetInputFormat.TASK_SIDE_METADATA, true) + val rawFooters = if (shouldMergeSchemas) { + ParquetFileReader.readAllFootersInParallel( + conf, seqAsJavaList(leaves), taskSideMetaData) } else { - // No partition directories found, makes an empty specification - PartitionSpec(StructType(Seq.empty[StructField]), Seq.empty[Partition]) + ParquetFileReader.readAllFootersInParallelUsingSummaryFiles( + conf, seqAsJavaList(leaves), taskSideMetaData) } + + rawFooters.map(footer => footer.getFile -> footer).toMap } - // To get the schema. We first try to get the schema defined in maybeSchema. - // If maybeSchema is not defined, we will try to get the schema from existing parquet data - // (through readSchema). If data does not exist, we will try to get the schema defined in - // maybeMetastoreSchema (defined in the options of the data source). - // Finally, if we still could not get the schema. We throw an error. - parquetSchema = - maybeSchema - .orElse(readSchema()) - .orElse(maybeMetastoreSchema) - .getOrElse(sys.error("Failed to get the schema.")) - - partitionKeysIncludedInParquetSchema = - isPartitioned && - partitionColumns.forall(f => parquetSchema.fieldNames.contains(f.name)) - - schema = { - val fullRelationSchema = if (partitionKeysIncludedInParquetSchema) { - parquetSchema - } else { - StructType(parquetSchema.fields ++ partitionColumns.fields) + // If we already get the schema, don't need to re-compute it since the schema merging is + // time-consuming. + if (dataSchema == null) { + dataSchema = { + val dataSchema0 = + maybeDataSchema + .orElse(readSchema()) + .orElse(maybeMetastoreSchema) + .getOrElse(sys.error("Failed to get the schema.")) + + // If this Parquet relation is converted from a Hive Metastore table, must reconcile case + // case insensitivity issue and possible schema mismatch (probably caused by schema + // evolution). + maybeMetastoreSchema + .map(ParquetRelation2.mergeMetastoreParquetSchema(_, dataSchema0)) + .getOrElse(dataSchema0) } - - // If this Parquet relation is converted from a Hive Metastore table, must reconcile case - // insensitivity issue and possible schema mismatch. - maybeMetastoreSchema - .map(ParquetRelation2.mergeMetastoreParquetSchema(_, fullRelationSchema)) - .getOrElse(fullRelationSchema) } } + private def isSummaryFile(file: Path): Boolean = { + file.getName == ParquetFileWriter.PARQUET_COMMON_METADATA_FILE || + file.getName == ParquetFileWriter.PARQUET_METADATA_FILE + } + private def readSchema(): Option[StructType] = { // Sees which file(s) we need to touch in order to figure out the schema. - val filesToTouch = + // // Always tries the summary files first if users don't require a merged schema. In this case, // "_common_metadata" is more preferable than "_metadata" because it doesn't contain row // groups information, and could be much smaller for large Parquet files with lots of row @@ -365,6 +422,7 @@ private[sql] case class ParquetRelation2( // Here we tend to be pessimistic and take the second case into account. Basically this means // we can't trust the summary files if users require a merged schema, and must touch all part- // files to do the merge. + val filesToTouch = if (shouldMergeSchemas) { // Also includes summary files, 'cause there might be empty partition directories. (metadataStatuses ++ commonMetadataStatuses ++ dataStatuses).toSeq @@ -382,356 +440,67 @@ private[sql] case class ParquetRelation2( .toSeq } - ParquetRelation2.readSchema(filesToTouch.map(footers.apply), sqlContext) + assert( + filesToTouch.nonEmpty || maybeDataSchema.isDefined || maybeMetastoreSchema.isDefined, + "No schema defined, " + + s"and no Parquet data file or summary file found under ${paths.mkString(", ")}.") + + ParquetRelation2.readSchema(filesToTouch.map(f => footers.apply(f.getPath)), sqlContext) } } +} - @transient private val metadataCache = new MetadataCache - metadataCache.refresh() - - def partitionSpec: PartitionSpec = metadataCache.partitionSpec - - def partitionColumns: StructType = metadataCache.partitionSpec.partitionColumns - - def partitions: Seq[Partition] = metadataCache.partitionSpec.partitions - - def isPartitioned: Boolean = partitionColumns.nonEmpty - - private def partitionKeysIncludedInDataSchema = metadataCache.partitionKeysIncludedInParquetSchema - - private def parquetSchema = metadataCache.parquetSchema - - override def schema: StructType = metadataCache.schema - - private def isSummaryFile(file: Path): Boolean = { - file.getName == ParquetFileWriter.PARQUET_COMMON_METADATA_FILE || - file.getName == ParquetFileWriter.PARQUET_METADATA_FILE - } +private[sql] object ParquetRelation2 extends Logging { + // Whether we should merge schemas collected from all Parquet part-files. + private[sql] val MERGE_SCHEMA = "mergeSchema" - // Skip type conversion - override val needConversion: Boolean = false + // Hive Metastore schema, used when converting Metastore Parquet tables. This option is only used + // internally. + private[sql] val METASTORE_SCHEMA = "metastoreSchema" - // TODO Should calculate per scan size - // It's common that a query only scans a fraction of a large Parquet file. Returning size of the - // whole Parquet file disables some optimizations in this case (e.g. broadcast join). - override val sizeInBytes = metadataCache.dataStatuses.map(_.getLen).sum - - // This is mostly a hack so that we can use the existing parquet filter code. - override def buildScan(output: Seq[Attribute], predicates: Seq[Expression]): RDD[Row] = { - val job = new Job(sparkContext.hadoopConfiguration) - ParquetInputFormat.setReadSupportClass(job, classOf[RowReadSupport]) - val jobConf: Configuration = ContextUtil.getConfiguration(job) - - val selectedPartitions = prunePartitions(predicates, partitions) - val selectedFiles = if (isPartitioned) { - selectedPartitions.flatMap { p => - metadataCache.dataStatuses.filter(_.getPath.getParent.toString == p.path) - } - } else { - metadataCache.dataStatuses.toSeq - } - val selectedFooters = selectedFiles.map(metadataCache.footers) - - // FileInputFormat cannot handle empty lists. - if (selectedFiles.nonEmpty) { - // In order to encode the authority of a Path containning special characters such as /, - // we need to use the string retruned by the URI of the path to create a new Path. - val selectedPaths = selectedFiles.map(status => new Path(status.getPath.toUri.toString)) - FileInputFormat.setInputPaths(job, selectedPaths: _*) - } + /** This closure sets various Parquet configurations at both driver side and executor side. */ + private[parquet] def initializeLocalJobFunc( + requiredColumns: Array[String], + filters: Array[Filter], + dataSchema: StructType, + useMetadataCache: Boolean, + parquetFilterPushDown: Boolean)(job: Job): Unit = { + val conf = job.getConfiguration + conf.set(ParquetInputFormat.READ_SUPPORT_CLASS, classOf[RowReadSupport].getName()) // Try to push down filters when filter push-down is enabled. - if (sqlContext.conf.parquetFilterPushDown) { - val partitionColNames = partitionColumns.map(_.name).toSet - predicates - // Don't push down predicates which reference partition columns - .filter { pred => - val referencedColNames = pred.references.map(_.name).toSet - referencedColNames.intersect(partitionColNames).isEmpty - } + if (parquetFilterPushDown) { + filters // Collects all converted Parquet filter predicates. Notice that not all predicates can be // converted (`ParquetFilters.createFilter` returns an `Option`). That's why a `flatMap` // is used here. - .flatMap(ParquetFilters.createFilter) + .flatMap(ParquetFilters.createFilter(dataSchema, _)) .reduceOption(FilterApi.and) - .foreach(ParquetInputFormat.setFilterPredicate(jobConf, _)) + .foreach(ParquetInputFormat.setFilterPredicate(conf, _)) } - if (isPartitioned) { - logInfo { - val percentRead = selectedPartitions.size.toDouble / partitions.size.toDouble * 100 - s"Reading $percentRead% of partitions" - } - } - - val requiredColumns = output.map(_.name) - val requestedSchema = StructType(requiredColumns.map(schema(_))) + conf.set(RowReadSupport.SPARK_ROW_REQUESTED_SCHEMA, { + val requestedSchema = StructType(requiredColumns.map(dataSchema(_))) + ParquetTypesConverter.convertToString(requestedSchema.toAttributes) + }) - // Store both requested and original schema in `Configuration` - jobConf.set( - RowReadSupport.SPARK_ROW_REQUESTED_SCHEMA, - convertToString(requestedSchema.toAttributes)) - jobConf.set( + conf.set( RowWriteSupport.SPARK_ROW_SCHEMA, - convertToString(schema.toAttributes)) + ParquetTypesConverter.convertToString(dataSchema.toAttributes)) // Tell FilteringParquetRowInputFormat whether it's okay to cache Parquet and FS metadata - val useCache = sqlContext.getConf(SQLConf.PARQUET_CACHE_METADATA, "true").toBoolean - jobConf.set(SQLConf.PARQUET_CACHE_METADATA, useCache.toString) - - val baseRDD = - new NewHadoopRDD( - sparkContext, - classOf[FilteringParquetRowInputFormat], - classOf[Void], - classOf[Row], - jobConf) { - val cacheMetadata = useCache - - @transient - val cachedStatus = selectedFiles.map { st => - // In order to encode the authority of a Path containning special characters such as /, - // we need to use the string retruned by the URI of the path to create a new Path. - val newPath = new Path(st.getPath.toUri.toString) - - new FileStatus( - st.getLen, - st.isDir, - st.getReplication, - st.getBlockSize, - st.getModificationTime, - st.getAccessTime, - st.getPermission, - st.getOwner, - st.getGroup, - newPath) - } - - @transient - val cachedFooters = selectedFooters.map { f => - // In order to encode the authority of a Path containning special characters such as /, - // we need to use the string retruned by the URI of the path to create a new Path. - new Footer(new Path(f.getFile.toUri.toString), f.getParquetMetadata) - } - - - // Overridden so we can inject our own cached files statuses. - override def getPartitions: Array[SparkPartition] = { - val inputFormat = if (cacheMetadata) { - new FilteringParquetRowInputFormat { - override def listStatus(jobContext: JobContext): JList[FileStatus] = cachedStatus - - override def getFooters(jobContext: JobContext): JList[Footer] = cachedFooters - } - } else { - new FilteringParquetRowInputFormat - } - - val jobContext = newJobContext(getConf, jobId) - val rawSplits = inputFormat.getSplits(jobContext) - - Array.tabulate[SparkPartition](rawSplits.size) { i => - new NewHadoopPartition(id, i, rawSplits(i).asInstanceOf[InputSplit with Writable]) - } - } - } - - // The ordinals for partition keys in the result row, if requested. - val partitionKeyLocations = partitionColumns.fieldNames.zipWithIndex.map { - case (name, index) => index -> requiredColumns.indexOf(name) - }.toMap.filter { - case (_, index) => index >= 0 - } - - // When the data does not include the key and the key is requested then we must fill it in - // based on information from the input split. - if (!partitionKeysIncludedInDataSchema && partitionKeyLocations.nonEmpty) { - // This check is based on CatalystConverter.createRootConverter. - val primitiveRow = - requestedSchema.forall(a => ParquetTypesConverter.isPrimitiveType(a.dataType)) - - baseRDD.mapPartitionsWithInputSplit { case (split: ParquetInputSplit, iterator) => - val partValues = selectedPartitions.collectFirst { - case p if split.getPath.getParent.toString == p.path => - CatalystTypeConverters.convertToCatalyst(p.values).asInstanceOf[Row] - }.get - - val requiredPartOrdinal = partitionKeyLocations.keys.toSeq - - if (primitiveRow) { - iterator.map { pair => - // We are using CatalystPrimitiveRowConverter and it returns a SpecificMutableRow. - val row = pair._2.asInstanceOf[SpecificMutableRow] - var i = 0 - while (i < requiredPartOrdinal.size) { - // TODO Avoids boxing cost here! - val partOrdinal = requiredPartOrdinal(i) - row.update(partitionKeyLocations(partOrdinal), partValues(partOrdinal)) - i += 1 - } - row - } - } else { - // Create a mutable row since we need to fill in values from partition columns. - val mutableRow = new GenericMutableRow(requestedSchema.size) - iterator.map { pair => - // We are using CatalystGroupConverter and it returns a GenericRow. - // Since GenericRow is not mutable, we just cast it to a Row. - val row = pair._2.asInstanceOf[Row] - var i = 0 - while (i < row.size) { - // TODO Avoids boxing cost here! - mutableRow(i) = row(i) - i += 1 - } - - i = 0 - while (i < requiredPartOrdinal.size) { - // TODO Avoids boxing cost here! - val partOrdinal = requiredPartOrdinal(i) - mutableRow.update(partitionKeyLocations(partOrdinal), partValues(partOrdinal)) - i += 1 - } - mutableRow - } - } - } - } else { - baseRDD.map(_._2) - } + conf.set(SQLConf.PARQUET_CACHE_METADATA, useMetadataCache.toString) } - private def prunePartitions( - predicates: Seq[Expression], - partitions: Seq[Partition]): Seq[Partition] = { - val partitionColumnNames = partitionColumns.map(_.name).toSet - val partitionPruningPredicates = predicates.filter { - _.references.map(_.name).toSet.subsetOf(partitionColumnNames) - } - - val rawPredicate = - partitionPruningPredicates.reduceOption(expressions.And).getOrElse(Literal(true)) - val boundPredicate = InterpretedPredicate.create(rawPredicate transform { - case a: AttributeReference => - val index = partitionColumns.indexWhere(a.name == _.name) - BoundReference(index, partitionColumns(index).dataType, nullable = true) - }) - - if (isPartitioned && partitionPruningPredicates.nonEmpty) { - partitions.filter(p => boundPredicate(p.values)) - } else { - partitions + /** This closure sets input paths at the driver side. */ + private[parquet] def initializeDriverSideJobFunc( + inputFiles: Array[FileStatus])(job: Job): Unit = { + // We side the input paths at the driver side. + if (inputFiles.nonEmpty) { + FileInputFormat.setInputPaths(job, inputFiles.map(_.getPath): _*) } } - override def insert(data: DataFrame, overwrite: Boolean): Unit = { - assert(paths.size == 1, s"Can't write to multiple destinations: ${paths.mkString(",")}") - - // TODO: currently we do not check whether the "schema"s are compatible - // That means if one first creates a table and then INSERTs data with - // and incompatible schema the execution will fail. It would be nice - // to catch this early one, maybe having the planner validate the schema - // before calling execute(). - - val job = new Job(sqlContext.sparkContext.hadoopConfiguration) - val writeSupport = - if (parquetSchema.map(_.dataType).forall(ParquetTypesConverter.isPrimitiveType)) { - log.debug("Initializing MutableRowWriteSupport") - classOf[MutableRowWriteSupport] - } else { - classOf[RowWriteSupport] - } - - ParquetOutputFormat.setWriteSupportClass(job, writeSupport) - - val conf = ContextUtil.getConfiguration(job) - RowWriteSupport.setSchema(data.schema.toAttributes, conf) - - val destinationPath = new Path(paths.head) - - if (overwrite) { - val fs = destinationPath.getFileSystem(conf) - if (fs.exists(destinationPath)) { - var success: Boolean = false - try { - success = fs.delete(destinationPath, true) - } catch { - case e: IOException => - throw new IOException( - s"Unable to clear output directory ${destinationPath.toString} prior" + - s" to writing to Parquet table:\n${e.toString}") - } - if (!success) { - throw new IOException( - s"Unable to clear output directory ${destinationPath.toString} prior" + - s" to writing to Parquet table.") - } - } - } - - job.setOutputKeyClass(classOf[Void]) - job.setOutputValueClass(classOf[Row]) - FileOutputFormat.setOutputPath(job, destinationPath) - - val wrappedConf = new SerializableWritable(job.getConfiguration) - val jobTrackerId = new SimpleDateFormat("yyyyMMddHHmm").format(new Date()) - val stageId = sqlContext.sparkContext.newRddId() - - val taskIdOffset = if (overwrite) { - 1 - } else { - FileSystemHelper.findMaxTaskId( - FileOutputFormat.getOutputPath(job).toString, job.getConfiguration) + 1 - } - - def writeShard(context: TaskContext, iterator: Iterator[Row]): Unit = { - /* "reduce task" */ - val attemptId = newTaskAttemptID( - jobTrackerId, stageId, isMap = false, context.partitionId(), context.attemptNumber()) - val hadoopContext = newTaskAttemptContext(wrappedConf.value, attemptId) - val format = new AppendingParquetOutputFormat(taskIdOffset) - val committer = format.getOutputCommitter(hadoopContext) - committer.setupTask(hadoopContext) - val writer = format.getRecordWriter(hadoopContext) - try { - while (iterator.hasNext) { - val row = iterator.next() - writer.write(null, row) - } - } finally { - writer.close(hadoopContext) - } - - SparkHadoopMapRedUtil.commitTask(committer, hadoopContext, context) - } - val jobFormat = new AppendingParquetOutputFormat(taskIdOffset) - /* apparently we need a TaskAttemptID to construct an OutputCommitter; - * however we're only going to use this local OutputCommitter for - * setupJob/commitJob, so we just use a dummy "map" task. - */ - val jobAttemptId = newTaskAttemptID(jobTrackerId, stageId, isMap = true, 0, 0) - val jobTaskContext = newTaskAttemptContext(wrappedConf.value, jobAttemptId) - val jobCommitter = jobFormat.getOutputCommitter(jobTaskContext) - - jobCommitter.setupJob(jobTaskContext) - sqlContext.sparkContext.runJob(data.queryExecution.executedPlan.execute(), writeShard _) - jobCommitter.commitJob(jobTaskContext) - - metadataCache.refresh() - } -} - -private[sql] object ParquetRelation2 extends Logging { - // Whether we should merge schemas collected from all Parquet part-files. - val MERGE_SCHEMA = "mergeSchema" - - // Default partition name to use when the partition column value is null or empty string. - val DEFAULT_PARTITION_NAME = "partition.defaultName" - - // Hive Metastore schema, used when converting Metastore Parquet tables. This option is only used - // internally. - private[sql] val METASTORE_SCHEMA = "metastoreSchema" - private[parquet] def readSchema( footers: Seq[Footer], sqlContext: SQLContext): Option[StructType] = { footers.map { footer => @@ -766,7 +535,7 @@ private[sql] object ParquetRelation2 extends Logging { // Falls back to Parquet schema if Spark SQL schema is absent. StructType.fromAttributes( // TODO Really no need to use `Attribute` here, we only need to know the data type. - convertToAttributes( + ParquetTypesConverter.convertToAttributes( parquetSchema, sqlContext.conf.isParquetBinaryAsString, sqlContext.conf.isParquetINT96AsTimestamp)) @@ -805,7 +574,8 @@ private[sql] object ParquetRelation2 extends Logging { val ordinalMap = metastoreSchema.zipWithIndex.map { case (field, index) => field.name.toLowerCase -> index }.toMap - val reorderedParquetSchema = mergedParquetSchema.sortBy(f => + + val reorderedParquetSchema = mergedParquetSchema.sortBy(f => ordinalMap.getOrElse(f.name.toLowerCase, metastoreSchema.size + 1)) StructType(metastoreSchema.zip(reorderedParquetSchema).map { @@ -841,178 +611,4 @@ private[sql] object ParquetRelation2 extends Logging { .filter(_.nullable) StructType(parquetSchema ++ missingFields) } - - - // TODO Data source implementations shouldn't touch Catalyst types (`Literal`). - // However, we are already using Catalyst expressions for partition pruning and predicate - // push-down here... - private[parquet] case class PartitionValues(columnNames: Seq[String], literals: Seq[Literal]) { - require(columnNames.size == literals.size) - } - - /** - * Given a group of qualified paths, tries to parse them and returns a partition specification. - * For example, given: - * {{{ - * hdfs://:/path/to/partition/a=1/b=hello/c=3.14 - * hdfs://:/path/to/partition/a=2/b=world/c=6.28 - * }}} - * it returns: - * {{{ - * PartitionSpec( - * partitionColumns = StructType( - * StructField(name = "a", dataType = IntegerType, nullable = true), - * StructField(name = "b", dataType = StringType, nullable = true), - * StructField(name = "c", dataType = DoubleType, nullable = true)), - * partitions = Seq( - * Partition( - * values = Row(1, "hello", 3.14), - * path = "hdfs://:/path/to/partition/a=1/b=hello/c=3.14"), - * Partition( - * values = Row(2, "world", 6.28), - * path = "hdfs://:/path/to/partition/a=2/b=world/c=6.28"))) - * }}} - */ - private[parquet] def parsePartitions( - paths: Seq[Path], - defaultPartitionName: String): PartitionSpec = { - val partitionValues = resolvePartitions(paths.map(parsePartition(_, defaultPartitionName))) - val fields = { - val (PartitionValues(columnNames, literals)) = partitionValues.head - columnNames.zip(literals).map { case (name, Literal(_, dataType)) => - StructField(name, dataType, nullable = true) - } - } - - val partitions = partitionValues.zip(paths).map { - case (PartitionValues(_, literals), path) => - Partition(Row(literals.map(_.value): _*), path.toString) - } - - PartitionSpec(StructType(fields), partitions) - } - - /** - * Parses a single partition, returns column names and values of each partition column. For - * example, given: - * {{{ - * path = hdfs://:/path/to/partition/a=42/b=hello/c=3.14 - * }}} - * it returns: - * {{{ - * PartitionValues( - * Seq("a", "b", "c"), - * Seq( - * Literal.create(42, IntegerType), - * Literal.create("hello", StringType), - * Literal.create(3.14, FloatType))) - * }}} - */ - private[parquet] def parsePartition( - path: Path, - defaultPartitionName: String): PartitionValues = { - val columns = ArrayBuffer.empty[(String, Literal)] - // Old Hadoop versions don't have `Path.isRoot` - var finished = path.getParent == null - var chopped = path - - while (!finished) { - val maybeColumn = parsePartitionColumn(chopped.getName, defaultPartitionName) - maybeColumn.foreach(columns += _) - chopped = chopped.getParent - finished = maybeColumn.isEmpty || chopped.getParent == null - } - - val (columnNames, values) = columns.reverse.unzip - PartitionValues(columnNames, values) - } - - private def parsePartitionColumn( - columnSpec: String, - defaultPartitionName: String): Option[(String, Literal)] = { - val equalSignIndex = columnSpec.indexOf('=') - if (equalSignIndex == -1) { - None - } else { - val columnName = columnSpec.take(equalSignIndex) - assert(columnName.nonEmpty, s"Empty partition column name in '$columnSpec'") - - val rawColumnValue = columnSpec.drop(equalSignIndex + 1) - assert(rawColumnValue.nonEmpty, s"Empty partition column value in '$columnSpec'") - - val literal = inferPartitionColumnValue(rawColumnValue, defaultPartitionName) - Some(columnName -> literal) - } - } - - /** - * Resolves possible type conflicts between partitions by up-casting "lower" types. The up- - * casting order is: - * {{{ - * NullType -> - * IntegerType -> LongType -> - * FloatType -> DoubleType -> DecimalType.Unlimited -> - * StringType - * }}} - */ - private[parquet] def resolvePartitions(values: Seq[PartitionValues]): Seq[PartitionValues] = { - // Column names of all partitions must match - val distinctPartitionsColNames = values.map(_.columnNames).distinct - assert(distinctPartitionsColNames.size == 1, { - val list = distinctPartitionsColNames.mkString("\t", "\n", "") - s"Conflicting partition column names detected:\n$list" - }) - - // Resolves possible type conflicts for each column - val columnCount = values.head.columnNames.size - val resolvedValues = (0 until columnCount).map { i => - resolveTypeConflicts(values.map(_.literals(i))) - } - - // Fills resolved literals back to each partition - values.zipWithIndex.map { case (d, index) => - d.copy(literals = resolvedValues.map(_(index))) - } - } - - /** - * Converts a string to a `Literal` with automatic type inference. Currently only supports - * [[IntegerType]], [[LongType]], [[FloatType]], [[DoubleType]], [[DecimalType.Unlimited]], and - * [[StringType]]. - */ - private[parquet] def inferPartitionColumnValue( - raw: String, - defaultPartitionName: String): Literal = { - // First tries integral types - Try(Literal.create(Integer.parseInt(raw), IntegerType)) - .orElse(Try(Literal.create(JLong.parseLong(raw), LongType))) - // Then falls back to fractional types - .orElse(Try(Literal.create(JFloat.parseFloat(raw), FloatType))) - .orElse(Try(Literal.create(JDouble.parseDouble(raw), DoubleType))) - .orElse(Try(Literal.create(new JBigDecimal(raw), DecimalType.Unlimited))) - // Then falls back to string - .getOrElse { - if (raw == defaultPartitionName) Literal.create(null, NullType) - else Literal.create(raw, StringType) - } - } - - private val upCastingOrder: Seq[DataType] = - Seq(NullType, IntegerType, LongType, FloatType, DoubleType, DecimalType.Unlimited, StringType) - - /** - * Given a collection of [[Literal]]s, resolves possible type conflicts by up-casting "lower" - * types. - */ - private def resolveTypeConflicts(literals: Seq[Literal]): Seq[Literal] = { - val desiredType = { - val topType = literals.map(_.dataType).maxBy(upCastingOrder.indexOf(_)) - // Falls back to string if all values of this column are null or empty string - if (topType == NullType) StringType else topType - } - - literals.map { case l @ Literal(_, dataType) => - Literal.create(Cast(l, desiredType).eval(), desiredType) - } - } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala index b3d71f687a60a..c6a4dabbab05e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala @@ -17,20 +17,23 @@ package org.apache.spark.sql.sources -import org.apache.spark.rdd.RDD +import org.apache.spark.{Logging, SerializableWritable, TaskContext} +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.rdd.{MapPartitionsRDD, RDD, UnionRDD} import org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.SparkPlan -import org.apache.spark.sql.types.{UTF8String, StringType} -import org.apache.spark.sql.{Row, Strategy, execution, sources} +import org.apache.spark.sql.types.{StringType, StructType, UTF8String} +import org.apache.spark.sql.{SaveMode, Strategy, execution, sources} +import org.apache.spark.util.Utils /** * A Strategy for planning scans over data sources defined using the sources API. */ -private[sql] object DataSourceStrategy extends Strategy { +private[sql] object DataSourceStrategy extends Strategy with Logging { def apply(plan: LogicalPlan): Seq[execution.SparkPlan] = plan match { case PhysicalOperation(projectList, filters, l @ LogicalRelation(t: CatalystScan)) => pruneFilterProjectRaw( @@ -53,6 +56,47 @@ private[sql] object DataSourceStrategy extends Strategy { filters, (a, _) => t.buildScan(a)) :: Nil + // Scanning partitioned HadoopFsRelation + case PhysicalOperation(projectList, filters, l @ LogicalRelation(t: HadoopFsRelation)) + if t.partitionSpec.partitionColumns.nonEmpty => + val selectedPartitions = prunePartitions(filters, t.partitionSpec).toArray + + logInfo { + val total = t.partitionSpec.partitions.length + val selected = selectedPartitions.length + val percentPruned = (1 - total.toDouble / selected.toDouble) * 100 + s"Selected $selected partitions out of $total, pruned $percentPruned% partitions." + } + + // Only pushes down predicates that do not reference partition columns. + val pushedFilters = { + val partitionColumnNames = t.partitionSpec.partitionColumns.map(_.name).toSet + filters.filter { f => + val referencedColumnNames = f.references.map(_.name).toSet + referencedColumnNames.intersect(partitionColumnNames).isEmpty + } + } + + buildPartitionedTableScan( + l, + projectList, + pushedFilters, + t.partitionSpec.partitionColumns, + selectedPartitions) :: Nil + + // Scanning non-partitioned HadoopFsRelation + case PhysicalOperation(projectList, filters, l @ LogicalRelation(t: HadoopFsRelation)) => + // See buildPartitionedTableScan for the reason that we need to create a shard + // broadcast HadoopConf. + val sharedHadoopConf = SparkHadoopUtil.get.conf + val confBroadcast = + t.sqlContext.sparkContext.broadcast(new SerializableWritable(sharedHadoopConf)) + pruneFilterProject( + l, + projectList, + filters, + (a, f) => t.buildScan(a, f, t.paths, confBroadcast)) :: Nil + case l @ LogicalRelation(t: TableScan) => createPhysicalRDD(l.relation, l.output, t.buildScan()) :: Nil @@ -60,9 +104,154 @@ private[sql] object DataSourceStrategy extends Strategy { l @ LogicalRelation(t: InsertableRelation), part, query, overwrite, false) if part.isEmpty => execution.ExecutedCommand(InsertIntoDataSource(l, query, overwrite)) :: Nil + case i @ logical.InsertIntoTable( + l @ LogicalRelation(t: HadoopFsRelation), part, query, overwrite, false) => + val mode = if (overwrite) SaveMode.Overwrite else SaveMode.Append + execution.ExecutedCommand(InsertIntoHadoopFsRelation(t, query, mode)) :: Nil + case _ => Nil } + private def buildPartitionedTableScan( + logicalRelation: LogicalRelation, + projections: Seq[NamedExpression], + filters: Seq[Expression], + partitionColumns: StructType, + partitions: Array[Partition]) = { + val output = projections.map(_.toAttribute) + val relation = logicalRelation.relation.asInstanceOf[HadoopFsRelation] + + // Because we are creating one RDD per partition, we need to have a shared HadoopConf. + // Otherwise, the cost of broadcasting HadoopConf in every RDD will be high. + val sharedHadoopConf = SparkHadoopUtil.get.conf + val confBroadcast = + relation.sqlContext.sparkContext.broadcast(new SerializableWritable(sharedHadoopConf)) + + // Builds RDD[Row]s for each selected partition. + val perPartitionRows = partitions.map { case Partition(partitionValues, dir) => + // The table scan operator (PhysicalRDD) which retrieves required columns from data files. + // Notice that the schema of data files, represented by `relation.dataSchema`, may contain + // some partition column(s). + val scan = + pruneFilterProject( + logicalRelation, + projections, + filters, + (requiredColumns, filters) => { + val partitionColNames = partitionColumns.fieldNames + + // Don't scan any partition columns to save I/O. Here we are being optimistic and + // assuming partition columns data stored in data files are always consistent with those + // partition values encoded in partition directory paths. + val nonPartitionColumns = requiredColumns.filterNot(partitionColNames.contains) + val dataRows = + relation.buildScan(nonPartitionColumns, filters, Array(dir), confBroadcast) + + // Merges data values with partition values. + mergeWithPartitionValues( + relation.schema, + requiredColumns, + partitionColNames, + partitionValues, + dataRows) + }) + + scan.execute() + } + + val unionedRows = + if (perPartitionRows.length == 0) { + relation.sqlContext.emptyResult + } else { + new UnionRDD(relation.sqlContext.sparkContext, perPartitionRows) + } + + createPhysicalRDD(logicalRelation.relation, output, unionedRows) + } + + private def mergeWithPartitionValues( + schema: StructType, + requiredColumns: Array[String], + partitionColumns: Array[String], + partitionValues: Row, + dataRows: RDD[Row]): RDD[Row] = { + val nonPartitionColumns = requiredColumns.filterNot(partitionColumns.contains) + + // If output columns contain any partition column(s), we need to merge scanned data + // columns and requested partition columns to form the final result. + if (!requiredColumns.sameElements(nonPartitionColumns)) { + val mergers = requiredColumns.zipWithIndex.map { case (name, index) => + // To see whether the `index`-th column is a partition column... + val i = partitionColumns.indexOf(name) + if (i != -1) { + // If yes, gets column value from partition values. + (mutableRow: MutableRow, dataRow: expressions.Row, ordinal: Int) => { + mutableRow(ordinal) = partitionValues(i) + } + } else { + // Otherwise, inherits the value from scanned data. + val i = nonPartitionColumns.indexOf(name) + (mutableRow: MutableRow, dataRow: expressions.Row, ordinal: Int) => { + mutableRow(ordinal) = dataRow(i) + } + } + } + + // Since we know for sure that this closure is serializable, we can avoid the overhead + // of cleaning a closure for each RDD by creating our own MapPartitionsRDD. Functionally + // this is equivalent to calling `dataRows.mapPartitions(mapPartitionsFunc)` (SPARK-7718). + val mapPartitionsFunc = (_: TaskContext, _: Int, iterator: Iterator[Row]) => { + val dataTypes = requiredColumns.map(schema(_).dataType) + val mutableRow = new SpecificMutableRow(dataTypes) + iterator.map { dataRow => + var i = 0 + while (i < mutableRow.length) { + mergers(i)(mutableRow, dataRow, i) + i += 1 + } + mutableRow.asInstanceOf[expressions.Row] + } + } + + // This is an internal RDD whose call site the user should not be concerned with + // Since we create many of these (one per partition), the time spent on computing + // the call site may add up. + Utils.withDummyCallSite(dataRows.sparkContext) { + new MapPartitionsRDD(dataRows, mapPartitionsFunc, preservesPartitioning = false) + } + + } else { + dataRows + } + } + + protected def prunePartitions( + predicates: Seq[Expression], + partitionSpec: PartitionSpec): Seq[Partition] = { + val PartitionSpec(partitionColumns, partitions) = partitionSpec + val partitionColumnNames = partitionColumns.map(_.name).toSet + val partitionPruningPredicates = predicates.filter { + _.references.map(_.name).toSet.subsetOf(partitionColumnNames) + } + + if (partitionPruningPredicates.nonEmpty) { + val predicate = + partitionPruningPredicates + .reduceOption(expressions.And) + .getOrElse(Literal(true)) + + val boundPredicate = InterpretedPredicate.create(predicate.transform { + case a: AttributeReference => + val index = partitionColumns.indexWhere(a.name == _.name) + BoundReference(index, partitionColumns(index).dataType, nullable = true) + }) + + partitions.filter { case Partition(values, _) => boundPredicate(values) } + } else { + partitions + } + } + // Based on Public API. protected def pruneFilterProject( relation: LogicalRelation, @@ -120,7 +309,7 @@ private[sql] object DataSourceStrategy extends Strategy { output: Seq[Attribute], rdd: RDD[Row]): SparkPlan = { val converted = if (relation.needConversion) { - execution.RDDConversions.rowToRowRdd(rdd, relation.schema) + execution.RDDConversions.rowToRowRdd(rdd, output.map(_.dataType)) } else { rdd } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala new file mode 100644 index 0000000000000..c4c99de5a38dc --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala @@ -0,0 +1,320 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.sources + +import java.lang.{Double => JDouble, Float => JFloat, Long => JLong} +import java.math.{BigDecimal => JBigDecimal} + +import scala.collection.mutable.ArrayBuffer +import scala.util.Try + +import org.apache.hadoop.fs.Path +import org.apache.hadoop.util.Shell + +import org.apache.spark.sql.Row +import org.apache.spark.sql.catalyst.expressions.{Cast, Literal} +import org.apache.spark.sql.types._ + +private[sql] case class Partition(values: Row, path: String) + +private[sql] case class PartitionSpec(partitionColumns: StructType, partitions: Seq[Partition]) + +private[sql] object PartitionSpec { + val emptySpec = PartitionSpec(StructType(Seq.empty[StructField]), Seq.empty[Partition]) +} + +private[sql] object PartitioningUtils { + // This duplicates default value of Hive `ConfVars.DEFAULTPARTITIONNAME`, since sql/core doesn't + // depend on Hive. + private[sql] val DEFAULT_PARTITION_NAME = "__HIVE_DEFAULT_PARTITION__" + + private[sql] case class PartitionValues(columnNames: Seq[String], literals: Seq[Literal]) { + require(columnNames.size == literals.size) + } + + /** + * Given a group of qualified paths, tries to parse them and returns a partition specification. + * For example, given: + * {{{ + * hdfs://:/path/to/partition/a=1/b=hello/c=3.14 + * hdfs://:/path/to/partition/a=2/b=world/c=6.28 + * }}} + * it returns: + * {{{ + * PartitionSpec( + * partitionColumns = StructType( + * StructField(name = "a", dataType = IntegerType, nullable = true), + * StructField(name = "b", dataType = StringType, nullable = true), + * StructField(name = "c", dataType = DoubleType, nullable = true)), + * partitions = Seq( + * Partition( + * values = Row(1, "hello", 3.14), + * path = "hdfs://:/path/to/partition/a=1/b=hello/c=3.14"), + * Partition( + * values = Row(2, "world", 6.28), + * path = "hdfs://:/path/to/partition/a=2/b=world/c=6.28"))) + * }}} + */ + private[sql] def parsePartitions( + paths: Seq[Path], + defaultPartitionName: String): PartitionSpec = { + // First, we need to parse every partition's path and see if we can find partition values. + val pathsWithPartitionValues = paths.flatMap { path => + parsePartition(path, defaultPartitionName).map(path -> _) + } + + if (pathsWithPartitionValues.isEmpty) { + // This dataset is not partitioned. + PartitionSpec.emptySpec + } else { + // This dataset is partitioned. We need to check whether all partitions have the same + // partition columns and resolve potential type conflicts. + val resolvedPartitionValues = resolvePartitions(pathsWithPartitionValues.map(_._2)) + + // Creates the StructType which represents the partition columns. + val fields = { + val PartitionValues(columnNames, literals) = resolvedPartitionValues.head + columnNames.zip(literals).map { case (name, Literal(_, dataType)) => + // We always assume partition columns are nullable since we've no idea whether null values + // will be appended in the future. + StructField(name, dataType, nullable = true) + } + } + + // Finally, we create `Partition`s based on paths and resolved partition values. + val partitions = resolvedPartitionValues.zip(pathsWithPartitionValues).map { + case (PartitionValues(_, literals), (path, _)) => + Partition(Row.fromSeq(literals.map(_.value)), path.toString) + } + + PartitionSpec(StructType(fields), partitions) + } + } + + /** + * Parses a single partition, returns column names and values of each partition column. For + * example, given: + * {{{ + * path = hdfs://:/path/to/partition/a=42/b=hello/c=3.14 + * }}} + * it returns: + * {{{ + * PartitionValues( + * Seq("a", "b", "c"), + * Seq( + * Literal.create(42, IntegerType), + * Literal.create("hello", StringType), + * Literal.create(3.14, FloatType))) + * }}} + */ + private[sql] def parsePartition( + path: Path, + defaultPartitionName: String): Option[PartitionValues] = { + val columns = ArrayBuffer.empty[(String, Literal)] + // Old Hadoop versions don't have `Path.isRoot` + var finished = path.getParent == null + var chopped = path + + while (!finished) { + // Sometimes (e.g., when speculative task is enabled), temporary directories may be left + // uncleaned. Here we simply ignore them. + if (chopped.getName.toLowerCase == "_temporary") { + return None + } + + val maybeColumn = parsePartitionColumn(chopped.getName, defaultPartitionName) + maybeColumn.foreach(columns += _) + chopped = chopped.getParent + finished = maybeColumn.isEmpty || chopped.getParent == null + } + + if (columns.isEmpty) { + None + } else { + val (columnNames, values) = columns.reverse.unzip + Some(PartitionValues(columnNames, values)) + } + } + + private def parsePartitionColumn( + columnSpec: String, + defaultPartitionName: String): Option[(String, Literal)] = { + val equalSignIndex = columnSpec.indexOf('=') + if (equalSignIndex == -1) { + None + } else { + val columnName = columnSpec.take(equalSignIndex) + assert(columnName.nonEmpty, s"Empty partition column name in '$columnSpec'") + + val rawColumnValue = columnSpec.drop(equalSignIndex + 1) + assert(rawColumnValue.nonEmpty, s"Empty partition column value in '$columnSpec'") + + val literal = inferPartitionColumnValue(rawColumnValue, defaultPartitionName) + Some(columnName -> literal) + } + } + + /** + * Resolves possible type conflicts between partitions by up-casting "lower" types. The up- + * casting order is: + * {{{ + * NullType -> + * IntegerType -> LongType -> + * FloatType -> DoubleType -> DecimalType.Unlimited -> + * StringType + * }}} + */ + private[sql] def resolvePartitions(values: Seq[PartitionValues]): Seq[PartitionValues] = { + // Column names of all partitions must match + val distinctPartitionsColNames = values.map(_.columnNames).distinct + + if (distinctPartitionsColNames.isEmpty) { + Seq.empty + } else { + assert(distinctPartitionsColNames.size == 1, { + val list = distinctPartitionsColNames.mkString("\t", "\n\t", "") + s"Conflicting partition column names detected:\n$list" + }) + + // Resolves possible type conflicts for each column + val columnCount = values.head.columnNames.size + val resolvedValues = (0 until columnCount).map { i => + resolveTypeConflicts(values.map(_.literals(i))) + } + + // Fills resolved literals back to each partition + values.zipWithIndex.map { case (d, index) => + d.copy(literals = resolvedValues.map(_(index))) + } + } + } + + /** + * Converts a string to a `Literal` with automatic type inference. Currently only supports + * [[IntegerType]], [[LongType]], [[FloatType]], [[DoubleType]], [[DecimalType.Unlimited]], and + * [[StringType]]. + */ + private[sql] def inferPartitionColumnValue( + raw: String, + defaultPartitionName: String): Literal = { + // First tries integral types + Try(Literal.create(Integer.parseInt(raw), IntegerType)) + .orElse(Try(Literal.create(JLong.parseLong(raw), LongType))) + // Then falls back to fractional types + .orElse(Try(Literal.create(JFloat.parseFloat(raw), FloatType))) + .orElse(Try(Literal.create(JDouble.parseDouble(raw), DoubleType))) + .orElse(Try(Literal.create(new JBigDecimal(raw), DecimalType.Unlimited))) + // Then falls back to string + .getOrElse { + if (raw == defaultPartitionName) Literal.create(null, NullType) + else Literal.create(unescapePathName(raw), StringType) + } + } + + private val upCastingOrder: Seq[DataType] = + Seq(NullType, IntegerType, LongType, FloatType, DoubleType, DecimalType.Unlimited, StringType) + + /** + * Given a collection of [[Literal]]s, resolves possible type conflicts by up-casting "lower" + * types. + */ + private def resolveTypeConflicts(literals: Seq[Literal]): Seq[Literal] = { + val desiredType = { + val topType = literals.map(_.dataType).maxBy(upCastingOrder.indexOf(_)) + // Falls back to string if all values of this column are null or empty string + if (topType == NullType) StringType else topType + } + + literals.map { case l @ Literal(_, dataType) => + Literal.create(Cast(l, desiredType).eval(), desiredType) + } + } + + ////////////////////////////////////////////////////////////////////////////////////////////////// + // The following string escaping code is mainly copied from Hive (o.a.h.h.common.FileUtils). + ////////////////////////////////////////////////////////////////////////////////////////////////// + + val charToEscape = { + val bitSet = new java.util.BitSet(128) + + /** + * ASCII 01-1F are HTTP control characters that need to be escaped. + * \u000A and \u000D are \n and \r, respectively. + */ + val clist = Array( + '\u0001', '\u0002', '\u0003', '\u0004', '\u0005', '\u0006', '\u0007', '\u0008', '\u0009', + '\n', '\u000B', '\u000C', '\r', '\u000E', '\u000F', '\u0010', '\u0011', '\u0012', '\u0013', + '\u0014', '\u0015', '\u0016', '\u0017', '\u0018', '\u0019', '\u001A', '\u001B', '\u001C', + '\u001D', '\u001E', '\u001F', '"', '#', '%', '\'', '*', '/', ':', '=', '?', '\\', '\u007F', + '{', '[', ']', '^') + + clist.foreach(bitSet.set(_)) + + if (Shell.WINDOWS) { + Array(' ', '<', '>', '|').foreach(bitSet.set(_)) + } + + bitSet + } + + def needsEscaping(c: Char): Boolean = { + c >= 0 && c < charToEscape.size() && charToEscape.get(c) + } + + def escapePathName(path: String): String = { + val builder = new StringBuilder() + path.foreach { c => + if (needsEscaping(c)) { + builder.append('%') + builder.append(f"${c.asInstanceOf[Int]}%02x") + } else { + builder.append(c) + } + } + + builder.toString() + } + + def unescapePathName(path: String): String = { + val sb = new StringBuilder + var i = 0 + + while (i < path.length) { + val c = path.charAt(i) + if (c == '%' && i + 2 < path.length) { + val code: Int = try { + Integer.valueOf(path.substring(i + 1, i + 3), 16) + } catch { case e: Exception => + -1: Integer + } + if (code >= 0) { + sb.append(code.asInstanceOf[Char]) + i += 3 + } else { + sb.append(c) + i += 1 + } + } else { + sb.append(c) + i += 1 + } + } + + sb.toString() + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/SqlNewHadoopRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/SqlNewHadoopRDD.scala new file mode 100644 index 0000000000000..ebad0c1564ec0 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/SqlNewHadoopRDD.scala @@ -0,0 +1,264 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.sources + +import java.text.SimpleDateFormat +import java.util.Date + +import org.apache.hadoop.conf.{Configurable, Configuration} +import org.apache.hadoop.io.Writable +import org.apache.hadoop.mapreduce._ +import org.apache.hadoop.mapreduce.lib.input.{CombineFileSplit, FileSplit} +import org.apache.spark.broadcast.Broadcast + +import org.apache.spark.{Partition => SparkPartition, _} +import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.executor.DataReadMethod +import org.apache.spark.mapreduce.SparkHadoopMapReduceUtil +import org.apache.spark.rdd.{RDD, HadoopRDD} +import org.apache.spark.rdd.NewHadoopRDD.NewHadoopMapPartitionsWithSplitRDD +import org.apache.spark.storage.StorageLevel +import org.apache.spark.util.Utils + +import scala.reflect.ClassTag + +private[spark] class SqlNewHadoopPartition( + rddId: Int, + val index: Int, + @transient rawSplit: InputSplit with Writable) + extends SparkPartition { + + val serializableHadoopSplit = new SerializableWritable(rawSplit) + + override def hashCode(): Int = 41 * (41 + rddId) + index +} + +/** + * An RDD that provides core functionality for reading data stored in Hadoop (e.g., files in HDFS, + * sources in HBase, or S3), using the new MapReduce API (`org.apache.hadoop.mapreduce`). + * It is based on [[org.apache.spark.rdd.NewHadoopRDD]]. It has three additions. + * 1. A shared broadcast Hadoop Configuration. + * 2. An optional closure `initDriverSideJobFuncOpt` that set configurations at the driver side + * to the shared Hadoop Configuration. + * 3. An optional closure `initLocalJobFuncOpt` that set configurations at both the driver side + * and the executor side to the shared Hadoop Configuration. + * + * Note: This is RDD is basically a cloned version of [[org.apache.spark.rdd.NewHadoopRDD]] with + * changes based on [[org.apache.spark.rdd.HadoopRDD]]. In future, this functionality will be + * folded into core. + */ +private[sql] class SqlNewHadoopRDD[K, V]( + @transient sc : SparkContext, + broadcastedConf: Broadcast[SerializableWritable[Configuration]], + @transient initDriverSideJobFuncOpt: Option[Job => Unit], + initLocalJobFuncOpt: Option[Job => Unit], + inputFormatClass: Class[_ <: InputFormat[K, V]], + keyClass: Class[K], + valueClass: Class[V]) + extends RDD[(K, V)](sc, Nil) + with SparkHadoopMapReduceUtil + with Logging { + + protected def getJob(): Job = { + val conf: Configuration = broadcastedConf.value.value + // "new Job" will make a copy of the conf. Then, it is + // safe to mutate conf properties with initLocalJobFuncOpt + // and initDriverSideJobFuncOpt. + val newJob = new Job(conf) + initLocalJobFuncOpt.map(f => f(newJob)) + newJob + } + + def getConf(isDriverSide: Boolean): Configuration = { + val job = getJob() + if (isDriverSide) { + initDriverSideJobFuncOpt.map(f => f(job)) + } + job.getConfiguration + } + + private val jobTrackerId: String = { + val formatter = new SimpleDateFormat("yyyyMMddHHmm") + formatter.format(new Date()) + } + + @transient protected val jobId = new JobID(jobTrackerId, id) + + override def getPartitions: Array[SparkPartition] = { + val conf = getConf(isDriverSide = true) + val inputFormat = inputFormatClass.newInstance + inputFormat match { + case configurable: Configurable => + configurable.setConf(conf) + case _ => + } + val jobContext = newJobContext(conf, jobId) + val rawSplits = inputFormat.getSplits(jobContext).toArray + val result = new Array[SparkPartition](rawSplits.size) + for (i <- 0 until rawSplits.size) { + result(i) = + new SqlNewHadoopPartition(id, i, rawSplits(i).asInstanceOf[InputSplit with Writable]) + } + result + } + + override def compute( + theSplit: SparkPartition, + context: TaskContext): InterruptibleIterator[(K, V)] = { + val iter = new Iterator[(K, V)] { + val split = theSplit.asInstanceOf[SqlNewHadoopPartition] + logInfo("Input split: " + split.serializableHadoopSplit) + val conf = getConf(isDriverSide = false) + + val inputMetrics = context.taskMetrics + .getInputMetricsForReadMethod(DataReadMethod.Hadoop) + + // Find a function that will return the FileSystem bytes read by this thread. Do this before + // creating RecordReader, because RecordReader's constructor might read some bytes + val bytesReadCallback = inputMetrics.bytesReadCallback.orElse { + split.serializableHadoopSplit.value match { + case _: FileSplit | _: CombineFileSplit => + SparkHadoopUtil.get.getFSBytesReadOnThreadCallback() + case _ => None + } + } + inputMetrics.setBytesReadCallback(bytesReadCallback) + + val attemptId = newTaskAttemptID(jobTrackerId, id, isMap = true, split.index, 0) + val hadoopAttemptContext = newTaskAttemptContext(conf, attemptId) + val format = inputFormatClass.newInstance + format match { + case configurable: Configurable => + configurable.setConf(conf) + case _ => + } + val reader = format.createRecordReader( + split.serializableHadoopSplit.value, hadoopAttemptContext) + reader.initialize(split.serializableHadoopSplit.value, hadoopAttemptContext) + + // Register an on-task-completion callback to close the input stream. + context.addTaskCompletionListener(context => close()) + var havePair = false + var finished = false + var recordsSinceMetricsUpdate = 0 + + override def hasNext: Boolean = { + if (!finished && !havePair) { + finished = !reader.nextKeyValue + havePair = !finished + } + !finished + } + + override def next(): (K, V) = { + if (!hasNext) { + throw new java.util.NoSuchElementException("End of stream") + } + havePair = false + if (!finished) { + inputMetrics.incRecordsRead(1) + } + (reader.getCurrentKey, reader.getCurrentValue) + } + + private def close() { + try { + reader.close() + if (bytesReadCallback.isDefined) { + inputMetrics.updateBytesRead() + } else if (split.serializableHadoopSplit.value.isInstanceOf[FileSplit] || + split.serializableHadoopSplit.value.isInstanceOf[CombineFileSplit]) { + // If we can't get the bytes read from the FS stats, fall back to the split size, + // which may be inaccurate. + try { + inputMetrics.incBytesRead(split.serializableHadoopSplit.value.getLength) + } catch { + case e: java.io.IOException => + logWarning("Unable to get input size to set InputMetrics for task", e) + } + } + } catch { + case e: Exception => { + if (!Utils.inShutdown()) { + logWarning("Exception in RecordReader.close()", e) + } + } + } + } + } + new InterruptibleIterator(context, iter) + } + + /** Maps over a partition, providing the InputSplit that was used as the base of the partition. */ + @DeveloperApi + def mapPartitionsWithInputSplit[U: ClassTag]( + f: (InputSplit, Iterator[(K, V)]) => Iterator[U], + preservesPartitioning: Boolean = false): RDD[U] = { + new NewHadoopMapPartitionsWithSplitRDD(this, f, preservesPartitioning) + } + + override def getPreferredLocations(hsplit: SparkPartition): Seq[String] = { + val split = hsplit.asInstanceOf[SqlNewHadoopPartition].serializableHadoopSplit.value + val locs = HadoopRDD.SPLIT_INFO_REFLECTIONS match { + case Some(c) => + try { + val infos = c.newGetLocationInfo.invoke(split).asInstanceOf[Array[AnyRef]] + Some(HadoopRDD.convertSplitLocationInfo(infos)) + } catch { + case e : Exception => + logDebug("Failed to use InputSplit#getLocationInfo.", e) + None + } + case None => None + } + locs.getOrElse(split.getLocations.filter(_ != "localhost")) + } + + override def persist(storageLevel: StorageLevel): this.type = { + if (storageLevel.deserialized) { + logWarning("Caching NewHadoopRDDs as deserialized objects usually leads to undesired" + + " behavior because Hadoop's RecordReader reuses the same Writable object for all records." + + " Use a map transformation to make copies of the records.") + } + super.persist(storageLevel) + } +} + +private[spark] object SqlNewHadoopRDD { + /** + * Analogous to [[org.apache.spark.rdd.MapPartitionsRDD]], but passes in an InputSplit to + * the given function rather than the index of the partition. + */ + private[spark] class NewHadoopMapPartitionsWithSplitRDD[U: ClassTag, T: ClassTag]( + prev: RDD[T], + f: (InputSplit, Iterator[T]) => Iterator[U], + preservesPartitioning: Boolean = false) + extends RDD[U](prev) { + + override val partitioner = if (preservesPartitioning) firstParent[T].partitioner else None + + override def getPartitions: Array[SparkPartition] = firstParent[T].partitions + + override def compute(split: SparkPartition, context: TaskContext): Iterator[U] = { + val partition = split.asInstanceOf[SqlNewHadoopPartition] + val inputSplit = partition.serializableHadoopSplit.value + f(inputSplit, firstParent[T].iterator(split, context)) + } + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala index dbdb0d39c26a1..71f016b1f14de 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/commands.scala @@ -14,12 +14,29 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package org.apache.spark.sql.sources -import org.apache.spark.sql.{DataFrame, SQLContext} +import java.util.Date + +import scala.collection.mutable + +import org.apache.hadoop.fs.Path +import org.apache.hadoop.mapreduce._ +import org.apache.hadoop.mapreduce.lib.output.{FileOutputCommitter => MapReduceFileOutputCommitter, FileOutputFormat} +import parquet.hadoop.util.ContextUtil + +import org.apache.spark._ +import org.apache.spark.mapred.SparkHadoopMapRedUtil +import org.apache.spark.mapreduce.SparkHadoopMapReduceUtil +import org.apache.spark.sql.catalyst.CatalystTypeConverters +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.expressions.codegen.GenerateProjection +import org.apache.spark.sql.catalyst.plans.logical.{Project, LogicalPlan} import org.apache.spark.sql.execution.RunnableCommand +import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.{DataFrame, SQLConf, SQLContext, SaveMode} private[sql] case class InsertIntoDataSource( logicalRelation: LogicalRelation, @@ -41,3 +58,405 @@ private[sql] case class InsertIntoDataSource( Seq.empty[Row] } } + +private[sql] case class InsertIntoHadoopFsRelation( + @transient relation: HadoopFsRelation, + @transient query: LogicalPlan, + mode: SaveMode) + extends RunnableCommand { + + override def run(sqlContext: SQLContext): Seq[Row] = { + require( + relation.paths.length == 1, + s"Cannot write to multiple destinations: ${relation.paths.mkString(",")}") + + val hadoopConf = sqlContext.sparkContext.hadoopConfiguration + val outputPath = new Path(relation.paths.head) + val fs = outputPath.getFileSystem(hadoopConf) + val qualifiedOutputPath = outputPath.makeQualified(fs.getUri, fs.getWorkingDirectory) + + val doInsertion = (mode, fs.exists(qualifiedOutputPath)) match { + case (SaveMode.ErrorIfExists, true) => + sys.error(s"path $qualifiedOutputPath already exists.") + case (SaveMode.Overwrite, true) => + fs.delete(qualifiedOutputPath, true) + true + case (SaveMode.Append, _) | (SaveMode.Overwrite, _) | (SaveMode.ErrorIfExists, false) => + true + case (SaveMode.Ignore, exists) => + !exists + } + + if (doInsertion) { + val job = new Job(hadoopConf) + job.setOutputKeyClass(classOf[Void]) + job.setOutputValueClass(classOf[Row]) + FileOutputFormat.setOutputPath(job, qualifiedOutputPath) + + // We create a DataFrame by applying the schema of relation to the data to make sure. + // We are writing data based on the expected schema, + val df = { + // For partitioned relation r, r.schema's column ordering can be different from the column + // ordering of data.logicalPlan (partition columns are all moved after data column). We + // need a Project to adjust the ordering, so that inside InsertIntoHadoopFsRelation, we can + // safely apply the schema of r.schema to the data. + val project = Project( + relation.schema.map(field => new UnresolvedAttribute(Seq(field.name))), query) + + sqlContext.createDataFrame( + DataFrame(sqlContext, project).queryExecution.toRdd, + relation.schema, + needsConversion = false) + } + + val partitionColumns = relation.partitionColumns.fieldNames + if (partitionColumns.isEmpty) { + insert(new DefaultWriterContainer(relation, job), df) + } else { + val writerContainer = new DynamicPartitionWriterContainer( + relation, job, partitionColumns, PartitioningUtils.DEFAULT_PARTITION_NAME) + insertWithDynamicPartitions(sqlContext, writerContainer, df, partitionColumns) + } + } + + Seq.empty[Row] + } + + private def insert(writerContainer: BaseWriterContainer, df: DataFrame): Unit = { + // Uses local vals for serialization + val needsConversion = relation.needConversion + val dataSchema = relation.dataSchema + + try { + writerContainer.driverSideSetup() + df.sqlContext.sparkContext.runJob(df.queryExecution.executedPlan.execute(), writeRows _) + writerContainer.commitJob() + relation.refresh() + } catch { case cause: Throwable => + logError("Aborting job.", cause) + writerContainer.abortJob() + throw new SparkException("Job aborted.", cause) + } + + def writeRows(taskContext: TaskContext, iterator: Iterator[Row]): Unit = { + writerContainer.executorSideSetup(taskContext) + + try { + if (needsConversion) { + val converter = CatalystTypeConverters.createToScalaConverter(dataSchema) + while (iterator.hasNext) { + val row = converter(iterator.next()).asInstanceOf[Row] + writerContainer.outputWriterForRow(row).write(row) + } + } else { + while (iterator.hasNext) { + val row = iterator.next() + writerContainer.outputWriterForRow(row).write(row) + } + } + writerContainer.commitTask() + } catch { case cause: Throwable => + logError("Aborting task.", cause) + writerContainer.abortTask() + throw new SparkException("Task failed while writing rows.", cause) + } + } + } + + private def insertWithDynamicPartitions( + sqlContext: SQLContext, + writerContainer: BaseWriterContainer, + df: DataFrame, + partitionColumns: Array[String]): Unit = { + // Uses a local val for serialization + val needsConversion = relation.needConversion + val dataSchema = relation.dataSchema + + require( + df.schema == relation.schema, + s"""DataFrame must have the same schema as the relation to which is inserted. + |DataFrame schema: ${df.schema} + |Relation schema: ${relation.schema} + """.stripMargin) + + val partitionColumnsInSpec = relation.partitionColumns.fieldNames + require( + partitionColumnsInSpec.sameElements(partitionColumns), + s"""Partition columns mismatch. + |Expected: ${partitionColumnsInSpec.mkString(", ")} + |Actual: ${partitionColumns.mkString(", ")} + """.stripMargin) + + val output = df.queryExecution.executedPlan.output + val (partitionOutput, dataOutput) = output.partition(a => partitionColumns.contains(a.name)) + val codegenEnabled = df.sqlContext.conf.codegenEnabled + + try { + writerContainer.driverSideSetup() + df.sqlContext.sparkContext.runJob(df.queryExecution.executedPlan.execute(), writeRows _) + writerContainer.commitJob() + relation.refresh() + } catch { case cause: Throwable => + logError("Aborting job.", cause) + writerContainer.abortJob() + throw new SparkException("Job aborted.", cause) + } + + def writeRows(taskContext: TaskContext, iterator: Iterator[Row]): Unit = { + writerContainer.executorSideSetup(taskContext) + + val partitionProj = newProjection(codegenEnabled, partitionOutput, output) + val dataProj = newProjection(codegenEnabled, dataOutput, output) + + if (needsConversion) { + val converter = CatalystTypeConverters.createToScalaConverter(dataSchema) + while (iterator.hasNext) { + val row = iterator.next() + val partitionPart = partitionProj(row) + val dataPart = dataProj(row) + val convertedDataPart = converter(dataPart).asInstanceOf[Row] + writerContainer.outputWriterForRow(partitionPart).write(convertedDataPart) + } + } else { + val partitionSchema = StructType.fromAttributes(partitionOutput) + val converter = CatalystTypeConverters.createToScalaConverter(partitionSchema) + while (iterator.hasNext) { + val row = iterator.next() + val partitionPart = converter(partitionProj(row)).asInstanceOf[Row] + val dataPart = dataProj(row) + writerContainer.outputWriterForRow(partitionPart).write(dataPart) + } + } + + writerContainer.commitTask() + } + } + + // This is copied from SparkPlan, probably should move this to a more general place. + private def newProjection( + codegenEnabled: Boolean, + expressions: Seq[Expression], + inputSchema: Seq[Attribute]): Projection = { + log.debug( + s"Creating Projection: $expressions, inputSchema: $inputSchema, codegen:$codegenEnabled") + if (codegenEnabled) { + GenerateProjection.generate(expressions, inputSchema) + } else { + new InterpretedProjection(expressions, inputSchema) + } + } +} + +private[sql] abstract class BaseWriterContainer( + @transient val relation: HadoopFsRelation, + @transient job: Job) + extends SparkHadoopMapReduceUtil + with Logging + with Serializable { + + protected val serializableConf = new SerializableWritable(ContextUtil.getConfiguration(job)) + + // This is only used on driver side. + @transient private val jobContext: JobContext = job + + // The following fields are initialized and used on both driver and executor side. + @transient protected var outputCommitter: OutputCommitter = _ + @transient private var jobId: JobID = _ + @transient private var taskId: TaskID = _ + @transient private var taskAttemptId: TaskAttemptID = _ + @transient protected var taskAttemptContext: TaskAttemptContext = _ + + protected val outputPath: String = { + assert( + relation.paths.length == 1, + s"Cannot write to multiple destinations: ${relation.paths.mkString(",")}") + relation.paths.head + } + + protected val dataSchema = relation.dataSchema + + protected var outputWriterFactory: OutputWriterFactory = _ + + private var outputFormatClass: Class[_ <: OutputFormat[_, _]] = _ + + def driverSideSetup(): Unit = { + setupIDs(0, 0, 0) + setupConf() + taskAttemptContext = newTaskAttemptContext(serializableConf.value, taskAttemptId) + outputWriterFactory = relation.prepareJobForWrite(job) + outputFormatClass = job.getOutputFormatClass + outputCommitter = newOutputCommitter(taskAttemptContext) + outputCommitter.setupJob(jobContext) + } + + def executorSideSetup(taskContext: TaskContext): Unit = { + setupIDs(taskContext.stageId(), taskContext.partitionId(), taskContext.attemptNumber()) + setupConf() + taskAttemptContext = newTaskAttemptContext(serializableConf.value, taskAttemptId) + outputCommitter = newOutputCommitter(taskAttemptContext) + outputCommitter.setupTask(taskAttemptContext) + initWriters() + } + + protected def getWorkPath: String = { + outputCommitter match { + // FileOutputCommitter writes to a temporary location returned by `getWorkPath`. + case f: MapReduceFileOutputCommitter => f.getWorkPath.toString + case _ => outputPath + } + } + + private def newOutputCommitter(context: TaskAttemptContext): OutputCommitter = { + val committerClass = context.getConfiguration.getClass( + SQLConf.OUTPUT_COMMITTER_CLASS, null, classOf[OutputCommitter]) + + Option(committerClass).map { clazz => + // Every output format based on org.apache.hadoop.mapreduce.lib.output.OutputFormat + // has an associated output committer. To override this output committer, + // we will first try to use the output committer set in SQLConf.OUTPUT_COMMITTER_CLASS. + // If a data source needs to override the output committer, it needs to set the + // output committer in prepareForWrite method. + if (classOf[MapReduceFileOutputCommitter].isAssignableFrom(clazz)) { + // The specified output committer is a FileOutputCommitter. + // So, we will use the FileOutputCommitter-specified constructor. + val ctor = clazz.getDeclaredConstructor(classOf[Path], classOf[TaskAttemptContext]) + ctor.newInstance(new Path(outputPath), context) + } else { + // The specified output committer is just a OutputCommitter. + // So, we will use the no-argument constructor. + val ctor = clazz.getDeclaredConstructor() + ctor.newInstance() + } + }.getOrElse { + // If output committer class is not set, we will use the one associated with the + // file output format. + outputFormatClass.newInstance().getOutputCommitter(context) + } + } + + private def setupIDs(jobId: Int, splitId: Int, attemptId: Int): Unit = { + this.jobId = SparkHadoopWriter.createJobID(new Date, jobId) + this.taskId = new TaskID(this.jobId, true, splitId) + this.taskAttemptId = new TaskAttemptID(taskId, attemptId) + } + + private def setupConf(): Unit = { + serializableConf.value.set("mapred.job.id", jobId.toString) + serializableConf.value.set("mapred.tip.id", taskAttemptId.getTaskID.toString) + serializableConf.value.set("mapred.task.id", taskAttemptId.toString) + serializableConf.value.setBoolean("mapred.task.is.map", true) + serializableConf.value.setInt("mapred.task.partition", 0) + } + + // Called on executor side when writing rows + def outputWriterForRow(row: Row): OutputWriter + + protected def initWriters(): Unit + + def commitTask(): Unit = { + SparkHadoopMapRedUtil.commitTask( + outputCommitter, taskAttemptContext, jobId.getId, taskId.getId, taskAttemptId.getId) + } + + def abortTask(): Unit = { + outputCommitter.abortTask(taskAttemptContext) + logError(s"Task attempt $taskAttemptId aborted.") + } + + def commitJob(): Unit = { + outputCommitter.commitJob(jobContext) + logInfo(s"Job $jobId committed.") + } + + def abortJob(): Unit = { + outputCommitter.abortJob(jobContext, JobStatus.State.FAILED) + logError(s"Job $jobId aborted.") + } +} + +private[sql] class DefaultWriterContainer( + @transient relation: HadoopFsRelation, + @transient job: Job) + extends BaseWriterContainer(relation, job) { + + @transient private var writer: OutputWriter = _ + + override protected def initWriters(): Unit = { + taskAttemptContext.getConfiguration.set("spark.sql.sources.output.path", outputPath) + writer = outputWriterFactory.newInstance(getWorkPath, dataSchema, taskAttemptContext) + } + + override def outputWriterForRow(row: Row): OutputWriter = writer + + override def commitTask(): Unit = { + try { + writer.close() + super.commitTask() + } catch { + case cause: Throwable => + super.abortTask() + throw new RuntimeException("Failed to commit task", cause) + } + } + + override def abortTask(): Unit = { + try { + writer.close() + } finally { + super.abortTask() + } + } +} + +private[sql] class DynamicPartitionWriterContainer( + @transient relation: HadoopFsRelation, + @transient job: Job, + partitionColumns: Array[String], + defaultPartitionName: String) + extends BaseWriterContainer(relation, job) { + + // All output writers are created on executor side. + @transient protected var outputWriters: mutable.Map[String, OutputWriter] = _ + + override protected def initWriters(): Unit = { + outputWriters = mutable.Map.empty[String, OutputWriter] + } + + override def outputWriterForRow(row: Row): OutputWriter = { + val partitionPath = partitionColumns.zip(row.toSeq).map { case (col, rawValue) => + val string = if (rawValue == null) null else String.valueOf(rawValue) + val valueString = if (string == null || string.isEmpty) { + defaultPartitionName + } else { + PartitioningUtils.escapePathName(string) + } + s"/$col=$valueString" + }.mkString.stripPrefix(Path.SEPARATOR) + + outputWriters.getOrElseUpdate(partitionPath, { + val path = new Path(getWorkPath, partitionPath) + taskAttemptContext.getConfiguration.set( + "spark.sql.sources.output.path", + new Path(outputPath, partitionPath).toString) + outputWriterFactory.newInstance(path.toString, dataSchema, taskAttemptContext) + }) + } + + override def commitTask(): Unit = { + try { + outputWriters.values.foreach(_.close()) + super.commitTask() + } catch { case cause: Throwable => + super.abortTask() + throw new RuntimeException("Failed to commit task", cause) + } + } + + override def abortTask(): Unit = { + try { + outputWriters.values.foreach(_.close()) + } finally { + super.abortTask() + } + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala index 1abf3aa51cb25..20afd60cb7767 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/ddl.scala @@ -17,18 +17,20 @@ package org.apache.spark.sql.sources -import scala.language.existentials +import scala.language.{existentials, implicitConversions} import scala.util.matching.Regex -import scala.language.implicitConversions + +import org.apache.hadoop.fs.Path import org.apache.spark.Logging -import org.apache.spark.sql.{AnalysisException, SaveMode, DataFrame, SQLContext} -import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.sql.catalyst.AbstractSparkSQLParser import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Row} +import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.execution.RunnableCommand import org.apache.spark.sql.types._ +import org.apache.spark.sql.{AnalysisException, DataFrame, SQLContext, SaveMode} import org.apache.spark.util.Utils /** @@ -111,6 +113,7 @@ private[sql] class DDLParser( CreateTableUsingAsSelect(tableName, provider, temp.isDefined, + Array.empty[String], mode, options, queryPlan) @@ -127,7 +130,7 @@ private[sql] class DDLParser( } } - protected lazy val tableCols: Parser[Seq[StructField]] = "(" ~> repsep(column, ",") <~ ")" + protected lazy val tableCols: Parser[Seq[StructField]] = "(" ~> repsep(column, ",") <~ ")" /* * describe [extended] table avroTable @@ -135,7 +138,7 @@ private[sql] class DDLParser( */ protected lazy val describeTable: Parser[LogicalPlan] = (DESCRIBE ~> opt(EXTENDED)) ~ (ident <~ ".").? ~ ident ^^ { - case e ~ db ~ tbl => + case e ~ db ~ tbl => val tblIdentifier = db match { case Some(dbName) => Seq(dbName, tbl) @@ -157,7 +160,7 @@ private[sql] class DDLParser( protected lazy val className: Parser[String] = repsep(ident, ".") ^^ { case s => s.mkString(".")} override implicit def regexToParser(regex: Regex): Parser[String] = acceptMatch( - s"identifier matching regex ${regex}", { + s"identifier matching regex $regex", { case lexical.Identifier(str) if regex.unapplySeq(str).isDefined => str case lexical.Keyword(str) if regex.unapplySeq(str).isDefined => str } @@ -168,7 +171,7 @@ private[sql] class DDLParser( } protected lazy val pair: Parser[(String, String)] = - optionName ~ stringLit ^^ { case k ~ v => (k,v) } + optionName ~ stringLit ^^ { case k ~ v => (k, v) } protected lazy val column: Parser[StructField] = ident ~ dataType ~ (COMMENT ~> stringLit).? ^^ { case columnName ~ typ ~ cm => @@ -185,18 +188,20 @@ private[sql] class DDLParser( private[sql] object ResolvedDataSource { private val builtinSources = Map( - "jdbc" -> classOf[org.apache.spark.sql.jdbc.DefaultSource], - "json" -> classOf[org.apache.spark.sql.json.DefaultSource], - "parquet" -> classOf[org.apache.spark.sql.parquet.DefaultSource] + "jdbc" -> "org.apache.spark.sql.jdbc.DefaultSource", + "json" -> "org.apache.spark.sql.json.DefaultSource", + "parquet" -> "org.apache.spark.sql.parquet.DefaultSource", + "orc" -> "org.apache.spark.sql.hive.orc.DefaultSource" ) /** Given a provider name, look up the data source class definition. */ def lookupDataSource(provider: String): Class[_] = { + val loader = Utils.getContextOrSparkClassLoader + if (builtinSources.contains(provider)) { - return builtinSources(provider) + return loader.loadClass(builtinSources(provider)) } - val loader = Utils.getContextOrSparkClassLoader try { loader.loadClass(provider) } catch { @@ -205,7 +210,11 @@ private[sql] object ResolvedDataSource { loader.loadClass(provider + ".DefaultSource") } catch { case cnf: java.lang.ClassNotFoundException => - sys.error(s"Failed to load class for data source: $provider") + if (provider.startsWith("org.apache.spark.sql.hive.orc")) { + sys.error("The ORC data source must be used with Hive support enabled.") + } else { + sys.error(s"Failed to load class for data source: $provider") + } } } } @@ -214,6 +223,7 @@ private[sql] object ResolvedDataSource { def apply( sqlContext: SQLContext, userSpecifiedSchema: Option[StructType], + partitionColumns: Array[String], provider: String, options: Map[String, String]): ResolvedDataSource = { val clazz: Class[_] = lookupDataSource(provider) @@ -222,6 +232,28 @@ private[sql] object ResolvedDataSource { case Some(schema: StructType) => clazz.newInstance() match { case dataSource: SchemaRelationProvider => dataSource.createRelation(sqlContext, new CaseInsensitiveMap(options), schema) + case dataSource: HadoopFsRelationProvider => + val maybePartitionsSchema = if (partitionColumns.isEmpty) { + None + } else { + Some(partitionColumnsSchema(schema, partitionColumns)) + } + + val caseInsensitiveOptions = new CaseInsensitiveMap(options) + val paths = { + val patternPath = new Path(caseInsensitiveOptions("path")) + SparkHadoopUtil.get.globPath(patternPath).map(_.toString).toArray + } + + val dataSchema = + StructType(schema.filterNot(f => partitionColumns.contains(f.name))).asNullable + + dataSource.createRelation( + sqlContext, + paths, + Some(dataSchema), + maybePartitionsSchema, + caseInsensitiveOptions) case dataSource: org.apache.spark.sql.sources.RelationProvider => throw new AnalysisException(s"$className does not allow user-specified schemas.") case _ => @@ -231,20 +263,39 @@ private[sql] object ResolvedDataSource { case None => clazz.newInstance() match { case dataSource: RelationProvider => dataSource.createRelation(sqlContext, new CaseInsensitiveMap(options)) + case dataSource: HadoopFsRelationProvider => + val caseInsensitiveOptions = new CaseInsensitiveMap(options) + val paths = { + val patternPath = new Path(caseInsensitiveOptions("path")) + SparkHadoopUtil.get.globPath(patternPath).map(_.toString).toArray + } + dataSource.createRelation(sqlContext, paths, None, None, caseInsensitiveOptions) case dataSource: org.apache.spark.sql.sources.SchemaRelationProvider => throw new AnalysisException( s"A schema needs to be specified when using $className.") case _ => - throw new AnalysisException(s"$className is not a RelationProvider.") + throw new AnalysisException( + s"$className is neither a RelationProvider nor a FSBasedRelationProvider.") } } new ResolvedDataSource(clazz, relation) } + private def partitionColumnsSchema( + schema: StructType, + partitionColumns: Array[String]): StructType = { + StructType(partitionColumns.map { col => + schema.find(_.name == col).getOrElse { + throw new RuntimeException(s"Partition column $col not found in schema $schema") + } + }).asNullable + } + /** Create a [[ResolvedDataSource]] for saving the content of the given [[DataFrame]]. */ def apply( sqlContext: SQLContext, provider: String, + partitionColumns: Array[String], mode: SaveMode, options: Map[String, String], data: DataFrame): ResolvedDataSource = { @@ -252,6 +303,34 @@ private[sql] object ResolvedDataSource { val relation = clazz.newInstance() match { case dataSource: CreatableRelationProvider => dataSource.createRelation(sqlContext, mode, options, data) + case dataSource: HadoopFsRelationProvider => + // Don't glob path for the write path. The contracts here are: + // 1. Only one output path can be specified on the write path; + // 2. Output path must be a legal HDFS style file system path; + // 3. It's OK that the output path doesn't exist yet; + val caseInsensitiveOptions = new CaseInsensitiveMap(options) + val outputPath = { + val path = new Path(caseInsensitiveOptions("path")) + val fs = path.getFileSystem(sqlContext.sparkContext.hadoopConfiguration) + path.makeQualified(fs.getUri, fs.getWorkingDirectory) + } + val dataSchema = StructType(data.schema.filterNot(f => partitionColumns.contains(f.name))) + val r = dataSource.createRelation( + sqlContext, + Array(outputPath.toString), + Some(dataSchema.asNullable), + Some(partitionColumnsSchema(data.schema, partitionColumns)), + caseInsensitiveOptions) + + // For partitioned relation r, r.schema's column ordering can be different from the column + // ordering of data.logicalPlan (partition columns are all moved after data column). This + // will be adjusted within InsertIntoHadoopFsRelation. + sqlContext.executePlan( + InsertIntoHadoopFsRelation( + r, + data.logicalPlan, + mode)).toRdd + r case _ => sys.error(s"${clazz.getCanonicalName} does not allow create table as select.") } @@ -269,8 +348,10 @@ private[sql] case class ResolvedDataSource(provider: Class[_], relation: BaseRel */ private[sql] case class DescribeCommand( table: LogicalPlan, - isExtended: Boolean) extends Command { - override val output = Seq( + isExtended: Boolean) extends LogicalPlan with Command { + + override def children: Seq[LogicalPlan] = Seq.empty + override val output: Seq[Attribute] = Seq( // Column names are based on Hive. AttributeReference("col_name", StringType, nullable = false, new MetadataBuilder().putString("comment", "name of the column").build())(), @@ -292,7 +373,11 @@ private[sql] case class CreateTableUsing( temporary: Boolean, options: Map[String, String], allowExisting: Boolean, - managedIfNoPath: Boolean) extends Command + managedIfNoPath: Boolean) extends LogicalPlan with Command { + + override def output: Seq[Attribute] = Seq.empty + override def children: Seq[LogicalPlan] = Seq.empty +} /** * A node used to support CTAS statements and saveAsTable for the data source API. @@ -304,6 +389,7 @@ private[sql] case class CreateTableUsingAsSelect( tableName: String, provider: String, temporary: Boolean, + partitionColumns: Array[String], mode: SaveMode, options: Map[String, String], child: LogicalPlan) extends UnaryNode { @@ -319,7 +405,8 @@ private[sql] case class CreateTempTableUsing( options: Map[String, String]) extends RunnableCommand { def run(sqlContext: SQLContext): Seq[Row] = { - val resolved = ResolvedDataSource(sqlContext, userSpecifiedSchema, provider, options) + val resolved = ResolvedDataSource( + sqlContext, userSpecifiedSchema, Array.empty[String], provider, options) sqlContext.registerDataFrameAsTable( DataFrame(sqlContext, LogicalRelation(resolved.relation)), tableName) Seq.empty @@ -329,13 +416,14 @@ private[sql] case class CreateTempTableUsing( private[sql] case class CreateTempTableUsingAsSelect( tableName: String, provider: String, + partitionColumns: Array[String], mode: SaveMode, options: Map[String, String], query: LogicalPlan) extends RunnableCommand { - def run(sqlContext: SQLContext): Seq[Row] = { + override def run(sqlContext: SQLContext): Seq[Row] = { val df = DataFrame(sqlContext, query) - val resolved = ResolvedDataSource(sqlContext, provider, mode, options, df) + val resolved = ResolvedDataSource(sqlContext, provider, partitionColumns, mode, options, df) sqlContext.registerDataFrameAsTable( DataFrame(sqlContext, LogicalRelation(resolved.relation)), tableName) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala index 791046e0079d6..24e86ca415c51 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/filters.scala @@ -19,83 +19,113 @@ package org.apache.spark.sql.sources /** * A filter predicate for data sources. + * + * @since 1.3.0 */ abstract class Filter /** * A filter that evaluates to `true` iff the attribute evaluates to a value * equal to `value`. + * + * @since 1.3.0 */ case class EqualTo(attribute: String, value: Any) extends Filter /** * A filter that evaluates to `true` iff the attribute evaluates to a value * greater than `value`. + * + * @since 1.3.0 */ case class GreaterThan(attribute: String, value: Any) extends Filter /** * A filter that evaluates to `true` iff the attribute evaluates to a value * greater than or equal to `value`. + * + * @since 1.3.0 */ case class GreaterThanOrEqual(attribute: String, value: Any) extends Filter /** * A filter that evaluates to `true` iff the attribute evaluates to a value * less than `value`. + * + * @since 1.3.0 */ case class LessThan(attribute: String, value: Any) extends Filter /** * A filter that evaluates to `true` iff the attribute evaluates to a value * less than or equal to `value`. + * + * @since 1.3.0 */ case class LessThanOrEqual(attribute: String, value: Any) extends Filter /** * A filter that evaluates to `true` iff the attribute evaluates to one of the values in the array. + * + * @since 1.3.0 */ case class In(attribute: String, values: Array[Any]) extends Filter /** * A filter that evaluates to `true` iff the attribute evaluates to null. + * + * @since 1.3.0 */ case class IsNull(attribute: String) extends Filter /** * A filter that evaluates to `true` iff the attribute evaluates to a non-null value. + * + * @since 1.3.0 */ case class IsNotNull(attribute: String) extends Filter /** * A filter that evaluates to `true` iff both `left` or `right` evaluate to `true`. + * + * @since 1.3.0 */ case class And(left: Filter, right: Filter) extends Filter /** * A filter that evaluates to `true` iff at least one of `left` or `right` evaluates to `true`. + * + * @since 1.3.0 */ case class Or(left: Filter, right: Filter) extends Filter /** * A filter that evaluates to `true` iff `child` is evaluated to `false`. + * + * @since 1.3.0 */ case class Not(child: Filter) extends Filter /** * A filter that evaluates to `true` iff the attribute evaluates to * a string that starts with `value`. + * + * @since 1.3.1 */ case class StringStartsWith(attribute: String, value: String) extends Filter /** * A filter that evaluates to `true` iff the attribute evaluates to * a string that starts with `value`. + * + * @since 1.3.1 */ case class StringEndsWith(attribute: String, value: String) extends Filter /** * A filter that evaluates to `true` iff the attribute evaluates to * a string that contains the string `value`. + * + * @since 1.3.1 */ case class StringContains(attribute: String, value: String) extends Filter diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala index ca53dcdb92c52..f5bd2d2941ca0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala @@ -17,10 +17,20 @@ package org.apache.spark.sql.sources -import org.apache.spark.annotation.{Experimental, DeveloperApi} +import scala.collection.mutable +import scala.util.Try + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileStatus, FileSystem, Path} +import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext} + +import org.apache.spark.annotation.{DeveloperApi, Experimental} +import org.apache.spark.broadcast.Broadcast import org.apache.spark.rdd.RDD -import org.apache.spark.sql.{SaveMode, DataFrame, Row, SQLContext} -import org.apache.spark.sql.catalyst.expressions.{Expression, Attribute} +import org.apache.spark.SerializableWritable +import org.apache.spark.sql.{Row, _} +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection import org.apache.spark.sql.types.StructType /** @@ -35,6 +45,8 @@ import org.apache.spark.sql.types.StructType * data source 'org.apache.spark.sql.json.DefaultSource' * * A new instance of this class with be instantiated each time a DDL call is made. + * + * @since 1.3.0 */ @DeveloperApi trait RelationProvider { @@ -64,6 +76,8 @@ trait RelationProvider { * users need to provide a schema when using a SchemaRelationProvider. * A relation provider can inherits both [[RelationProvider]] and [[SchemaRelationProvider]] * if it can support both schema inference and user-specified schemas. + * + * @since 1.3.0 */ @DeveloperApi trait SchemaRelationProvider { @@ -78,6 +92,49 @@ trait SchemaRelationProvider { schema: StructType): BaseRelation } +/** + * ::Experimental:: + * Implemented by objects that produce relations for a specific kind of data source + * with a given schema and partitioned columns. When Spark SQL is given a DDL operation with a + * USING clause specified (to specify the implemented [[HadoopFsRelationProvider]]), a user defined + * schema, and an optional list of partition columns, this interface is used to pass in the + * parameters specified by a user. + * + * Users may specify the fully qualified class name of a given data source. When that class is + * not found Spark SQL will append the class name `DefaultSource` to the path, allowing for + * less verbose invocation. For example, 'org.apache.spark.sql.json' would resolve to the + * data source 'org.apache.spark.sql.json.DefaultSource' + * + * A new instance of this class with be instantiated each time a DDL call is made. + * + * The difference between a [[RelationProvider]] and a [[HadoopFsRelationProvider]] is + * that users need to provide a schema and a (possibly empty) list of partition columns when + * using a SchemaRelationProvider. A relation provider can inherits both [[RelationProvider]], + * and [[HadoopFsRelationProvider]] if it can support schema inference, user-specified + * schemas, and accessing partitioned relations. + * + * @since 1.4.0 + */ +@Experimental +trait HadoopFsRelationProvider { + /** + * Returns a new base relation with the given parameters, a user defined schema, and a list of + * partition columns. Note: the parameters' keywords are case insensitive and this insensitivity + * is enforced by the Map that is passed to the function. + * + * @param dataSchema Schema of data columns (i.e., columns that are not partition columns). + */ + def createRelation( + sqlContext: SQLContext, + paths: Array[String], + dataSchema: Option[StructType], + partitionColumns: Option[StructType], + parameters: Map[String, String]): HadoopFsRelation +} + +/** + * @since 1.3.0 + */ @DeveloperApi trait CreatableRelationProvider { /** @@ -91,6 +148,8 @@ trait CreatableRelationProvider { * existing data is expected to be overwritten by the contents of the DataFrame. * ErrorIfExists mode means that when saving a DataFrame to a data source, * if data already exists, an exception is expected to be thrown. + * + * @since 1.3.0 */ def createRelation( sqlContext: SQLContext, @@ -109,6 +168,8 @@ trait CreatableRelationProvider { * BaseRelations must also define a equality function that only returns true when the two * instances will return the same data. This equality function is used when determining when * it is safe to substitute cached results for a given relation. + * + * @since 1.3.0 */ @DeveloperApi abstract class BaseRelation { @@ -124,6 +185,8 @@ abstract class BaseRelation { * * Note that it is always better to overestimate size than underestimate, because underestimation * could lead to execution plans that are suboptimal (i.e. broadcasting a very large table). + * + * @since 1.3.0 */ def sizeInBytes: Long = sqlContext.conf.defaultSizeInBytes @@ -134,6 +197,8 @@ abstract class BaseRelation { * * Note: The internal representation is not stable across releases and thus data sources outside * of Spark SQL should leave this as true. + * + * @since 1.4.0 */ def needConversion: Boolean = true } @@ -141,6 +206,8 @@ abstract class BaseRelation { /** * ::DeveloperApi:: * A BaseRelation that can produce all of its tuples as an RDD of Row objects. + * + * @since 1.3.0 */ @DeveloperApi trait TableScan { @@ -151,6 +218,8 @@ trait TableScan { * ::DeveloperApi:: * A BaseRelation that can eliminate unneeded columns before producing an RDD * containing all of its tuples as Row objects. + * + * @since 1.3.0 */ @DeveloperApi trait PrunedScan { @@ -168,6 +237,8 @@ trait PrunedScan { * The pushed down filters are currently purely an optimization as they will all be evaluated * again. This means it is safe to use them with methods that produce false positives such * as filtering partitions based on a bloom filter. + * + * @since 1.3.0 */ @DeveloperApi trait PrunedFilteredScan { @@ -189,6 +260,8 @@ trait PrunedFilteredScan { * 3. It assumes that fields of the data provided in the insert method are nullable. * If a data source needs to check the actual nullability of a field, it needs to do it in the * insert method. + * + * @since 1.3.0 */ @DeveloperApi trait InsertableRelation { @@ -202,8 +275,380 @@ trait InsertableRelation { * [[org.apache.spark.sql.catalyst.plans.logical.LogicalPlan]]. Unlike the other APIs this * interface is NOT designed to be binary compatible across releases and thus should only be used * for experimentation. + * + * @since 1.3.0 */ @Experimental trait CatalystScan { def buildScan(requiredColumns: Seq[Attribute], filters: Seq[Expression]): RDD[Row] } + +/** + * ::Experimental:: + * A factory that produces [[OutputWriter]]s. A new [[OutputWriterFactory]] is created on driver + * side for each write job issued when writing to a [[HadoopFsRelation]], and then gets serialized + * to executor side to create actual [[OutputWriter]]s on the fly. + * + * @since 1.4.0 + */ +@Experimental +abstract class OutputWriterFactory extends Serializable { + /** + * When writing to a [[HadoopFsRelation]], this method gets called by each task on executor side + * to instantiate new [[OutputWriter]]s. + * + * @param path Path of the file to which this [[OutputWriter]] is supposed to write. Note that + * this may not point to the final output file. For example, `FileOutputFormat` writes to + * temporary directories and then merge written files back to the final destination. In + * this case, `path` points to a temporary output file under the temporary directory. + * @param dataSchema Schema of the rows to be written. Partition columns are not included in the + * schema if the relation being written is partitioned. + * @param context The Hadoop MapReduce task context. + * + * @since 1.4.0 + */ + def newInstance(path: String, dataSchema: StructType, context: TaskAttemptContext): OutputWriter +} + +/** + * ::Experimental:: + * [[OutputWriter]] is used together with [[HadoopFsRelation]] for persisting rows to the + * underlying file system. Subclasses of [[OutputWriter]] must provide a zero-argument constructor. + * An [[OutputWriter]] instance is created and initialized when a new output file is opened on + * executor side. This instance is used to persist rows to this single output file. + * + * @since 1.4.0 + */ +@Experimental +abstract class OutputWriter { + /** + * Persists a single row. Invoked on the executor side. When writing to dynamically partitioned + * tables, dynamic partition columns are not included in rows to be written. + * + * @since 1.4.0 + */ + def write(row: Row): Unit + + /** + * Closes the [[OutputWriter]]. Invoked on the executor side after all rows are persisted, before + * the task output is committed. + * + * @since 1.4.0 + */ + def close(): Unit +} + +/** + * ::Experimental:: + * A [[BaseRelation]] that provides much of the common code required for formats that store their + * data to an HDFS compatible filesystem. + * + * For the read path, similar to [[PrunedFilteredScan]], it can eliminate unneeded columns and + * filter using selected predicates before producing an RDD containing all matching tuples as + * [[Row]] objects. In addition, when reading from Hive style partitioned tables stored in file + * systems, it's able to discover partitioning information from the paths of input directories, and + * perform partition pruning before start reading the data. Subclasses of [[HadoopFsRelation()]] + * must override one of the three `buildScan` methods to implement the read path. + * + * For the write path, it provides the ability to write to both non-partitioned and partitioned + * tables. Directory layout of the partitioned tables is compatible with Hive. + * + * @constructor This constructor is for internal uses only. The [[PartitionSpec]] argument is for + * implementing metastore table conversion. + * + * @param maybePartitionSpec An [[HadoopFsRelation]] can be created with an optional + * [[PartitionSpec]], so that partition discovery can be skipped. + * + * @since 1.4.0 + */ +@Experimental +abstract class HadoopFsRelation private[sql](maybePartitionSpec: Option[PartitionSpec]) + extends BaseRelation { + + def this() = this(None) + + private val hadoopConf = new Configuration(sqlContext.sparkContext.hadoopConfiguration) + + private val codegenEnabled = sqlContext.conf.codegenEnabled + + private var _partitionSpec: PartitionSpec = _ + + private class FileStatusCache { + var leafFiles = mutable.Map.empty[Path, FileStatus] + + var leafDirToChildrenFiles = mutable.Map.empty[Path, Array[FileStatus]] + + def refresh(): Unit = { + // We don't filter files/directories whose name start with "_" except "_temporary" here, as + // specific data sources may take advantages over them (e.g. Parquet _metadata and + // _common_metadata files). "_temporary" directories are explicitly ignored since failed + // tasks/jobs may leave partial/corrupted data files there. + def listLeafFilesAndDirs(fs: FileSystem, status: FileStatus): Set[FileStatus] = { + if (status.getPath.getName.toLowerCase == "_temporary") { + Set.empty + } else { + val (dirs, files) = fs.listStatus(status.getPath).partition(_.isDir) + val leafDirs = if (dirs.isEmpty) Set(status) else Set.empty[FileStatus] + files.toSet ++ leafDirs ++ dirs.flatMap(dir => listLeafFilesAndDirs(fs, dir)) + } + } + + leafFiles.clear() + + val statuses = paths.flatMap { path => + val hdfsPath = new Path(path) + val fs = hdfsPath.getFileSystem(hadoopConf) + val qualified = hdfsPath.makeQualified(fs.getUri, fs.getWorkingDirectory) + Try(fs.getFileStatus(qualified)).toOption.toArray.flatMap(listLeafFilesAndDirs(fs, _)) + }.filterNot { status => + // SPARK-8037: Ignores files like ".DS_Store" and other hidden files/directories + status.getPath.getName.startsWith(".") + } + + val files = statuses.filterNot(_.isDir) + leafFiles ++= files.map(f => f.getPath -> f).toMap + leafDirToChildrenFiles ++= files.groupBy(_.getPath.getParent) + } + } + + private lazy val fileStatusCache = { + val cache = new FileStatusCache + cache.refresh() + cache + } + + protected def cachedLeafStatuses(): Set[FileStatus] = { + fileStatusCache.leafFiles.values.toSet + } + + final private[sql] def partitionSpec: PartitionSpec = { + if (_partitionSpec == null) { + _partitionSpec = maybePartitionSpec + .flatMap { + case spec if spec.partitions.nonEmpty => + Some(spec.copy(partitionColumns = spec.partitionColumns.asNullable)) + case _ => + None + } + .orElse { + // We only know the partition columns and their data types. We need to discover + // partition values. + userDefinedPartitionColumns.map { partitionSchema => + val spec = discoverPartitions() + val castedPartitions = spec.partitions.map { case p @ Partition(values, path) => + val literals = values.toSeq.zip(spec.partitionColumns.map(_.dataType)).map { + case (value, dataType) => Literal.create(value, dataType) + } + val castedValues = partitionSchema.zip(literals).map { case (field, literal) => + Cast(literal, field.dataType).eval() + } + p.copy(values = Row.fromSeq(castedValues)) + } + PartitionSpec(partitionSchema, castedPartitions) + } + } + .getOrElse { + if (sqlContext.conf.partitionDiscoveryEnabled()) { + discoverPartitions() + } else { + PartitionSpec(StructType(Nil), Array.empty[Partition]) + } + } + } + _partitionSpec + } + + /** + * Base paths of this relation. For partitioned relations, it should be either root directories + * of all partition directories. + * + * @since 1.4.0 + */ + def paths: Array[String] + + /** + * Partition columns. Can be either defined by [[userDefinedPartitionColumns]] or automatically + * discovered. Note that they should always be nullable. + * + * @since 1.4.0 + */ + final def partitionColumns: StructType = + userDefinedPartitionColumns.getOrElse(partitionSpec.partitionColumns) + + /** + * Optional user defined partition columns. + * + * @since 1.4.0 + */ + def userDefinedPartitionColumns: Option[StructType] = None + + private[sql] def refresh(): Unit = { + fileStatusCache.refresh() + if (sqlContext.conf.partitionDiscoveryEnabled()) { + _partitionSpec = discoverPartitions() + } + } + + private def discoverPartitions(): PartitionSpec = { + // We use leaf dirs containing data files to discover the schema. + val leafDirs = fileStatusCache.leafDirToChildrenFiles.keys.toSeq + PartitioningUtils.parsePartitions(leafDirs, PartitioningUtils.DEFAULT_PARTITION_NAME) + } + + /** + * Schema of this relation. It consists of columns appearing in [[dataSchema]] and all partition + * columns not appearing in [[dataSchema]]. + * + * @since 1.4.0 + */ + override lazy val schema: StructType = { + val dataSchemaColumnNames = dataSchema.map(_.name.toLowerCase).toSet + StructType(dataSchema ++ partitionColumns.filterNot { column => + dataSchemaColumnNames.contains(column.name.toLowerCase) + }) + } + + private[sources] final def buildScan( + requiredColumns: Array[String], + filters: Array[Filter], + inputPaths: Array[String], + broadcastedConf: Broadcast[SerializableWritable[Configuration]]): RDD[Row] = { + val inputStatuses = inputPaths.flatMap { input => + val path = new Path(input) + + // First assumes `input` is a directory path, and tries to get all files contained in it. + fileStatusCache.leafDirToChildrenFiles.getOrElse( + path, + // Otherwise, `input` might be a file path + fileStatusCache.leafFiles.get(path).toArray + ).filter { status => + val name = status.getPath.getName + !name.startsWith("_") && !name.startsWith(".") + } + } + + buildScan(requiredColumns, filters, inputStatuses, broadcastedConf) + } + + /** + * Specifies schema of actual data files. For partitioned relations, if one or more partitioned + * columns are contained in the data files, they should also appear in `dataSchema`. + * + * @since 1.4.0 + */ + def dataSchema: StructType + + /** + * For a non-partitioned relation, this method builds an `RDD[Row]` containing all rows within + * this relation. For partitioned relations, this method is called for each selected partition, + * and builds an `RDD[Row]` containing all rows within that single partition. + * + * @param inputFiles For a non-partitioned relation, it contains paths of all data files in the + * relation. For a partitioned relation, it contains paths of all data files in a single + * selected partition. + * + * @since 1.4.0 + */ + def buildScan(inputFiles: Array[FileStatus]): RDD[Row] = { + throw new UnsupportedOperationException( + "At least one buildScan() method should be overridden to read the relation.") + } + + /** + * For a non-partitioned relation, this method builds an `RDD[Row]` containing all rows within + * this relation. For partitioned relations, this method is called for each selected partition, + * and builds an `RDD[Row]` containing all rows within that single partition. + * + * @param requiredColumns Required columns. + * @param inputFiles For a non-partitioned relation, it contains paths of all data files in the + * relation. For a partitioned relation, it contains paths of all data files in a single + * selected partition. + * + * @since 1.4.0 + */ + def buildScan(requiredColumns: Array[String], inputFiles: Array[FileStatus]): RDD[Row] = { + // Yeah, to workaround serialization... + val dataSchema = this.dataSchema + val codegenEnabled = this.codegenEnabled + + val requiredOutput = requiredColumns.map { col => + val field = dataSchema(col) + BoundReference(dataSchema.fieldIndex(col), field.dataType, field.nullable) + }.toSeq + + buildScan(inputFiles).mapPartitions { rows => + val buildProjection = if (codegenEnabled) { + GenerateMutableProjection.generate(requiredOutput, dataSchema.toAttributes) + } else { + () => new InterpretedMutableProjection(requiredOutput, dataSchema.toAttributes) + } + + val mutableProjection = buildProjection() + rows.map(mutableProjection) + } + } + + /** + * For a non-partitioned relation, this method builds an `RDD[Row]` containing all rows within + * this relation. For partitioned relations, this method is called for each selected partition, + * and builds an `RDD[Row]` containing all rows within that single partition. + * + * @param requiredColumns Required columns. + * @param filters Candidate filters to be pushed down. The actual filter should be the conjunction + * of all `filters`. The pushed down filters are currently purely an optimization as they + * will all be evaluated again. This means it is safe to use them with methods that produce + * false positives such as filtering partitions based on a bloom filter. + * @param inputFiles For a non-partitioned relation, it contains paths of all data files in the + * relation. For a partitioned relation, it contains paths of all data files in a single + * selected partition. + * + * @since 1.4.0 + */ + def buildScan( + requiredColumns: Array[String], + filters: Array[Filter], + inputFiles: Array[FileStatus]): RDD[Row] = { + buildScan(requiredColumns, inputFiles) + } + + /** + * For a non-partitioned relation, this method builds an `RDD[Row]` containing all rows within + * this relation. For partitioned relations, this method is called for each selected partition, + * and builds an `RDD[Row]` containing all rows within that single partition. + * + * Note: This interface is subject to change in future. + * + * @param requiredColumns Required columns. + * @param filters Candidate filters to be pushed down. The actual filter should be the conjunction + * of all `filters`. The pushed down filters are currently purely an optimization as they + * will all be evaluated again. This means it is safe to use them with methods that produce + * false positives such as filtering partitions based on a bloom filter. + * @param inputFiles For a non-partitioned relation, it contains paths of all data files in the + * relation. For a partitioned relation, it contains paths of all data files in a single + * selected partition. + * @param broadcastedConf A shared broadcast Hadoop Configuration, which can be used to reduce the + * overhead of broadcasting the Configuration for every Hadoop RDD. + * + * @since 1.4.0 + */ + private[sql] def buildScan( + requiredColumns: Array[String], + filters: Array[Filter], + inputFiles: Array[FileStatus], + broadcastedConf: Broadcast[SerializableWritable[Configuration]]): RDD[Row] = { + buildScan(requiredColumns, filters, inputFiles) + } + + /** + * Prepares a write job and returns an [[OutputWriterFactory]]. Client side job preparation can + * be put here. For example, user defined output committer can be configured here + * by setting the output committer class in the conf of spark.sql.sources.outputCommitterClass. + * + * Note that the only side effect expected here is mutating `job` via its setters. Especially, + * Spark SQL caches [[BaseRelation]] instances for performance, mutating relation internal states + * may cause unexpected behaviors. + * + * @since 1.4.0 + */ + def prepareJobForWrite(job: Job): OutputWriterFactory +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/rules.scala index 6ed68d179edc9..a3fd7f13b3db7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/sources/rules.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/rules.scala @@ -35,9 +35,9 @@ private[sql] object PreInsertCastAndRename extends Rule[LogicalPlan] { // Wait until children are resolved. case p: LogicalPlan if !p.childrenResolved => p - // We are inserting into an InsertableRelation. + // We are inserting into an InsertableRelation or HadoopFsRelation. case i @ InsertIntoTable( - l @ LogicalRelation(r: InsertableRelation), partition, child, overwrite, ifNotExists) => { + l @ LogicalRelation(_: InsertableRelation | _: HadoopFsRelation), _, child, _, _) => { // First, make sure the data to be inserted have the same number of fields with the // schema of the relation. if (l.output.size != child.output.size) { @@ -101,13 +101,25 @@ private[sql] case class PreWriteCheck(catalog: Catalog) extends (LogicalPlan => } } - case i @ logical.InsertIntoTable( - l: LogicalRelation, partition, query, overwrite, ifNotExists) - if !l.isInstanceOf[InsertableRelation] => + case logical.InsertIntoTable(LogicalRelation(r: HadoopFsRelation), part, _, _, _) => + // We need to make sure the partition columns specified by users do match partition + // columns of the relation. + val existingPartitionColumns = r.partitionColumns.fieldNames.toSet + val specifiedPartitionColumns = part.keySet + if (existingPartitionColumns != specifiedPartitionColumns) { + failAnalysis(s"Specified partition columns " + + s"(${specifiedPartitionColumns.mkString(", ")}) " + + s"do not match the partition columns of the table. Please use " + + s"(${existingPartitionColumns.mkString(", ")}) as the partition columns.") + } else { + // OK + } + + case logical.InsertIntoTable(l: LogicalRelation, _, _, _, _) => // The relation in l is not an InsertableRelation. failAnalysis(s"$l does not allow insertion.") - case CreateTableUsingAsSelect(tableName, _, _, SaveMode.Overwrite, _, query) => + case CreateTableUsingAsSelect(tableName, _, _, _, SaveMode.Overwrite, _, query) => // When the SaveMode is Overwrite, we need to check if the table is an input table of // the query. If so, we will throw an AnalysisException to let users know it is not allowed. if (catalog.tableExists(Seq(tableName))) { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/test/README.md b/sql/core/src/main/scala/org/apache/spark/sql/test/README.md new file mode 100644 index 0000000000000..d867f181b9728 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/test/README.md @@ -0,0 +1,7 @@ +README +====== + +Please do not add any class in this place unless it is used by `sql/console` or Python tests. +If you need to create any classes or traits that will be used by tests from both `sql/core` and +`sql/hive`, you can add them in the `src/test` of `sql/core` (tests of `sql/hive` +depend on the test jar of `sql/core`). diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaApplySchemaSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaApplySchemaSuite.java index c344a9b095c52..fcb8f5499cf84 100644 --- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaApplySchemaSuite.java +++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaApplySchemaSuite.java @@ -187,14 +187,14 @@ public void applySchemaToJSON() { null, "this is another simple string.")); - DataFrame df1 = sqlContext.jsonRDD(jsonRDD); + DataFrame df1 = sqlContext.read().json(jsonRDD); StructType actualSchema1 = df1.schema(); Assert.assertEquals(expectedSchema, actualSchema1); df1.registerTempTable("jsonTable1"); List actual1 = sqlContext.sql("select * from jsonTable1").collectAsList(); Assert.assertEquals(expectedResult, actual1); - DataFrame df2 = sqlContext.jsonRDD(jsonRDD, expectedSchema); + DataFrame df2 = sqlContext.read().schema(expectedSchema).json(jsonRDD); StructType actualSchema2 = df2.schema(); Assert.assertEquals(expectedSchema, actualSchema2); df2.registerTempTable("jsonTable2"); diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java index 78e847239f405..72c42f4fe376b 100644 --- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java +++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java @@ -34,11 +34,11 @@ import java.io.Serializable; import java.util.Arrays; +import java.util.Comparator; import java.util.List; import java.util.Map; import static org.apache.spark.sql.functions.*; -import static org.apache.spark.sql.mathfunctions.*; public class JavaDataFrameSuite { private transient JavaSparkContext jsc; @@ -178,6 +178,33 @@ public void testCreateDataFrameFromJavaBeans() { Assert.assertEquals(bean.getD().get(i), d.apply(i)); } } + + private static Comparator CrosstabRowComparator = new Comparator() { + public int compare(Row row1, Row row2) { + String item1 = row1.getString(0); + String item2 = row2.getString(0); + return item1.compareTo(item2); + } + }; + + @Test + public void testCrosstab() { + DataFrame df = context.table("testData2"); + DataFrame crosstab = df.stat().crosstab("a", "b"); + String[] columnNames = crosstab.schema().fieldNames(); + Assert.assertEquals(columnNames[0], "a_b"); + Assert.assertEquals(columnNames[1], "1"); + Assert.assertEquals(columnNames[2], "2"); + Row[] rows = crosstab.collect(); + Arrays.sort(rows, CrosstabRowComparator); + Integer count = 1; + for (Row row : rows) { + Assert.assertEquals(row.get(0).toString(), count.toString()); + Assert.assertEquals(row.getLong(1), 1L); + Assert.assertEquals(row.getLong(2), 1L); + count++; + } + } @Test public void testFrequentItems() { diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/JavaSaveLoadSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/sources/JavaSaveLoadSuite.java index b76f7d421f643..2706e01bd28af 100644 --- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/JavaSaveLoadSuite.java +++ b/sql/core/src/test/java/test/org/apache/spark/sql/sources/JavaSaveLoadSuite.java @@ -67,7 +67,7 @@ public void setUp() throws IOException { jsonObjects.add("{\"a\":" + i + ", \"b\":\"str" + i + "\"}"); } JavaRDD rdd = sc.parallelize(jsonObjects); - df = sqlContext.jsonRDD(rdd); + df = sqlContext.read().json(rdd); df.registerTempTable("jsonTable"); } @@ -75,10 +75,8 @@ public void setUp() throws IOException { public void saveAndLoad() { Map options = new HashMap(); options.put("path", path.toString()); - df.save("org.apache.spark.sql.json", SaveMode.ErrorIfExists, options); - - DataFrame loadedDF = sqlContext.load("org.apache.spark.sql.json", options); - + df.write().mode(SaveMode.ErrorIfExists).format("json").options(options).save(); + DataFrame loadedDF = sqlContext.read().format("json").options(options).load(); checkAnswer(loadedDF, df.collectAsList()); } @@ -86,12 +84,12 @@ public void saveAndLoad() { public void saveAndLoadWithSchema() { Map options = new HashMap(); options.put("path", path.toString()); - df.save("org.apache.spark.sql.json", SaveMode.ErrorIfExists, options); + df.write().format("json").mode(SaveMode.ErrorIfExists).options(options).save(); List fields = new ArrayList(); fields.add(DataTypes.createStructField("b", DataTypes.StringType, true)); StructType schema = DataTypes.createStructType(fields); - DataFrame loadedDF = sqlContext.load("org.apache.spark.sql.json", schema, options); + DataFrame loadedDF = sqlContext.read().format("json").schema(schema).options(options).load(); checkAnswer(loadedDF, sqlContext.sql("SELECT b FROM jsonTable").collectAsList()); } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala index 6322faf4d9907..bfba379d9a518 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql import org.scalatest.Matchers._ +import org.apache.spark.sql.execution.Project import org.apache.spark.sql.functions._ import org.apache.spark.sql.test.TestSQLContext import org.apache.spark.sql.test.TestSQLContext.implicits._ @@ -27,7 +28,71 @@ import org.apache.spark.sql.types._ class ColumnExpressionSuite extends QueryTest { import org.apache.spark.sql.TestData._ - // TODO: Add test cases for bitwise operations. + test("alias") { + val df = Seq((1, Seq(1, 2, 3))).toDF("a", "intList") + assert(df.select(df("a").as("b")).columns.head === "b") + assert(df.select(df("a").alias("b")).columns.head === "b") + } + + test("single explode") { + val df = Seq((1, Seq(1, 2, 3))).toDF("a", "intList") + checkAnswer( + df.select(explode('intList)), + Row(1) :: Row(2) :: Row(3) :: Nil) + } + + test("explode and other columns") { + val df = Seq((1, Seq(1, 2, 3))).toDF("a", "intList") + + checkAnswer( + df.select($"a", explode('intList)), + Row(1, 1) :: + Row(1, 2) :: + Row(1, 3) :: Nil) + + checkAnswer( + df.select($"*", explode('intList)), + Row(1, Seq(1, 2, 3), 1) :: + Row(1, Seq(1, 2, 3), 2) :: + Row(1, Seq(1, 2, 3), 3) :: Nil) + } + + test("aliased explode") { + val df = Seq((1, Seq(1, 2, 3))).toDF("a", "intList") + + checkAnswer( + df.select(explode('intList).as('int)).select('int), + Row(1) :: Row(2) :: Row(3) :: Nil) + + checkAnswer( + df.select(explode('intList).as('int)).select(sum('int)), + Row(6) :: Nil) + } + + test("explode on map") { + val df = Seq((1, Map("a" -> "b"))).toDF("a", "map") + + checkAnswer( + df.select(explode('map)), + Row("a", "b")) + } + + test("explode on map with aliases") { + val df = Seq((1, Map("a" -> "b"))).toDF("a", "map") + + checkAnswer( + df.select(explode('map).as("key1" :: "value1" :: Nil)).select("key1", "value1"), + Row("a", "b")) + } + + test("self join explode") { + val df = Seq((1, Seq(1, 2, 3))).toDF("a", "intList") + val exploded = df.select(explode('intList).as('i)) + + checkAnswer( + exploded.join(exploded, exploded("i") === exploded("i")).agg(count("*")), + Row(3) :: Nil) + } test("collect on column produced by a binary operator") { val df = Seq((1, 2, 3)).toDF("a", "b", "c") @@ -208,6 +273,20 @@ class ColumnExpressionSuite extends QueryTest { testData2.collect().toSeq.filter(r => r.getInt(0) <= r.getInt(1))) } + test("between") { + val testData = TestSQLContext.sparkContext.parallelize( + (0, 1, 2) :: + (1, 2, 3) :: + (2, 1, 0) :: + (2, 2, 4) :: + (3, 1, 6) :: + (3, 2, 0) :: Nil).toDF("a", "b", "c") + val expectAnswer = testData.collect().toSeq. + filter(r => r.getInt(0) >= r.getInt(1) && r.getInt(0) <= r.getInt(2)) + + checkAnswer(testData.filter($"a".between($"b", $"c")), expectAnswer) + } + val booleanData = TestSQLContext.createDataFrame(TestSQLContext.sparkContext.parallelize( Row(false, false) :: Row(false, true) :: @@ -243,6 +322,27 @@ class ColumnExpressionSuite extends QueryTest { Row(false, true) :: Row(true, false) :: Row(true, true) :: Nil) } + test("SPARK-7321 when conditional statements") { + val testData = (1 to 3).map(i => (i, i.toString)).toDF("key", "value") + + checkAnswer( + testData.select(when($"key" === 1, -1).when($"key" === 2, -2).otherwise(0)), + Seq(Row(-1), Row(-2), Row(0)) + ) + + // Without the ending otherwise, return null for unmatched conditions. + // Also test putting a non-literal value in the expression. + checkAnswer( + testData.select(when($"key" === 1, lit(0) - $"key").when($"key" === 2, -2)), + Seq(Row(-1), Row(-2), Row(null)) + ) + + // Test error handling for invalid expressions. + intercept[IllegalArgumentException] { $"key".when($"key" === 1, -1) } + intercept[IllegalArgumentException] { $"key".otherwise(-1) } + intercept[IllegalArgumentException] { when($"key" === 1, -1).otherwise(-1).otherwise(-1) } + } + test("sqrt") { checkAnswer( testData.select(sqrt('key)).orderBy('key.asc), @@ -353,13 +453,51 @@ class ColumnExpressionSuite extends QueryTest { } test("rand") { - val randCol = testData.select('key, rand(5L).as("rand")) + val randCol = testData.select($"key", rand(5L).as("rand")) randCol.columns.length should be (2) val rows = randCol.collect() rows.foreach { row => assert(row.getDouble(1) <= 1.0) assert(row.getDouble(1) >= 0.0) } + + def checkNumProjects(df: DataFrame, expectedNumProjects: Int): Unit = { + val projects = df.queryExecution.executedPlan.collect { + case project: Project => project + } + assert(projects.size === expectedNumProjects) + } + + // We first create a plan with two Projects. + // Project [rand + 1 AS rand1, rand - 1 AS rand2] + // Project [key, (Rand 5 + 1) AS rand] + // LogicalRDD [key, value] + // Because Rand function is not deterministic, the column rand is not deterministic. + // So, in the optimizer, we will not collapse Project [rand + 1 AS rand1, rand - 1 AS rand2] + // and Project [key, Rand 5 AS rand]. The final plan still has two Projects. + val dfWithTwoProjects = + testData + .select($"key", (rand(5L) + 1).as("rand")) + .select(($"rand" + 1).as("rand1"), ($"rand" - 1).as("rand2")) + checkNumProjects(dfWithTwoProjects, 2) + + // Now, we add one more project rand1 - rand2 on top of the query plan. + // Since rand1 and rand2 are deterministic (they basically apply +/- to the generated + // rand value), we can collapse rand1 - rand2 to the Project generating rand1 and rand2. + // So, the plan will be optimized from ... + // Project [(rand1 - rand2) AS (rand1 - rand2)] + // Project [rand + 1 AS rand1, rand - 1 AS rand2] + // Project [key, (Rand 5 + 1) AS rand] + // LogicalRDD [key, value] + // to ... + // Project [((rand + 1 AS rand1) - (rand - 1 AS rand2)) AS (rand1 - rand2)] + // Project [key, Rand 5 AS rand] + // LogicalRDD [key, value] + val dfWithThreeProjects = dfWithTwoProjects.select($"rand1" - $"rand2") + checkNumProjects(dfWithThreeProjects, 2) + dfWithThreeProjects.collect().foreach { row => + assert(row.getDouble(0) === 2.0 +- 0.0001) + } } test("randn") { @@ -371,4 +509,35 @@ class ColumnExpressionSuite extends QueryTest { assert(row.getDouble(1) >= -4.0) } } + + test("bitwiseAND") { + checkAnswer( + testData2.select($"a".bitwiseAND(75)), + testData2.collect().toSeq.map(r => Row(r.getInt(0) & 75))) + + checkAnswer( + testData2.select($"a".bitwiseAND($"b").bitwiseAND(22)), + testData2.collect().toSeq.map(r => Row(r.getInt(0) & r.getInt(1) & 22))) + } + + test("bitwiseOR") { + checkAnswer( + testData2.select($"a".bitwiseOR(170)), + testData2.collect().toSeq.map(r => Row(r.getInt(0) | 170))) + + checkAnswer( + testData2.select($"a".bitwiseOR($"b").bitwiseOR(42)), + testData2.collect().toSeq.map(r => Row(r.getInt(0) | r.getInt(1) | 42))) + } + + test("bitwiseXOR") { + checkAnswer( + testData2.select($"a".bitwiseXOR(112)), + testData2.collect().toSeq.map(r => Row(r.getInt(0) ^ 112))) + + checkAnswer( + testData2.select($"a".bitwiseXOR($"b").bitwiseXOR(39)), + testData2.collect().toSeq.map(r => Row(r.getInt(0) ^ r.getInt(1) ^ 39))) + } + } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala new file mode 100644 index 0000000000000..232f05c00918f --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala @@ -0,0 +1,193 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.sql.TestData._ +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.test.TestSQLContext +import org.apache.spark.sql.test.TestSQLContext.implicits._ +import org.apache.spark.sql.types.DecimalType + + +class DataFrameAggregateSuite extends QueryTest { + + test("groupBy") { + checkAnswer( + testData2.groupBy("a").agg(sum($"b")), + Seq(Row(1, 3), Row(2, 3), Row(3, 3)) + ) + checkAnswer( + testData2.groupBy("a").agg(sum($"b").as("totB")).agg(sum('totB)), + Row(9) + ) + checkAnswer( + testData2.groupBy("a").agg(count("*")), + Row(1, 2) :: Row(2, 2) :: Row(3, 2) :: Nil + ) + checkAnswer( + testData2.groupBy("a").agg(Map("*" -> "count")), + Row(1, 2) :: Row(2, 2) :: Row(3, 2) :: Nil + ) + checkAnswer( + testData2.groupBy("a").agg(Map("b" -> "sum")), + Row(1, 3) :: Row(2, 3) :: Row(3, 3) :: Nil + ) + + val df1 = Seq(("a", 1, 0, "b"), ("b", 2, 4, "c"), ("a", 2, 3, "d")) + .toDF("key", "value1", "value2", "rest") + + checkAnswer( + df1.groupBy("key").min(), + df1.groupBy("key").min("value1", "value2").collect() + ) + checkAnswer( + df1.groupBy("key").min("value2"), + Seq(Row("a", 0), Row("b", 4)) + ) + } + + test("spark.sql.retainGroupColumns config") { + checkAnswer( + testData2.groupBy("a").agg(sum($"b")), + Seq(Row(1, 3), Row(2, 3), Row(3, 3)) + ) + + TestSQLContext.conf.setConf("spark.sql.retainGroupColumns", "false") + checkAnswer( + testData2.groupBy("a").agg(sum($"b")), + Seq(Row(3), Row(3), Row(3)) + ) + TestSQLContext.conf.setConf("spark.sql.retainGroupColumns", "true") + } + + test("agg without groups") { + checkAnswer( + testData2.agg(sum('b)), + Row(9) + ) + } + + test("average") { + checkAnswer( + testData2.agg(avg('a)), + Row(2.0)) + + // Also check mean + checkAnswer( + testData2.agg(mean('a)), + Row(2.0)) + + checkAnswer( + testData2.agg(avg('a), sumDistinct('a)), // non-partial + Row(2.0, 6.0) :: Nil) + + checkAnswer( + decimalData.agg(avg('a)), + Row(new java.math.BigDecimal(2.0))) + checkAnswer( + decimalData.agg(avg('a), sumDistinct('a)), // non-partial + Row(new java.math.BigDecimal(2.0), new java.math.BigDecimal(6)) :: Nil) + + checkAnswer( + decimalData.agg(avg('a cast DecimalType(10, 2))), + Row(new java.math.BigDecimal(2.0))) + // non-partial + checkAnswer( + decimalData.agg(avg('a cast DecimalType(10, 2)), sumDistinct('a cast DecimalType(10, 2))), + Row(new java.math.BigDecimal(2.0), new java.math.BigDecimal(6)) :: Nil) + } + + test("null average") { + checkAnswer( + testData3.agg(avg('b)), + Row(2.0)) + + checkAnswer( + testData3.agg(avg('b), countDistinct('b)), + Row(2.0, 1)) + + checkAnswer( + testData3.agg(avg('b), sumDistinct('b)), // non-partial + Row(2.0, 2.0)) + } + + test("zero average") { + val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b") + checkAnswer( + emptyTableData.agg(avg('a)), + Row(null)) + + checkAnswer( + emptyTableData.agg(avg('a), sumDistinct('b)), // non-partial + Row(null, null)) + } + + test("count") { + assert(testData2.count() === testData2.map(_ => 1).count()) + + checkAnswer( + testData2.agg(count('a), sumDistinct('a)), // non-partial + Row(6, 6.0)) + } + + test("null count") { + checkAnswer( + testData3.groupBy('a).agg(count('b)), + Seq(Row(1, 0), Row(2, 1)) + ) + + checkAnswer( + testData3.groupBy('a).agg(count('a + 'b)), + Seq(Row(1, 0), Row(2, 1)) + ) + + checkAnswer( + testData3.agg(count('a), count('b), count(lit(1)), countDistinct('a), countDistinct('b)), + Row(2, 1, 2, 2, 1) + ) + + checkAnswer( + testData3.agg(count('b), countDistinct('b), sumDistinct('b)), // non-partial + Row(1, 1, 2) + ) + } + + test("zero count") { + val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b") + assert(emptyTableData.count() === 0) + + checkAnswer( + emptyTableData.agg(count('a), sumDistinct('a)), // non-partial + Row(0, null)) + } + + test("zero sum") { + val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b") + checkAnswer( + emptyTableData.agg(sum('a)), + Row(null)) + } + + test("zero sum distinct") { + val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b") + checkAnswer( + emptyTableData.agg(sumDistinct('a)), + Row(null)) + } + +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index ca03713ef4658..b1e0faa310b68 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql +import org.apache.spark.sql.TestData._ import org.apache.spark.sql.functions._ import org.apache.spark.sql.test.TestSQLContext.implicits._ import org.apache.spark.sql.types._ @@ -81,4 +82,10 @@ class DataFrameFunctionsSuite extends QueryTest { struct(col("a") * 2) } } + + test("bitwiseNOT") { + checkAnswer( + testData2.select(bitwiseNOT($"a")), + testData2.collect().toSeq.map(r => Row(~r.getInt(0)))) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala new file mode 100644 index 0000000000000..787f3f175fea2 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.sql.TestData._ +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.test.TestSQLContext._ +import org.apache.spark.sql.test.TestSQLContext.implicits._ + + +class DataFrameJoinSuite extends QueryTest { + + test("join - join using") { + val df = Seq(1, 2, 3).map(i => (i, i.toString)).toDF("int", "str") + val df2 = Seq(1, 2, 3).map(i => (i, (i + 1).toString)).toDF("int", "str") + + checkAnswer( + df.join(df2, "int"), + Row(1, "1", "2") :: Row(2, "2", "3") :: Row(3, "3", "4") :: Nil) + } + + test("join - join using self join") { + val df = Seq(1, 2, 3).map(i => (i, i.toString)).toDF("int", "str") + + // self join + checkAnswer( + df.join(df, "int"), + Row(1, "1", "1") :: Row(2, "2", "2") :: Row(3, "3", "3") :: Nil) + } + + test("join - self join") { + val df1 = testData.select(testData("key")).as('df1) + val df2 = testData.select(testData("key")).as('df2) + + checkAnswer( + df1.join(df2, $"df1.key" === $"df2.key"), + sql("SELECT a.key, b.key FROM testData a JOIN testData b ON a.key = b.key").collect().toSeq) + } + + test("join - using aliases after self join") { + val df = Seq(1, 2, 3).map(i => (i, i.toString)).toDF("int", "str") + checkAnswer( + df.as('x).join(df.as('y), $"x.str" === $"y.str").groupBy("x.str").count(), + Row("1", 1) :: Row("2", 1) :: Row("3", 1) :: Nil) + + checkAnswer( + df.as('x).join(df.as('y), $"x.str" === $"y.str").groupBy("y.str").count(), + Row("1", 1) :: Row("2", 1) :: Row("3", 1) :: Nil) + } + + test("[SPARK-6231] join - self join auto resolve ambiguity") { + val df = Seq((1, "1"), (2, "2")).toDF("key", "value") + checkAnswer( + df.join(df, df("key") === df("key")), + Row(1, "1", 1, "1") :: Row(2, "2", 2, "2") :: Nil) + + checkAnswer( + df.join(df.filter($"value" === "2"), df("key") === df("key")), + Row(2, "2", 2, "2") :: Nil) + + checkAnswer( + df.join(df, df("key") === df("key") && df("value") === 1), + Row(1, "1", 1, "1") :: Nil) + + val left = df.groupBy("key").agg(count("*")) + val right = df.groupBy("key").agg(sum("key")) + checkAnswer( + left.join(right, left("key") === right("key")), + Row(1, 1, 1, 1) :: Row(2, 1, 2, 2) :: Nil) + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala index 06764d2a122f1..10e0e06868a2c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala @@ -25,25 +25,8 @@ import org.apache.spark.sql.test.TestSQLContext.implicits._ class DataFrameStatSuite extends FunSuite { - import TestData._ val sqlCtx = TestSQLContext def toLetter(i: Int): String = (i + 97).toChar.toString - - test("Frequent Items") { - val rows = Seq.tabulate(1000) { i => - if (i % 3 == 0) (1, toLetter(1), -1.0) else (i, toLetter(i), i * -1.0) - } - val df = rows.toDF("numbers", "letters", "negDoubles") - - val results = df.stat.freqItems(Array("numbers", "letters"), 0.1) - val items = results.collect().head - items.getSeq[Int](0) should contain (1) - items.getSeq[String](1) should contain (toLetter(1)) - - val singleColResults = df.stat.freqItems(Array("negDoubles"), 0.1) - val items2 = singleColResults.collect().head - items2.getSeq[Double](0) should contain (-1.0) - } test("pearson correlation") { val df = Seq.tabulate(10)(i => (i, 2 * i, i * -1.0)).toDF("a", "b", "c") @@ -76,7 +59,43 @@ class DataFrameStatSuite extends FunSuite { intercept[IllegalArgumentException] { df.stat.cov("singles", "letters") // doesn't accept non-numerical dataTypes } + val decimalData = Seq.tabulate(6)(i => (BigDecimal(i % 3), BigDecimal(i % 2))).toDF("a", "b") val decimalRes = decimalData.stat.cov("a", "b") assert(math.abs(decimalRes) < 1e-12) } + + test("crosstab") { + val df = Seq((0, 0), (2, 1), (1, 0), (2, 0), (0, 0), (2, 0)).toDF("a", "b") + val crosstab = df.stat.crosstab("a", "b") + val columnNames = crosstab.schema.fieldNames + assert(columnNames(0) === "a_b") + assert(columnNames(1) === "0") + assert(columnNames(2) === "1") + val rows: Array[Row] = crosstab.collect().sortBy(_.getString(0)) + assert(rows(0).get(0).toString === "0") + assert(rows(0).getLong(1) === 2L) + assert(rows(0).get(2) === 0L) + assert(rows(1).get(0).toString === "1") + assert(rows(1).getLong(1) === 1L) + assert(rows(1).get(2) === 0L) + assert(rows(2).get(0).toString === "2") + assert(rows(2).getLong(1) === 2L) + assert(rows(2).getLong(2) === 1L) + } + + test("Frequent Items") { + val rows = Seq.tabulate(1000) { i => + if (i % 3 == 0) (1, toLetter(1), -1.0) else (i, toLetter(i), i * -1.0) + } + val df = rows.toDF("numbers", "letters", "negDoubles") + + val results = df.stat.freqItems(Array("numbers", "letters"), 0.1) + val items = results.collect().head + items.getSeq[Int](0) should contain (1) + items.getSeq[String](1) should contain (toLetter(1)) + + val singleColResults = df.stat.freqItems(Array("negDoubles"), 0.1) + val items2 = singleColResults.collect().head + items2.getSeq[Double](0) should contain (-1.0) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index e286fef23caa4..a4fd1058afce5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -22,9 +22,7 @@ import scala.language.postfixOps import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ import org.apache.spark.sql.test.{ExamplePointUDT, ExamplePoint, TestSQLContext} -import org.apache.spark.sql.test.TestSQLContext.logicalPlanToSparkQuery import org.apache.spark.sql.test.TestSQLContext.implicits._ -import org.apache.spark.sql.test.TestSQLContext.sql class DataFrameSuite extends QueryTest { @@ -61,10 +59,10 @@ class DataFrameSuite extends QueryTest { } test("rename nested groupby") { - val df = Seq((1,(1,1))).toDF() + val df = Seq((1, (1, 1))).toDF() checkAnswer( - df.groupBy("_1").agg(col("_1"), sum("_2._1")).toDF("key", "total"), + df.groupBy("_1").agg(sum("_2._1")).toDF("key", "total"), Row(1, 1) :: Nil) } @@ -118,44 +116,6 @@ class DataFrameSuite extends QueryTest { ) } - test("join - join using") { - val df = Seq(1, 2, 3).map(i => (i, i.toString)).toDF("int", "str") - val df2 = Seq(1, 2, 3).map(i => (i, (i + 1).toString)).toDF("int", "str") - - checkAnswer( - df.join(df2, "int"), - Row(1, "1", "2") :: Row(2, "2", "3") :: Row(3, "3", "4") :: Nil) - } - - test("join - join using self join") { - val df = Seq(1, 2, 3).map(i => (i, i.toString)).toDF("int", "str") - - // self join - checkAnswer( - df.join(df, "int"), - Row(1, "1", "1") :: Row(2, "2", "2") :: Row(3, "3", "3") :: Nil) - } - - test("join - self join") { - val df1 = testData.select(testData("key")).as('df1) - val df2 = testData.select(testData("key")).as('df2) - - checkAnswer( - df1.join(df2, $"df1.key" === $"df2.key"), - sql("SELECT a.key, b.key FROM testData a JOIN testData b ON a.key = b.key").collect().toSeq) - } - - test("join - using aliases after self join") { - val df = Seq(1, 2, 3).map(i => (i, i.toString)).toDF("int", "str") - checkAnswer( - df.as('x).join(df.as('y), $"x.str" === $"y.str").groupBy("x.str").count(), - Row("1", 1) :: Row("2", 1) :: Row("3", 1) :: Nil) - - checkAnswer( - df.as('x).join(df.as('y), $"x.str" === $"y.str").groupBy("y.str").count(), - Row("1", 1) :: Row("2", 1) :: Row("3", 1) :: Nil) - } - test("explode") { val df = Seq((1, "a b c"), (2, "a b"), (3, "a")).toDF("number", "letters") val df2 = @@ -167,7 +127,7 @@ class DataFrameSuite extends QueryTest { df2 .select('_1 as 'letter, 'number) .groupBy('letter) - .agg('letter, countDistinct('number)), + .agg(countDistinct('number)), Row("a", 3) :: Row("b", 2) :: Row("c", 1) :: Nil ) } @@ -204,48 +164,6 @@ class DataFrameSuite extends QueryTest { testData.select('key).collect().toSeq) } - test("groupBy") { - checkAnswer( - testData2.groupBy("a").agg($"a", sum($"b")), - Seq(Row(1, 3), Row(2, 3), Row(3, 3)) - ) - checkAnswer( - testData2.groupBy("a").agg($"a", sum($"b").as("totB")).agg(sum('totB)), - Row(9) - ) - checkAnswer( - testData2.groupBy("a").agg(col("a"), count("*")), - Row(1, 2) :: Row(2, 2) :: Row(3, 2) :: Nil - ) - checkAnswer( - testData2.groupBy("a").agg(Map("*" -> "count")), - Row(1, 2) :: Row(2, 2) :: Row(3, 2) :: Nil - ) - checkAnswer( - testData2.groupBy("a").agg(Map("b" -> "sum")), - Row(1, 3) :: Row(2, 3) :: Row(3, 3) :: Nil - ) - - val df1 = Seq(("a", 1, 0, "b"), ("b", 2, 4, "c"), ("a", 2, 3, "d")) - .toDF("key", "value1", "value2", "rest") - - checkAnswer( - df1.groupBy("key").min(), - df1.groupBy("key").min("value1", "value2").collect() - ) - checkAnswer( - df1.groupBy("key").min("value2"), - Seq(Row("a", 0), Row("b", 4)) - ) - } - - test("agg without groups") { - checkAnswer( - testData2.agg(sum('b)), - Row(9) - ) - } - test("convert $\"attribute name\" into unresolved attribute") { checkAnswer( testData.where($"key" === lit(1)).select($"value"), @@ -293,23 +211,23 @@ class DataFrameSuite extends QueryTest { test("global sorting") { checkAnswer( testData2.orderBy('a.asc, 'b.asc), - Seq(Row(1,1), Row(1,2), Row(2,1), Row(2,2), Row(3,1), Row(3,2))) + Seq(Row(1, 1), Row(1, 2), Row(2, 1), Row(2, 2), Row(3, 1), Row(3, 2))) checkAnswer( testData2.orderBy(asc("a"), desc("b")), - Seq(Row(1,2), Row(1,1), Row(2,2), Row(2,1), Row(3,2), Row(3,1))) + Seq(Row(1, 2), Row(1, 1), Row(2, 2), Row(2, 1), Row(3, 2), Row(3, 1))) checkAnswer( testData2.orderBy('a.asc, 'b.desc), - Seq(Row(1,2), Row(1,1), Row(2,2), Row(2,1), Row(3,2), Row(3,1))) + Seq(Row(1, 2), Row(1, 1), Row(2, 2), Row(2, 1), Row(3, 2), Row(3, 1))) checkAnswer( testData2.orderBy('a.desc, 'b.desc), - Seq(Row(3,2), Row(3,1), Row(2,2), Row(2,1), Row(1,2), Row(1,1))) + Seq(Row(3, 2), Row(3, 1), Row(2, 2), Row(2, 1), Row(1, 2), Row(1, 1))) checkAnswer( testData2.orderBy('a.desc, 'b.asc), - Seq(Row(3,1), Row(3,2), Row(2,1), Row(2,2), Row(1,1), Row(1,2))) + Seq(Row(3, 1), Row(3, 2), Row(2, 1), Row(2, 2), Row(1, 1), Row(1, 2))) checkAnswer( arrayData.toDF().orderBy('data.getItem(0).asc), @@ -342,105 +260,6 @@ class DataFrameSuite extends QueryTest { mapData.take(1).map(r => Row.fromSeq(r.productIterator.toSeq))) } - test("average") { - checkAnswer( - testData2.agg(avg('a)), - Row(2.0)) - - checkAnswer( - testData2.agg(avg('a), sumDistinct('a)), // non-partial - Row(2.0, 6.0) :: Nil) - - checkAnswer( - decimalData.agg(avg('a)), - Row(new java.math.BigDecimal(2.0))) - checkAnswer( - decimalData.agg(avg('a), sumDistinct('a)), // non-partial - Row(new java.math.BigDecimal(2.0), new java.math.BigDecimal(6)) :: Nil) - - checkAnswer( - decimalData.agg(avg('a cast DecimalType(10, 2))), - Row(new java.math.BigDecimal(2.0))) - // non-partial - checkAnswer( - decimalData.agg(avg('a cast DecimalType(10, 2)), sumDistinct('a cast DecimalType(10, 2))), - Row(new java.math.BigDecimal(2.0), new java.math.BigDecimal(6)) :: Nil) - } - - test("null average") { - checkAnswer( - testData3.agg(avg('b)), - Row(2.0)) - - checkAnswer( - testData3.agg(avg('b), countDistinct('b)), - Row(2.0, 1)) - - checkAnswer( - testData3.agg(avg('b), sumDistinct('b)), // non-partial - Row(2.0, 2.0)) - } - - test("zero average") { - checkAnswer( - emptyTableData.agg(avg('a)), - Row(null)) - - checkAnswer( - emptyTableData.agg(avg('a), sumDistinct('b)), // non-partial - Row(null, null)) - } - - test("count") { - assert(testData2.count() === testData2.map(_ => 1).count()) - - checkAnswer( - testData2.agg(count('a), sumDistinct('a)), // non-partial - Row(6, 6.0)) - } - - test("null count") { - checkAnswer( - testData3.groupBy('a).agg('a, count('b)), - Seq(Row(1,0), Row(2, 1)) - ) - - checkAnswer( - testData3.groupBy('a).agg('a, count('a + 'b)), - Seq(Row(1,0), Row(2, 1)) - ) - - checkAnswer( - testData3.agg(count('a), count('b), count(lit(1)), countDistinct('a), countDistinct('b)), - Row(2, 1, 2, 2, 1) - ) - - checkAnswer( - testData3.agg(count('b), countDistinct('b), sumDistinct('b)), // non-partial - Row(1, 1, 2) - ) - } - - test("zero count") { - assert(emptyTableData.count() === 0) - - checkAnswer( - emptyTableData.agg(count('a), sumDistinct('a)), // non-partial - Row(0, null)) - } - - test("zero sum") { - checkAnswer( - emptyTableData.agg(sum('a)), - Row(null)) - } - - test("zero sum distinct") { - checkAnswer( - emptyTableData.agg(sumDistinct('a)), - Row(null)) - } - test("except") { checkAnswer( lowerCaseData.except(upperCaseData), @@ -488,7 +307,7 @@ class DataFrameSuite extends QueryTest { testData.collect().map { case Row(key: Int, value: String) => Row(key, value, key + 1) }.toSeq) - assert(df.schema.map(_.name).toSeq === Seq("key", "value", "newCol")) + assert(df.schema.map(_.name) === Seq("key", "value", "newCol")) } test("replace column using withColumn") { @@ -512,7 +331,7 @@ class DataFrameSuite extends QueryTest { checkAnswer( df, testData.collect().toSeq) - assert(df.schema.map(_.name) === Seq("key","value")) + assert(df.schema.map(_.name) === Seq("key", "value")) } test("withColumnRenamed") { @@ -523,7 +342,7 @@ class DataFrameSuite extends QueryTest { testData.collect().map { case Row(key: Int, value: String) => Row(key, value, key + 1) }.toSeq) - assert(df.schema.map(_.name).toSeq === Seq("key", "valueRenamed", "newCol")) + assert(df.schema.map(_.name) === Seq("key", "valueRenamed", "newCol")) } test("randomSplit") { @@ -545,30 +364,35 @@ class DataFrameSuite extends QueryTest { test("describe") { val describeTestData = Seq( - ("Bob", 16, 176), + ("Bob", 16, 176), ("Alice", 32, 164), ("David", 60, 192), - ("Amy", 24, 180)).toDF("name", "age", "height") + ("Amy", 24, 180)).toDF("name", "age", "height") val describeResult = Seq( - Row("count", 4, 4), - Row("mean", 33.0, 178.0), - Row("stddev", 16.583123951777, 10.0), - Row("min", 16, 164), - Row("max", 60, 192)) + Row("count", "4", "4"), + Row("mean", "33.0", "178.0"), + Row("stddev", "16.583123951777", "10.0"), + Row("min", "16", "164"), + Row("max", "60", "192")) val emptyDescribeResult = Seq( - Row("count", 0, 0), - Row("mean", null, null), - Row("stddev", null, null), - Row("min", null, null), - Row("max", null, null)) + Row("count", "0", "0"), + Row("mean", null, null), + Row("stddev", null, null), + Row("min", null, null), + Row("max", null, null)) def getSchemaAsSeq(df: DataFrame): Seq[String] = df.schema.map(_.name) val describeTwoCols = describeTestData.describe("age", "height") assert(getSchemaAsSeq(describeTwoCols) === Seq("summary", "age", "height")) checkAnswer(describeTwoCols, describeResult) + // All aggregate value should have been cast to string + describeTwoCols.collect().foreach { row => + assert(row.get(1).isInstanceOf[String], "expected string but found " + row.get(1).getClass) + assert(row.get(2).isInstanceOf[String], "expected string but found " + row.get(2).getClass) + } val describeAllCols = describeTestData.describe() assert(getSchemaAsSeq(describeAllCols) === Seq("summary", "age", "height")) @@ -598,6 +422,25 @@ class DataFrameSuite extends QueryTest { testData.select($"*").show(1000) } + test("SPARK-7319 showString") { + val expectedAnswer = """+---+-----+ + ||key|value| + |+---+-----+ + || 1| 1| + |+---+-----+ + |""".stripMargin + assert(testData.select($"*").showString(1) === expectedAnswer) + } + + test("SPARK-7327 show with empty dataFrame") { + val expectedAnswer = """+---+-----+ + ||key|value| + |+---+-----+ + |+---+-----+ + |""".stripMargin + assert(testData.select($"*").filter($"key" < 0).showString(1) === expectedAnswer) + } + test("createDataFrame(RDD[Row], StructType) should convert UDTs (SPARK-6672)") { val rowRDD = TestSQLContext.sparkContext.parallelize(Seq(Row(new ExamplePoint(1.0, 2.0)))) val schema = StructType(Array(StructField("point", new ExamplePointUDT(), false))) @@ -613,4 +456,125 @@ class DataFrameSuite extends QueryTest { Row(new java.math.BigDecimal(2.0))) TestSQLContext.setConf(SQLConf.CODEGEN_ENABLED, originalValue.toString) } + + test("SPARK-7133: Implement struct, array, and map field accessor") { + assert(complexData.filter(complexData("a")(0) === 2).count() == 1) + assert(complexData.filter(complexData("m")("1") === 1).count() == 1) + assert(complexData.filter(complexData("s")("key") === 1).count() == 1) + assert(complexData.filter(complexData("m")(complexData("s")("value")) === 1).count() == 1) + } + + test("SPARK-7551: support backticks for DataFrame attribute resolution") { + val df = TestSQLContext.read.json(TestSQLContext.sparkContext.makeRDD( + """{"a.b": {"c": {"d..e": {"f": 1}}}}""" :: Nil)) + checkAnswer( + df.select(df("`a.b`.c.`d..e`.`f`")), + Row(1) + ) + + val df2 = TestSQLContext.read.json(TestSQLContext.sparkContext.makeRDD( + """{"a b": {"c": {"d e": {"f": 1}}}}""" :: Nil)) + checkAnswer( + df2.select(df2("`a b`.c.d e.f")), + Row(1) + ) + + def checkError(testFun: => Unit): Unit = { + val e = intercept[org.apache.spark.sql.AnalysisException] { + testFun + } + assert(e.getMessage.contains("syntax error in attribute name:")) + } + checkError(df("`abc.`c`")) + checkError(df("`abc`..d")) + checkError(df("`a`.b.")) + checkError(df("`a.b`.c.`d")) + } + + test("SPARK-7324 dropDuplicates") { + val testData = TestSQLContext.sparkContext.parallelize( + (2, 1, 2) :: (1, 1, 1) :: + (1, 2, 1) :: (2, 1, 2) :: + (2, 2, 2) :: (2, 2, 1) :: + (2, 1, 1) :: (1, 1, 2) :: + (1, 2, 2) :: (1, 2, 1) :: Nil).toDF("key", "value1", "value2") + + checkAnswer( + testData.dropDuplicates(), + Seq(Row(2, 1, 2), Row(1, 1, 1), Row(1, 2, 1), + Row(2, 2, 2), Row(2, 1, 1), Row(2, 2, 1), + Row(1, 1, 2), Row(1, 2, 2))) + + checkAnswer( + testData.dropDuplicates(Seq("key", "value1")), + Seq(Row(2, 1, 2), Row(1, 2, 1), Row(1, 1, 1), Row(2, 2, 2))) + + checkAnswer( + testData.dropDuplicates(Seq("value1", "value2")), + Seq(Row(2, 1, 2), Row(1, 2, 1), Row(1, 1, 1), Row(2, 2, 2))) + + checkAnswer( + testData.dropDuplicates(Seq("key")), + Seq(Row(2, 1, 2), Row(1, 1, 1))) + + checkAnswer( + testData.dropDuplicates(Seq("value1")), + Seq(Row(2, 1, 2), Row(1, 2, 1))) + + checkAnswer( + testData.dropDuplicates(Seq("value2")), + Seq(Row(2, 1, 2), Row(1, 1, 1))) + } + + test("SPARK-7276: Project collapse for continuous select") { + var df = testData + for (i <- 1 to 5) { + df = df.select($"*") + } + + import org.apache.spark.sql.catalyst.plans.logical.Project + // make sure df have at most two Projects + val p = df.logicalPlan.asInstanceOf[Project].child.asInstanceOf[Project] + assert(!p.child.isInstanceOf[Project]) + } + + test("SPARK-7150 range api") { + // numSlice is greater than length + val res1 = TestSQLContext.range(0, 10, 1, 15).select("id") + assert(res1.count == 10) + assert(res1.agg(sum("id")).as("sumid").collect() === Seq(Row(45))) + + val res2 = TestSQLContext.range(3, 15, 3, 2).select("id") + assert(res2.count == 4) + assert(res2.agg(sum("id")).as("sumid").collect() === Seq(Row(30))) + + val res3 = TestSQLContext.range(1, -2).select("id") + assert(res3.count == 0) + + // start is positive, end is negative, step is negative + val res4 = TestSQLContext.range(1, -2, -2, 6).select("id") + assert(res4.count == 2) + assert(res4.agg(sum("id")).as("sumid").collect() === Seq(Row(0))) + + // start, end, step are negative + val res5 = TestSQLContext.range(-3, -8, -2, 1).select("id") + assert(res5.count == 3) + assert(res5.agg(sum("id")).as("sumid").collect() === Seq(Row(-15))) + + // start, end are negative, step is positive + val res6 = TestSQLContext.range(-8, -4, 2, 1).select("id") + assert(res6.count == 2) + assert(res6.agg(sum("id")).as("sumid").collect() === Seq(Row(-14))) + + val res7 = TestSQLContext.range(-10, -9, -20, 1).select("id") + assert(res7.count == 0) + + val res8 = TestSQLContext.range(Long.MinValue, Long.MaxValue, Long.MaxValue, 100).select("id") + assert(res8.count == 3) + assert(res8.agg(sum("id")).as("sumid").collect() === Seq(Row(-3))) + + val res9 = TestSQLContext.range(Long.MaxValue, Long.MinValue, Long.MinValue, 100).select("id") + assert(res9.count == 2) + assert(res9.agg(sum("id")).as("sumid").collect() === Seq(Row(Long.MaxValue - 1))) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala index 037d392c1f929..407c789657834 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala @@ -167,10 +167,10 @@ class JoinSuite extends QueryTest with BeforeAndAfterEach { val y = testData2.where($"a" === 1).as("y") checkAnswer( x.join(y).where($"x.a" === $"y.a"), - Row(1,1,1,1) :: - Row(1,1,1,2) :: - Row(1,2,1,1) :: - Row(1,2,1,2) :: Nil + Row(1, 1, 1, 1) :: + Row(1, 1, 1, 2) :: + Row(1, 2, 1, 1) :: + Row(1, 2, 1, 2) :: Nil ) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ListTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ListTablesSuite.scala index f9f41eb358bd5..3ce97c3fffdb4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ListTablesSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ListTablesSuite.scala @@ -28,7 +28,7 @@ class ListTablesSuite extends QueryTest with BeforeAndAfter { import org.apache.spark.sql.test.TestSQLContext.implicits._ val df = - sparkContext.parallelize((1 to 10).map(i => (i,s"str$i"))).toDF("key", "value") + sparkContext.parallelize((1 to 10).map(i => (i, s"str$i"))).toDF("key", "value") before { df.registerTempTable("ListTablesSuiteTable") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala index 9e19bb7482e9b..c4281c4b55c02 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala @@ -19,8 +19,7 @@ package org.apache.spark.sql import java.lang.{Double => JavaDouble} -import org.apache.spark.sql.functions.lit -import org.apache.spark.sql.mathfunctions._ +import org.apache.spark.sql.functions._ import org.apache.spark.sql.test.TestSQLContext import org.apache.spark.sql.test.TestSQLContext.implicits._ @@ -159,11 +158,11 @@ class MathExpressionsSuite extends QueryTest { } test("toDeg") { - testOneToOneMathFunction(toDeg, math.toDegrees) + testOneToOneMathFunction(toDegrees, math.toDegrees) } test("toRad") { - testOneToOneMathFunction(toRad, math.toRadians) + testOneToOneMathFunction(toRadians, math.toRadians) } test("cbrt") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala index bbf9ab113ca43..98ba3c99283a1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala @@ -67,6 +67,10 @@ class QueryTest extends PlanTest { checkAnswer(df, Seq(expectedAnswer)) } + protected def checkAnswer(df: DataFrame, expectedAnswer: DataFrame): Unit = { + checkAnswer(df, expectedAnswer.collect()) + } + def sqlTest(sqlString: String, expectedAnswer: Seq[Row])(implicit sqlContext: SQLContext) { test(sqlString) { checkAnswer(sqlContext.sql(sqlString), expectedAnswer) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala new file mode 100644 index 0000000000000..f186bc1c18123 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala @@ -0,0 +1,49 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.spark.sql + +import org.scalatest.{BeforeAndAfterAll, FunSuite} + +import org.apache.spark.sql.test.TestSQLContext + +class SQLContextSuite extends FunSuite with BeforeAndAfterAll { + + private val testSqlContext = TestSQLContext + private val testSparkContext = TestSQLContext.sparkContext + + override def afterAll(): Unit = { + SQLContext.setLastInstantiatedContext(testSqlContext) + } + + test("getOrCreate instantiates SQLContext") { + SQLContext.clearLastInstantiatedContext() + val sqlContext = SQLContext.getOrCreate(testSparkContext) + assert(sqlContext != null, "SQLContext.getOrCreate returned null") + assert(SQLContext.getOrCreate(testSparkContext).eq(sqlContext), + "SQLContext created by SQLContext.getOrCreate not returned by SQLContext.getOrCreate") + } + + test("getOrCreate gets last explicitly instantiated SQLContext") { + SQLContext.clearLastInstantiatedContext() + val sqlContext = new SQLContext(testSparkContext) + assert(SQLContext.getOrCreate(testSparkContext) != null, + "SQLContext.getOrCreate after explicitly created SQLContext returned null") + assert(SQLContext.getOrCreate(testSparkContext).eq(sqlContext), + "SQLContext.getOrCreate after explicitly created SQLContext did not return the context") + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 0ab8558c1db13..bf18bf854aa4a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql import org.scalatest.BeforeAndAfterAll +import org.apache.spark.sql.catalyst.DefaultParserDialect import org.apache.spark.sql.catalyst.errors.DialectException import org.apache.spark.sql.execution.GeneratedAggregate import org.apache.spark.sql.functions._ @@ -29,7 +30,7 @@ import org.apache.spark.sql.test.TestSQLContext.{udf => _, _} import org.apache.spark.sql.types._ /** A SQL Dialect for testing purpose, and it can not be nested type */ -class MyDialect extends DefaultDialect +class MyDialect extends DefaultParserDialect class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { // Make sure the tables are loaded. @@ -38,8 +39,21 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { import org.apache.spark.sql.test.TestSQLContext.implicits._ val sqlCtx = TestSQLContext + test("SPARK-6743: no columns from cache") { + Seq( + (83, 0, 38), + (26, 0, 79), + (43, 81, 24) + ).toDF("a", "b", "c").registerTempTable("cachedData") + + cacheTable("cachedData") + checkAnswer( + sql("SELECT t1.b FROM cachedData, cachedData t1 GROUP BY t1.b"), + Row(0) :: Row(81) :: Nil) + } + test("self join with aliases") { - Seq(1,2,3).map(i => (i, i.toString)).toDF("int", "str").registerTempTable("df") + Seq(1, 2, 3).map(i => (i, i.toString)).toDF("int", "str").registerTempTable("df") checkAnswer( sql( @@ -62,7 +76,7 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { } test("self join with alias in agg") { - Seq(1,2,3) + Seq(1, 2, 3) .map(i => (i, i.toString)) .toDF("int", "str") .groupBy("str") @@ -93,18 +107,18 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { newContext.sql("SELECT 1") } // test if the dialect set back to DefaultSQLDialect - assert(newContext.getSQLDialect().getClass === classOf[DefaultDialect]) + assert(newContext.getSQLDialect().getClass === classOf[DefaultParserDialect]) } test("SPARK-4625 support SORT BY in SimpleSQLParser & DSL") { checkAnswer( sql("SELECT a FROM testData2 SORT BY a"), - Seq(1, 1, 2 ,2 ,3 ,3).map(Row(_)) + Seq(1, 1, 2, 2, 3, 3).map(Row(_)) ) } test("grouping on nested fields") { - jsonRDD(sparkContext.parallelize("""{"nested": {"attribute": 1}, "value": 2}""" :: Nil)) + read.json(sparkContext.parallelize("""{"nested": {"attribute": 1}, "value": 2}""" :: Nil)) .registerTempTable("rows") checkAnswer( @@ -120,6 +134,16 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { Row(1, 1) :: Nil) } + test("SPARK-6201 IN type conversion") { + read.json( + sparkContext.parallelize(Seq("{\"a\": \"1\"}}", "{\"a\": \"2\"}}", "{\"a\": \"3\"}}"))) + .registerTempTable("d") + + checkAnswer( + sql("select * from d where d.a in (1,2)"), + Seq(Row("1"), Row("2"))) + } + test("SPARK-3176 Added Parser of SQL ABS()") { checkAnswer( sql("SELECT ABS(-1.3)"), @@ -131,7 +155,7 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { sql("SELECT ABS(2.5)"), Row(2.5)) } - + test("aggregation with codegen") { val originalValue = conf.codegenEnabled setConf(SQLConf.CODEGEN_ENABLED, "true") @@ -183,7 +207,7 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { "SELECT value, sum(key) FROM testData3x GROUP BY value", (1 to 100).map(i => Row(i.toString, 3 * i))) testCodeGen( - "SELECT sum(key), SUM(CAST(key as Double)) FROM testData3x", + "SELECT sum(key), SUM(CAST(key as Double)) FROM testData3x", Row(5050 * 3, 5050 * 3.0) :: Nil) // AVERAGE testCodeGen( @@ -287,6 +311,10 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { } test("SPARK-3173 Timestamp support in the parser") { + checkAnswer(sql( + "SELECT time FROM timestamps WHERE time='1969-12-31 16:00:00.0'"), + Row(java.sql.Timestamp.valueOf("1969-12-31 16:00:00"))) + checkAnswer(sql( "SELECT time FROM timestamps WHERE time=CAST('1969-12-31 16:00:00.001' AS TIMESTAMP)"), Row(java.sql.Timestamp.valueOf("1969-12-31 16:00:00.001"))) @@ -326,7 +354,7 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { test("left semi greater than predicate") { checkAnswer( sql("SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.a >= y.a + 2"), - Seq(Row(3,1), Row(3,2)) + Seq(Row(3, 1), Row(3, 2)) ) } @@ -343,16 +371,16 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { test("agg") { checkAnswer( sql("SELECT a, SUM(b) FROM testData2 GROUP BY a"), - Seq(Row(1,3), Row(2,3), Row(3,3))) + Seq(Row(1, 3), Row(2, 3), Row(3, 3))) } test("literal in agg grouping expressions") { checkAnswer( sql("SELECT a, count(1) FROM testData2 GROUP BY a, 1"), - Seq(Row(1,2), Row(2,2), Row(3,2))) + Seq(Row(1, 2), Row(2, 2), Row(3, 2))) checkAnswer( sql("SELECT a, count(2) FROM testData2 GROUP BY a, 2"), - Seq(Row(1,2), Row(2,2), Row(3,2))) + Seq(Row(1, 2), Row(2, 2), Row(3, 2))) } test("aggregates with nulls") { @@ -377,19 +405,19 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { def sortTest(): Unit = { checkAnswer( sql("SELECT * FROM testData2 ORDER BY a ASC, b ASC"), - Seq(Row(1,1), Row(1,2), Row(2,1), Row(2,2), Row(3,1), Row(3,2))) + Seq(Row(1, 1), Row(1, 2), Row(2, 1), Row(2, 2), Row(3, 1), Row(3, 2))) checkAnswer( sql("SELECT * FROM testData2 ORDER BY a ASC, b DESC"), - Seq(Row(1,2), Row(1,1), Row(2,2), Row(2,1), Row(3,2), Row(3,1))) + Seq(Row(1, 2), Row(1, 1), Row(2, 2), Row(2, 1), Row(3, 2), Row(3, 1))) checkAnswer( sql("SELECT * FROM testData2 ORDER BY a DESC, b DESC"), - Seq(Row(3,2), Row(3,1), Row(2,2), Row(2,1), Row(1,2), Row(1,1))) + Seq(Row(3, 2), Row(3, 1), Row(2, 2), Row(2, 1), Row(1, 2), Row(1, 1))) checkAnswer( sql("SELECT * FROM testData2 ORDER BY a DESC, b ASC"), - Seq(Row(3,1), Row(3,2), Row(2,1), Row(2,2), Row(1,1), Row(1,2))) + Seq(Row(3, 1), Row(3, 2), Row(2, 1), Row(2, 2), Row(1, 1), Row(1, 2))) checkAnswer( sql("SELECT b FROM binaryData ORDER BY a ASC"), @@ -524,7 +552,7 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { test("average overflow") { checkAnswer( sql("SELECT AVG(a),b FROM largeAndSmallInts group by b"), - Seq(Row(2147483645.0,1), Row(2.0,2))) + Seq(Row(2147483645.0, 1), Row(2.0, 2))) } test("count") { @@ -591,10 +619,10 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { | (SELECT * FROM testData2 WHERE a = 1) x JOIN | (SELECT * FROM testData2 WHERE a = 1) y |WHERE x.a = y.a""".stripMargin), - Row(1,1,1,1) :: - Row(1,1,1,2) :: - Row(1,2,1,1) :: - Row(1,2,1,2) :: Nil) + Row(1, 1, 1, 1) :: + Row(1, 1, 1, 2) :: + Row(1, 2, 1, 1) :: + Row(1, 2, 1, 2) :: Nil) } test("inner join, no matches") { @@ -862,6 +890,16 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { conf.clear() } + test("SET commands with illegal or inappropriate argument") { + conf.clear() + // Set negative mapred.reduce.tasks for automatically determing + // the number of reducers is not supported + intercept[IllegalArgumentException](sql(s"SET mapred.reduce.tasks=-1")) + intercept[IllegalArgumentException](sql(s"SET mapred.reduce.tasks=-01")) + intercept[IllegalArgumentException](sql(s"SET mapred.reduce.tasks=-2")) + conf.clear() + } + test("apply schema") { val schema1 = StructType( StructField("f1", IntegerType, false) :: @@ -1175,7 +1213,7 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { test("SPARK-3483 Special chars in column names") { val data = sparkContext.parallelize( Seq("""{"key?number1": "value1", "key.number2": "value2"}""")) - jsonRDD(data).registerTempTable("records") + read.json(data).registerTempTable("records") sql("SELECT `key?number1`, `key.number2` FROM records") } @@ -1216,11 +1254,11 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { } test("SPARK-4322 Grouping field with struct field as sub expression") { - jsonRDD(sparkContext.makeRDD("""{"a": {"b": [{"c": 1}]}}""" :: Nil)).registerTempTable("data") + read.json(sparkContext.makeRDD("""{"a": {"b": [{"c": 1}]}}""" :: Nil)).registerTempTable("data") checkAnswer(sql("SELECT a.b[0].c FROM data GROUP BY a.b[0].c"), Row(1)) dropTempTable("data") - jsonRDD(sparkContext.makeRDD("""{"a": {"b": 1}}""" :: Nil)).registerTempTable("data") + read.json(sparkContext.makeRDD("""{"a": {"b": 1}}""" :: Nil)).registerTempTable("data") checkAnswer(sql("SELECT a.b + 1 FROM data GROUP BY a.b + 1"), Row(2)) dropTempTable("data") } @@ -1228,22 +1266,22 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { test("SPARK-4432 Fix attribute reference resolution error when using ORDER BY") { checkAnswer( sql("SELECT a + b FROM testData2 ORDER BY a"), - Seq(2, 3, 3 ,4 ,4 ,5).map(Row(_)) + Seq(2, 3, 3, 4, 4, 5).map(Row(_)) ) } test("oder by asc by default when not specify ascending and descending") { checkAnswer( sql("SELECT a, b FROM testData2 ORDER BY a desc, b"), - Seq(Row(3, 1), Row(3, 2), Row(2, 1), Row(2,2), Row(1, 1), Row(1, 2)) + Seq(Row(3, 1), Row(3, 2), Row(2, 1), Row(2, 2), Row(1, 1), Row(1, 2)) ) } test("Supporting relational operator '<=>' in Spark SQL") { - val nullCheckData1 = TestData(1,"1") :: TestData(2,null) :: Nil + val nullCheckData1 = TestData(1, "1") :: TestData(2, null) :: Nil val rdd1 = sparkContext.parallelize((0 to 1).map(i => nullCheckData1(i))) rdd1.toDF().registerTempTable("nulldata1") - val nullCheckData2 = TestData(1,"1") :: TestData(2,null) :: Nil + val nullCheckData2 = TestData(1, "1") :: TestData(2, null) :: Nil val rdd2 = sparkContext.parallelize((0 to 1).map(i => nullCheckData2(i))) rdd2.toDF().registerTempTable("nulldata2") checkAnswer(sql("SELECT nulldata1.key FROM nulldata1 join " + @@ -1252,14 +1290,23 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { } test("Multi-column COUNT(DISTINCT ...)") { - val data = TestData(1,"val_1") :: TestData(2,"val_2") :: Nil + val data = TestData(1, "val_1") :: TestData(2, "val_2") :: Nil val rdd = sparkContext.parallelize((0 to 1).map(i => data(i))) rdd.toDF().registerTempTable("distinctData") checkAnswer(sql("SELECT COUNT(DISTINCT key,value) FROM distinctData"), Row(2)) } + test("SPARK-4699 case sensitivity SQL query") { + setConf(SQLConf.CASE_SENSITIVE, "false") + val data = TestData(1, "val_1") :: TestData(2, "val_2") :: Nil + val rdd = sparkContext.parallelize((0 to 1).map(i => data(i))) + rdd.toDF().registerTempTable("testTable1") + checkAnswer(sql("SELECT VALUE FROM TESTTABLE1 where KEY = 1"), Row("val_1")) + setConf(SQLConf.CASE_SENSITIVE, "true") + } + test("SPARK-6145: ORDER BY test for nested fields") { - jsonRDD(sparkContext.makeRDD("""{"a": {"b": 1, "a": {"a": 1}}, "c": [{"d": 1}]}""" :: Nil)) + read.json(sparkContext.makeRDD("""{"a": {"b": 1, "a": {"a": 1}}, "c": [{"d": 1}]}""" :: Nil)) .registerTempTable("nestedOrder") checkAnswer(sql("SELECT 1 FROM nestedOrder ORDER BY a.b"), Row(1)) @@ -1271,14 +1318,14 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { } test("SPARK-6145: special cases") { - jsonRDD(sparkContext.makeRDD( + read.json(sparkContext.makeRDD( """{"a": {"b": [1]}, "b": [{"a": 1}], "c0": {"a": 1}}""" :: Nil)).registerTempTable("t") checkAnswer(sql("SELECT a.b[0] FROM t ORDER BY c0.a"), Row(1)) checkAnswer(sql("SELECT b[0].a FROM t ORDER BY c0.a"), Row(1)) } test("SPARK-6898: complete support for special chars in column names") { - jsonRDD(sparkContext.makeRDD( + read.json(sparkContext.makeRDD( """{"a": {"c.b": 1}, "b.$q": [{"a@!.q": 1}], "q.w": {"w.i&": [1]}}""" :: Nil)) .registerTempTable("t") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala index 3fa00fd9d0ccb..52d265b445e14 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala @@ -80,14 +80,14 @@ class ScalaReflectionRelationSuite extends FunSuite { test("query case class RDD") { val data = ReflectData("a", 1, 1L, 1.toFloat, 1.toDouble, 1.toShort, 1.toByte, true, - new java.math.BigDecimal(1), new Date(12345), new Timestamp(12345), Seq(1,2,3)) + new java.math.BigDecimal(1), new Date(12345), new Timestamp(12345), Seq(1, 2, 3)) val rdd = sparkContext.parallelize(data :: Nil) rdd.toDF().registerTempTable("reflectData") assert(sql("SELECT * FROM reflectData").collect().head === Row("a", 1, 1L, 1.toFloat, 1.toDouble, 1.toShort, 1.toByte, true, new java.math.BigDecimal(1), Date.valueOf("1970-01-01"), - new Timestamp(12345), Seq(1,2,3))) + new Timestamp(12345), Seq(1, 2, 3))) } test("query case class RDD with nulls") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala index 225b51bd73d6c..725a18bfae3a7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala @@ -86,8 +86,6 @@ object TestData { TestData3(2, Some(2)) :: Nil).toDF() testData3.registerTempTable("testData3") - val emptyTableData = logical.LocalRelation($"a".int, $"b".int) - case class UpperCaseData(N: Int, L: String) val upperCaseData = TestSQLContext.sparkContext.parallelize( @@ -111,8 +109,8 @@ object TestData { case class ArrayData(data: Seq[Int], nestedData: Seq[Seq[Int]]) val arrayData = TestSQLContext.sparkContext.parallelize( - ArrayData(Seq(1,2,3), Seq(Seq(1,2,3))) :: - ArrayData(Seq(2,3,4), Seq(Seq(2,3,4))) :: Nil) + ArrayData(Seq(1, 2, 3), Seq(Seq(1, 2, 3))) :: + ArrayData(Seq(2, 3, 4), Seq(Seq(2, 3, 4))) :: Nil) arrayData.toDF().registerTempTable("arrayData") case class MapData(data: scala.collection.Map[Int, String]) @@ -177,7 +175,7 @@ object TestData { "4, D4, true, 2147483644" :: Nil) case class TimestampField(time: Timestamp) - val timestamps = TestSQLContext.sparkContext.parallelize((1 to 3).map { i => + val timestamps = TestSQLContext.sparkContext.parallelize((0 to 3).map { i => TimestampField(new Timestamp(i)) }) timestamps.toDF().registerTempTable("timestamps") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala index d615542ab50a7..1a9ba66416b21 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala @@ -38,7 +38,7 @@ class UDFSuite extends QueryTest { } test("TwoArgument UDF") { - udf.register("strLenScala", (_: String).length + (_:Int)) + udf.register("strLenScala", (_: String).length + (_: Int)) assert(sql("SELECT strLenScala('test', 1)").head().getInt(0) === 5) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala index 2672e20deadc5..dc2d43a197f40 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala @@ -105,13 +105,13 @@ class UserDefinedTypeSuite extends QueryTest { test("UDTs with Parquet") { val tempDir = Utils.createTempDir() tempDir.delete() - pointsRDD.saveAsParquetFile(tempDir.getCanonicalPath) + pointsRDD.write.parquet(tempDir.getCanonicalPath) } test("Repartition UDTs with Parquet") { val tempDir = Utils.createTempDir() tempDir.delete() - pointsRDD.repartition(1).saveAsParquetFile(tempDir.getCanonicalPath) + pointsRDD.repartition(1).write.parquet(tempDir.getCanonicalPath) } // Tests to make sure that all operators correctly convert types on the way out. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnTypeSuite.scala index 1e105e259dce7..061efb37a0ac3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnTypeSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/ColumnTypeSuite.scala @@ -73,7 +73,7 @@ class ColumnTypeSuite extends FunSuite with Logging { checkActualSize(TIMESTAMP, new Timestamp(0L), 12) val binary = Array.fill[Byte](4)(0: Byte) - checkActualSize(BINARY, binary, 4 + 4) + checkActualSize(BINARY, binary, 4 + 4) val generic = Map(1 -> "a") checkActualSize(GENERIC, SparkSqlSerializer.serialize(generic), 4 + 8) @@ -167,7 +167,7 @@ class ColumnTypeSuite extends FunSuite with Logging { val serializer = new SparkSqlSerializer(conf).newInstance() val buffer = ByteBuffer.allocate(512) - val obj = CustomClass(Int.MaxValue,Long.MaxValue) + val obj = CustomClass(Int.MaxValue, Long.MaxValue) val serializedObj = serializer.serialize(obj).array() GENERIC.append(serializer.serialize(obj).array(), buffer) @@ -278,7 +278,7 @@ private[columnar] object CustomerSerializer extends Serializer[CustomClass] { override def read(kryo: Kryo, input: Input, aClass: Class[CustomClass]): CustomClass = { val a = input.readInt() val b = input.readLong() - CustomClass(a,b) + CustomClass(a, b) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/DictionaryEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/DictionaryEncodingSuite.scala index 64b70552eb047..cef60ec204faa 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/DictionaryEncodingSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/DictionaryEncodingSuite.scala @@ -27,8 +27,8 @@ import org.apache.spark.sql.columnar.ColumnarTestUtils._ import org.apache.spark.sql.types.AtomicType class DictionaryEncodingSuite extends FunSuite { - testDictionaryEncoding(new IntColumnStats, INT) - testDictionaryEncoding(new LongColumnStats, LONG) + testDictionaryEncoding(new IntColumnStats, INT) + testDictionaryEncoding(new LongColumnStats, LONG) testDictionaryEncoding(new StringColumnStats, STRING) def testDictionaryEncoding[T <: AtomicType]( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/IntegralDeltaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/IntegralDeltaSuite.scala index bfd99f143bedc..5514590541dd6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/IntegralDeltaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/IntegralDeltaSuite.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.columnar.ColumnarTestUtils._ import org.apache.spark.sql.types.IntegralType class IntegralDeltaSuite extends FunSuite { - testIntegralDelta(new IntColumnStats, INT, IntDelta) + testIntegralDelta(new IntColumnStats, INT, IntDelta) testIntegralDelta(new LongColumnStats, LONG, LongDelta) def testIntegralDelta[I <: IntegralType]( @@ -116,7 +116,7 @@ class IntegralDeltaSuite extends FunSuite { test(s"$scheme: simple case") { val input = columnType match { - case INT => Seq(2: Int, 1: Int, 2: Int, 130: Int) + case INT => Seq(2: Int, 1: Int, 2: Int, 130: Int) case LONG => Seq(2: Long, 1: Long, 2: Long, 130: Long) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/RunLengthEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/RunLengthEncodingSuite.scala index fde7a4595be0e..6ee48f6291914 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/RunLengthEncodingSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/compression/RunLengthEncodingSuite.scala @@ -26,11 +26,11 @@ import org.apache.spark.sql.types.AtomicType class RunLengthEncodingSuite extends FunSuite { testRunLengthEncoding(new NoopColumnStats, BOOLEAN) - testRunLengthEncoding(new ByteColumnStats, BYTE) - testRunLengthEncoding(new ShortColumnStats, SHORT) - testRunLengthEncoding(new IntColumnStats, INT) - testRunLengthEncoding(new LongColumnStats, LONG) - testRunLengthEncoding(new StringColumnStats, STRING) + testRunLengthEncoding(new ByteColumnStats, BYTE) + testRunLengthEncoding(new ShortColumnStats, SHORT) + testRunLengthEncoding(new IntColumnStats, INT) + testRunLengthEncoding(new LongColumnStats, LONG) + testRunLengthEncoding(new StringColumnStats, STRING) def testRunLengthEncoding[T <: AtomicType]( columnStats: ColumnStats, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlSerializer2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlSerializer2Suite.scala index 27f063d73a9a9..15337c4045436 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlSerializer2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlSerializer2Suite.scala @@ -148,6 +148,15 @@ abstract class SparkSqlSerializer2Suite extends QueryTest with BeforeAndAfterAll table("shuffle").collect()) } + test("key schema is null") { + val aggregations = allColumns.split(",").map(c => s"COUNT($c)").mkString(",") + val df = sql(s"SELECT $aggregations FROM shuffle") + checkSerializer(df.queryExecution.executedPlan, serializerClass) + checkAnswer( + df, + Row(1000, 1000, 0, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000)) + } + test("value schema is null") { val df = sql(s"SELECT col0 FROM shuffle ORDER BY col0") checkSerializer(df.queryExecution.executedPlan, serializerClass) @@ -167,29 +176,20 @@ class SparkSqlSerializer2SortShuffleSuite extends SparkSqlSerializer2Suite { override def beforeAll(): Unit = { super.beforeAll() // Sort merge will not be triggered. - sql("set spark.sql.shuffle.partitions = 200") - } - - test("key schema is null") { - val aggregations = allColumns.split(",").map(c => s"COUNT($c)").mkString(",") - val df = sql(s"SELECT $aggregations FROM shuffle") - checkSerializer(df.queryExecution.executedPlan, serializerClass) - checkAnswer( - df, - Row(1000, 1000, 0, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000)) + val bypassMergeThreshold = + sparkContext.conf.getInt("spark.shuffle.sort.bypassMergeThreshold", 200) + sql(s"set spark.sql.shuffle.partitions=${bypassMergeThreshold-1}") } } /** For now, we will use SparkSqlSerializer for sort based shuffle with sort merge. */ class SparkSqlSerializer2SortMergeShuffleSuite extends SparkSqlSerializer2Suite { - // We are expecting SparkSqlSerializer. - override val serializerClass: Class[Serializer] = - classOf[SparkSqlSerializer].asInstanceOf[Class[Serializer]] - override def beforeAll(): Unit = { super.beforeAll() // To trigger the sort merge. - sql("set spark.sql.shuffle.partitions = 201") + val bypassMergeThreshold = + sparkContext.conf.getInt("spark.shuffle.sort.bypassMergeThreshold", 200) + sql(s"set spark.sql.shuffle.partitions=${bypassMergeThreshold + 1}") } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index b165ab2b1deb3..9ffb499ae7e35 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -35,6 +35,13 @@ class JDBCSuite extends FunSuite with BeforeAndAfter { val testBytes = Array[Byte](99.toByte, 134.toByte, 135.toByte, 200.toByte, 205.toByte) + val testH2Dialect = new JdbcDialect { + def canHandle(url: String) : Boolean = url.startsWith("jdbc:h2") + override def getCatalystType( + sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = + Some(StringType) + } + before { Class.forName("org.h2.Driver") // Extra properties that will be specified for our database. We need these to test @@ -61,6 +68,14 @@ class JDBCSuite extends FunSuite with BeforeAndAfter { |OPTIONS (url '$url', dbtable 'TEST.PEOPLE', user 'testUser', password 'testPass') """.stripMargin.replaceAll("\n", " ")) + sql( + s""" + |CREATE TEMPORARY TABLE fetchtwo + |USING org.apache.spark.sql.jdbc + |OPTIONS (url '$url', dbtable 'TEST.PEOPLE', user 'testUser', password 'testPass', + | fetchSize '2') + """.stripMargin.replaceAll("\n", " ")) + sql( s""" |CREATE TEMPORARY TABLE parts @@ -104,6 +119,8 @@ class JDBCSuite extends FunSuite with BeforeAndAfter { ).executeUpdate() conn.prepareStatement("insert into test.timetypes values ('12:34:56', " + "'1996-01-01', '2002-02-20 11:22:33.543543543')").executeUpdate() + conn.prepareStatement("insert into test.timetypes values ('12:34:56', " + + "null, '2002-02-20 11:22:33.543543543')").executeUpdate() conn.commit() sql( s""" @@ -127,6 +144,23 @@ class JDBCSuite extends FunSuite with BeforeAndAfter { |OPTIONS (url '$url', dbtable 'TEST.FLTTYPES', user 'testUser', password 'testPass') """.stripMargin.replaceAll("\n", " ")) + conn.prepareStatement( + s""" + |create table test.nulltypes (a INT, b BOOLEAN, c TINYINT, d BINARY(20), e VARCHAR(20), + |f VARCHAR_IGNORECASE(20), g CHAR(20), h BLOB, i CLOB, j TIME, k DATE, l TIMESTAMP, + |m DOUBLE, n REAL, o DECIMAL(40, 20)) + """.stripMargin.replaceAll("\n", " ")).executeUpdate() + conn.prepareStatement("insert into test.nulltypes values (" + + "null, null, null, null, null, null, null, null, null, " + + "null, null, null, null, null, null)").executeUpdate() + conn.commit() + sql( + s""" + |CREATE TEMPORARY TABLE nulltypes + |USING org.apache.spark.sql.jdbc + |OPTIONS (url '$url', dbtable 'TEST.NULLTYPES', user 'testUser', password 'testPass') + """.stripMargin.replaceAll("\n", " ")) + // Untested: IDENTITY, OTHER, UUID, ARRAY, and GEOMETRY types. } @@ -159,6 +193,14 @@ class JDBCSuite extends FunSuite with BeforeAndAfter { assert(names(2).equals("mary")) } + test("SELECT first field when fetchSize is two") { + val names = sql("SELECT NAME FROM fetchtwo").collect().map(x => x.getString(0)).sortWith(_ < _) + assert(names.size === 3) + assert(names(0).equals("fred")) + assert(names(1).equals("joe 'foo' \"bar\"")) + assert(names(2).equals("mary")) + } + test("SELECT second field") { val ids = sql("SELECT THEID FROM foobar").collect().map(x => x.getInt(0)).sortWith(_ < _) assert(ids.size === 3) @@ -167,6 +209,14 @@ class JDBCSuite extends FunSuite with BeforeAndAfter { assert(ids(2) === 3) } + test("SELECT second field when fetchSize is two") { + val ids = sql("SELECT THEID FROM fetchtwo").collect().map(x => x.getInt(0)).sortWith(_ < _) + assert(ids.size === 3) + assert(ids(0) === 1) + assert(ids(1) === 2) + assert(ids(2) === 3) + } + test("SELECT * partitioned") { assert(sql("SELECT * FROM parts").collect().size == 3) } @@ -185,23 +235,49 @@ class JDBCSuite extends FunSuite with BeforeAndAfter { assert(ids(2) === 3) } + test("Register JDBC query with renamed fields") { + // Regression test for bug SPARK-7345 + sql( + s""" + |CREATE TEMPORARY TABLE renamed + |USING org.apache.spark.sql.jdbc + |OPTIONS (url '$url', dbtable '(select NAME as NAME1, NAME as NAME2 from TEST.PEOPLE)', + |user 'testUser', password 'testPass') + """.stripMargin.replaceAll("\n", " ")) + + val df = sql("SELECT * FROM renamed") + assert(df.schema.fields.size == 2) + assert(df.schema.fields(0).name == "NAME1") + assert(df.schema.fields(1).name == "NAME2") + } + test("Basic API") { - assert(TestSQLContext.jdbc(urlWithUserAndPass, "TEST.PEOPLE").collect().size === 3) + assert(TestSQLContext.read.jdbc( + urlWithUserAndPass, "TEST.PEOPLE", new Properties).collect().length === 3) + } + + test("Basic API with FetchSize") { + val properties = new Properties + properties.setProperty("fetchSize", "2") + assert(TestSQLContext.read.jdbc( + urlWithUserAndPass, "TEST.PEOPLE", properties).collect().length === 3) } test("Partitioning via JDBCPartitioningInfo API") { - assert(TestSQLContext.jdbc(urlWithUserAndPass, "TEST.PEOPLE", "THEID", 0, 4, 3) - .collect.size === 3) + assert( + TestSQLContext.read.jdbc(urlWithUserAndPass, "TEST.PEOPLE", "THEID", 0, 4, 3, new Properties) + .collect().length === 3) } test("Partitioning via list-of-where-clauses API") { val parts = Array[String]("THEID < 2", "THEID >= 2") - assert(TestSQLContext.jdbc(urlWithUserAndPass, "TEST.PEOPLE", parts).collect().size === 3) + assert(TestSQLContext.read.jdbc(urlWithUserAndPass, "TEST.PEOPLE", parts, new Properties) + .collect().length === 3) } test("H2 integral types") { val rows = sql("SELECT * FROM inttypes WHERE A IS NOT NULL").collect() - assert(rows.size === 1) + assert(rows.length === 1) assert(rows(0).getInt(0) === 1) assert(rows(0).getBoolean(1) === false) assert(rows(0).getInt(2) === 3) @@ -211,7 +287,7 @@ class JDBCSuite extends FunSuite with BeforeAndAfter { test("H2 null entries") { val rows = sql("SELECT * FROM inttypes WHERE A IS NULL").collect() - assert(rows.size === 1) + assert(rows.length === 1) assert(rows(0).isNullAt(0)) assert(rows(0).isNullAt(1)) assert(rows(0).isNullAt(2)) @@ -251,28 +327,41 @@ class JDBCSuite extends FunSuite with BeforeAndAfter { } test("test DATE types") { - val rows = TestSQLContext.jdbc(urlWithUserAndPass, "TEST.TIMETYPES").collect() - val cachedRows = TestSQLContext.jdbc(urlWithUserAndPass, "TEST.TIMETYPES").cache().collect() + val rows = TestSQLContext.read.jdbc( + urlWithUserAndPass, "TEST.TIMETYPES", new Properties).collect() + val cachedRows = TestSQLContext.read.jdbc(urlWithUserAndPass, "TEST.TIMETYPES", new Properties) + .cache().collect() assert(rows(0).getAs[java.sql.Date](1) === java.sql.Date.valueOf("1996-01-01")) + assert(rows(1).getAs[java.sql.Date](1) === null) assert(cachedRows(0).getAs[java.sql.Date](1) === java.sql.Date.valueOf("1996-01-01")) } test("test DATE types in cache") { - val rows = TestSQLContext.jdbc(urlWithUserAndPass, "TEST.TIMETYPES").collect() - TestSQLContext - .jdbc(urlWithUserAndPass, "TEST.TIMETYPES").cache().registerTempTable("mycached_date") + val rows = + TestSQLContext.read.jdbc(urlWithUserAndPass, "TEST.TIMETYPES", new Properties).collect() + TestSQLContext.read.jdbc(urlWithUserAndPass, "TEST.TIMETYPES", new Properties) + .cache().registerTempTable("mycached_date") val cachedRows = sql("select * from mycached_date").collect() assert(rows(0).getAs[java.sql.Date](1) === java.sql.Date.valueOf("1996-01-01")) assert(cachedRows(0).getAs[java.sql.Date](1) === java.sql.Date.valueOf("1996-01-01")) } + test("test types for null value") { + val rows = TestSQLContext.read.jdbc( + urlWithUserAndPass, "TEST.NULLTYPES", new Properties).collect() + assert((0 to 14).forall(i => rows(0).isNullAt(i))) + } + test("H2 floating-point types") { val rows = sql("SELECT * FROM flttypes").collect() assert(rows(0).getDouble(0) === 1.00000000000000022) // Yes, I meant ==. assert(rows(0).getDouble(1) === 1.00000011920928955) // Yes, I meant ==. assert(rows(0).getAs[BigDecimal](2) - .equals(new BigDecimal("123456789012345.54321543215432100000"))) + .equals(new BigDecimal("123456789012345.54321543215432100000"))) assert(rows(0).schema.fields(2).dataType === DecimalType(40, 20)) + val compareDecimal = sql("SELECT C FROM flttypes where C > C - 1").collect() + assert(compareDecimal(0).getAs[BigDecimal](0) + .equals(new BigDecimal("123456789012345.54321543215432100000"))) } test("SQL query as table name") { @@ -302,4 +391,46 @@ class JDBCSuite extends FunSuite with BeforeAndAfter { """.stripMargin.replaceAll("\n", " ")) } } + + test("Remap types via JdbcDialects") { + JdbcDialects.registerDialect(testH2Dialect) + val df = TestSQLContext.read.jdbc(urlWithUserAndPass, "TEST.PEOPLE", new Properties) + assert(df.schema.filter( + _.dataType != org.apache.spark.sql.types.StringType + ).isEmpty) + val rows = df.collect() + assert(rows(0).get(0).isInstanceOf[String]) + assert(rows(0).get(1).isInstanceOf[String]) + JdbcDialects.unregisterDialect(testH2Dialect) + } + + test("Default jdbc dialect registration") { + assert(JdbcDialects.get("jdbc:mysql://127.0.0.1/db") == MySQLDialect) + assert(JdbcDialects.get("jdbc:postgresql://127.0.0.1/db") == PostgresDialect) + assert(JdbcDialects.get("test.invalid") == NoopDialect) + } + + test("Dialect unregister") { + JdbcDialects.registerDialect(testH2Dialect) + JdbcDialects.unregisterDialect(testH2Dialect) + assert(JdbcDialects.get(urlWithUserAndPass) == NoopDialect) + } + + test("Aggregated dialects") { + val agg = new AggregatedDialect(List(new JdbcDialect { + def canHandle(url: String) : Boolean = url.startsWith("jdbc:h2:") + override def getCatalystType( + sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = + if (sqlType % 2 == 0) { + Some(LongType) + } else { + None + } + }, testH2Dialect)) + assert(agg.canHandle("jdbc:h2:xxx")) + assert(!agg.canHandle("jdbc:h2")) + assert(agg.getCatalystType(0, "", 1, null) == Some(LongType)) + assert(agg.getCatalystType(1, "", 1, null) == Some(StringType)) + } + } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala index ee5c7620d1a22..9f0fea7f05302 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala @@ -18,25 +18,59 @@ package org.apache.spark.sql.jdbc import java.sql.DriverManager +import java.util.Properties import org.scalatest.{BeforeAndAfter, FunSuite} -import org.apache.spark.sql.Row +import org.apache.spark.sql.{SaveMode, Row} import org.apache.spark.sql.test._ import org.apache.spark.sql.types._ class JDBCWriteSuite extends FunSuite with BeforeAndAfter { val url = "jdbc:h2:mem:testdb2" var conn: java.sql.Connection = null + val url1 = "jdbc:h2:mem:testdb3" + var conn1: java.sql.Connection = null + val properties = new Properties() + properties.setProperty("user", "testUser") + properties.setProperty("password", "testPass") + properties.setProperty("rowId", "false") before { Class.forName("org.h2.Driver") conn = DriverManager.getConnection(url) conn.prepareStatement("create schema test").executeUpdate() + + conn1 = DriverManager.getConnection(url1, properties) + conn1.prepareStatement("create schema test").executeUpdate() + conn1.prepareStatement("drop table if exists test.people").executeUpdate() + conn1.prepareStatement( + "create table test.people (name TEXT(32) NOT NULL, theid INTEGER NOT NULL)").executeUpdate() + conn1.prepareStatement("insert into test.people values ('fred', 1)").executeUpdate() + conn1.prepareStatement("insert into test.people values ('mary', 2)").executeUpdate() + conn1.prepareStatement("drop table if exists test.people1").executeUpdate() + conn1.prepareStatement( + "create table test.people1 (name TEXT(32) NOT NULL, theid INTEGER NOT NULL)").executeUpdate() + conn1.commit() + + TestSQLContext.sql( + s""" + |CREATE TEMPORARY TABLE PEOPLE + |USING org.apache.spark.sql.jdbc + |OPTIONS (url '$url1', dbtable 'TEST.PEOPLE', user 'testUser', password 'testPass') + """.stripMargin.replaceAll("\n", " ")) + + TestSQLContext.sql( + s""" + |CREATE TEMPORARY TABLE PEOPLE1 + |USING org.apache.spark.sql.jdbc + |OPTIONS (url '$url1', dbtable 'TEST.PEOPLE1', user 'testUser', password 'testPass') + """.stripMargin.replaceAll("\n", " ")) } after { conn.close() + conn1.close() } val sc = TestSQLContext.sparkContext @@ -56,52 +90,66 @@ class JDBCWriteSuite extends FunSuite with BeforeAndAfter { test("Basic CREATE") { val df = TestSQLContext.createDataFrame(sc.parallelize(arr2x2), schema2) - df.createJDBCTable(url, "TEST.BASICCREATETEST", false) - assert(2 == TestSQLContext.jdbc(url, "TEST.BASICCREATETEST").count) - assert(2 == TestSQLContext.jdbc(url, "TEST.BASICCREATETEST").collect()(0).length) + df.write.jdbc(url, "TEST.BASICCREATETEST", new Properties) + assert(2 == TestSQLContext.read.jdbc(url, "TEST.BASICCREATETEST", new Properties).count) + assert(2 == + TestSQLContext.read.jdbc(url, "TEST.BASICCREATETEST", new Properties).collect()(0).length) } test("CREATE with overwrite") { val df = TestSQLContext.createDataFrame(sc.parallelize(arr2x3), schema3) val df2 = TestSQLContext.createDataFrame(sc.parallelize(arr1x2), schema2) - df.createJDBCTable(url, "TEST.DROPTEST", false) - assert(2 == TestSQLContext.jdbc(url, "TEST.DROPTEST").count) - assert(3 == TestSQLContext.jdbc(url, "TEST.DROPTEST").collect()(0).length) + df.write.jdbc(url1, "TEST.DROPTEST", properties) + assert(2 == TestSQLContext.read.jdbc(url1, "TEST.DROPTEST", properties).count) + assert(3 == TestSQLContext.read.jdbc(url1, "TEST.DROPTEST", properties).collect()(0).length) - df2.createJDBCTable(url, "TEST.DROPTEST", true) - assert(1 == TestSQLContext.jdbc(url, "TEST.DROPTEST").count) - assert(2 == TestSQLContext.jdbc(url, "TEST.DROPTEST").collect()(0).length) + df2.write.mode(SaveMode.Overwrite).jdbc(url1, "TEST.DROPTEST", properties) + assert(1 == TestSQLContext.read.jdbc(url1, "TEST.DROPTEST", properties).count) + assert(2 == TestSQLContext.read.jdbc(url1, "TEST.DROPTEST", properties).collect()(0).length) } test("CREATE then INSERT to append") { val df = TestSQLContext.createDataFrame(sc.parallelize(arr2x2), schema2) val df2 = TestSQLContext.createDataFrame(sc.parallelize(arr1x2), schema2) - df.createJDBCTable(url, "TEST.APPENDTEST", false) - df2.insertIntoJDBC(url, "TEST.APPENDTEST", false) - assert(3 == TestSQLContext.jdbc(url, "TEST.APPENDTEST").count) - assert(2 == TestSQLContext.jdbc(url, "TEST.APPENDTEST").collect()(0).length) + df.write.jdbc(url, "TEST.APPENDTEST", new Properties) + df2.write.mode(SaveMode.Append).jdbc(url, "TEST.APPENDTEST", new Properties) + assert(3 == TestSQLContext.read.jdbc(url, "TEST.APPENDTEST", new Properties).count) + assert(2 == + TestSQLContext.read.jdbc(url, "TEST.APPENDTEST", new Properties).collect()(0).length) } test("CREATE then INSERT to truncate") { val df = TestSQLContext.createDataFrame(sc.parallelize(arr2x2), schema2) val df2 = TestSQLContext.createDataFrame(sc.parallelize(arr1x2), schema2) - df.createJDBCTable(url, "TEST.TRUNCATETEST", false) - df2.insertIntoJDBC(url, "TEST.TRUNCATETEST", true) - assert(1 == TestSQLContext.jdbc(url, "TEST.TRUNCATETEST").count) - assert(2 == TestSQLContext.jdbc(url, "TEST.TRUNCATETEST").collect()(0).length) + df.write.jdbc(url1, "TEST.TRUNCATETEST", properties) + df2.write.mode(SaveMode.Overwrite).jdbc(url1, "TEST.TRUNCATETEST", properties) + assert(1 == TestSQLContext.read.jdbc(url1, "TEST.TRUNCATETEST", properties).count) + assert(2 == TestSQLContext.read.jdbc(url1, "TEST.TRUNCATETEST", properties).collect()(0).length) } test("Incompatible INSERT to append") { val df = TestSQLContext.createDataFrame(sc.parallelize(arr2x2), schema2) val df2 = TestSQLContext.createDataFrame(sc.parallelize(arr2x3), schema3) - df.createJDBCTable(url, "TEST.INCOMPATIBLETEST", false) + df.write.jdbc(url, "TEST.INCOMPATIBLETEST", new Properties) intercept[org.apache.spark.SparkException] { - df2.insertIntoJDBC(url, "TEST.INCOMPATIBLETEST", true) + df2.write.mode(SaveMode.Append).jdbc(url, "TEST.INCOMPATIBLETEST", new Properties) } } + test("INSERT to JDBC Datasource") { + TestSQLContext.sql("INSERT INTO TABLE PEOPLE1 SELECT * FROM PEOPLE") + assert(2 == TestSQLContext.read.jdbc(url1, "TEST.PEOPLE1", properties).count) + assert(2 == TestSQLContext.read.jdbc(url1, "TEST.PEOPLE1", properties).collect()(0).length) + } + + test("INSERT to JDBC Datasource with overwrite") { + TestSQLContext.sql("INSERT INTO TABLE PEOPLE1 SELECT * FROM PEOPLE") + TestSQLContext.sql("INSERT OVERWRITE TABLE PEOPLE1 SELECT * FROM PEOPLE") + assert(2 == TestSQLContext.read.jdbc(url1, "TEST.PEOPLE1", properties).count) + assert(2 == TestSQLContext.read.jdbc(url1, "TEST.PEOPLE1", properties).collect()(0).length) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala index fd0e2746dc045..f8d62f9e7e02b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala @@ -17,13 +17,15 @@ package org.apache.spark.sql.json +import java.io.StringWriter import java.sql.{Date, Timestamp} +import com.fasterxml.jackson.core.JsonFactory import org.scalactic.Tolerance._ import org.apache.spark.sql.TestData._ -import org.apache.spark.sql.functions._ -import org.apache.spark.sql.json.JsonRDD.{compatibleType, enforceCorrectType} +import org.apache.spark.sql.catalyst.util.DateUtils +import org.apache.spark.sql.json.InferSchema.compatibleType import org.apache.spark.sql.sources.LogicalRelation import org.apache.spark.sql.test.TestSQLContext import org.apache.spark.sql.test.TestSQLContext._ @@ -46,6 +48,18 @@ class JsonSuite extends QueryTest { s"${expected}(${expected.getClass}).") } + val factory = new JsonFactory() + def enforceCorrectType(value: Any, dataType: DataType): Any = { + val writer = new StringWriter() + val generator = factory.createGenerator(writer) + generator.writeObject(value) + generator.flush() + + val parser = factory.createParser(writer.toString) + parser.nextToken() + JacksonParser.convertField(factory, parser, dataType) + } + val intNumber: Int = 2147483647 checkTypePromotion(intNumber, enforceCorrectType(intNumber, IntegerType)) checkTypePromotion(intNumber.toLong, enforceCorrectType(intNumber, LongType)) @@ -200,7 +214,7 @@ class JsonSuite extends QueryTest { } test("Complex field and type inferring with null in sampling") { - val jsonDF = jsonRDD(jsonNullStruct) + val jsonDF = read.json(jsonNullStruct) val expectedSchema = StructType( StructField("headers", StructType( StructField("Charset", StringType, true) :: @@ -219,7 +233,7 @@ class JsonSuite extends QueryTest { } test("Primitive field and type inferring") { - val jsonDF = jsonRDD(primitiveFieldAndType) + val jsonDF = read.json(primitiveFieldAndType) val expectedSchema = StructType( StructField("bigInteger", DecimalType.Unlimited, true) :: @@ -247,7 +261,7 @@ class JsonSuite extends QueryTest { } test("Complex field and type inferring") { - val jsonDF = jsonRDD(complexFieldAndType1) + val jsonDF = read.json(complexFieldAndType1) val expectedSchema = StructType( StructField("arrayOfArray1", ArrayType(ArrayType(StringType, true), true), true) :: @@ -346,7 +360,7 @@ class JsonSuite extends QueryTest { } test("GetField operation on complex data type") { - val jsonDF = jsonRDD(complexFieldAndType1) + val jsonDF = read.json(complexFieldAndType1) jsonDF.registerTempTable("jsonTable") checkAnswer( @@ -362,7 +376,7 @@ class JsonSuite extends QueryTest { } test("Type conflict in primitive field values") { - val jsonDF = jsonRDD(primitiveFieldValueTypeConflict) + val jsonDF = read.json(primitiveFieldValueTypeConflict) val expectedSchema = StructType( StructField("num_bool", StringType, true) :: @@ -436,10 +450,10 @@ class JsonSuite extends QueryTest { } ignore("Type conflict in primitive field values (Ignored)") { - val jsonDF = jsonRDD(primitiveFieldValueTypeConflict) + val jsonDF = read.json(primitiveFieldValueTypeConflict) jsonDF.registerTempTable("jsonTable") - // Right now, the analyzer does not promote strings in a boolean expreesion. + // Right now, the analyzer does not promote strings in a boolean expression. // Number and Boolean conflict: resolve the type as boolean in this query. checkAnswer( sql("select num_bool from jsonTable where NOT num_bool"), @@ -489,7 +503,7 @@ class JsonSuite extends QueryTest { } test("Type conflict in complex field values") { - val jsonDF = jsonRDD(complexFieldValueTypeConflict) + val jsonDF = read.json(complexFieldValueTypeConflict) val expectedSchema = StructType( StructField("array", ArrayType(LongType, true), true) :: @@ -508,12 +522,12 @@ class JsonSuite extends QueryTest { Row(Seq(), "11", "[1,2,3]", Row(null), "[]") :: Row(null, """{"field":false}""", null, null, "{}") :: Row(Seq(4, 5, 6), null, "str", Row(null), "[7,8,9]") :: - Row(Seq(7), "{}","[str1,str2,33]", Row("str"), """{"field":true}""") :: Nil + Row(Seq(7), "{}", """["str1","str2",33]""", Row("str"), """{"field":true}""") :: Nil ) } test("Type conflict in array elements") { - val jsonDF = jsonRDD(arrayElementTypeConflict) + val jsonDF = read.json(arrayElementTypeConflict) val expectedSchema = StructType( StructField("array1", ArrayType(StringType, true), true) :: @@ -541,7 +555,7 @@ class JsonSuite extends QueryTest { } test("Handling missing fields") { - val jsonDF = jsonRDD(missingFields) + val jsonDF = read.json(missingFields) val expectedSchema = StructType( StructField("a", BooleanType, true) :: @@ -561,24 +575,24 @@ class JsonSuite extends QueryTest { dir.delete() val path = dir.getCanonicalPath sparkContext.parallelize(1 to 100).map(i => s"""{"a": 1, "b": "str$i"}""").saveAsTextFile(path) - val jsonDF = jsonFile(path, 0.49) + val jsonDF = read.option("samplingRatio", "0.49").json(path) val analyzed = jsonDF.queryExecution.analyzed assert( analyzed.isInstanceOf[LogicalRelation], - "The DataFrame returned by jsonFile should be based on JSONRelation.") + "The DataFrame returned by jsonFile should be based on LogicalRelation.") val relation = analyzed.asInstanceOf[LogicalRelation].relation assert( relation.isInstanceOf[JSONRelation], "The DataFrame returned by jsonFile should be based on JSONRelation.") - assert(relation.asInstanceOf[JSONRelation].path === path) + assert(relation.asInstanceOf[JSONRelation].path === Some(path)) assert(relation.asInstanceOf[JSONRelation].samplingRatio === (0.49 +- 0.001)) val schema = StructType(StructField("a", LongType, true) :: Nil) val logicalRelation = - jsonFile(path, schema).queryExecution.analyzed.asInstanceOf[LogicalRelation] + read.schema(schema).json(path).queryExecution.analyzed.asInstanceOf[LogicalRelation] val relationWithSchema = logicalRelation.relation.asInstanceOf[JSONRelation] - assert(relationWithSchema.path === path) + assert(relationWithSchema.path === Some(path)) assert(relationWithSchema.schema === schema) assert(relationWithSchema.samplingRatio > 0.99) } @@ -588,7 +602,7 @@ class JsonSuite extends QueryTest { dir.delete() val path = dir.getCanonicalPath primitiveFieldAndType.map(record => record.replaceAll("\n", " ")).saveAsTextFile(path) - val jsonDF = jsonFile(path) + val jsonDF = read.json(path) val expectedSchema = StructType( StructField("bigInteger", DecimalType.Unlimited, true) :: @@ -657,7 +671,7 @@ class JsonSuite extends QueryTest { StructField("null", StringType, true) :: StructField("string", StringType, true) :: Nil) - val jsonDF1 = jsonFile(path, schema) + val jsonDF1 = read.schema(schema).json(path) assert(schema === jsonDF1.schema) @@ -674,7 +688,7 @@ class JsonSuite extends QueryTest { "this is a simple string.") ) - val jsonDF2 = jsonRDD(primitiveFieldAndType, schema) + val jsonDF2 = read.schema(schema).json(primitiveFieldAndType) assert(schema === jsonDF2.schema) @@ -695,7 +709,7 @@ class JsonSuite extends QueryTest { test("Applying schemas with MapType") { val schemaWithSimpleMap = StructType( StructField("map", MapType(StringType, IntegerType, true), false) :: Nil) - val jsonWithSimpleMap = jsonRDD(mapType1, schemaWithSimpleMap) + val jsonWithSimpleMap = read.schema(schemaWithSimpleMap).json(mapType1) jsonWithSimpleMap.registerTempTable("jsonWithSimpleMap") @@ -723,7 +737,7 @@ class JsonSuite extends QueryTest { val schemaWithComplexMap = StructType( StructField("map", MapType(StringType, innerStruct, true), false) :: Nil) - val jsonWithComplexMap = jsonRDD(mapType2, schemaWithComplexMap) + val jsonWithComplexMap = read.schema(schemaWithComplexMap).json(mapType2) jsonWithComplexMap.registerTempTable("jsonWithComplexMap") @@ -749,7 +763,7 @@ class JsonSuite extends QueryTest { } test("SPARK-2096 Correctly parse dot notations") { - val jsonDF = jsonRDD(complexFieldAndType2) + val jsonDF = read.json(complexFieldAndType2) jsonDF.registerTempTable("jsonTable") checkAnswer( @@ -767,7 +781,7 @@ class JsonSuite extends QueryTest { } test("SPARK-3390 Complex arrays") { - val jsonDF = jsonRDD(complexFieldAndType2) + val jsonDF = read.json(complexFieldAndType2) jsonDF.registerTempTable("jsonTable") checkAnswer( @@ -790,7 +804,7 @@ class JsonSuite extends QueryTest { } test("SPARK-3308 Read top level JSON arrays") { - val jsonDF = jsonRDD(jsonArray) + val jsonDF = read.json(jsonArray) jsonDF.registerTempTable("jsonTable") checkAnswer( @@ -811,7 +825,7 @@ class JsonSuite extends QueryTest { val oldColumnNameOfCorruptRecord = TestSQLContext.conf.columnNameOfCorruptRecord TestSQLContext.setConf(SQLConf.COLUMN_NAME_OF_CORRUPT_RECORD, "_unparsed") - val jsonDF = jsonRDD(corruptRecords) + val jsonDF = read.json(corruptRecords) jsonDF.registerTempTable("jsonTable") val schema = StructType( @@ -865,7 +879,7 @@ class JsonSuite extends QueryTest { } test("SPARK-4068: nulls in arrays") { - val jsonDF = jsonRDD(nullsInArrays) + val jsonDF = read.json(nullsInArrays) jsonDF.registerTempTable("jsonTable") val schema = StructType( @@ -942,8 +956,8 @@ class JsonSuite extends QueryTest { assert(result2(1) === "{\"f1\":{\"f11\":2,\"f12\":false},\"f2\":{\"B2\":null}}") assert(result2(3) === "{\"f1\":{\"f11\":4,\"f12\":true},\"f2\":{\"D4\":2147483644}}") - val jsonDF = jsonRDD(primitiveFieldAndType) - val primTable = jsonRDD(jsonDF.toJSON) + val jsonDF = read.json(primitiveFieldAndType) + val primTable = read.json(jsonDF.toJSON) primTable.registerTempTable("primativeTable") checkAnswer( sql("select * from primativeTable"), @@ -955,8 +969,8 @@ class JsonSuite extends QueryTest { "this is a simple string.") ) - val complexJsonDF = jsonRDD(complexFieldAndType1) - val compTable = jsonRDD(complexJsonDF.toJSON) + val complexJsonDF = read.json(complexFieldAndType1) + val compTable = read.json(complexJsonDF.toJSON) compTable.registerTempTable("complexTable") // Access elements of a primitive array. checkAnswer( @@ -1020,15 +1034,24 @@ class JsonSuite extends QueryTest { } test("JSONRelation equality test") { - val relation1 = - JSONRelation("path", 1.0, Some(StructType(StructField("a", IntegerType, true) :: Nil)))(null) + val context = org.apache.spark.sql.test.TestSQLContext + val relation1 = new JSONRelation( + "path", + 1.0, + Some(StructType(StructField("a", IntegerType, true) :: Nil)), + context) val logicalRelation1 = LogicalRelation(relation1) - val relation2 = - JSONRelation("path", 0.5, Some(StructType(StructField("a", IntegerType, true) :: Nil)))( - org.apache.spark.sql.test.TestSQLContext) + val relation2 = new JSONRelation( + "path", + 0.5, + Some(StructType(StructField("a", IntegerType, true) :: Nil)), + context) val logicalRelation2 = LogicalRelation(relation2) - val relation3 = - JSONRelation("path", 1.0, Some(StructType(StructField("b", StringType, true) :: Nil)))(null) + val relation3 = new JSONRelation( + "path", + 1.0, + Some(StructType(StructField("b", StringType, true) :: Nil)), + context) val logicalRelation3 = LogicalRelation(relation3) assert(relation1 === relation2) @@ -1046,8 +1069,35 @@ class JsonSuite extends QueryTest { test("SPARK-6245 JsonRDD.inferSchema on empty RDD") { // This is really a test that it doesn't throw an exception - val emptySchema = JsonRDD.inferSchema(empty, 1.0, "") + val emptySchema = InferSchema(empty, 1.0, "") assert(StructType(Seq()) === emptySchema) } + test("SPARK-7565 MapType in JsonRDD") { + val useStreaming = getConf(SQLConf.USE_JACKSON_STREAMING_API, "true") + val oldColumnNameOfCorruptRecord = TestSQLContext.conf.columnNameOfCorruptRecord + TestSQLContext.setConf(SQLConf.COLUMN_NAME_OF_CORRUPT_RECORD, "_unparsed") + + val schemaWithSimpleMap = StructType( + StructField("map", MapType(StringType, IntegerType, true), false) :: Nil) + try{ + for (useStreaming <- List("true", "false")) { + setConf(SQLConf.USE_JACKSON_STREAMING_API, useStreaming) + val temp = Utils.createTempDir().getPath + + val df = read.schema(schemaWithSimpleMap).json(mapType1) + df.write.mode("overwrite").parquet(temp) + // order of MapType is not defined + assert(read.parquet(temp).count() == 5) + + val df2 = read.json(corruptRecords) + df2.write.mode("overwrite").parquet(temp) + checkAnswer(read.parquet(temp), df2.collect()) + } + } finally { + setConf(SQLConf.USE_JACKSON_STREAMING_API, useStreaming) + setConf(SQLConf.COLUMN_NAME_OF_CORRUPT_RECORD, oldColumnNameOfCorruptRecord) + } + } + } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala index 10d0ede4dc0dc..bdc2ebabc5e9a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetFilterSuite.scala @@ -328,12 +328,12 @@ class ParquetDataSourceOnFilterSuite extends ParquetFilterSuiteBase with BeforeA withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_ENABLED -> "true") { withTempPath { dir => val path = s"${dir.getCanonicalPath}/part=1" - (1 to 3).map(i => (i, i.toString)).toDF("a", "b").saveAsParquetFile(path) + (1 to 3).map(i => (i, i.toString)).toDF("a", "b").write.parquet(path) // If the "part = 1" filter gets pushed down, this query will throw an exception since // "part" is not a valid column in the actual Parquet file checkAnswer( - sqlContext.parquetFile(path).filter("part = 1"), + sqlContext.read.parquet(path).filter("part = 1"), (1 to 3).map(i => Row(i, i.toString, 1))) } } @@ -350,14 +350,14 @@ class ParquetDataSourceOffFilterSuite extends ParquetFilterSuiteBase with Before override protected def afterAll(): Unit = { sqlContext.setConf(SQLConf.PARQUET_USE_DATA_SOURCE_API, originalConf.toString) } - + test("SPARK-6742: don't push down predicates which reference partition columns") { import sqlContext.implicits._ withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_ENABLED -> "true") { withTempPath { dir => val path = s"${dir.getCanonicalPath}/part=1" - (1 to 3).map(i => (i, i.toString)).toDF("a", "b").saveAsParquetFile(path) + (1 to 3).map(i => (i, i.toString)).toDF("a", "b").write.parquet(path) // If the "part = 1" filter gets pushed down, this query will throw an exception since // "part" is not a valid column in the actual Parquet file @@ -365,7 +365,7 @@ class ParquetDataSourceOffFilterSuite extends ParquetFilterSuiteBase with Before path, Some(sqlContext.sparkContext.hadoopConfiguration), sqlContext, Seq(AttributeReference("part", IntegerType, false)()) )) - + checkAnswer( df.filter("a = 1 or part = 1"), (1 to 3).map(i => Row(1, i, i.toString))) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala index b504842053690..dd48bb350f26d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetIOSuite.scala @@ -35,6 +35,7 @@ import parquet.schema.{MessageType, MessageTypeParser} import org.apache.spark.sql.catalyst.ScalaReflection import org.apache.spark.sql.catalyst.expressions.Row +import org.apache.spark.sql.catalyst.util.DateUtils import org.apache.spark.sql.test.TestSQLContext import org.apache.spark.sql.test.TestSQLContext._ import org.apache.spark.sql.test.TestSQLContext.implicits._ @@ -113,24 +114,24 @@ class ParquetIOSuiteBase extends QueryTest with ParquetTest { for ((precision, scale) <- Seq((5, 2), (1, 0), (1, 1), (18, 10), (18, 17))) { withTempPath { dir => val data = makeDecimalRDD(DecimalType(precision, scale)) - data.saveAsParquetFile(dir.getCanonicalPath) - checkAnswer(parquetFile(dir.getCanonicalPath), data.collect().toSeq) + data.write.parquet(dir.getCanonicalPath) + checkAnswer(read.parquet(dir.getCanonicalPath), data.collect().toSeq) } } // Decimals with precision above 18 are not yet supported - intercept[RuntimeException] { + intercept[Throwable] { withTempPath { dir => - makeDecimalRDD(DecimalType(19, 10)).saveAsParquetFile(dir.getCanonicalPath) - parquetFile(dir.getCanonicalPath).collect() + makeDecimalRDD(DecimalType(19, 10)).write.parquet(dir.getCanonicalPath) + read.parquet(dir.getCanonicalPath).collect() } } // Unlimited-length decimals are not yet supported - intercept[RuntimeException] { + intercept[Throwable] { withTempPath { dir => - makeDecimalRDD(DecimalType.Unlimited).saveAsParquetFile(dir.getCanonicalPath) - parquetFile(dir.getCanonicalPath).collect() + makeDecimalRDD(DecimalType.Unlimited).write.parquet(dir.getCanonicalPath) + read.parquet(dir.getCanonicalPath).collect() } } } @@ -145,8 +146,8 @@ class ParquetIOSuiteBase extends QueryTest with ParquetTest { withTempPath { dir => val data = makeDateRDD() - data.saveAsParquetFile(dir.getCanonicalPath) - checkAnswer(parquetFile(dir.getCanonicalPath), data.collect().toSeq) + data.write.parquet(dir.getCanonicalPath) + checkAnswer(read.parquet(dir.getCanonicalPath), data.collect().toSeq) } } @@ -282,7 +283,7 @@ class ParquetIOSuiteBase extends QueryTest with ParquetTest { withTempDir { dir => val path = new Path(dir.toURI.toString, "part-r-0.parquet") makeRawParquetFile(path) - checkAnswer(parquetFile(path.toString), (0 until 10).map { i => + checkAnswer(read.parquet(path.toString), (0 until 10).map { i => Row(i % 2 == 0, i, i.toLong, i.toFloat, i.toDouble) }) } @@ -310,8 +311,8 @@ class ParquetIOSuiteBase extends QueryTest with ParquetTest { test("save - overwrite") { withParquetFile((1 to 10).map(i => (i, i.toString))) { file => val newData = (11 to 20).map(i => (i, i.toString)) - newData.toDF().save("org.apache.spark.sql.parquet", SaveMode.Overwrite, Map("path" -> file)) - checkAnswer(parquetFile(file), newData.map(Row.fromTuple)) + newData.toDF().write.format("parquet").mode(SaveMode.Overwrite).save(file) + checkAnswer(read.parquet(file), newData.map(Row.fromTuple)) } } @@ -319,8 +320,8 @@ class ParquetIOSuiteBase extends QueryTest with ParquetTest { val data = (1 to 10).map(i => (i, i.toString)) withParquetFile(data) { file => val newData = (11 to 20).map(i => (i, i.toString)) - newData.toDF().save("org.apache.spark.sql.parquet", SaveMode.Ignore, Map("path" -> file)) - checkAnswer(parquetFile(file), data.map(Row.fromTuple)) + newData.toDF().write.format("parquet").mode(SaveMode.Ignore).save(file) + checkAnswer(read.parquet(file), data.map(Row.fromTuple)) } } @@ -329,8 +330,7 @@ class ParquetIOSuiteBase extends QueryTest with ParquetTest { withParquetFile(data) { file => val newData = (11 to 20).map(i => (i, i.toString)) val errorMessage = intercept[Throwable] { - newData.toDF().save( - "org.apache.spark.sql.parquet", SaveMode.ErrorIfExists, Map("path" -> file)) + newData.toDF().write.format("parquet").mode(SaveMode.ErrorIfExists).save(file) }.getMessage assert(errorMessage.contains("already exists")) } @@ -340,8 +340,8 @@ class ParquetIOSuiteBase extends QueryTest with ParquetTest { val data = (1 to 10).map(i => (i, i.toString)) withParquetFile(data) { file => val newData = (11 to 20).map(i => (i, i.toString)) - newData.toDF().save("org.apache.spark.sql.parquet", SaveMode.Append, Map("path" -> file)) - checkAnswer(parquetFile(file), (data ++ newData).map(Row.fromTuple)) + newData.toDF().write.format("parquet").mode(SaveMode.Append).save(file) + checkAnswer(read.parquet(file), (data ++ newData).map(Row.fromTuple)) } } @@ -373,7 +373,7 @@ class ParquetIOSuiteBase extends QueryTest with ParquetTest { path, new Footer(path, new ParquetMetadata(fileMetadata, Nil)) :: Nil) - assertResult(parquetFile(path.toString).schema) { + assertResult(read.parquet(path.toString).schema) { StructType( StructField("a", BooleanType, nullable = false) :: StructField("b", IntegerType, nullable = false) :: @@ -391,7 +391,7 @@ class ParquetIOSuiteBase extends QueryTest with ParquetTest { sqlContext.udf.register("div0", (x: Int) => x / 0) withTempPath { dir => intercept[org.apache.spark.SparkException] { - sqlContext.sql("select div0(1)").saveAsParquetFile(dir.getCanonicalPath) + sqlContext.sql("select div0(1)").write.parquet(dir.getCanonicalPath) } val path = new Path(dir.getCanonicalPath, "_temporary") val fs = path.getFileSystem(configuration) @@ -419,11 +419,11 @@ class ParquetDataSourceOnIOSuite extends ParquetIOSuiteBase with BeforeAndAfterA test("SPARK-6330 regression test") { // In 1.3.0, save to fs other than file: without configuring core-site.xml would get: // IllegalArgumentException: Wrong FS: hdfs://..., expected: file:/// - intercept[java.io.FileNotFoundException] { - sqlContext.parquetFile("file:///nonexistent") + intercept[Throwable] { + sqlContext.read.parquet("file:///nonexistent") } val errorMessage = intercept[Throwable] { - sqlContext.parquetFile("hdfs://nonexistent") + sqlContext.read.parquet("hdfs://nonexistent") }.toString assert(errorMessage.contains("UnknownHostException")) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala index b7561ce7298cb..3b29979452ad9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala @@ -16,15 +16,21 @@ */ package org.apache.spark.sql.parquet +import java.io.File +import java.math.BigInteger +import java.sql.Timestamp + import scala.collection.mutable.ArrayBuffer +import com.google.common.io.Files import org.apache.hadoop.fs.Path import org.apache.spark.sql.catalyst.expressions.Literal -import org.apache.spark.sql.parquet.ParquetRelation2._ +import org.apache.spark.sql.sources.PartitioningUtils._ +import org.apache.spark.sql.sources.{LogicalRelation, Partition, PartitionSpec} import org.apache.spark.sql.test.TestSQLContext import org.apache.spark.sql.types._ -import org.apache.spark.sql.{QueryTest, Row, SQLContext} +import org.apache.spark.sql.{Column, QueryTest, Row, SQLContext} // The data where the partitioning key exists only in the directory structure. case class ParquetData(intField: Int, stringField: String) @@ -38,7 +44,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest { import sqlContext._ import sqlContext.implicits._ - val defaultPartitionName = "__NULL__" + val defaultPartitionName = "__HIVE_DEFAULT_PARTITION__" test("column type inference") { def check(raw: String, literal: Literal): Unit = { @@ -53,44 +59,45 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest { } test("parse partition") { - def check(path: String, expected: PartitionValues): Unit = { + def check(path: String, expected: Option[PartitionValues]): Unit = { assert(expected === parsePartition(new Path(path), defaultPartitionName)) } def checkThrows[T <: Throwable: Manifest](path: String, expected: String): Unit = { val message = intercept[T] { - parsePartition(new Path(path), defaultPartitionName) + parsePartition(new Path(path), defaultPartitionName).get }.getMessage assert(message.contains(expected)) } - check( - "file:///", - PartitionValues( - ArrayBuffer.empty[String], - ArrayBuffer.empty[Literal])) - - check( - "file://path/a=10", + check("file://path/a=10", Some { PartitionValues( ArrayBuffer("a"), - ArrayBuffer(Literal.create(10, IntegerType)))) + ArrayBuffer(Literal.create(10, IntegerType))) + }) - check( - "file://path/a=10/b=hello/c=1.5", + check("file://path/a=10/b=hello/c=1.5", Some { PartitionValues( ArrayBuffer("a", "b", "c"), ArrayBuffer( Literal.create(10, IntegerType), Literal.create("hello", StringType), - Literal.create(1.5, FloatType)))) + Literal.create(1.5, FloatType))) + }) - check( - "file://path/a=10/b_hello/c=1.5", + check("file://path/a=10/b_hello/c=1.5", Some { PartitionValues( ArrayBuffer("c"), - ArrayBuffer(Literal.create(1.5, FloatType)))) + ArrayBuffer(Literal.create(1.5, FloatType))) + }) + + check("file:///", None) + check("file:///path/_temporary", None) + check("file:///path/_temporary/c=1.5", None) + check("file:///path/_temporary/path", None) + check("file://path/a=10/_temporary/c=1.5", None) + check("file://path/a=10/c=1.5/_temporary", None) checkThrows[AssertionError]("file://path/=10", "Empty partition column name") checkThrows[AssertionError]("file://path/a=", "Empty partition column value") @@ -120,6 +127,25 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest { Partition(Row(10, "20"), "hdfs://host:9000/path/a=10/b=20"), Partition(Row(10.5, "hello"), "hdfs://host:9000/path/a=10.5/b=hello")))) + check(Seq( + "hdfs://host:9000/path/_temporary", + "hdfs://host:9000/path/a=10/b=20", + "hdfs://host:9000/path/a=10.5/b=hello", + "hdfs://host:9000/path/a=10.5/_temporary", + "hdfs://host:9000/path/a=10.5/_TeMpOrArY", + "hdfs://host:9000/path/a=10.5/b=hello/_temporary", + "hdfs://host:9000/path/a=10.5/b=hello/_TEMPORARY", + "hdfs://host:9000/path/_temporary/path", + "hdfs://host:9000/path/a=11/_temporary/path", + "hdfs://host:9000/path/a=10.5/b=world/_temporary/path"), + PartitionSpec( + StructType(Seq( + StructField("a", FloatType), + StructField("b", StringType))), + Seq( + Partition(Row(10, "20"), "hdfs://host:9000/path/a=10/b=20"), + Partition(Row(10.5, "hello"), "hdfs://host:9000/path/a=10.5/b=hello")))) + check(Seq( s"hdfs://host:9000/path/a=10/b=20", s"hdfs://host:9000/path/a=$defaultPartitionName/b=hello"), @@ -141,6 +167,11 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest { Seq( Partition(Row(10, null), s"hdfs://host:9000/path/a=10/b=$defaultPartitionName"), Partition(Row(10.5, null), s"hdfs://host:9000/path/a=10.5/b=$defaultPartitionName")))) + + check(Seq( + s"hdfs://host:9000/path1", + s"hdfs://host:9000/path2"), + PartitionSpec.emptySpec) } test("read partitioned table - normal case") { @@ -149,12 +180,18 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest { pi <- Seq(1, 2) ps <- Seq("foo", "bar") } { + val dir = makePartitionDir(base, defaultPartitionName, "pi" -> pi, "ps" -> ps) makeParquetFile( (1 to 10).map(i => ParquetData(i, i.toString)), - makePartitionDir(base, defaultPartitionName, "pi" -> pi, "ps" -> ps)) + dir) + // Introduce _temporary dir to test the robustness of the schema discovery process. + new File(dir.toString, "_temporary").mkdir() } + // Introduce _temporary dir to the base dir the robustness of the schema discovery process. + new File(base.getCanonicalPath, "_temporary").mkdir() - parquetFile(base.getCanonicalPath).registerTempTable("t") + println("load the partitioned table") + read.parquet(base.getCanonicalPath).registerTempTable("t") withTempTable("t") { checkAnswer( @@ -201,7 +238,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest { makePartitionDir(base, defaultPartitionName, "pi" -> pi, "ps" -> ps)) } - parquetFile(base.getCanonicalPath).registerTempTable("t") + read.parquet(base.getCanonicalPath).registerTempTable("t") withTempTable("t") { checkAnswer( @@ -249,12 +286,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest { makePartitionDir(base, defaultPartitionName, "pi" -> pi, "ps" -> ps)) } - val parquetRelation = load( - "org.apache.spark.sql.parquet", - Map( - "path" -> base.getCanonicalPath, - ParquetRelation2.DEFAULT_PARTITION_NAME -> defaultPartitionName)) - + val parquetRelation = read.format("org.apache.spark.sql.parquet").load(base.getCanonicalPath) parquetRelation.registerTempTable("t") withTempTable("t") { @@ -294,12 +326,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest { makePartitionDir(base, defaultPartitionName, "pi" -> pi, "ps" -> ps)) } - val parquetRelation = load( - "org.apache.spark.sql.parquet", - Map( - "path" -> base.getCanonicalPath, - ParquetRelation2.DEFAULT_PARTITION_NAME -> defaultPartitionName)) - + val parquetRelation = read.format("org.apache.spark.sql.parquet").load(base.getCanonicalPath) parquetRelation.registerTempTable("t") withTempTable("t") { @@ -331,7 +358,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest { (1 to 10).map(i => (i, i.toString)).toDF("intField", "stringField"), makePartitionDir(base, defaultPartitionName, "pi" -> 2)) - load(base.getCanonicalPath, "org.apache.spark.sql.parquet").registerTempTable("t") + read.format("org.apache.spark.sql.parquet").load(base.getCanonicalPath).registerTempTable("t") withTempTable("t") { checkAnswer( @@ -340,4 +367,86 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest { } } } + + test("SPARK-7749 Non-partitioned table should have empty partition spec") { + withTempPath { dir => + (1 to 10).map(i => (i, i.toString)).toDF("a", "b").write.parquet(dir.getCanonicalPath) + val queryExecution = read.parquet(dir.getCanonicalPath).queryExecution + queryExecution.analyzed.collectFirst { + case LogicalRelation(relation: ParquetRelation2) => + assert(relation.partitionSpec === PartitionSpec.emptySpec) + }.getOrElse { + fail(s"Expecting a ParquetRelation2, but got:\n$queryExecution") + } + } + } + + test("SPARK-7847: Dynamic partition directory path escaping and unescaping") { + withTempPath { dir => + val df = Seq("/", "[]", "?").zipWithIndex.map(_.swap).toDF("i", "s") + df.write.format("parquet").partitionBy("s").save(dir.getCanonicalPath) + checkAnswer(read.parquet(dir.getCanonicalPath), df.collect()) + } + } + + test("Various partition value types") { + val row = + Row( + 100.toByte, + 40000.toShort, + Int.MaxValue, + Long.MaxValue, + 1.5.toFloat, + 4.5, + new java.math.BigDecimal(new BigInteger("212500"), 5), + new java.math.BigDecimal(2.125), + java.sql.Date.valueOf("2015-05-23"), + new Timestamp(0), + "This is a string, /[]?=:", + "This is not a partition column") + + // BooleanType is not supported yet + val partitionColumnTypes = + Seq( + ByteType, + ShortType, + IntegerType, + LongType, + FloatType, + DoubleType, + DecimalType(10, 5), + DecimalType.Unlimited, + DateType, + TimestampType, + StringType) + + val partitionColumns = partitionColumnTypes.zipWithIndex.map { + case (t, index) => StructField(s"p_$index", t) + } + + val schema = StructType(partitionColumns :+ StructField(s"i", StringType)) + val df = createDataFrame(sparkContext.parallelize(row :: Nil), schema) + + withTempPath { dir => + df.write.format("parquet").partitionBy(partitionColumns.map(_.name): _*).save(dir.toString) + val fields = schema.map(f => Column(f.name).cast(f.dataType)) + checkAnswer(read.load(dir.toString).select(fields: _*), row) + } + } + + test("SPARK-8037: Ignores files whose name starts with dot") { + withTempPath { dir => + val df = (1 to 3).map(i => (i, i, i, i)).toDF("a", "b", "c", "d") + + df.write + .format("parquet") + .partitionBy("b", "c", "d") + .save(dir.getCanonicalPath) + + Files.touch(new File(s"${dir.getCanonicalPath}/b=1", ".DS_Store")) + Files.createParentDirs(new File(s"${dir.getCanonicalPath}/b=1/c=1/.foo/bar")) + + checkAnswer(read.format("parquet").load(dir.getCanonicalPath), df) + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala index b98ba09ccfc2d..304936fb2be8e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.parquet import org.scalatest.BeforeAndAfterAll +import org.apache.spark.sql.types._ import org.apache.spark.sql.{SQLConf, QueryTest} import org.apache.spark.sql.catalyst.expressions.Row import org.apache.spark.sql.test.TestSQLContext @@ -111,6 +112,18 @@ class ParquetQuerySuiteBase extends QueryTest with ParquetTest { List(Row("same", "run_5", 100))) } } + + test("SPARK-6917 DecimalType should work with non-native types") { + val data = (1 to 10).map(i => Row(Decimal(i, 18, 0), new java.sql.Timestamp(i))) + val schema = StructType(List(StructField("d", DecimalType(18, 0), false), + StructField("time", TimestampType, false)).toArray) + withTempPath { file => + val df = sqlContext.createDataFrame(sparkContext.parallelize(data), schema) + df.write.parquet(file.getCanonicalPath) + val df2 = sqlContext.read.parquet(file.getCanonicalPath) + checkAnswer(df2, df.collect().toSeq) + } + } } class ParquetDataSourceOnQuerySuite extends ParquetQuerySuiteBase with BeforeAndAfterAll { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetTest.scala similarity index 59% rename from sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTest.scala rename to sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetTest.scala index 9d17516e0ef7d..516ba373f41d2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetTest.scala @@ -21,10 +21,9 @@ import java.io.File import scala.reflect.ClassTag import scala.reflect.runtime.universe.TypeTag -import scala.util.Try -import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode} -import org.apache.spark.util.Utils +import org.apache.spark.sql.test.SQLTestUtils +import org.apache.spark.sql.{DataFrame, SaveMode} /** * A helper trait that provides convenient facilities for Parquet testing. @@ -33,54 +32,9 @@ import org.apache.spark.util.Utils * convenient to use tuples rather than special case classes when writing test cases/suites. * Especially, `Tuple1.apply` can be used to easily wrap a single type/value. */ -private[sql] trait ParquetTest { - val sqlContext: SQLContext - +private[sql] trait ParquetTest extends SQLTestUtils { import sqlContext.implicits.{localSeqToDataFrameHolder, rddToDataFrameHolder} - import sqlContext.{conf, sparkContext} - - protected def configuration = sparkContext.hadoopConfiguration - - /** - * Sets all SQL configurations specified in `pairs`, calls `f`, and then restore all SQL - * configurations. - * - * @todo Probably this method should be moved to a more general place - */ - protected def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = { - val (keys, values) = pairs.unzip - val currentValues = keys.map(key => Try(conf.getConf(key)).toOption) - (keys, values).zipped.foreach(conf.setConf) - try f finally { - keys.zip(currentValues).foreach { - case (key, Some(value)) => conf.setConf(key, value) - case (key, None) => conf.unsetConf(key) - } - } - } - - /** - * Generates a temporary path without creating the actual file/directory, then pass it to `f`. If - * a file/directory is created there by `f`, it will be delete after `f` returns. - * - * @todo Probably this method should be moved to a more general place - */ - protected def withTempPath(f: File => Unit): Unit = { - val path = Utils.createTempDir() - path.delete() - try f(path) finally Utils.deleteRecursively(path) - } - - /** - * Creates a temporary directory, which is then passed to `f` and will be deleted after `f` - * returns. - * - * @todo Probably this method should be moved to a more general place - */ - protected def withTempDir(f: File => Unit): Unit = { - val dir = Utils.createTempDir().getCanonicalFile - try f(dir) finally Utils.deleteRecursively(dir) - } + import sqlContext.sparkContext /** * Writes `data` to a Parquet file, which is then passed to `f` and will be deleted after `f` @@ -90,7 +44,7 @@ private[sql] trait ParquetTest { (data: Seq[T]) (f: String => Unit): Unit = { withTempPath { file => - sparkContext.parallelize(data).toDF().saveAsParquetFile(file.getCanonicalPath) + sparkContext.parallelize(data).toDF().write.parquet(file.getCanonicalPath) f(file.getCanonicalPath) } } @@ -102,14 +56,7 @@ private[sql] trait ParquetTest { protected def withParquetDataFrame[T <: Product: ClassTag: TypeTag] (data: Seq[T]) (f: DataFrame => Unit): Unit = { - withParquetFile(data)(path => f(sqlContext.parquetFile(path))) - } - - /** - * Drops temporary table `tableName` after calling `f`. - */ - protected def withTempTable(tableName: String)(f: => Unit): Unit = { - try f finally sqlContext.dropTempTable(tableName) + withParquetFile(data)(path => f(sqlContext.read.parquet(path))) } /** @@ -128,12 +75,12 @@ private[sql] trait ParquetTest { protected def makeParquetFile[T <: Product: ClassTag: TypeTag]( data: Seq[T], path: File): Unit = { - data.toDF().save(path.getCanonicalPath, "org.apache.spark.sql.parquet", SaveMode.Overwrite) + data.toDF().write.mode(SaveMode.Overwrite).parquet(path.getCanonicalPath) } protected def makeParquetFile[T <: Product: ClassTag: TypeTag]( df: DataFrame, path: File): Unit = { - df.save(path.getCanonicalPath, "org.apache.spark.sql.parquet", SaveMode.Overwrite) + df.write.mode(SaveMode.Overwrite).parquet(path.getCanonicalPath) } protected def makePartitionDir( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala index 20a23b3bd6aa9..d2d1011b8e917 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala @@ -17,23 +17,23 @@ package org.apache.spark.sql.sources -import java.io.{IOException, File} +import java.io.{File, IOException} -import org.apache.spark.sql.AnalysisException import org.scalatest.BeforeAndAfterAll +import org.apache.spark.sql.AnalysisException import org.apache.spark.util.Utils class CreateTableAsSelectSuite extends DataSourceTest with BeforeAndAfterAll { - import caseInsensisitiveContext._ + import caseInsensitiveContext._ var path: File = null override def beforeAll(): Unit = { path = Utils.createTempDir() val rdd = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}""")) - jsonRDD(rdd).registerTempTable("jt") + read.json(rdd).registerTempTable("jt") } override def afterAll(): Unit = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala index ca25751b9583d..5c3467158a01b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala @@ -43,7 +43,7 @@ case class SimpleDDLScan(from: Int, to: Int, table: String)(@transient val sqlCo StructField("bigintType", LongType, nullable = false), StructField("tinyintType", ByteType, nullable = false), StructField("decimalType", DecimalType.Unlimited, nullable = false), - StructField("fixedDecimalType", DecimalType(5,1), nullable = false), + StructField("fixedDecimalType", DecimalType(5, 1), nullable = false), StructField("binaryType", BinaryType, nullable = false), StructField("booleanType", BooleanType, nullable = false), StructField("smallIntType", ShortType, nullable = false), @@ -51,8 +51,7 @@ case class SimpleDDLScan(from: Int, to: Int, table: String)(@transient val sqlCo StructField("mapType", MapType(StringType, StringType)), StructField("arrayType", ArrayType(StringType)), StructField("structType", - StructType(StructField("f1",StringType) :: - (StructField("f2",IntegerType)) :: Nil + StructType(StructField("f1", StringType) :: StructField("f2", IntegerType) :: Nil ) ) )) @@ -64,7 +63,7 @@ case class SimpleDDLScan(from: Int, to: Int, table: String)(@transient val sqlCo } class DDLTestSuite extends DataSourceTest { - import caseInsensisitiveContext._ + import caseInsensitiveContext._ before { sql( @@ -99,4 +98,10 @@ class DDLTestSuite extends DataSourceTest { Row("arrayType", "array", ""), Row("structType", "struct", "") )) + + test("SPARK-7686 DescribeCommand should have correct physical plan output attributes") { + val attributes = sql("describe ddlPeople").queryExecution.executedPlan.output + assert(attributes.map(_.name) === Seq("col_name", "data_type", "comment")) + assert(attributes.map(_.dataType).toSet === Set(StringType)) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala index 33c67355967dd..24ed665c67d2e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala @@ -18,25 +18,13 @@ package org.apache.spark.sql.sources import org.apache.spark.sql._ -import org.apache.spark.sql.catalyst.analysis.Analyzer +import org.apache.spark.sql.catalyst.CatalystConf import org.apache.spark.sql.test.TestSQLContext import org.scalatest.BeforeAndAfter abstract class DataSourceTest extends QueryTest with BeforeAndAfter { - // Case sensitivity is not configurable yet, but we want to test some edge cases. - // TODO: Remove when it is configurable - implicit val caseInsensisitiveContext = new SQLContext(TestSQLContext.sparkContext) { - @transient - override protected[sql] lazy val analyzer: Analyzer = - new Analyzer(catalog, functionRegistry, caseSensitive = false) { - override val extendedResolutionRules = - PreInsertCastAndRename :: - Nil + // We want to test some edge cases. + implicit val caseInsensitiveContext = new SQLContext(TestSQLContext.sparkContext) - override val extendedCheckRules = Seq( - sources.PreWriteCheck(catalog) - ) - } - } + caseInsensitiveContext.setConf(SQLConf.CASE_SENSITIVE, "false") } - diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala index cb5e5147ff189..db94b1f3e8926 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala @@ -97,7 +97,7 @@ object FiltersPushed { class FilteredScanSuite extends DataSourceTest { - import caseInsensisitiveContext._ + import caseInsensitiveContext._ before { sql( @@ -154,7 +154,7 @@ class FilteredScanSuite extends DataSourceTest { sqlTest( "SELECT a, b FROM oneToTenFiltered WHERE a IN (1,3,5)", - Seq(1,3,5).map(i => Row(i, i * 2))) + Seq(1, 3, 5).map(i => Row(i, i * 2))) sqlTest( "SELECT a, b FROM oneToTenFiltered WHERE A = 1", diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala index 80efe9728fbc2..6f375ef36237d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala @@ -21,19 +21,19 @@ import java.io.File import org.scalatest.BeforeAndAfterAll -import org.apache.spark.sql.{AnalysisException, Row} +import org.apache.spark.sql.{SaveMode, AnalysisException, Row} import org.apache.spark.util.Utils class InsertSuite extends DataSourceTest with BeforeAndAfterAll { - import caseInsensisitiveContext._ + import caseInsensitiveContext._ var path: File = null override def beforeAll: Unit = { path = Utils.createTempDir() val rdd = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}""")) - jsonRDD(rdd).registerTempTable("jt") + read.json(rdd).registerTempTable("jt") sql( s""" |CREATE TEMPORARY TABLE jsonTable (a int, b string) @@ -100,23 +100,48 @@ class InsertSuite extends DataSourceTest with BeforeAndAfterAll { test("INSERT OVERWRITE a JSONRelation multiple times") { sql( s""" - |INSERT OVERWRITE TABLE jsonTable SELECT a, b FROM jt - """.stripMargin) + |INSERT OVERWRITE TABLE jsonTable SELECT a, b FROM jt + """.stripMargin) + checkAnswer( + sql("SELECT a, b FROM jsonTable"), + (1 to 10).map(i => Row(i, s"str$i")) + ) + // Writing the table to less part files. + val rdd1 = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}"""), 5) + read.json(rdd1).registerTempTable("jt1") sql( s""" - |INSERT OVERWRITE TABLE jsonTable SELECT a, b FROM jt - """.stripMargin) + |INSERT OVERWRITE TABLE jsonTable SELECT a, b FROM jt1 + """.stripMargin) + checkAnswer( + sql("SELECT a, b FROM jsonTable"), + (1 to 10).map(i => Row(i, s"str$i")) + ) + // Writing the table to more part files. + val rdd2 = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}"""), 10) + read.json(rdd2).registerTempTable("jt2") sql( s""" - |INSERT OVERWRITE TABLE jsonTable SELECT a, b FROM jt - """.stripMargin) - + |INSERT OVERWRITE TABLE jsonTable SELECT a, b FROM jt2 + """.stripMargin) checkAnswer( sql("SELECT a, b FROM jsonTable"), (1 to 10).map(i => Row(i, s"str$i")) ) + + sql( + s""" + |INSERT OVERWRITE TABLE jsonTable SELECT a * 10, b FROM jt1 + """.stripMargin) + checkAnswer( + sql("SELECT a, b FROM jsonTable"), + (1 to 10).map(i => Row(i * 10, s"str$i")) + ) + + dropTempTable("jt1") + dropTempTable("jt2") } test("INSERT INTO not supported for JSONRelation for now") { @@ -128,6 +153,20 @@ class InsertSuite extends DataSourceTest with BeforeAndAfterAll { } } + test("save directly to the path of a JSON table") { + table("jt").selectExpr("a * 5 as a", "b").write.mode(SaveMode.Overwrite).json(path.toString) + checkAnswer( + sql("SELECT a, b FROM jsonTable"), + (1 to 10).map(i => Row(i * 5, s"str$i")) + ) + + table("jt").write.mode(SaveMode.Overwrite).json(path.toString) + checkAnswer( + sql("SELECT a, b FROM jsonTable"), + (1 to 10).map(i => Row(i, s"str$i")) + ) + } + test("it is not allowed to write to a table while querying it.") { val message = intercept[AnalysisException] { sql( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/PrunedScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/PrunedScanSuite.scala index 6a1ddf2f8e98b..c2bc52e2120c1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/PrunedScanSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/PrunedScanSuite.scala @@ -52,7 +52,7 @@ case class SimplePrunedScan(from: Int, to: Int)(@transient val sqlContext: SQLCo } class PrunedScanSuite extends DataSourceTest { - import caseInsensisitiveContext._ + import caseInsensitiveContext._ before { sql( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala index cb287ba85c1f8..274c652dd14d6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/SaveLoadSuite.scala @@ -27,7 +27,7 @@ import org.apache.spark.util.Utils class SaveLoadSuite extends DataSourceTest with BeforeAndAfterAll { - import caseInsensisitiveContext._ + import caseInsensitiveContext._ var originalDefaultSource: String = null @@ -42,7 +42,7 @@ class SaveLoadSuite extends DataSourceTest with BeforeAndAfterAll { path.delete() val rdd = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}""")) - df = jsonRDD(rdd) + df = read.json(rdd) df.registerTempTable("jsonTable") } @@ -57,41 +57,48 @@ class SaveLoadSuite extends DataSourceTest with BeforeAndAfterAll { def checkLoad(): Unit = { conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, "org.apache.spark.sql.json") - checkAnswer(load(path.toString), df.collect()) + checkAnswer(read.load(path.toString), df.collect()) // Test if we can pick up the data source name passed in load. conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, "not a source name") - checkAnswer(load(path.toString, "org.apache.spark.sql.json"), df.collect()) - checkAnswer(load("org.apache.spark.sql.json", Map("path" -> path.toString)), df.collect()) + checkAnswer(read.format("json").load(path.toString), df.collect()) + checkAnswer(read.format("json").load(path.toString), df.collect()) val schema = StructType(StructField("b", StringType, true) :: Nil) checkAnswer( - load("org.apache.spark.sql.json", schema, Map("path" -> path.toString)), + read.format("json").schema(schema).load(path.toString), sql("SELECT b FROM jsonTable").collect()) } test("save with path and load") { conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, "org.apache.spark.sql.json") - df.save(path.toString) + df.write.save(path.toString) + checkLoad() + } + + test("save with string mode and path, and load") { + conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, "org.apache.spark.sql.json") + path.createNewFile() + df.write.mode("overwrite").save(path.toString) checkLoad() } test("save with path and datasource, and load") { conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, "not a source name") - df.save(path.toString, "org.apache.spark.sql.json") + df.write.json(path.toString) checkLoad() } test("save with data source and options, and load") { conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, "not a source name") - df.save("org.apache.spark.sql.json", SaveMode.ErrorIfExists, Map("path" -> path.toString)) + df.write.mode(SaveMode.ErrorIfExists).json(path.toString) checkLoad() } test("save and save again") { - df.save(path.toString, "org.apache.spark.sql.json") + df.write.json(path.toString) var message = intercept[RuntimeException] { - df.save(path.toString, "org.apache.spark.sql.json") + df.write.json(path.toString) }.getMessage assert( @@ -100,14 +107,14 @@ class SaveLoadSuite extends DataSourceTest with BeforeAndAfterAll { if (path.exists()) Utils.deleteRecursively(path) - df.save(path.toString, "org.apache.spark.sql.json") + df.write.json(path.toString) checkLoad() - df.save("org.apache.spark.sql.json", SaveMode.Overwrite, Map("path" -> path.toString)) + df.write.mode(SaveMode.Overwrite).json(path.toString) checkLoad() message = intercept[RuntimeException] { - df.save("org.apache.spark.sql.json", SaveMode.Append, Map("path" -> path.toString)) + df.write.mode(SaveMode.Append).json(path.toString) }.getMessage assert( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala index 3b47b8adf313b..77af04a491742 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala @@ -88,7 +88,7 @@ case class AllDataTypesScan( } class TableScanSuite extends DataSourceTest { - import caseInsensisitiveContext._ + import caseInsensitiveContext._ var tableWithSchemaExpected = (1 to 10).map { i => Row( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala new file mode 100644 index 0000000000000..17a8b0cca09df --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.test + +import java.io.File + +import scala.util.Try + +import org.apache.spark.sql.SQLContext +import org.apache.spark.util.Utils + +trait SQLTestUtils { + val sqlContext: SQLContext + + import sqlContext.{conf, sparkContext} + + protected def configuration = sparkContext.hadoopConfiguration + + /** + * Sets all SQL configurations specified in `pairs`, calls `f`, and then restore all SQL + * configurations. + * + * @todo Probably this method should be moved to a more general place + */ + protected def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = { + val (keys, values) = pairs.unzip + val currentValues = keys.map(key => Try(conf.getConf(key)).toOption) + (keys, values).zipped.foreach(conf.setConf) + try f finally { + keys.zip(currentValues).foreach { + case (key, Some(value)) => conf.setConf(key, value) + case (key, None) => conf.unsetConf(key) + } + } + } + + /** + * Generates a temporary path without creating the actual file/directory, then pass it to `f`. If + * a file/directory is created there by `f`, it will be delete after `f` returns. + * + * @todo Probably this method should be moved to a more general place + */ + protected def withTempPath(f: File => Unit): Unit = { + val path = Utils.createTempDir() + path.delete() + try f(path) finally Utils.deleteRecursively(path) + } + + /** + * Creates a temporary directory, which is then passed to `f` and will be deleted after `f` + * returns. + * + * @todo Probably this method should be moved to a more general place + */ + protected def withTempDir(f: File => Unit): Unit = { + val dir = Utils.createTempDir().getCanonicalFile + try f(dir) finally Utils.deleteRecursively(dir) + } + + /** + * Drops temporary table `tableName` after calling `f`. + */ + protected def withTempTable(tableNames: String*)(f: => Unit): Unit = { + try f finally tableNames.foreach(sqlContext.dropTempTable) + } + + /** + * Drops table `tableName` after calling `f`. + */ + protected def withTable(tableNames: String*)(f: => Unit): Unit = { + try f finally { + tableNames.foreach { name => + sqlContext.sql(s"DROP TABLE IF EXISTS $name") + } + } + } +} diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 437f697d25bf3..0a9d23776ea19 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../../pom.xml diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala index 0be5a92c2546c..94687eeda4179 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala @@ -17,23 +17,23 @@ package org.apache.spark.sql.hive.thriftserver +import scala.collection.mutable +import scala.collection.mutable.ArrayBuffer + import org.apache.commons.logging.LogFactory import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hive.service.cli.thrift.{ThriftBinaryCLIService, ThriftHttpCLIService} import org.apache.hive.service.server.{HiveServer2, ServerOptionsProcessor} -import org.apache.spark.sql.SQLConf -import org.apache.spark.{SparkContext, SparkConf, Logging} import org.apache.spark.annotation.DeveloperApi -import org.apache.spark.sql.hive.HiveContext +import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd, SparkListenerJobStart} +import org.apache.spark.sql.SQLConf import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ -import org.apache.spark.scheduler.{SparkListenerJobStart, SparkListenerApplicationEnd, SparkListener} import org.apache.spark.sql.hive.thriftserver.ui.ThriftServerTab +import org.apache.spark.sql.hive.{HiveContext, HiveShim} import org.apache.spark.util.Utils - -import scala.collection.mutable -import scala.collection.mutable.ArrayBuffer +import org.apache.spark.{Logging, SparkContext} /** * The main entry point for the Spark SQL port of HiveServer2. Starts up a `SparkSQLContext` and a @@ -51,6 +51,7 @@ object HiveThriftServer2 extends Logging { @DeveloperApi def startWithContext(sqlContext: HiveContext): Unit = { val server = new HiveThriftServer2(sqlContext) + sqlContext.setConf("spark.sql.hive.version", HiveShim.version) server.init(sqlContext.hiveconf) server.start() listener = new HiveThriftServer2Listener(server, sqlContext.conf) @@ -147,7 +148,7 @@ object HiveThriftServer2 extends Logging { override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd): Unit = { server.stop() } - + var onlineSessionNum: Int = 0 val sessionList = new mutable.LinkedHashMap[String, SessionInfo] val executionList = new mutable.LinkedHashMap[String, ExecutionInfo] val retainedStatements = @@ -170,11 +171,13 @@ object HiveThriftServer2 extends Logging { def onSessionCreated(ip: String, sessionId: String, userName: String = "UNKNOWN"): Unit = { val info = new SessionInfo(sessionId, System.currentTimeMillis, ip, userName) sessionList.put(sessionId, info) + onlineSessionNum += 1 trimSessionIfNecessary() } def onSessionClosed(sessionId: String): Unit = { sessionList(sessionId).finishTimestamp = System.currentTimeMillis + onlineSessionNum -= 1 } def onStatementStart( diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index b7b6925aa87f7..14f6f658d9b75 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -37,13 +37,13 @@ import org.apache.hadoop.hive.ql.session.SessionState import org.apache.thrift.transport.TSocket import org.apache.spark.Logging -import org.apache.spark.sql.hive.HiveShim +import org.apache.spark.sql.hive.{HiveContext, HiveShim} import org.apache.spark.util.Utils private[hive] object SparkSQLCLIDriver { private var prompt = "spark-sql" private var continuedPrompt = "".padTo(prompt.length, ' ') - private var transport:TSocket = _ + private var transport: TSocket = _ installSignalHandler() @@ -74,7 +74,12 @@ private[hive] object SparkSQLCLIDriver { System.exit(1) } - val sessionState = new CliSessionState(new HiveConf(classOf[SessionState])) + val cliConf = new HiveConf(classOf[SessionState]) + // Override the location of the metastore since this is only used for local execution. + HiveContext.newTemporaryConfiguration().foreach { + case (key, value) => cliConf.set(key, value) + } + val sessionState = new CliSessionState(cliConf) sessionState.in = System.in try { @@ -91,10 +96,14 @@ private[hive] object SparkSQLCLIDriver { // Set all properties specified via command line. val conf: HiveConf = sessionState.getConf - sessionState.cmdProperties.entrySet().foreach { item: java.util.Map.Entry[Object, Object] => - conf.set(item.getKey.asInstanceOf[String], item.getValue.asInstanceOf[String]) - sessionState.getOverriddenConfigurations.put( - item.getKey.asInstanceOf[String], item.getValue.asInstanceOf[String]) + sessionState.cmdProperties.entrySet().foreach { item => + val key = item.getKey.asInstanceOf[String] + val value = item.getValue.asInstanceOf[String] + // We do not propagate metastore options to the execution copy of hive. + if (key != "javax.jdo.option.ConnectionURL") { + conf.set(key, value) + sessionState.getOverriddenConfigurations.put(key, value) + } } SessionState.start(sessionState) @@ -138,8 +147,9 @@ private[hive] object SparkSQLCLIDriver { case e: UnsupportedEncodingException => System.exit(3) } - // use the specified database if specified - cli.processSelectDatabase(sessionState); + if (sessionState.database != null) { + SparkSQLEnv.hiveContext.runSqlHive(s"USE ${sessionState.database}") + } // Execute -i init files (always in silent mode) cli.processInitFiles(sessionState) @@ -266,13 +276,13 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { driver.init() val out = sessionState.out - val start:Long = System.currentTimeMillis() + val start: Long = System.currentTimeMillis() if (sessionState.getIsVerbose) { out.println(cmd) } val rc = driver.run(cmd) val end = System.currentTimeMillis() - val timeTaken:Double = (end - start) / 1000.0 + val timeTaken: Double = (end - start) / 1000.0 ret = rc.getResponseCode if (ret != 0) { @@ -300,7 +310,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { res.clear() } } catch { - case e:IOException => + case e: IOException => console.printError( s"""Failed with exception ${e.getClass.getName}: ${e.getMessage} |${org.apache.hadoop.util.StringUtils.stringifyException(e)} diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala index 97b46a01ba5b4..7c0c505e2d61e 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.hive.thriftserver +import java.io.PrintStream + import scala.collection.JavaConversions._ import org.apache.spark.scheduler.StatsReportListener @@ -39,7 +41,6 @@ private[hive] object SparkSQLEnv extends Logging { sparkConf .setAppName(s"SparkSQL::${Utils.localHostName()}") - .set("spark.sql.hive.version", HiveShim.version) .set( "spark.serializer", maybeSerializer.getOrElse("org.apache.spark.serializer.KryoSerializer")) @@ -51,6 +52,12 @@ private[hive] object SparkSQLEnv extends Logging { sparkContext.addSparkListener(new StatsReportListener()) hiveContext = new HiveContext(sparkContext) + hiveContext.metadataHive.setOut(new PrintStream(System.out, true, "UTF-8")) + hiveContext.metadataHive.setInfo(new PrintStream(System.err, true, "UTF-8")) + hiveContext.metadataHive.setError(new PrintStream(System.err, true, "UTF-8")) + + hiveContext.setConf("spark.sql.hive.version", HiveShim.version) + if (log.isDebugEnabled) { hiveContext.hiveconf.getAllProperties.toSeq.sorted.foreach { case (k, v) => logDebug(s"HiveConf var: $k=$v") diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala index 71b16b6bebffb..10c83d8b27a2a 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala @@ -29,7 +29,7 @@ import org.apache.spark.ui.UIUtils._ import org.apache.spark.ui._ -/** Page for Spark Web UI that shows statistics of a streaming job */ +/** Page for Spark Web UI that shows statistics of a thrift server */ private[ui] class ThriftServerPage(parent: ThriftServerTab) extends WebUIPage("") with Logging { private val listener = parent.listener @@ -42,20 +42,20 @@ private[ui] class ThriftServerPage(parent: ThriftServerTab) extends WebUIPage("" generateBasicStats() ++
++

- {listener.sessionList.size} session(s) are online, + {listener.onlineSessionNum} session(s) are online, running {listener.totalRunning} SQL statement(s)

++ generateSessionStatsTable() ++ generateSQLStatsTable() - UIUtils.headerSparkPage("ThriftServer", content, parent, Some(5000)) + UIUtils.headerSparkPage("JDBC/ODBC Server", content, parent, Some(5000)) } - /** Generate basic stats of the streaming program */ + /** Generate basic stats of the thrift server program */ private def generateBasicStats(): Seq[Node] = { val timeSinceStart = System.currentTimeMillis() - startTime.getTime
  • - Started at: {startTime.toString} + Started at: {formatDate(startTime)}
  • Time since start: {formatDurationVerbose(timeSinceStart)} @@ -77,7 +77,7 @@ private[ui] class ThriftServerPage(parent: ThriftServerTab) extends WebUIPage("" [{id}] } - val detail = if(info.state == ExecutionState.FAILED) info.detail else info.executePlan + val detail = if (info.state == ExecutionState.FAILED) info.detail else info.executePlan {info.userName} @@ -85,7 +85,7 @@ private[ui] class ThriftServerPage(parent: ThriftServerTab) extends WebUIPage("" {info.groupId} {formatDate(info.startTimestamp)} - {if(info.finishTimestamp > 0) formatDate(info.finishTimestamp)} + {if (info.finishTimestamp > 0) formatDate(info.finishTimestamp)} {formatDurationOption(Some(info.totalTime))} {info.statement} {info.state} @@ -143,14 +143,14 @@ private[ui] class ThriftServerPage(parent: ThriftServerTab) extends WebUIPage("" val headerRow = Seq("User", "IP", "Session ID", "Start Time", "Finish Time", "Duration", "Total Execute") def generateDataRow(session: SessionInfo): Seq[Node] = { - val sessionLink = "%s/ThriftServer/session?id=%s" + val sessionLink = "%s/sql/session?id=%s" .format(UIUtils.prependBaseUri(parent.basePath), session.sessionId) {session.userName} {session.ip} - {session.sessionId} , + {session.sessionId} {formatDate(session.startTimestamp)} - {if(session.finishTimestamp > 0) formatDate(session.finishTimestamp)} + {if (session.finishTimestamp > 0) formatDate(session.finishTimestamp)} {formatDurationOption(Some(session.totalTime))} {session.totalExecution.toString} diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala index 33ba038ecce73..3b01afa603cea 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala @@ -55,7 +55,7 @@ private[ui] class ThriftServerSessionPage(parent: ThriftServerTab) Total run {sessionStat._2.totalExecution} SQL

++ generateSQLStatsTable(sessionStat._2.sessionId) - UIUtils.headerSparkPage("ThriftServer", content, parent, Some(5000)) + UIUtils.headerSparkPage("JDBC/ODBC Session", content, parent, Some(5000)) } /** Generate basic stats of the streaming program */ @@ -87,7 +87,7 @@ private[ui] class ThriftServerSessionPage(parent: ThriftServerTab) [{id}] } - val detail = if(info.state == ExecutionState.FAILED) info.detail else info.executePlan + val detail = if (info.state == ExecutionState.FAILED) info.detail else info.executePlan {info.userName} diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerTab.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerTab.scala index 343031f10c75c..94fd8a6bb60b9 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerTab.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerTab.scala @@ -27,7 +27,9 @@ import org.apache.spark.{SparkContext, Logging, SparkException} * This assumes the given SparkContext has enabled its SparkUI. */ private[thriftserver] class ThriftServerTab(sparkContext: SparkContext) - extends SparkUITab(getSparkUI(sparkContext), "ThriftServer") with Logging { + extends SparkUITab(getSparkUI(sparkContext), "sql") with Logging { + + override val name = "SQL" val parent = getSparkUI(sparkContext) val listener = HiveThriftServer2.listener diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index b070fa8eaa469..cc07db827d359 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -25,11 +25,15 @@ import scala.concurrent.{Await, Promise} import scala.sys.process.{Process, ProcessLogger} import org.apache.hadoop.hive.conf.HiveConf.ConfVars -import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, FunSuite} +import org.scalatest.{BeforeAndAfter, FunSuite} import org.apache.spark.Logging import org.apache.spark.util.Utils +/** + * A test suite for the `spark-sql` CLI tool. Note that all test cases share the same temporary + * Hive metastore and warehouse. + */ class CliSuite extends FunSuite with BeforeAndAfter with Logging { val warehousePath = Utils.createTempDir() val metastorePath = Utils.createTempDir() @@ -58,13 +62,13 @@ class CliSuite extends FunSuite with BeforeAndAfter with Logging { | --master local | --hiveconf ${ConfVars.METASTORECONNECTURLKEY}=$jdbcUrl | --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$warehousePath - | --driver-class-path ${sys.props("java.class.path")} """.stripMargin.split("\\s+").toSeq ++ extraArgs } var next = 0 val foundAllExpectedAnswers = Promise.apply[Unit]() - val queryStream = new ByteArrayInputStream(queries.mkString("\n").getBytes) + // Explicitly adds ENTER for each statement to make sure they are actually entered into the CLI. + val queryStream = new ByteArrayInputStream(queries.map(_ + "\n").mkString.getBytes) val buffer = new ArrayBuffer[String]() val lock = new Object @@ -124,7 +128,7 @@ class CliSuite extends FunSuite with BeforeAndAfter with Logging { "SELECT COUNT(*) FROM hive_test;" -> "5", "DROP TABLE hive_test;" - -> "Time taken: " + -> "OK" ) } @@ -151,4 +155,33 @@ class CliSuite extends FunSuite with BeforeAndAfter with Logging { -> "hive_test" ) } + + test("Commands using SerDe provided in --jars") { + val jarFile = + "../hive/src/test/resources/hive-hcatalog-core-0.13.1.jar" + .split("/") + .mkString(File.separator) + + val dataFilePath = + Thread.currentThread().getContextClassLoader.getResource("data/files/small_kv.txt") + + runCliWithin(1.minute, Seq("--jars", s"$jarFile"))( + """CREATE TABLE t1(key string, val string) + |ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'; + """.stripMargin + -> "OK", + "CREATE TABLE sourceTable (key INT, val STRING);" + -> "OK", + s"LOAD DATA LOCAL INPATH '$dataFilePath' OVERWRITE INTO TABLE sourceTable;" + -> "OK", + "INSERT INTO TABLE t1 SELECT key, val FROM sourceTable;" + -> "Time taken:", + "SELECT count(key) FROM t1;" + -> "5", + "DROP TABLE t1;" + -> "OK", + "DROP TABLE sourceTable;" + -> "OK" + ) + } } diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala index 1fadea97fd07f..f9eff41b87281 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala @@ -27,6 +27,8 @@ import scala.concurrent.{Await, Promise} import scala.sys.process.{Process, ProcessLogger} import scala.util.{Random, Try} +import com.google.common.base.Charsets.UTF_8 +import com.google.common.io.Files import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hive.jdbc.HiveDriver import org.apache.hive.service.auth.PlainSaslHelper @@ -54,7 +56,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest { override def mode: ServerMode.Value = ServerMode.binary private def withCLIServiceClient(f: ThriftCLIServiceClient => Unit): Unit = { - // Transport creation logics below mimics HiveConnection.createBinaryTransport + // Transport creation logic below mimics HiveConnection.createBinaryTransport val rawTransport = new TSocket("localhost", serverPort) val user = System.getProperty("user.name") val transport = PlainSaslHelper.getPlainTransport(user, "anonymous", rawTransport) @@ -391,10 +393,10 @@ abstract class HiveThriftJdbcTest extends HiveThriftServer2Test { val statements = connections.map(_.createStatement()) try { - statements.zip(fs).map { case (s, f) => f(s) } + statements.zip(fs).foreach { case (s, f) => f(s) } } finally { - statements.map(_.close()) - connections.map(_.close()) + statements.foreach(_.close()) + connections.foreach(_.close()) } } @@ -433,15 +435,33 @@ abstract class HiveThriftServer2Test extends FunSuite with BeforeAndAfterAll wit ConfVars.HIVE_SERVER2_THRIFT_HTTP_PORT } + val driverClassPath = { + // Writes a temporary log4j.properties and prepend it to driver classpath, so that it + // overrides all other potential log4j configurations contained in other dependency jar files. + val tempLog4jConf = Utils.createTempDir().getCanonicalPath + + Files.write( + """log4j.rootCategory=INFO, console + |log4j.appender.console=org.apache.log4j.ConsoleAppender + |log4j.appender.console.target=System.err + |log4j.appender.console.layout=org.apache.log4j.PatternLayout + |log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n + """.stripMargin, + new File(s"$tempLog4jConf/log4j.properties"), + UTF_8) + + tempLog4jConf + File.pathSeparator + sys.props("java.class.path") + } + s"""$startScript | --master local - | --hiveconf hive.root.logger=INFO,console | --hiveconf ${ConfVars.METASTORECONNECTURLKEY}=$metastoreJdbcUri | --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$warehousePath | --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST}=localhost | --hiveconf ${ConfVars.HIVE_SERVER2_TRANSPORT_MODE}=$mode | --hiveconf $portConf=$port - | --driver-class-path ${sys.props("java.class.path")} + | --driver-class-path $driverClassPath + | --driver-java-options -Dlog4j.debug | --conf spark.ui.enabled=false """.stripMargin.split("\\s+").toSeq } diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala index 47541015a3611..4c9fab7ef6136 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala @@ -17,21 +17,18 @@ package org.apache.spark.sql.hive.thriftserver - - import scala.util.Random +import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.openqa.selenium.WebDriver import org.openqa.selenium.htmlunit.HtmlUnitDriver -import org.scalatest.{Matchers, BeforeAndAfterAll} import org.scalatest.concurrent.Eventually._ import org.scalatest.selenium.WebBrowser import org.scalatest.time.SpanSugar._ +import org.scalatest.{BeforeAndAfterAll, Matchers} -import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.spark.sql.hive.HiveContext - class UISeleniumSuite extends HiveThriftJdbcTest with WebBrowser with Matchers with BeforeAndAfterAll { @@ -75,9 +72,9 @@ class UISeleniumSuite """.stripMargin.split("\\s+").toSeq } - test("thrift server ui test") { - withJdbcStatement(statement =>{ - val baseURL = s"http://localhost:${uiPort}" + ignore("thrift server ui test") { + withJdbcStatement { statement => + val baseURL = s"http://localhost:$uiPort" val queries = Seq( "CREATE TABLE test_map(key INT, value STRING)", @@ -86,20 +83,20 @@ class UISeleniumSuite queries.foreach(statement.execute) eventually(timeout(10 seconds), interval(50 milliseconds)) { - go to (baseURL) - find(cssSelector("""ul li a[href*="ThriftServer"]""")) should not be(None) + go to baseURL + find(cssSelector("""ul li a[href*="sql"]""")) should not be None } eventually(timeout(10 seconds), interval(50 milliseconds)) { - go to (baseURL + "/ThriftServer") - find(id("sessionstat")) should not be(None) - find(id("sqlstat")) should not be(None) + go to (baseURL + "/sql") + find(id("sessionstat")) should not be None + find(id("sqlstat")) should not be None // check whether statements exists queries.foreach { line => findAll(cssSelector("""ul table tbody tr td""")).map(_.text).toList should contain (line) } } - }) + } } } diff --git a/sql/hive-thriftserver/v0.12.0/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim12.scala b/sql/hive-thriftserver/v0.12.0/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim12.scala deleted file mode 100644 index b3a79ba1c7d6b..0000000000000 --- a/sql/hive-thriftserver/v0.12.0/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim12.scala +++ /dev/null @@ -1,278 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.hive.thriftserver - -import java.sql.{Date, Timestamp} -import java.util.concurrent.Executors -import java.util.{ArrayList => JArrayList, Map => JMap, UUID} - -import org.apache.commons.logging.Log -import org.apache.hadoop.hive.conf.HiveConf -import org.apache.hadoop.hive.conf.HiveConf.ConfVars -import org.apache.hive.service.cli.thrift.TProtocolVersion -import org.apache.spark.sql.hive.thriftserver.server.SparkSQLOperationManager - -import scala.collection.JavaConversions._ -import scala.collection.mutable.{ArrayBuffer, Map => SMap} - -import org.apache.hadoop.hive.common.`type`.HiveDecimal -import org.apache.hadoop.hive.metastore.api.FieldSchema -import org.apache.hadoop.hive.shims.ShimLoader -import org.apache.hadoop.security.UserGroupInformation -import org.apache.hive.service.cli._ -import org.apache.hive.service.cli.operation.ExecuteStatementOperation -import org.apache.hive.service.cli.session.{SessionManager, HiveSession} - -import org.apache.spark.Logging -import org.apache.spark.sql.{DataFrame, SQLConf, Row => SparkRow} -import org.apache.spark.sql.execution.SetCommand -import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ -import org.apache.spark.sql.hive.{HiveContext, HiveMetastoreTypes} -import org.apache.spark.sql.types._ - -/** - * A compatibility layer for interacting with Hive version 0.12.0. - */ -private[thriftserver] object HiveThriftServerShim { - val version = "0.12.0" - - def setServerUserName(sparkServiceUGI: UserGroupInformation, sparkCliService:SparkSQLCLIService) = { - val serverUserName = ShimLoader.getHadoopShims.getShortUserName(sparkServiceUGI) - setSuperField(sparkCliService, "serverUserName", serverUserName) - } -} - -private[hive] class SparkSQLDriver(val _context: HiveContext = SparkSQLEnv.hiveContext) - extends AbstractSparkSQLDriver(_context) { - override def getResults(res: JArrayList[String]): Boolean = { - if (hiveResponse == null) { - false - } else { - res.addAll(hiveResponse) - hiveResponse = null - true - } - } -} - -private[hive] class SparkExecuteStatementOperation( - parentSession: HiveSession, - statement: String, - confOverlay: JMap[String, String])( - hiveContext: HiveContext, - sessionToActivePool: SMap[SessionHandle, String]) - extends ExecuteStatementOperation(parentSession, statement, confOverlay) with Logging { - - private var result: DataFrame = _ - private var iter: Iterator[SparkRow] = _ - private var dataTypes: Array[DataType] = _ - - def close(): Unit = { - // RDDs will be cleaned automatically upon garbage collection. - logDebug("CLOSING") - } - - def getNextRowSet(order: FetchOrientation, maxRowsL: Long): RowSet = { - if (!iter.hasNext) { - new RowSet() - } else { - // maxRowsL here typically maps to java.sql.Statement.getFetchSize, which is an int - val maxRows = maxRowsL.toInt - var curRow = 0 - var rowSet = new ArrayBuffer[Row](maxRows.min(1024)) - - while (curRow < maxRows && iter.hasNext) { - val sparkRow = iter.next() - val row = new Row() - var curCol = 0 - - while (curCol < sparkRow.length) { - if (sparkRow.isNullAt(curCol)) { - addNullColumnValue(sparkRow, row, curCol) - } else { - addNonNullColumnValue(sparkRow, row, curCol) - } - curCol += 1 - } - rowSet += row - curRow += 1 - } - new RowSet(rowSet, 0) - } - } - - def addNonNullColumnValue(from: SparkRow, to: Row, ordinal: Int) { - dataTypes(ordinal) match { - case StringType => - to.addString(from(ordinal).asInstanceOf[String]) - case IntegerType => - to.addColumnValue(ColumnValue.intValue(from.getInt(ordinal))) - case BooleanType => - to.addColumnValue(ColumnValue.booleanValue(from.getBoolean(ordinal))) - case DoubleType => - to.addColumnValue(ColumnValue.doubleValue(from.getDouble(ordinal))) - case FloatType => - to.addColumnValue(ColumnValue.floatValue(from.getFloat(ordinal))) - case DecimalType() => - val hiveDecimal = from.getDecimal(ordinal) - to.addColumnValue(ColumnValue.stringValue(new HiveDecimal(hiveDecimal))) - case LongType => - to.addColumnValue(ColumnValue.longValue(from.getLong(ordinal))) - case ByteType => - to.addColumnValue(ColumnValue.byteValue(from.getByte(ordinal))) - case ShortType => - to.addColumnValue(ColumnValue.shortValue(from.getShort(ordinal))) - case DateType => - to.addColumnValue(ColumnValue.dateValue(from(ordinal).asInstanceOf[Date])) - case TimestampType => - to.addColumnValue( - ColumnValue.timestampValue(from.get(ordinal).asInstanceOf[Timestamp])) - case BinaryType | _: ArrayType | _: StructType | _: MapType => - val hiveString = HiveContext.toHiveString((from.get(ordinal), dataTypes(ordinal))) - to.addColumnValue(ColumnValue.stringValue(hiveString)) - } - } - - def addNullColumnValue(from: SparkRow, to: Row, ordinal: Int) { - dataTypes(ordinal) match { - case StringType => - to.addString(null) - case IntegerType => - to.addColumnValue(ColumnValue.intValue(null)) - case BooleanType => - to.addColumnValue(ColumnValue.booleanValue(null)) - case DoubleType => - to.addColumnValue(ColumnValue.doubleValue(null)) - case FloatType => - to.addColumnValue(ColumnValue.floatValue(null)) - case DecimalType() => - to.addColumnValue(ColumnValue.stringValue(null: HiveDecimal)) - case LongType => - to.addColumnValue(ColumnValue.longValue(null)) - case ByteType => - to.addColumnValue(ColumnValue.byteValue(null)) - case ShortType => - to.addColumnValue(ColumnValue.shortValue(null)) - case DateType => - to.addColumnValue(ColumnValue.dateValue(null)) - case TimestampType => - to.addColumnValue(ColumnValue.timestampValue(null)) - case BinaryType | _: ArrayType | _: StructType | _: MapType => - to.addColumnValue(ColumnValue.stringValue(null: String)) - } - } - - def getResultSetSchema: TableSchema = { - logInfo(s"Result Schema: ${result.queryExecution.analyzed.output}") - if (result.queryExecution.analyzed.output.size == 0) { - new TableSchema(new FieldSchema("Result", "string", "") :: Nil) - } else { - val schema = result.queryExecution.analyzed.output.map { attr => - new FieldSchema(attr.name, HiveMetastoreTypes.toMetastoreType(attr.dataType), "") - } - new TableSchema(schema) - } - } - - def run(): Unit = { - val statementId = UUID.randomUUID().toString - logInfo(s"Running query '$statement'") - setState(OperationState.RUNNING) - HiveThriftServer2.listener.onStatementStart( - statementId, parentSession.getSessionHandle.getSessionId.toString, statement, statementId) - hiveContext.sparkContext.setJobGroup(statementId, statement) - sessionToActivePool.get(parentSession.getSessionHandle).foreach { pool => - hiveContext.sparkContext.setLocalProperty("spark.scheduler.pool", pool) - } - try { - result = hiveContext.sql(statement) - logDebug(result.queryExecution.toString()) - result.queryExecution.logical match { - case SetCommand(Some((SQLConf.THRIFTSERVER_POOL, Some(value))), _) => - sessionToActivePool(parentSession.getSessionHandle) = value - logInfo(s"Setting spark.scheduler.pool=$value for future statements in this session.") - case _ => - } - HiveThriftServer2.listener.onStatementParsed(statementId, result.queryExecution.toString()) - iter = { - val useIncrementalCollect = - hiveContext.getConf("spark.sql.thriftServer.incrementalCollect", "false").toBoolean - if (useIncrementalCollect) { - result.rdd.toLocalIterator - } else { - result.collect().iterator - } - } - dataTypes = result.queryExecution.analyzed.output.map(_.dataType).toArray - setHasResultSet(true) - } catch { - // Actually do need to catch Throwable as some failures don't inherit from Exception and - // HiveServer will silently swallow them. - case e: Throwable => - setState(OperationState.ERROR) - HiveThriftServer2.listener.onStatementError( - statementId, e.getMessage, e.getStackTraceString) - logError("Error executing query:",e) - throw new HiveSQLException(e.toString) - } - setState(OperationState.FINISHED) - HiveThriftServer2.listener.onStatementFinish(statementId) - } -} - -private[hive] class SparkSQLSessionManager(hiveContext: HiveContext) - extends SessionManager - with ReflectedCompositeService { - - private lazy val sparkSqlOperationManager = new SparkSQLOperationManager(hiveContext) - - override def init(hiveConf: HiveConf) { - setSuperField(this, "hiveConf", hiveConf) - - val backgroundPoolSize = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_ASYNC_EXEC_THREADS) - setSuperField(this, "backgroundOperationPool", Executors.newFixedThreadPool(backgroundPoolSize)) - getAncestorField[Log](this, 3, "LOG").info( - s"HiveServer2: Async execution pool size $backgroundPoolSize") - - setSuperField(this, "operationManager", sparkSqlOperationManager) - addService(sparkSqlOperationManager) - - initCompositeService(hiveConf) - } - - override def openSession( - username: String, - passwd: String, - sessionConf: java.util.Map[String, String], - withImpersonation: Boolean, - delegationToken: String): SessionHandle = { - hiveContext.openSession() - val sessionHandle = super.openSession( - username, passwd, sessionConf, withImpersonation, delegationToken) - HiveThriftServer2.listener.onSessionCreated("UNKNOWN", sessionHandle.getSessionId.toString) - sessionHandle - } - - override def closeSession(sessionHandle: SessionHandle) { - HiveThriftServer2.listener.onSessionClosed(sessionHandle.getSessionId.toString) - super.closeSession(sessionHandle) - sparkSqlOperationManager.sessionToActivePool -= sessionHandle - - hiveContext.detachSession() - } -} diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index 81ee48ef4152f..0b1917a392901 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -185,7 +185,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { // Hive does not support buckets. ".*bucket.*", - // No window support yet + // We have our own tests based on these query files. ".*window.*", // Fails in hive with authorization errors. @@ -240,7 +240,20 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { // It has a bug and it has been fixed by // https://issues.apache.org/jira/browse/HIVE-7673 (in Hive 0.14 and trunk). - "input46" + "input46", + + // These tests were broken by the hive client isolation PR. + "part_inherit_tbl_props", + "part_inherit_tbl_props_with_star", + + "nullformatCTAS", // SPARK-7411: need to finish CTAS parser + + // The isolated classloader seemed to make some of our test reset mechanisms less robust. + "combine1", // This test changes compression settings in a way that breaks all subsequent tests. + "load_dyn_part14.*", // These work alone but fail when run with other tests... + + // the answer is sensitive for jdk version + "udf_java_method" ) ++ HiveShim.compatibilityBlackList /** @@ -867,7 +880,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "udf_int", "udf_isnotnull", "udf_isnull", - "udf_java_method", "udf_lcase", "udf_length", "udf_lessthan", diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveWindowFunctionQuerySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveWindowFunctionQuerySuite.scala new file mode 100644 index 0000000000000..934452fe579a1 --- /dev/null +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveWindowFunctionQuerySuite.scala @@ -0,0 +1,845 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive.execution + +import java.io.File +import java.util.{Locale, TimeZone} + +import org.scalatest.BeforeAndAfter + +import org.apache.spark.sql.hive.test.TestHive +import org.apache.spark.sql.hive.test.TestHive._ +import org.apache.spark.util.Utils + +/** + * The test suite for window functions. To actually compare results with Hive, + * every test should be created by `createQueryTest`. Because we are reusing tables + * for different tests and there are a few properties needed to let Hive generate golden + * files, every `createQueryTest` calls should explicitly set `reset` to `false`. + */ +abstract class HiveWindowFunctionQueryBaseSuite extends HiveComparisonTest with BeforeAndAfter { + private val originalTimeZone = TimeZone.getDefault + private val originalLocale = Locale.getDefault + private val testTempDir = Utils.createTempDir() + + override def beforeAll() { + TestHive.cacheTables = true + // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*) + TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles")) + // Add Locale setting + Locale.setDefault(Locale.US) + + // Create the table used in windowing.q + sql("DROP TABLE IF EXISTS part") + sql( + """ + |CREATE TABLE part( + | p_partkey INT, + | p_name STRING, + | p_mfgr STRING, + | p_brand STRING, + | p_type STRING, + | p_size INT, + | p_container STRING, + | p_retailprice DOUBLE, + | p_comment STRING) + """.stripMargin) + val testData1 = TestHive.getHiveFile("data/files/part_tiny.txt").getCanonicalPath + sql( + s""" + |LOAD DATA LOCAL INPATH '$testData1' overwrite into table part + """.stripMargin) + + sql("DROP TABLE IF EXISTS over1k") + sql( + """ + |create table over1k( + | t tinyint, + | si smallint, + | i int, + | b bigint, + | f float, + | d double, + | bo boolean, + | s string, + | ts timestamp, + | dec decimal(4,2), + | bin binary) + |row format delimited + |fields terminated by '|' + """.stripMargin) + val testData2 = TestHive.getHiveFile("data/files/over1k").getCanonicalPath + sql( + s""" + |LOAD DATA LOCAL INPATH '$testData2' overwrite into table over1k + """.stripMargin) + + // The following settings are used for generating golden files with Hive. + // We have to use kryo to correctly let Hive serialize plans with window functions. + // This is used to generate golden files. + sql("set hive.plan.serialization.format=kryo") + // Explicitly set fs to local fs. + sql(s"set fs.default.name=file://$testTempDir/") + // Ask Hive to run jobs in-process as a single map and reduce task. + sql("set mapred.job.tracker=local") + } + + override def afterAll() { + TestHive.cacheTables = false + TimeZone.setDefault(originalTimeZone) + Locale.setDefault(originalLocale) + TestHive.reset() + } + + ///////////////////////////////////////////////////////////////////////////// + // Tests based on windowing_multipartitioning.q + // Results of the original query file are not deterministic. + ///////////////////////////////////////////////////////////////////////////// + createQueryTest("windowing_multipartitioning.q (deterministic) 1", + s""" + |select s, + |rank() over (partition by s order by si) r, + |sum(b) over (partition by s order by si) sum + |from over1k + |order by s, r, sum; + """.stripMargin, reset = false) + + /* timestamp comparison issue with Hive? + createQueryTest("windowing_multipartitioning.q (deterministic) 2", + s""" + |select s, + |rank() over (partition by s order by dec desc) r, + |sum(b) over (partition by s order by ts desc) as sum + |from over1k + |where s = 'tom allen' or s = 'bob steinbeck' + |order by s, r, sum; + """.stripMargin, reset = false) + */ + + createQueryTest("windowing_multipartitioning.q (deterministic) 3", + s""" + |select s, sum(i) over (partition by s), sum(f) over (partition by si) + |from over1k where s = 'tom allen' or s = 'bob steinbeck'; + """.stripMargin, reset = false) + + createQueryTest("windowing_multipartitioning.q (deterministic) 4", + s""" + |select s, rank() over (partition by s order by bo), + |rank() over (partition by si order by bin desc) from over1k + |where s = 'tom allen' or s = 'bob steinbeck'; + """.stripMargin, reset = false) + + createQueryTest("windowing_multipartitioning.q (deterministic) 5", + s""" + |select s, sum(f) over (partition by i), row_number() over (order by f) + |from over1k where s = 'tom allen' or s = 'bob steinbeck'; + """.stripMargin, reset = false) + + createQueryTest("windowing_multipartitioning.q (deterministic) 6", + s""" + |select s, rank() over w1, + |rank() over w2 + |from over1k + |where s = 'tom allen' or s = 'bob steinbeck' + |window + |w1 as (partition by s order by dec), + |w2 as (partition by si order by f) ; + """.stripMargin, reset = false) + + ///////////////////////////////////////////////////////////////////////////// + // Tests based on windowing_navfn.q + // Results of the original query file are not deterministic. + // Also, the original query of + // select i, lead(s) over (partition by bin order by d,i desc) from over1k ; + ///////////////////////////////////////////////////////////////////////////// + createQueryTest("windowing_navfn.q (deterministic)", + s""" + |select s, row_number() over (partition by d order by dec) rn from over1k + |order by s, rn desc; + |select i, lead(s) over (partition by cast(bin as string) order by d,i desc) as l + |from over1k + |order by i desc, l; + |select i, lag(dec) over (partition by i order by s,i,dec) l from over1k + |order by i, l; + |select s, last_value(t) over (partition by d order by f) l from over1k + |order by s, l; + |select s, first_value(s) over (partition by bo order by s) f from over1k + |order by s, f; + |select t, s, i, last_value(i) over (partition by t order by s) + |from over1k where (s = 'oscar allen' or s = 'oscar carson') and t = 10; + """.stripMargin, reset = false) + + ///////////////////////////////////////////////////////////////////////////// + // Tests based on windowing_ntile.q + // Results of the original query file are not deterministic. + ///////////////////////////////////////////////////////////////////////////// + createQueryTest("windowing_ntile.q (deterministic)", + s""" + |select i, ntile(10) over (partition by s order by i) n from over1k + |order by i, n; + |select s, ntile(100) over (partition by i order by s) n from over1k + |order by s, n; + |select f, ntile(4) over (partition by d order by f) n from over1k + |order by f, n; + |select d, ntile(1000) over (partition by dec order by d) n from over1k + |order by d, n; + """.stripMargin, reset = false) + + ///////////////////////////////////////////////////////////////////////////// + // Tests based on windowing_udaf.q + // Results of the original query file are not deterministic. + ///////////////////////////////////////////////////////////////////////////// + createQueryTest("windowing_udaf.q (deterministic)", + s""" + |select s, min(i) over (partition by s) m from over1k + |order by s, m; + |select s, avg(f) over (partition by si order by s) a from over1k + |order by s, a; + |select s, avg(i) over (partition by t, b order by s) a from over1k + |order by s, a; + |select max(i) over w m from over1k + |order by m window w as (partition by f) ; + |select s, avg(d) over (partition by t order by f) a from over1k + |order by s, a; + """.stripMargin, reset = false) + + ///////////////////////////////////////////////////////////////////////////// + // Tests based on windowing_windowspec.q + // Results of the original query file are not deterministic. + ///////////////////////////////////////////////////////////////////////////// + createQueryTest("windowing_windowspec.q (deterministic)", + s""" + |select s, sum(b) over (partition by i order by s,b rows unbounded preceding) as sum + |from over1k order by s, sum; + |select s, sum(f) over (partition by d order by s,f rows unbounded preceding) as sum + |from over1k order by s, sum; + |select s, sum(f) over + |(partition by ts order by f range between current row and unbounded following) as sum + |from over1k order by s, sum; + |select s, avg(f) + |over (partition by ts order by s,f rows between current row and 5 following) avg + |from over1k order by s, avg; + |select s, avg(d) over + |(partition by t order by s,d desc rows between 5 preceding and 5 following) avg + |from over1k order by s, avg; + |select s, sum(i) over(partition by ts order by s) sum from over1k + |order by s, sum; + |select f, sum(f) over + |(partition by ts order by f range between unbounded preceding and current row) sum + |from over1k order by f, sum; + |select s, i, round(avg(d) over (partition by s order by i) / 10.0 , 2) avg + |from over1k order by s, i, avg; + |select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) avg + |from over1k + |order by s, i, avg window w1 as (partition by s order by i); + """.stripMargin, reset = false) + + ///////////////////////////////////////////////////////////////////////////// + // Tests based on windowing_rank.q + // Results of the original query file are not deterministic. + ///////////////////////////////////////////////////////////////////////////// + createQueryTest("windowing_rank.q (deterministic) 1", + s""" + |select s, rank() over (partition by f order by t) r from over1k order by s, r; + |select s, dense_rank() over (partition by ts order by i,s desc) as r from over1k + |order by s desc, r desc; + |select s, cume_dist() over (partition by bo order by b,s) cd from over1k + |order by s, cd; + |select s, percent_rank() over (partition by dec order by f) r from over1k + |order by s desc, r desc; + """.stripMargin, reset = false) + + createQueryTest("windowing_rank.q (deterministic) 2", + s""" + |select ts, dec, rnk + |from + | (select ts, dec, + | rank() over (partition by ts order by dec) as rnk + | from + | (select other.ts, other.dec + | from over1k other + | join over1k on (other.b = over1k.b) + | ) joined + | ) ranked + |where rnk = 1 + |order by ts, dec, rnk; + """.stripMargin, reset = false) + + createQueryTest("windowing_rank.q (deterministic) 3", + s""" + |select ts, dec, rnk + |from + | (select ts, dec, + | rank() over (partition by ts order by dec) as rnk + | from + | (select other.ts, other.dec + | from over1k other + | join over1k on (other.b = over1k.b) + | ) joined + | ) ranked + |where dec = 89.5 + |order by ts, dec, rnk; + """.stripMargin, reset = false) + + createQueryTest("windowing_rank.q (deterministic) 4", + s""" + |select ts, dec, rnk + |from + | (select ts, dec, + | rank() over (partition by ts order by dec) as rnk + | from + | (select other.ts, other.dec + | from over1k other + | join over1k on (other.b = over1k.b) + | where other.t < 10 + | ) joined + | ) ranked + |where rnk = 1 + |order by ts, dec, rnk; + """.stripMargin, reset = false) + + ///////////////////////////////////////////////////////////////////////////// + // Tests from windowing.q + // We port tests in windowing.q to here because this query file contains too + // many tests and the syntax of test "-- 7. testJoinWithWindowingAndPTF" + // is not supported right now. + ///////////////////////////////////////////////////////////////////////////// + createQueryTest("windowing.q -- 1. testWindowing", + s""" + |select p_mfgr, p_name, p_size, + |rank() over(distribute by p_mfgr sort by p_name) as r, + |dense_rank() over(distribute by p_mfgr sort by p_name) as dr, + |sum(p_retailprice) over + |(distribute by p_mfgr sort by p_name rows between unbounded preceding and current row) as s1 + |from part + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 2. testGroupByWithPartitioning", + s""" + |select p_mfgr, p_name, p_size, + |min(p_retailprice), + |rank() over(distribute by p_mfgr sort by p_name)as r, + |dense_rank() over(distribute by p_mfgr sort by p_name) as dr, + |p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz + |from part + |group by p_mfgr, p_name, p_size + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 3. testGroupByHavingWithSWQ", + s""" + |select p_mfgr, p_name, p_size, min(p_retailprice), + |rank() over(distribute by p_mfgr sort by p_name) as r, + |dense_rank() over(distribute by p_mfgr sort by p_name) as dr, + |p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz + |from part + |group by p_mfgr, p_name, p_size + |having p_size > 0 + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 4. testCount", + s""" + |select p_mfgr, p_name, + |count(p_size) over(distribute by p_mfgr sort by p_name) as cd + |from part + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 5. testCountWithWindowingUDAF", + s""" + |select p_mfgr, p_name, + |rank() over(distribute by p_mfgr sort by p_name) as r, + |dense_rank() over(distribute by p_mfgr sort by p_name) as dr, + |count(p_size) over(distribute by p_mfgr sort by p_name) as cd, + |p_retailprice, sum(p_retailprice) over (distribute by p_mfgr sort by p_name + | rows between unbounded preceding and current row) as s1, + |p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz + |from part + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 6. testCountInSubQ", + s""" + |select sub1.r, sub1.dr, sub1.cd, sub1.s1, sub1.deltaSz + |from (select p_mfgr, p_name, + |rank() over(distribute by p_mfgr sort by p_name) as r, + |dense_rank() over(distribute by p_mfgr sort by p_name) as dr, + |count(p_size) over(distribute by p_mfgr sort by p_name) as cd, + |p_retailprice, sum(p_retailprice) over (distribute by p_mfgr sort by p_name + | rows between unbounded preceding and current row) as s1, + |p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz + |from part + |) sub1 + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 8. testMixedCaseAlias", + s""" + |select p_mfgr, p_name, p_size, + |rank() over(distribute by p_mfgr sort by p_name, p_size desc) as R + |from part + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 9. testHavingWithWindowingNoGBY", + s""" + |select p_mfgr, p_name, p_size, + |rank() over(distribute by p_mfgr sort by p_name) as r, + |dense_rank() over(distribute by p_mfgr sort by p_name) as dr, + |sum(p_retailprice) over (distribute by p_mfgr sort by p_name + | rows between unbounded preceding and current row) as s1 + |from part + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 10. testHavingWithWindowingCondRankNoGBY", + s""" + |select p_mfgr, p_name, p_size, + |rank() over(distribute by p_mfgr sort by p_name) as r, + |dense_rank() over(distribute by p_mfgr sort by p_name) as dr, + |sum(p_retailprice) over (distribute by p_mfgr sort by p_name + | rows between unbounded preceding and current row) as s1 + |from part + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 11. testFirstLast", + s""" + |select p_mfgr,p_name, p_size, + |sum(p_size) over (distribute by p_mfgr sort by p_name + |rows between current row and current row) as s2, + |first_value(p_size) over w1 as f, + |last_value(p_size, false) over w1 as l + |from part + |window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 12. testFirstLastWithWhere", + s""" + |select p_mfgr,p_name, p_size, + |rank() over(distribute by p_mfgr sort by p_name) as r, + |sum(p_size) over (distribute by p_mfgr sort by p_name + |rows between current row and current row) as s2, + |first_value(p_size) over w1 as f, + |last_value(p_size, false) over w1 as l + |from part + |where p_mfgr = 'Manufacturer#3' + |window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 13. testSumWindow", + s""" + |select p_mfgr,p_name, p_size, + |sum(p_size) over w1 as s1, + |sum(p_size) over (distribute by p_mfgr sort by p_name + |rows between current row and current row) as s2 + |from part + |window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 14. testNoSortClause", + s""" + |select p_mfgr,p_name, p_size, + |rank() over(distribute by p_mfgr sort by p_name) as r, + |dense_rank() over(distribute by p_mfgr sort by p_name) as dr + |from part + |window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 15. testExpressions", + s""" + |select p_mfgr,p_name, p_size, + |rank() over(distribute by p_mfgr sort by p_name) as r, + |dense_rank() over(distribute by p_mfgr sort by p_name) as dr, + |cume_dist() over(distribute by p_mfgr sort by p_name) as cud, + |percent_rank() over(distribute by p_mfgr sort by p_name) as pr, + |ntile(3) over(distribute by p_mfgr sort by p_name) as nt, + |count(p_size) over(distribute by p_mfgr sort by p_name) as ca, + |avg(p_size) over(distribute by p_mfgr sort by p_name) as avg, + |stddev(p_size) over(distribute by p_mfgr sort by p_name) as st, + |first_value(p_size % 5) over(distribute by p_mfgr sort by p_name) as fv, + |last_value(p_size) over(distribute by p_mfgr sort by p_name) as lv, + |first_value(p_size) over w1 as fvW1 + |from part + |window w1 as (distribute by p_mfgr sort by p_mfgr, p_name + | rows between 2 preceding and 2 following) + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 16. testMultipleWindows", + s""" + |select p_mfgr,p_name, p_size, + |rank() over(distribute by p_mfgr sort by p_name) as r, + |dense_rank() over(distribute by p_mfgr sort by p_name) as dr, + |cume_dist() over(distribute by p_mfgr sort by p_name) as cud, + |sum(p_size) over (distribute by p_mfgr sort by p_name + |range between unbounded preceding and current row) as s1, + |sum(p_size) over (distribute by p_mfgr sort by p_size + |range between 5 preceding and current row) as s2, + |first_value(p_size) over w1 as fv1 + |from part + |window w1 as (distribute by p_mfgr sort by p_mfgr, p_name + | rows between 2 preceding and 2 following) + """.stripMargin, reset = false) + + + createQueryTest("windowing.q -- 17. testCountStar", + s""" + |select p_mfgr,p_name, p_size, + |count(*) over(distribute by p_mfgr sort by p_name ) as c, + |count(p_size) over(distribute by p_mfgr sort by p_name) as ca, + |first_value(p_size) over w1 as fvW1 + |from part + |window w1 as (distribute by p_mfgr sort by p_mfgr, p_name + | rows between 2 preceding and 2 following) + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 18. testUDAFs", + s""" + |select p_mfgr,p_name, p_size, + |sum(p_retailprice) over w1 as s, + |min(p_retailprice) over w1 as mi, + |max(p_retailprice) over w1 as ma, + |avg(p_retailprice) over w1 as ag + |from part + |window w1 as (distribute by p_mfgr sort by p_mfgr, p_name + | rows between 2 preceding and 2 following) + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 19. testUDAFsWithGBY", + """ + |select p_mfgr,p_name, p_size, p_retailprice, + |sum(p_retailprice) over w1 as s, + |min(p_retailprice) as mi , + |max(p_retailprice) as ma , + |avg(p_retailprice) over w1 as ag + |from part + |group by p_mfgr,p_name, p_size, p_retailprice + |window w1 as (distribute by p_mfgr sort by p_mfgr, p_name + | rows between 2 preceding and 2 following); + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 20. testSTATs", + """ + |select p_mfgr,p_name, p_size, + |stddev(p_retailprice) over w1 as sdev, + |stddev_pop(p_retailprice) over w1 as sdev_pop, + |collect_set(p_size) over w1 as uniq_size, + |variance(p_retailprice) over w1 as var, + |corr(p_size, p_retailprice) over w1 as cor, + |covar_pop(p_size, p_retailprice) over w1 as covarp + |from part + |window w1 as (distribute by p_mfgr sort by p_mfgr, p_name + | rows between 2 preceding and 2 following) + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 21. testDISTs", + """ + |select p_mfgr,p_name, p_size, + |histogram_numeric(p_retailprice, 5) over w1 as hist, + |percentile(p_partkey, 0.5) over w1 as per, + |row_number() over(distribute by p_mfgr sort by p_name) as rn + |from part + |window w1 as (distribute by p_mfgr sort by p_mfgr, p_name + | rows between 2 preceding and 2 following) + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 24. testLateralViews", + """ + |select p_mfgr, p_name, + |lv_col, p_size, sum(p_size) over w1 as s + |from (select p_mfgr, p_name, p_size, array(1,2,3) arr from part) p + |lateral view explode(arr) part_lv as lv_col + |window w1 as (distribute by p_mfgr sort by p_size, lv_col + | rows between 2 preceding and current row) + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 26. testGroupByHavingWithSWQAndAlias", + """ + |select p_mfgr, p_name, p_size, min(p_retailprice) as mi, + |rank() over(distribute by p_mfgr sort by p_name) as r, + |dense_rank() over(distribute by p_mfgr sort by p_name) as dr, + |p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz + |from part + |group by p_mfgr, p_name, p_size + |having p_size > 0 + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 27. testMultipleRangeWindows", + """ + |select p_mfgr,p_name, p_size, + |sum(p_size) over (distribute by p_mfgr sort by p_size + |range between 10 preceding and current row) as s2, + |sum(p_size) over (distribute by p_mfgr sort by p_size + |range between current row and 10 following ) as s1 + |from part + |window w1 as (rows between 2 preceding and 2 following) + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 28. testPartOrderInUDAFInvoke", + """ + |select p_mfgr, p_name, p_size, + |sum(p_size) over (partition by p_mfgr order by p_name + |rows between 2 preceding and 2 following) as s + |from part + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 29. testPartOrderInWdwDef", + """ + |select p_mfgr, p_name, p_size, + |sum(p_size) over w1 as s + |from part + |window w1 as (partition by p_mfgr order by p_name + | rows between 2 preceding and 2 following) + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 30. testDefaultPartitioningSpecRules", + """ + |select p_mfgr, p_name, p_size, + |sum(p_size) over w1 as s, + |sum(p_size) over w2 as s2 + |from part + |window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following), + | w2 as (partition by p_mfgr order by p_name) + """.stripMargin, reset = false) + + /* p_name is not a numeric column. What is Hive's semantic? + createQueryTest("windowing.q -- 31. testWindowCrossReference", + """ + |select p_mfgr, p_name, p_size, + |sum(p_size) over w1 as s1, + |sum(p_size) over w2 as s2 + |from part + |window w1 as (partition by p_mfgr order by p_name + | range between 2 preceding and 2 following), + | w2 as w1 + """.stripMargin, reset = false) + */ + /* + createQueryTest("windowing.q -- 32. testWindowInheritance", + """ + |select p_mfgr, p_name, p_size, + |sum(p_size) over w1 as s1, + |sum(p_size) over w2 as s2 + |from part + |window w1 as (partition by p_mfgr order by p_name + | range between 2 preceding and 2 following), + | w2 as (w1 rows between unbounded preceding and current row) + """.stripMargin, reset = false) + */ + + /* p_name is not a numeric column. What is Hive's semantic? + createQueryTest("windowing.q -- 33. testWindowForwardReference", + """ + |select p_mfgr, p_name, p_size, + |sum(p_size) over w1 as s1, + |sum(p_size) over w2 as s2, + |sum(p_size) over w3 as s3 + |from part + |window w1 as (distribute by p_mfgr sort by p_name + | range between 2 preceding and 2 following), + | w2 as w3, + | w3 as (distribute by p_mfgr sort by p_name + | range between unbounded preceding and current row) + """.stripMargin, reset = false) + */ + /* + createQueryTest("windowing.q -- 34. testWindowDefinitionPropagation", + """ + |select p_mfgr, p_name, p_size, + |sum(p_size) over w1 as s1, + |sum(p_size) over w2 as s2, + |sum(p_size) over (w3 rows between 2 preceding and 2 following) as s3 + |from part + |window w1 as (distribute by p_mfgr sort by p_name + | range between 2 preceding and 2 following), + | w2 as w3, + | w3 as (distribute by p_mfgr sort by p_name + | range between unbounded preceding and current row) + """.stripMargin, reset = false) + */ + + /* Seems Hive evaluate SELECT DISTINCT before window functions? + createQueryTest("windowing.q -- 35. testDistinctWithWindowing", + """ + |select DISTINCT p_mfgr, p_name, p_size, + |sum(p_size) over w1 as s + |from part + |window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following) + """.stripMargin, reset = false) + */ + + createQueryTest("windowing.q -- 36. testRankWithPartitioning", + """ + |select p_mfgr, p_name, p_size, + |rank() over (partition by p_mfgr order by p_name ) as r + |from part + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 37. testPartitioningVariousForms", + """ + |select p_mfgr, + |round(sum(p_retailprice) over (partition by p_mfgr order by p_mfgr),2) as s1, + |min(p_retailprice) over (partition by p_mfgr) as s2, + |max(p_retailprice) over (distribute by p_mfgr sort by p_mfgr) as s3, + |round(avg(p_retailprice) over (distribute by p_mfgr),2) as s4, + |count(p_retailprice) over (cluster by p_mfgr ) as s5 + |from part + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 38. testPartitioningVariousForms2", + """ + |select p_mfgr, p_name, p_size, + |sum(p_retailprice) over (partition by p_mfgr, p_name order by p_mfgr, p_name + |rows between unbounded preceding and current row) as s1, + |min(p_retailprice) over (distribute by p_mfgr, p_name sort by p_mfgr, p_name + |rows between unbounded preceding and current row) as s2, + |max(p_retailprice) over (partition by p_mfgr, p_name order by p_name) as s3 + |from part + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 39. testUDFOnOrderCols", + """ + |select p_mfgr, p_type, substr(p_type, 2) as short_ptype, + |rank() over (partition by p_mfgr order by substr(p_type, 2)) as r + |from part + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 40. testNoBetweenForRows", + """ + |select p_mfgr, p_name, p_size, + |sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows unbounded preceding) as s1 + |from part + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 41. testNoBetweenForRange", + """ + |select p_mfgr, p_name, p_size, + |sum(p_retailprice) over (distribute by p_mfgr sort by p_size range unbounded preceding) as s1 + |from part + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 42. testUnboundedFollowingForRows", + """ + |select p_mfgr, p_name, p_size, + |sum(p_retailprice) over (distribute by p_mfgr sort by p_name + |rows between current row and unbounded following) as s1 + |from part + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 43. testUnboundedFollowingForRange", + """ + |select p_mfgr, p_name, p_size, + |sum(p_retailprice) over (distribute by p_mfgr sort by p_size + |range between current row and unbounded following) as s1 + |from part + """.stripMargin, reset = false) + + createQueryTest("windowing.q -- 44. testOverNoPartitionSingleAggregate", + """ + |select p_name, p_retailprice, + |round(avg(p_retailprice) over(),2) + |from part + |order by p_name + """.stripMargin, reset = false) +} + +class HiveWindowFunctionQueryWithoutCodeGenSuite extends HiveWindowFunctionQueryBaseSuite { + var originalCodegenEnabled: Boolean = _ + override def beforeAll(): Unit = { + super.beforeAll() + originalCodegenEnabled = conf.codegenEnabled + sql("set spark.sql.codegen=false") + } + + override def afterAll(): Unit = { + sql(s"set spark.sql.codegen=$originalCodegenEnabled") + super.afterAll() + } +} + +abstract class HiveWindowFunctionQueryFileBaseSuite + extends HiveCompatibilitySuite with BeforeAndAfter { + private val originalTimeZone = TimeZone.getDefault + private val originalLocale = Locale.getDefault + private val testTempDir = Utils.createTempDir() + + override def beforeAll() { + TestHive.cacheTables = true + // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*) + TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles")) + // Add Locale setting + Locale.setDefault(Locale.US) + + // The following settings are used for generating golden files with Hive. + // We have to use kryo to correctly let Hive serialize plans with window functions. + // This is used to generate golden files. + sql("set hive.plan.serialization.format=kryo") + // Explicitly set fs to local fs. + sql(s"set fs.default.name=file://$testTempDir/") + // Ask Hive to run jobs in-process as a single map and reduce task. + sql("set mapred.job.tracker=local") + } + + override def afterAll() { + TestHive.cacheTables = false + TimeZone.setDefault(originalTimeZone) + Locale.setDefault(originalLocale) + TestHive.reset() + } + + override def blackList: Seq[String] = Seq( + // Partitioned table functions are not supported. + "ptf*", + // tests of windowing.q are in HiveWindowFunctionQueryBaseSuite + "windowing.q", + + // This one failed on the expression of + // sum(lag(p_retailprice,1,0.0)) over w1 + // lag(p_retailprice,1,0.0) is a GenericUDF and the argument inspector of + // p_retailprice created by HiveInspectors is + // PrimitiveObjectInspectorFactory.javaDoubleObjectInspector. + // However, seems Hive assumes it is + // PrimitiveObjectInspectorFactory.writableDoubleObjectInspector, which introduces an error. + "windowing_expressions", + + // Hive's results are not deterministic + "windowing_multipartitioning", + "windowing_navfn", + "windowing_ntile", + "windowing_udaf", + "windowing_windowspec", + "windowing_rank" + ) + + override def whiteList: Seq[String] = Seq( + "windowing_udaf2", + "windowing_columnPruning", + "windowing_adjust_rowcontainer_sz" + ) + + override def testCases: Seq[(String, File)] = super.testCases.filter { + case (name, _) => realWhiteList.contains(name) + } +} + +class HiveWindowFunctionQueryFileWithoutCodeGenSuite extends HiveWindowFunctionQueryFileBaseSuite { + var originalCodegenEnabled: Boolean = _ + override def beforeAll(): Unit = { + super.beforeAll() + originalCodegenEnabled = conf.codegenEnabled + sql("set spark.sql.codegen=false") + } + + override def afterAll(): Unit = { + sql(s"set spark.sql.codegen=$originalCodegenEnabled") + super.afterAll() + } +} diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index e322340094e6f..fef23d05ad2d0 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../../pom.xml @@ -136,16 +136,6 @@ - - hive-0.12.0 - - - com.twitter - parquet-hive-bundle - 1.5.0 - - - diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/ExtendedHiveQlParser.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/ExtendedHiveQlParser.scala index 3f20c6142e59a..7f8449cdc282d 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/ExtendedHiveQlParser.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/ExtendedHiveQlParser.scala @@ -29,10 +29,10 @@ import org.apache.spark.sql.hive.execution.{AddJar, AddFile, HiveNativeCommand} private[hive] class ExtendedHiveQlParser extends AbstractSparkSQLParser { // Keyword is a convention with AbstractSparkSQLParser, which will scan all of the `Keyword` // properties via reflection the class in runtime for constructing the SqlLexical object - protected val ADD = Keyword("ADD") - protected val DFS = Keyword("DFS") + protected val ADD = Keyword("ADD") + protected val DFS = Keyword("DFS") protected val FILE = Keyword("FILE") - protected val JAR = Keyword("JAR") + protected val JAR = Keyword("JAR") protected lazy val start: Parser[LogicalPlan] = dfs | addJar | addFile | hiveQl diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala index 1d8d0b5c322ad..fbf2c7d8cbc06 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala @@ -17,13 +17,16 @@ package org.apache.spark.sql.hive -import java.io.{BufferedReader, InputStreamReader, PrintStream} +import java.io.{BufferedReader, File, InputStreamReader, PrintStream} +import java.net.{URL, URLClassLoader} import java.sql.Timestamp +import java.util.{ArrayList => JArrayList} import org.apache.hadoop.hive.ql.parse.VariableSubstitution -import org.apache.spark.sql.catalyst.Dialect +import org.apache.spark.sql.catalyst.ParserDialect import scala.collection.JavaConversions._ +import scala.collection.mutable.{ArrayBuffer, HashMap} import scala.language.implicitConversions import org.apache.hadoop.fs.{FileSystem, Path} @@ -35,20 +38,25 @@ import org.apache.hadoop.hive.ql.processors._ import org.apache.hadoop.hive.ql.session.SessionState import org.apache.hadoop.hive.serde2.io.{DateWritable, TimestampWritable} -import org.apache.spark.SparkContext +import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.annotation.Experimental +import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.analysis.{Analyzer, EliminateSubQueries, OverrideCatalog, OverrideFunctionRegistry} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.execution.{ExecutedCommand, ExtractPythonUdfs, QueryExecutionException, SetCommand} +import org.apache.spark.sql.hive.client._ import org.apache.spark.sql.hive.execution.{DescribeHiveTableCommand, HiveNativeCommand} import org.apache.spark.sql.sources.{DDLParser, DataSourceStrategy} +import org.apache.spark.sql.catalyst.CatalystConf import org.apache.spark.sql.types._ +import org.apache.spark.util.Utils + /** * This is the HiveQL Dialect, this dialect is strongly bind with HiveContext */ -private[hive] class HiveQLDialect extends Dialect { +private[hive] class HiveQLDialect extends ParserDialect { override def parse(sqlText: String): LogicalPlan = { HiveQl.parseSql(sqlText) } @@ -57,10 +65,14 @@ private[hive] class HiveQLDialect extends Dialect { /** * An instance of the Spark SQL execution engine that integrates with data stored in Hive. * Configuration for Hive is read from hive-site.xml on the classpath. + * + * @since 1.0.0 */ class HiveContext(sc: SparkContext) extends SQLContext(sc) { self => + import HiveContext._ + /** * When true, enables an experimental feature where metastore tables that use the parquet SerDe * are automatically converted to use the Spark SQL parquet table scan, instead of the Hive @@ -93,9 +105,154 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { protected[sql] def convertCTAS: Boolean = getConf("spark.sql.hive.convertCTAS", "false").toBoolean + /** + * The version of the hive client that will be used to communicate with the metastore. Note that + * this does not necessarily need to be the same version of Hive that is used internally by + * Spark SQL for execution. + */ + protected[hive] def hiveMetastoreVersion: String = + getConf(HIVE_METASTORE_VERSION, hiveExecutionVersion) + + /** + * The location of the jars that should be used to instantiate the HiveMetastoreClient. This + * property can be one of three options: + * - a classpath in the standard format for both hive and hadoop. + * - builtin - attempt to discover the jars that were used to load Spark SQL and use those. This + * option is only valid when using the execution version of Hive. + * - maven - download the correct version of hive on demand from maven. + */ + protected[hive] def hiveMetastoreJars: String = + getConf(HIVE_METASTORE_JARS, "builtin") + + /** + * A comma separated list of class prefixes that should be loaded using the classloader that + * is shared between Spark SQL and a specific version of Hive. An example of classes that should + * be shared is JDBC drivers that are needed to talk to the metastore. Other classes that need + * to be shared are those that interact with classes that are already shared. For example, + * custom appenders that are used by log4j. + */ + protected[hive] def hiveMetastoreSharedPrefixes: Seq[String] = + getConf("spark.sql.hive.metastore.sharedPrefixes", jdbcPrefixes) + .split(",").filterNot(_ == "") + + private def jdbcPrefixes = Seq( + "com.mysql.jdbc", "org.postgresql", "com.microsoft.sqlserver", "oracle.jdbc").mkString(",") + + /** + * A comma separated list of class prefixes that should explicitly be reloaded for each version + * of Hive that Spark SQL is communicating with. For example, Hive UDFs that are declared in a + * prefix that typically would be shared (i.e. org.apache.spark.*) + */ + protected[hive] def hiveMetastoreBarrierPrefixes: Seq[String] = + getConf("spark.sql.hive.metastore.barrierPrefixes", "") + .split(",").filterNot(_ == "") + @transient protected[sql] lazy val substitutor = new VariableSubstitution() + /** + * The copy of the hive client that is used for execution. Currently this must always be + * Hive 13 as this is the version of Hive that is packaged with Spark SQL. This copy of the + * client is used for execution related tasks like registering temporary functions or ensuring + * that the ThreadLocal SessionState is correctly populated. This copy of Hive is *not* used + * for storing persistent metadata, and only point to a dummy metastore in a temporary directory. + */ + @transient + protected[hive] lazy val executionHive: ClientWrapper = { + logInfo(s"Initializing execution hive, version $hiveExecutionVersion") + new ClientWrapper( + version = IsolatedClientLoader.hiveVersion(hiveExecutionVersion), + config = newTemporaryConfiguration()) + } + SessionState.setCurrentSessionState(executionHive.state) + + /** + * The copy of the Hive client that is used to retrieve metadata from the Hive MetaStore. + * The version of the Hive client that is used here must match the metastore that is configured + * in the hive-site.xml file. + */ + @transient + protected[hive] lazy val metadataHive: ClientInterface = { + val metaVersion = IsolatedClientLoader.hiveVersion(hiveMetastoreVersion) + + // We instantiate a HiveConf here to read in the hive-site.xml file and then pass the options + // into the isolated client loader + val metadataConf = new HiveConf() + // `configure` goes second to override other settings. + val allConfig = metadataConf.iterator.map(e => e.getKey -> e.getValue).toMap ++ configure + + val isolatedLoader = if (hiveMetastoreJars == "builtin") { + if (hiveExecutionVersion != hiveMetastoreVersion) { + throw new IllegalArgumentException( + "Builtin jars can only be used when hive execution version == hive metastore version. " + + s"Execution: ${hiveExecutionVersion} != Metastore: ${hiveMetastoreVersion}. " + + "Specify a vaild path to the correct hive jars using $HIVE_METASTORE_JARS " + + s"or change $HIVE_METASTORE_VERSION to $hiveExecutionVersion.") + } + + // We recursively find all jars in the class loader chain, + // starting from the given classLoader. + def allJars(classLoader: ClassLoader): Array[URL] = classLoader match { + case null => Array.empty[URL] + case urlClassLoader: URLClassLoader => + urlClassLoader.getURLs ++ allJars(urlClassLoader.getParent) + case other => allJars(other.getParent) + } + + val classLoader = Utils.getContextOrSparkClassLoader + val jars = allJars(classLoader) + if (jars.length == 0) { + throw new IllegalArgumentException( + "Unable to locate hive jars to connect to metastore. " + + "Please set spark.sql.hive.metastore.jars.") + } + + logInfo( + s"Initializing HiveMetastoreConnection version $hiveMetastoreVersion using Spark classes.") + new IsolatedClientLoader( + version = metaVersion, + execJars = jars.toSeq, + config = allConfig, + isolationOn = true, + barrierPrefixes = hiveMetastoreBarrierPrefixes, + sharedPrefixes = hiveMetastoreSharedPrefixes) + } else if (hiveMetastoreJars == "maven") { + // TODO: Support for loading the jars from an already downloaded location. + logInfo( + s"Initializing HiveMetastoreConnection version $hiveMetastoreVersion using maven.") + IsolatedClientLoader.forVersion(hiveMetastoreVersion, allConfig) + } else { + // Convert to files and expand any directories. + val jars = + hiveMetastoreJars + .split(File.pathSeparator) + .flatMap { + case path if new File(path).getName() == "*" => + val files = new File(path).getParentFile().listFiles() + if (files == null) { + logWarning(s"Hive jar path '$path' does not exist.") + Nil + } else { + files.filter(_.getName().toLowerCase().endsWith(".jar")) + } + case path => + new File(path) :: Nil + } + .map(_.toURI.toURL) + + logInfo( + s"Initializing HiveMetastoreConnection version $hiveMetastoreVersion using $jars") + new IsolatedClientLoader( + version = metaVersion, + execJars = jars.toSeq, + config = allConfig, + isolationOn = true, + barrierPrefixes = hiveMetastoreBarrierPrefixes, + sharedPrefixes = hiveMetastoreSharedPrefixes) + } + isolatedLoader.client + } + protected[sql] override def parseSql(sql: String): LogicalPlan = { super.parseSql(substitutor.substitute(hiveconf, sql)) } @@ -108,6 +265,8 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { * Spark SQL or the external data source library it uses might cache certain metadata about a * table, such as the location of blocks. When those change outside of Spark SQL, users should * call this function to invalidate the cache. + * + * @since 1.3.0 */ def refreshTable(tableName: String): Unit = { // TODO: Database support... @@ -125,6 +284,8 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { * * Right now, it only supports Hive tables and it only updates the size of a Hive table * in the Hive metastore. + * + * @since 1.2.0 */ @Experimental def analyze(tableName: String) { @@ -178,15 +339,10 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { // recorded in the Hive metastore. // This logic is based on org.apache.hadoop.hive.ql.exec.StatsTask.aggregateStats(). if (newTotalSize > 0 && newTotalSize != oldTotalSize) { - tableParameters.put(HiveShim.getStatsSetupConstTotalSize, newTotalSize.toString) - val hiveTTable = relation.hiveQlTable.getTTable - hiveTTable.setParameters(tableParameters) - val tableFullName = - relation.hiveQlTable.getDbName + "." + relation.hiveQlTable.getTableName - - catalog.synchronized { - catalog.client.alterTable(tableFullName, new Table(hiveTTable)) - } + catalog.client.alterTable( + relation.table.copy( + properties = relation.table.properties + + (HiveShim.getStatsSetupConstTotalSize -> newTotalSize.toString))) } case otherRelation => throw new UnsupportedOperationException( @@ -194,80 +350,62 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { } } - // Circular buffer to hold what hive prints to STDOUT and ERR. Only printed when failures occur. - @transient - protected lazy val outputBuffer = new java.io.OutputStream { - var pos: Int = 0 - var buffer = new Array[Int](10240) - def write(i: Int): Unit = { - buffer(pos) = i - pos = (pos + 1) % buffer.size - } - - override def toString: String = { - val (end, start) = buffer.splitAt(pos) - val input = new java.io.InputStream { - val iterator = (start ++ end).iterator - - def read(): Int = if (iterator.hasNext) iterator.next() else -1 - } - val reader = new BufferedReader(new InputStreamReader(input)) - val stringBuilder = new StringBuilder - var line = reader.readLine() - while(line != null) { - stringBuilder.append(line) - stringBuilder.append("\n") - line = reader.readLine() - } - stringBuilder.toString() - } - } - - protected[hive] def sessionState = tlSession.get().asInstanceOf[this.SQLSession].sessionState - protected[hive] def hiveconf = tlSession.get().asInstanceOf[this.SQLSession].hiveconf override def setConf(key: String, value: String): Unit = { super.setConf(key, value) - runSqlHive(s"SET $key=$value") + executionHive.runSqlHive(s"SET $key=$value") + metadataHive.runSqlHive(s"SET $key=$value") + // If users put any Spark SQL setting in the spark conf (e.g. spark-defaults.conf), + // this setConf will be called in the constructor of the SQLContext. + // Also, calling hiveconf will create a default session containing a HiveConf, which + // will interfer with the creation of executionHive (which is a lazy val). So, + // we put hiveconf.set at the end of this method. + hiveconf.set(key, value) } /* A catalyst metadata catalog that points to the Hive Metastore. */ @transient - override protected[sql] lazy val catalog = new HiveMetastoreCatalog(this) with OverrideCatalog + override protected[sql] lazy val catalog = + new HiveMetastoreCatalog(metadataHive, this) with OverrideCatalog // Note that HiveUDFs will be overridden by functions registered in this context. @transient override protected[sql] lazy val functionRegistry = new HiveFunctionRegistry with OverrideFunctionRegistry { - def caseSensitive: Boolean = false + override def conf: CatalystConf = currentSession().conf } /* An analyzer that uses the Hive metastore. */ @transient - override protected[sql] lazy val analyzer = - new Analyzer(catalog, functionRegistry, caseSensitive = false) { + override protected[sql] lazy val analyzer: Analyzer = + new Analyzer(catalog, functionRegistry, conf) { override val extendedResolutionRules = catalog.ParquetConversions :: catalog.CreateTables :: catalog.PreInsertionCasts :: ExtractPythonUdfs :: + ResolveHiveWindowFunction :: sources.PreInsertCastAndRename :: Nil + + override val extendedCheckRules = Seq( + sources.PreWriteCheck(catalog) + ) } override protected[sql] def createSession(): SQLSession = { new this.SQLSession() } + /** Overridden by child classes that need to set configuration before the client init. */ + protected def configure(): Map[String, String] = Map.empty + protected[hive] class SQLSession extends super.SQLSession { protected[sql] override lazy val conf: SQLConf = new SQLConf { override def dialect: String = getConf(SQLConf.DIALECT, "hiveql") - } - - protected[hive] lazy val hiveconf: HiveConf = { - setConf(sessionState.getConf.getAllProperties) - sessionState.getConf + override def caseSensitiveAnalysis: Boolean = + getConf(SQLConf.CASE_SENSITIVE, "false").toBoolean } /** @@ -284,78 +422,12 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { state = new SessionState(new HiveConf(classOf[SessionState])) SessionState.start(state) } - if (state.out == null) { - state.out = new PrintStream(outputBuffer, true, "UTF-8") - } - if (state.err == null) { - state.err = new PrintStream(outputBuffer, true, "UTF-8") - } state } - } - /** - * Runs the specified SQL query using Hive. - */ - protected[sql] def runSqlHive(sql: String): Seq[String] = { - val maxResults = 100000 - val results = runHive(sql, maxResults) - // It is very confusing when you only get back some of the results... - if (results.size == maxResults) sys.error("RESULTS POSSIBLY TRUNCATED") - results - } - - /** - * Execute the command using Hive and return the results as a sequence. Each element - * in the sequence is one row. - */ - protected def runHive(cmd: String, maxRows: Int = 1000): Seq[String] = synchronized { - try { - val cmd_trimmed: String = cmd.trim() - val tokens: Array[String] = cmd_trimmed.split("\\s+") - val cmd_1: String = cmd_trimmed.substring(tokens(0).length()).trim() - val proc: CommandProcessor = HiveShim.getCommandProcessor(Array(tokens(0)), hiveconf) - - // Makes sure the session represented by the `sessionState` field is activated. This implies - // Spark SQL Hive support uses a single `SessionState` for all Hive operations and breaks - // session isolation under multi-user scenarios (i.e. HiveThriftServer2). - // TODO Fix session isolation - if (SessionState.get() != sessionState) { - SessionState.start(sessionState) - } - - proc match { - case driver: Driver => - val results = HiveShim.createDriverResultsArray - val response: CommandProcessorResponse = driver.run(cmd) - // Throw an exception if there is an error in query processing. - if (response.getResponseCode != 0) { - driver.close() - throw new QueryExecutionException(response.getErrorMessage) - } - driver.setMaxRows(maxRows) - driver.getResults(results) - driver.close() - HiveShim.processResults(results) - case _ => - if (sessionState.out != null) { - sessionState.out.println(tokens(0) + " " + cmd_1) - } - Seq(proc.run(cmd_1).getResponseCode.toString) - } - } catch { - case e: Exception => - logError( - s""" - |====================== - |HIVE FAILURE OUTPUT - |====================== - |${outputBuffer.toString} - |====================== - |END HIVE FAILURE OUTPUT - |====================== - """.stripMargin) - throw e + protected[hive] lazy val hiveconf: HiveConf = { + setConf(sessionState.getConf.getAllProperties) + sessionState.getConf } } @@ -390,17 +462,23 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { ) } + protected[hive] def runSqlHive(sql: String): Seq[String] = { + if (sql.toLowerCase.contains("create temporary function")) { + executionHive.runSqlHive(sql) + } else if (sql.trim.toLowerCase.startsWith("set")) { + metadataHive.runSqlHive(sql) + executionHive.runSqlHive(sql) + } else { + metadataHive.runSqlHive(sql) + } + } + @transient override protected[sql] val planner = hivePlanner /** Extends QueryExecution with hive specific features. */ protected[sql] class QueryExecution(logicalPlan: LogicalPlan) extends super.QueryExecution(logicalPlan) { - // Like what we do in runHive, makes sure the session represented by the - // `sessionState` field is activated. - if (SessionState.get() != sessionState) { - SessionState.start(sessionState) - } /** * Returns the result as a hive compatible sequence of strings. For native commands, the @@ -438,7 +516,32 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { } -private object HiveContext { +private[hive] object HiveContext { + /** The version of hive used internally by Spark SQL. */ + val hiveExecutionVersion: String = "0.13.1" + + val HIVE_METASTORE_VERSION: String = "spark.sql.hive.metastore.version" + val HIVE_METASTORE_JARS: String = "spark.sql.hive.metastore.jars" + + /** Constructs a configuration for hive, where the metastore is located in a temp directory. */ + def newTemporaryConfiguration(): Map[String, String] = { + val tempDir = Utils.createTempDir() + val localMetastore = new File(tempDir, "metastore").getAbsolutePath + val propMap: HashMap[String, String] = HashMap() + // We have to mask all properties in hive-site.xml that relates to metastore data source + // as we used a local metastore here. + HiveConf.ConfVars.values().foreach { confvar => + if (confvar.varname.contains("datanucleus") || confvar.varname.contains("jdo")) { + propMap.put(confvar.varname, confvar.defaultVal) + } + } + propMap.put("javax.jdo.option.ConnectionURL", + s"jdbc:derby:;databaseName=$localMetastore;create=true") + propMap.put("datanucleus.rdbms.datastoreAdapterClassName", + "org.datanucleus.store.rdbms.adapter.DerbyAdapter") + propMap.toMap + } + protected val primitiveTypes = Seq(StringType, IntegerType, LongType, DoubleType, FloatType, BooleanType, ByteType, ShortType, DateType, TimestampType, BinaryType) @@ -450,7 +553,7 @@ private object HiveContext { }.mkString("{", ",", "}") case (seq: Seq[_], ArrayType(typ, _)) => seq.map(v => (v, typ)).map(toHiveStructString).mkString("[", ",", "]") - case (map: Map[_,_], MapType(kType, vType, _)) => + case (map: Map[_, _], MapType(kType, vType, _)) => map.map { case (key, value) => toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType)) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala index 74ae984f34866..24cd335082639 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala @@ -18,12 +18,13 @@ package org.apache.spark.sql.hive import org.apache.hadoop.hive.common.`type`.{HiveDecimal, HiveVarchar} -import org.apache.hadoop.hive.serde2.objectinspector._ import org.apache.hadoop.hive.serde2.objectinspector.primitive._ +import org.apache.hadoop.hive.serde2.objectinspector.{StructField => HiveStructField, _} import org.apache.hadoop.hive.serde2.{io => hiveIo} import org.apache.hadoop.{io => hadoopIo} import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.util.DateUtils import org.apache.spark.sql.types import org.apache.spark.sql.types._ @@ -121,7 +122,7 @@ import scala.collection.JavaConversions._ * even a normal java object (POJO) * UnionObjectInspector: (tag: Int, object data) (TODO: not supported by SparkSQL yet) * - * 3) ConstantObjectInspector: + * 3) ConstantObjectInspector: * Constant object inspector can be either primitive type or Complex type, and it bundles a * constant value as its property, usually the value is created when the constant object inspector * constructed. @@ -132,7 +133,7 @@ import scala.collection.JavaConversions._ } }}} * Hive provides 3 built-in constant object inspectors: - * Primitive Object Inspectors: + * Primitive Object Inspectors: * WritableConstantStringObjectInspector * WritableConstantHiveVarcharObjectInspector * WritableConstantHiveDecimalObjectInspector @@ -146,9 +147,9 @@ import scala.collection.JavaConversions._ * WritableConstantByteObjectInspector * WritableConstantBinaryObjectInspector * WritableConstantDateObjectInspector - * Map Object Inspector: + * Map Object Inspector: * StandardConstantMapObjectInspector - * List Object Inspector: + * List Object Inspector: * StandardConstantListObjectInspector]] * Struct Object Inspector: Hive doesn't provide the built-in constant object inspector for Struct * Union Object Inspector: Hive doesn't provide the built-in constant object inspector for Union @@ -249,9 +250,9 @@ private[hive] trait HiveInspectors { poi.getWritableConstantValue.getHiveDecimal) case poi: WritableConstantTimestampObjectInspector => poi.getWritableConstantValue.getTimestamp.clone() - case poi: WritableConstantIntObjectInspector => + case poi: WritableConstantIntObjectInspector => poi.getWritableConstantValue.get() - case poi: WritableConstantDoubleObjectInspector => + case poi: WritableConstantDoubleObjectInspector => poi.getWritableConstantValue.get() case poi: WritableConstantBooleanObjectInspector => poi.getWritableConstantValue.get() @@ -305,7 +306,7 @@ private[hive] trait HiveInspectors { // In order to keep backward-compatible, we have to copy the // bytes with old apis val bw = x.getPrimitiveWritableObject(data) - val result = new Array[Byte](bw.getLength()) + val result = new Array[Byte](bw.getLength()) System.arraycopy(bw.getBytes(), 0, result, 0, bw.getLength()) result case x: DateObjectInspector if x.preferWritable() => @@ -334,7 +335,7 @@ private[hive] trait HiveInspectors { val allRefs = si.getAllStructFieldRefs new GenericRow( allRefs.map(r => - unwrap(si.getStructFieldData(data,r), r.getFieldObjectInspector)).toArray) + unwrap(si.getStructFieldData(data, r), r.getFieldObjectInspector)).toArray) } @@ -393,6 +394,30 @@ private[hive] trait HiveInspectors { identity[Any] } + /** + * Builds specific unwrappers ahead of time according to object inspector + * types to avoid pattern matching and branching costs per row. + */ + def unwrapperFor(field: HiveStructField): (Any, MutableRow, Int) => Unit = + field.getFieldObjectInspector match { + case oi: BooleanObjectInspector => + (value: Any, row: MutableRow, ordinal: Int) => row.setBoolean(ordinal, oi.get(value)) + case oi: ByteObjectInspector => + (value: Any, row: MutableRow, ordinal: Int) => row.setByte(ordinal, oi.get(value)) + case oi: ShortObjectInspector => + (value: Any, row: MutableRow, ordinal: Int) => row.setShort(ordinal, oi.get(value)) + case oi: IntObjectInspector => + (value: Any, row: MutableRow, ordinal: Int) => row.setInt(ordinal, oi.get(value)) + case oi: LongObjectInspector => + (value: Any, row: MutableRow, ordinal: Int) => row.setLong(ordinal, oi.get(value)) + case oi: FloatObjectInspector => + (value: Any, row: MutableRow, ordinal: Int) => row.setFloat(ordinal, oi.get(value)) + case oi: DoubleObjectInspector => + (value: Any, row: MutableRow, ordinal: Int) => row.setDouble(ordinal, oi.get(value)) + case oi => + (value: Any, row: MutableRow, ordinal: Int) => row(ordinal) = unwrap(value, oi) + } + /** * Converts native catalyst types to the types expected by Hive * @param a the value to be wrapped @@ -536,8 +561,8 @@ private[hive] trait HiveInspectors { case DecimalType() => PrimitiveObjectInspectorFactory.javaHiveDecimalObjectInspector case StructType(fields) => ObjectInspectorFactory.getStandardStructObjectInspector( - java.util.Arrays.asList(fields.map(f => f.name) :_*), - java.util.Arrays.asList(fields.map(f => toInspector(f.dataType)) :_*)) + java.util.Arrays.asList(fields.map(f => f.name) : _*), + java.util.Arrays.asList(fields.map(f => toInspector(f.dataType)) : _*)) } /** @@ -652,8 +677,8 @@ private[hive] trait HiveInspectors { getListTypeInfo(elemType.toTypeInfo) case StructType(fields) => getStructTypeInfo( - java.util.Arrays.asList(fields.map(_.name) :_*), - java.util.Arrays.asList(fields.map(_.dataType.toTypeInfo) :_*)) + java.util.Arrays.asList(fields.map(_.name) : _*), + java.util.Arrays.asList(fields.map(_.dataType.toTypeInfo) : _*)) case MapType(keyType, valueType, _) => getMapTypeInfo(keyType.toTypeInfo, valueType.toTypeInfo) case BinaryType => binaryTypeInfo diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala index 4d222cf88e5e8..ca1f49b546bd7 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala @@ -17,41 +17,35 @@ package org.apache.spark.sql.hive -import java.io.IOException -import java.util.{List => JList} - import com.google.common.base.Objects import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache} -import org.apache.hadoop.hive.metastore.api.{FieldSchema, Partition => TPartition, Table => TTable} -import org.apache.hadoop.hive.metastore.{TableType, Warehouse} +import org.apache.hadoop.fs.Path +import org.apache.hadoop.hive.metastore.Warehouse +import org.apache.hadoop.hive.metastore.api.FieldSchema import org.apache.hadoop.hive.ql.metadata._ -import org.apache.hadoop.hive.ql.plan.CreateTableDesc -import org.apache.hadoop.hive.serde.serdeConstants -import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe -import org.apache.hadoop.hive.serde2.{Deserializer, SerDeException} -import org.apache.hadoop.util.ReflectionUtils +import org.apache.hadoop.hive.serde2.Deserializer import org.apache.spark.Logging -import org.apache.spark.sql.{SaveMode, AnalysisException, SQLContext} -import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, NoSuchTableException, Catalog, OverrideCatalog} +import org.apache.spark.sql.catalyst.analysis.{Catalog, MultiInstanceRelation, OverrideCatalog} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ -import org.apache.spark.sql.parquet.{ParquetRelation2, Partition => ParquetPartition, PartitionSpec} -import org.apache.spark.sql.sources.{CreateTableUsingAsSelect, DDLParser, LogicalRelation, ResolvedDataSource} +import org.apache.spark.sql.hive.client._ +import org.apache.spark.sql.parquet.ParquetRelation2 +import org.apache.spark.sql.sources.{CreateTableUsingAsSelect, LogicalRelation, Partition => ParquetPartition, PartitionSpec, ResolvedDataSource} import org.apache.spark.sql.types._ +import org.apache.spark.sql.{AnalysisException, SQLContext, SaveMode, sources} import org.apache.spark.util.Utils /* Implicit conversions */ import scala.collection.JavaConversions._ -private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with Logging { - import org.apache.spark.sql.hive.HiveMetastoreTypes._ +private[hive] class HiveMetastoreCatalog(val client: ClientInterface, hive: HiveContext) + extends Catalog with Logging { - /** Connection to hive metastore. Usages should lock on `this`. */ - protected[hive] val client = Hive.get(hive.hiveconf) + val conf = hive.conf /** Usages should lock on `this`. */ protected[hive] lazy val hiveWarehouse = new Warehouse(hive.hiveconf) @@ -67,18 +61,16 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with val cacheLoader = new CacheLoader[QualifiedTableName, LogicalPlan]() { override def load(in: QualifiedTableName): LogicalPlan = { logDebug(s"Creating new cached data source for $in") - val table = HiveMetastoreCatalog.this.synchronized { - client.getTable(in.database, in.name) - } + val table = client.getTable(in.database, in.name) def schemaStringFromParts: Option[String] = { - Option(table.getProperty("spark.sql.sources.schema.numParts")).map { numParts => + table.properties.get("spark.sql.sources.schema.numParts").map { numParts => val parts = (0 until numParts.toInt).map { index => - val part = table.getProperty(s"spark.sql.sources.schema.part.${index}") + val part = table.properties.get(s"spark.sql.sources.schema.part.$index").orNull if (part == null) { throw new AnalysisException( - s"Could not read schema from the metastore because it is corrupted " + - s"(missing part ${index} of the schema).") + "Could not read schema from the metastore because it is corrupted " + + s"(missing part $index of the schema, $numParts parts are expected).") } part @@ -92,20 +84,26 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with // After SPARK-6024, we removed this flag. // Although we are not using spark.sql.sources.schema any more, we need to still support. val schemaString = - Option(table.getProperty("spark.sql.sources.schema")).orElse(schemaStringFromParts) + table.properties.get("spark.sql.sources.schema").orElse(schemaStringFromParts) val userSpecifiedSchema = schemaString.map(s => DataType.fromJson(s).asInstanceOf[StructType]) + // We only need names at here since userSpecifiedSchema we loaded from the metastore + // contains partition columns. We can always get datatypes of partitioning columns + // from userSpecifiedSchema. + val partitionColumns = table.partitionColumns.map(_.name) + // It does not appear that the ql client for the metastore has a way to enumerate all the // SerDe properties directly... - val options = table.getTTable.getSd.getSerdeInfo.getParameters.toMap + val options = table.serdeProperties val resolvedRelation = ResolvedDataSource( hive, userSpecifiedSchema, - table.getProperty("spark.sql.sources.provider"), + partitionColumns.toArray, + table.properties("spark.sql.sources.provider"), options) LogicalRelation(resolvedRelation.relation) @@ -118,8 +116,8 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with override def refreshTable(databaseName: String, tableName: String): Unit = { // refreshTable does not eagerly reload the cache. It just invalidate the cache. // Next time when we use the table, it will be populated in the cache. - // Since we also cache ParquetRealtions converted from Hive Parquet tables and - // adding converted ParquetRealtions into the cache is not defined in the load function + // Since we also cache ParquetRelations converted from Hive Parquet tables and + // adding converted ParquetRelations into the cache is not defined in the load function // of the cache (instead, we add the cache entry in convertToParquetRelation), // it is better at here to invalidate the cache to avoid confusing waring logs from the // cache loader (e.g. cannot find data source provider, which is only defined for @@ -140,53 +138,83 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with def createDataSourceTable( tableName: String, userSpecifiedSchema: Option[StructType], + partitionColumns: Array[String], provider: String, options: Map[String, String], isExternal: Boolean): Unit = { val (dbName, tblName) = processDatabaseAndTableName("default", tableName) - val tbl = new Table(dbName, tblName) + val tableProperties = new scala.collection.mutable.HashMap[String, String] + tableProperties.put("spark.sql.sources.provider", provider) - tbl.setProperty("spark.sql.sources.provider", provider) + // Saves optional user specified schema. Serialized JSON schema string may be too long to be + // stored into a single metastore SerDe property. In this case, we split the JSON string and + // store each part as a separate SerDe property. if (userSpecifiedSchema.isDefined) { - val threshold = hive.conf.schemaStringLengthThreshold + val threshold = conf.schemaStringLengthThreshold val schemaJsonString = userSpecifiedSchema.get.json // Split the JSON string. val parts = schemaJsonString.grouped(threshold).toSeq - tbl.setProperty("spark.sql.sources.schema.numParts", parts.size.toString) + tableProperties.put("spark.sql.sources.schema.numParts", parts.size.toString) parts.zipWithIndex.foreach { case (part, index) => - tbl.setProperty(s"spark.sql.sources.schema.part.${index}", part) + tableProperties.put(s"spark.sql.sources.schema.part.$index", part) } } - options.foreach { case (key, value) => tbl.setSerdeParam(key, value) } - if (isExternal) { - tbl.setProperty("EXTERNAL", "TRUE") - tbl.setTableType(TableType.EXTERNAL_TABLE) - } else { - tbl.setProperty("EXTERNAL", "FALSE") - tbl.setTableType(TableType.MANAGED_TABLE) + val metastorePartitionColumns = userSpecifiedSchema.map { schema => + val fields = partitionColumns.map(col => schema(col)) + fields.map { field => + HiveColumn( + name = field.name, + hiveType = HiveMetastoreTypes.toMetastoreType(field.dataType), + comment = "") + }.toSeq + }.getOrElse { + if (partitionColumns.length > 0) { + // The table does not have a specified schema, which means that the schema will be inferred + // when we load the table. So, we are not expecting partition columns and we will discover + // partitions when we load the table. However, if there are specified partition columns, + // we simplily ignore them and provide a warning message.. + logWarning( + s"The schema and partitions of table $tableName will be inferred when it is loaded. " + + s"Specified partition columns (${partitionColumns.mkString(",")}) will be ignored.") + } + Seq.empty[HiveColumn] } - // create the table - synchronized { - client.createTable(tbl, false) - } + val tableType = if (isExternal) { + tableProperties.put("EXTERNAL", "TRUE") + ExternalTable + } else { + tableProperties.put("EXTERNAL", "FALSE") + ManagedTable + } + + client.createTable( + HiveTable( + specifiedDatabase = Option(dbName), + name = tblName, + schema = Seq.empty, + partitionColumns = metastorePartitionColumns, + tableType = tableType, + properties = tableProperties.toMap, + serdeProperties = options)) } - def hiveDefaultTableFilePath(tableName: String): String = synchronized { - val currentDatabase = client.getDatabase(hive.sessionState.getCurrentDatabase) - - hiveWarehouse.getTablePath(currentDatabase, tableName).toString + def hiveDefaultTableFilePath(tableName: String): String = { + // Code based on: hiveWarehouse.getTablePath(currentDatabase, tableName) + new Path( + new Path(client.getDatabase(client.currentDatabase).location), + tableName.toLowerCase).toString } - def tableExists(tableIdentifier: Seq[String]): Boolean = synchronized { + def tableExists(tableIdentifier: Seq[String]): Boolean = { val tableIdent = processTableIdentifier(tableIdentifier) val databaseName = tableIdent .lift(tableIdent.size - 2) - .getOrElse(hive.sessionState.getCurrentDatabase) + .getOrElse(client.currentDatabase) val tblName = tableIdent.last - client.getTable(databaseName, tblName, false) != null + client.getTableOption(databaseName, tblName).isDefined } def lookupRelation( @@ -194,43 +222,30 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with alias: Option[String]): LogicalPlan = { val tableIdent = processTableIdentifier(tableIdentifier) val databaseName = tableIdent.lift(tableIdent.size - 2).getOrElse( - hive.sessionState.getCurrentDatabase) + client.currentDatabase) val tblName = tableIdent.last - val table = try { - synchronized { - client.getTable(databaseName, tblName) - } - } catch { - case te: org.apache.hadoop.hive.ql.metadata.InvalidTableException => - throw new NoSuchTableException - } + val table = client.getTable(databaseName, tblName) - if (table.getProperty("spark.sql.sources.provider") != null) { + if (table.properties.get("spark.sql.sources.provider").isDefined) { val dataSourceTable = cachedDataSourceTables(QualifiedTableName(databaseName, tblName).toLowerCase) // Then, if alias is specified, wrap the table with a Subquery using the alias. - // Othersie, wrap the table with a Subquery using the table name. + // Otherwise, wrap the table with a Subquery using the table name. val withAlias = alias.map(a => Subquery(a, dataSourceTable)).getOrElse( Subquery(tableIdent.last, dataSourceTable)) withAlias - } else if (table.isView) { - // if the unresolved relation is from hive view - // parse the text into logic node. - HiveQl.createPlanForView(table, alias) + } else if (table.tableType == VirtualView) { + val viewText = table.viewText.getOrElse(sys.error("Invalid view without text.")) + alias match { + // because hive use things like `_c0` to build the expanded text + // currently we cannot support view from "create view v1(c1) as ..." + case None => Subquery(table.name, HiveQl.createPlan(viewText)) + case Some(aliasText) => Subquery(aliasText, HiveQl.createPlan(viewText)) + } } else { - val partitions: Seq[Partition] = - if (table.isPartitioned) { - synchronized { - HiveShim.getAllPartitionsOf(client, table).toSeq - } - } else { - Nil - } - - MetastoreRelation(databaseName, tblName, alias)( - table.getTTable, partitions.map(part => part.getTPartition))(hive) + MetastoreRelation(databaseName, tblName, alias)(table)(hive) } } @@ -260,7 +275,9 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with val useCached = parquetRelation.paths.toSet == pathsInMetastore.toSet && logical.schema.sameType(metastoreSchema) && - parquetRelation.maybePartitionSpec == partitionSpecInMetastore + parquetRelation.partitionSpec == partitionSpecInMetastore.getOrElse { + PartitionSpec(StructType(Nil), Array.empty[sources.Partition]) + } if (useCached) { Some(logical) @@ -272,7 +289,7 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with case other => logWarning( s"${metastoreRelation.databaseName}.${metastoreRelation.tableName} should be stored " + - s"as Parquet. However, we are getting a ${other} from the metastore cache. " + + s"as Parquet. However, we are getting a $other from the metastore cache. " + s"This cached entry will be invalidated.") cachedDataSourceTables.invalidate(tableIdentifier) None @@ -294,8 +311,9 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with val cached = getCached(tableIdentifier, paths, metastoreSchema, Some(partitionSpec)) val parquetRelation = cached.getOrElse { - val created = - LogicalRelation(ParquetRelation2(paths, parquetOptions, None, Some(partitionSpec))(hive)) + val created = LogicalRelation( + new ParquetRelation2( + paths.toArray, None, Some(partitionSpec), parquetOptions)(hive)) cachedDataSourceTables.put(tableIdentifier, created) created } @@ -306,8 +324,8 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with val cached = getCached(tableIdentifier, paths, metastoreSchema, None) val parquetRelation = cached.getOrElse { - val created = - LogicalRelation(ParquetRelation2(paths, parquetOptions)(hive)) + val created = LogicalRelation( + new ParquetRelation2(paths.toArray, None, None, parquetOptions)(hive)) cachedDataSourceTables.put(tableIdentifier, created) created } @@ -318,178 +336,10 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with result.newInstance() } - override def getTables(databaseName: Option[String]): Seq[(String, Boolean)] = synchronized { - val dbName = if (!caseSensitive) { - if (databaseName.isDefined) Some(databaseName.get.toLowerCase) else None - } else { - databaseName - } - val db = dbName.getOrElse(hive.sessionState.getCurrentDatabase) + override def getTables(databaseName: Option[String]): Seq[(String, Boolean)] = { + val db = databaseName.getOrElse(client.currentDatabase) - client.getAllTables(db).map(tableName => (tableName, false)) - } - - /** - * Create table with specified database, table name, table description and schema - * @param databaseName Database Name - * @param tableName Table Name - * @param schema Schema of the new table, if not specified, will use the schema - * specified in crtTbl - * @param allowExisting if true, ignore AlreadyExistsException - * @param desc CreateTableDesc object which contains the SerDe info. Currently - * we support most of the features except the bucket. - */ - def createTable( - databaseName: String, - tableName: String, - schema: Seq[Attribute], - allowExisting: Boolean = false, - desc: Option[CreateTableDesc] = None) { - val hconf = hive.hiveconf - - val (dbName, tblName) = processDatabaseAndTableName(databaseName, tableName) - val tbl = new Table(dbName, tblName) - - val crtTbl: CreateTableDesc = desc.getOrElse(null) - - // We should respect the passed in schema, unless it's not set - val hiveSchema: JList[FieldSchema] = if (schema == null || schema.isEmpty) { - crtTbl.getCols - } else { - schema.map(attr => new FieldSchema(attr.name, toMetastoreType(attr.dataType), null)) - } - tbl.setFields(hiveSchema) - - // Most of code are similar with the DDLTask.createTable() of Hive, - if (crtTbl != null && crtTbl.getTblProps() != null) { - tbl.getTTable().getParameters().putAll(crtTbl.getTblProps()) - } - - if (crtTbl != null && crtTbl.getPartCols() != null) { - tbl.setPartCols(crtTbl.getPartCols()) - } - - if (crtTbl != null && crtTbl.getStorageHandler() != null) { - tbl.setProperty( - org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE, - crtTbl.getStorageHandler()) - } - - /* - * We use LazySimpleSerDe by default. - * - * If the user didn't specify a SerDe, and any of the columns are not simple - * types, we will have to use DynamicSerDe instead. - */ - if (crtTbl == null || crtTbl.getSerName() == null) { - val storageHandler = tbl.getStorageHandler() - if (storageHandler == null) { - logInfo(s"Default to LazySimpleSerDe for table $dbName.$tblName") - tbl.setSerializationLib(classOf[LazySimpleSerDe].getName()) - - import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - import org.apache.hadoop.io.Text - import org.apache.hadoop.mapred.TextInputFormat - - tbl.setInputFormatClass(classOf[TextInputFormat]) - tbl.setOutputFormatClass(classOf[HiveIgnoreKeyTextOutputFormat[Text, Text]]) - tbl.setSerializationLib("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe") - } else { - val serDeClassName = storageHandler.getSerDeClass().getName() - logInfo(s"Use StorageHandler-supplied $serDeClassName for table $dbName.$tblName") - tbl.setSerializationLib(serDeClassName) - } - } else { - // let's validate that the serde exists - val serdeName = crtTbl.getSerName() - try { - val d = ReflectionUtils.newInstance(hconf.getClassByName(serdeName), hconf) - if (d != null) { - logDebug("Found class for $serdeName") - } - } catch { - case e: SerDeException => throw new HiveException("Cannot validate serde: " + serdeName, e) - } - tbl.setSerializationLib(serdeName) - } - - if (crtTbl != null && crtTbl.getFieldDelim() != null) { - tbl.setSerdeParam(serdeConstants.FIELD_DELIM, crtTbl.getFieldDelim()) - tbl.setSerdeParam(serdeConstants.SERIALIZATION_FORMAT, crtTbl.getFieldDelim()) - } - if (crtTbl != null && crtTbl.getFieldEscape() != null) { - tbl.setSerdeParam(serdeConstants.ESCAPE_CHAR, crtTbl.getFieldEscape()) - } - - if (crtTbl != null && crtTbl.getCollItemDelim() != null) { - tbl.setSerdeParam(serdeConstants.COLLECTION_DELIM, crtTbl.getCollItemDelim()) - } - if (crtTbl != null && crtTbl.getMapKeyDelim() != null) { - tbl.setSerdeParam(serdeConstants.MAPKEY_DELIM, crtTbl.getMapKeyDelim()) - } - if (crtTbl != null && crtTbl.getLineDelim() != null) { - tbl.setSerdeParam(serdeConstants.LINE_DELIM, crtTbl.getLineDelim()) - } - HiveShim.setTblNullFormat(crtTbl, tbl) - - if (crtTbl != null && crtTbl.getSerdeProps() != null) { - val iter = crtTbl.getSerdeProps().entrySet().iterator() - while (iter.hasNext()) { - val m = iter.next() - tbl.setSerdeParam(m.getKey(), m.getValue()) - } - } - - if (crtTbl != null && crtTbl.getComment() != null) { - tbl.setProperty("comment", crtTbl.getComment()) - } - - if (crtTbl != null && crtTbl.getLocation() != null) { - HiveShim.setLocation(tbl, crtTbl) - } - - if (crtTbl != null && crtTbl.getSkewedColNames() != null) { - tbl.setSkewedColNames(crtTbl.getSkewedColNames()) - } - if (crtTbl != null && crtTbl.getSkewedColValues() != null) { - tbl.setSkewedColValues(crtTbl.getSkewedColValues()) - } - - if (crtTbl != null) { - tbl.setStoredAsSubDirectories(crtTbl.isStoredAsSubDirectories()) - tbl.setInputFormatClass(crtTbl.getInputFormat()) - tbl.setOutputFormatClass(crtTbl.getOutputFormat()) - } - - tbl.getTTable().getSd().setInputFormat(tbl.getInputFormatClass().getName()) - tbl.getTTable().getSd().setOutputFormat(tbl.getOutputFormatClass().getName()) - - if (crtTbl != null && crtTbl.isExternal()) { - tbl.setProperty("EXTERNAL", "TRUE") - tbl.setTableType(TableType.EXTERNAL_TABLE) - } - - // set owner - try { - tbl.setOwner(hive.hiveconf.getUser) - } catch { - case e: IOException => throw new HiveException("Unable to get current user", e) - } - - // set create time - tbl.setCreateTime((System.currentTimeMillis() / 1000).asInstanceOf[Int]) - - // TODO add bucket support - // TODO set more info if Hive upgrade - - // create the table - synchronized { - try client.createTable(tbl, allowExisting) catch { - case e: org.apache.hadoop.hive.metastore.api.AlreadyExistsException - if allowExisting => // Do nothing - case e: Throwable => throw e - } - } + client.listTables(db).map(tableName => (tableName, false)) } protected def processDatabaseAndTableName( @@ -531,7 +381,7 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with // Inserting into partitioned table is not supported in Parquet data source (yet). if !relation.hiveQlTable.isPartitioned && hive.convertMetastoreParquet && - hive.conf.parquetUseDataSourceApi && + conf.parquetUseDataSourceApi && relation.tableDesc.getSerdeClassName.toLowerCase.contains("parquet") => val parquetRelation = convertToParquetRelation(relation) val attributedRewrites = relation.output.zip(parquetRelation.output) @@ -542,7 +392,7 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with // Inserting into partitioned table is not supported in Parquet data source (yet). if !relation.hiveQlTable.isPartitioned && hive.convertMetastoreParquet && - hive.conf.parquetUseDataSourceApi && + conf.parquetUseDataSourceApi && relation.tableDesc.getSerdeClassName.toLowerCase.contains("parquet") => val parquetRelation = convertToParquetRelation(relation) val attributedRewrites = relation.output.zip(parquetRelation.output) @@ -551,7 +401,7 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with // Read path case p @ PhysicalOperation(_, _, relation: MetastoreRelation) if hive.convertMetastoreParquet && - hive.conf.parquetUseDataSourceApi && + conf.parquetUseDataSourceApi && relation.tableDesc.getSerdeClassName.toLowerCase.contains("parquet") => val parquetRelation = convertToParquetRelation(relation) val attributedRewrites = relation.output.zip(parquetRelation.output) @@ -591,100 +441,61 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with * For example, because of a CREATE TABLE X AS statement. */ object CreateTables extends Rule[LogicalPlan] { - import org.apache.hadoop.hive.ql.Context - import org.apache.hadoop.hive.ql.parse.{ASTNode, QB, SemanticAnalyzer} - def apply(plan: LogicalPlan): LogicalPlan = plan transform { // Wait until children are resolved. case p: LogicalPlan if !p.childrenResolved => p - - // TODO extra is in type of ASTNode which means the logical plan is not resolved - // Need to think about how to implement the CreateTableAsSelect.resolved - case CreateTableAsSelect(db, tableName, child, allowExisting, Some(extra: ASTNode)) => - val (dbName, tblName) = processDatabaseAndTableName(db, tableName) - val databaseName = dbName.getOrElse(hive.sessionState.getCurrentDatabase) - - // Get the CreateTableDesc from Hive SemanticAnalyzer - val desc: Option[CreateTableDesc] = if (tableExists(Seq(databaseName, tblName))) { - None + case p: LogicalPlan if p.resolved => p + case p @ CreateTableAsSelect(table, child, allowExisting) => + val schema = if (table.schema.size > 0) { + table.schema } else { - val sa = new SemanticAnalyzer(hive.hiveconf) { - override def analyzeInternal(ast: ASTNode) { - // A hack to intercept the SemanticAnalyzer.analyzeInternal, - // to ignore the SELECT clause of the CTAS - val method = classOf[SemanticAnalyzer].getDeclaredMethod( - "analyzeCreateTable", classOf[ASTNode], classOf[QB]) - method.setAccessible(true) - method.invoke(this, ast, this.getQB) - } + child.output.map { + attr => new HiveColumn( + attr.name, + HiveMetastoreTypes.toMetastoreType(attr.dataType), null) } - - sa.analyze(extra, new Context(hive.hiveconf)) - Some(sa.getQB().getTableDesc) } - // Check if the query specifies file format or storage handler. - val hasStorageSpec = desc match { - case Some(crtTbl) => - crtTbl != null && (crtTbl.getSerName != null || crtTbl.getStorageHandler != null) - case None => false - } + val desc = table.copy(schema = schema) - if (hive.convertCTAS && !hasStorageSpec) { + if (hive.convertCTAS && table.serde.isEmpty) { // Do the conversion when spark.sql.hive.convertCTAS is true and the query // does not specify any storage format (file format and storage handler). - if (dbName.isDefined) { + if (table.specifiedDatabase.isDefined) { throw new AnalysisException( "Cannot specify database name in a CTAS statement " + - "when spark.sql.hive.convertCTAS is set to true.") + "when spark.sql.hive.convertCTAS is set to true.") } val mode = if (allowExisting) SaveMode.Ignore else SaveMode.ErrorIfExists CreateTableUsingAsSelect( - tblName, + desc.name, hive.conf.defaultDataSourceName, temporary = false, + Array.empty[String], mode, options = Map.empty[String, String], child ) } else { - execution.CreateTableAsSelect( - databaseName, - tableName, - child, - allowExisting, - desc) - } - - case p: LogicalPlan if p.resolved => p - - case p @ CreateTableAsSelect(db, tableName, child, allowExisting, None) => - val (dbName, tblName) = processDatabaseAndTableName(db, tableName) - if (hive.convertCTAS) { - if (dbName.isDefined) { - throw new AnalysisException( - "Cannot specify database name in a CTAS statement " + - "when spark.sql.hive.convertCTAS is set to true.") + val desc = if (table.serde.isEmpty) { + // add default serde + table.copy( + serde = Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) + } else { + table } - val mode = if (allowExisting) SaveMode.Ignore else SaveMode.ErrorIfExists - CreateTableUsingAsSelect( - tblName, - hive.conf.defaultDataSourceName, - temporary = false, - mode, - options = Map.empty[String, String], - child - ) - } else { - val databaseName = dbName.getOrElse(hive.sessionState.getCurrentDatabase) + val (dbName, tblName) = + processDatabaseAndTableName( + desc.specifiedDatabase.getOrElse(client.currentDatabase), desc.name) + execution.CreateTableAsSelect( - databaseName, - tableName, + desc.copy( + specifiedDatabase = Some(dbName), + name = tblName), child, - allowExisting, - None) + allowExisting) } } } @@ -705,17 +516,19 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with def castChildOutput(p: InsertIntoTable, table: MetastoreRelation, child: LogicalPlan) : LogicalPlan = { val childOutputDataTypes = child.output.map(_.dataType) + val numDynamicPartitions = p.partition.values.count(_.isEmpty) val tableOutputDataTypes = - (table.attributes ++ table.partitionKeys).take(child.output.length).map(_.dataType) + (table.attributes ++ table.partitionKeys.takeRight(numDynamicPartitions)) + .take(child.output.length).map(_.dataType) if (childOutputDataTypes == tableOutputDataTypes) { - p + InsertIntoHiveTable(table, p.partition, p.child, p.overwrite, p.ifNotExists) } else if (childOutputDataTypes.size == tableOutputDataTypes.size && childOutputDataTypes.zip(tableOutputDataTypes) .forall { case (left, right) => left.sameType(right) }) { // If both types ignoring nullability of ArrayType, MapType, StructType are the same, // use InsertIntoHiveTable instead of InsertIntoTable. - InsertIntoHiveTable(p.table, p.partition, p.child, p.overwrite, p.ifNotExists) + InsertIntoHiveTable(table, p.partition, p.child, p.overwrite, p.ifNotExists) } else { // Only do the casting when child output data types differ from table output data types. val castedChildOutput = child.output.zip(table.output).map { @@ -733,13 +546,17 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with * UNIMPLEMENTED: It needs to be decided how we will persist in-memory tables to the metastore. * For now, if this functionality is desired mix in the in-memory [[OverrideCatalog]]. */ - override def registerTable(tableIdentifier: Seq[String], plan: LogicalPlan): Unit = ??? + override def registerTable(tableIdentifier: Seq[String], plan: LogicalPlan): Unit = { + throw new UnsupportedOperationException + } /** * UNIMPLEMENTED: It needs to be decided how we will persist in-memory tables to the metastore. * For now, if this functionality is desired mix in the in-memory [[OverrideCatalog]]. */ - override def unregisterTable(tableIdentifier: Seq[String]): Unit = ??? + override def unregisterTable(tableIdentifier: Seq[String]): Unit = { + throw new UnsupportedOperationException + } override def unregisterAllTables(): Unit = {} } @@ -750,7 +567,7 @@ private[hive] class HiveMetastoreCatalog(hive: HiveContext) extends Catalog with * because Hive table doesn't have nullability for ARRAY, MAP, STRUCT types. */ private[hive] case class InsertIntoHiveTable( - table: LogicalPlan, + table: MetastoreRelation, partition: Map[String, Option[String]], child: LogicalPlan, overwrite: Boolean, @@ -760,20 +577,26 @@ private[hive] case class InsertIntoHiveTable( override def children: Seq[LogicalPlan] = child :: Nil override def output: Seq[Attribute] = child.output - override lazy val resolved: Boolean = childrenResolved && child.output.zip(table.output).forall { + val numDynamicPartitions = partition.values.count(_.isEmpty) + + // This is the expected schema of the table prepared to be inserted into, + // including dynamic partition columns. + val tableOutput = table.attributes ++ table.partitionKeys.takeRight(numDynamicPartitions) + + override lazy val resolved: Boolean = childrenResolved && child.output.zip(tableOutput).forall { case (childAttr, tableAttr) => childAttr.dataType.sameType(tableAttr.dataType) } } private[hive] case class MetastoreRelation (databaseName: String, tableName: String, alias: Option[String]) - (val table: TTable, val partitions: Seq[TPartition]) + (val table: HiveTable) (@transient sqlContext: SQLContext) extends LeafNode with MultiInstanceRelation { self: Product => - override def equals(other: scala.Any): Boolean = other match { + override def equals(other: Any): Boolean = other match { case relation: MetastoreRelation => databaseName == relation.databaseName && tableName == relation.tableName && @@ -786,16 +609,63 @@ private[hive] case class MetastoreRelation Objects.hashCode(databaseName, tableName, alias, output) } - // TODO: Can we use org.apache.hadoop.hive.ql.metadata.Table as the type of table and - // use org.apache.hadoop.hive.ql.metadata.Partition as the type of elements of partitions. - // Right now, using org.apache.hadoop.hive.ql.metadata.Table and - // org.apache.hadoop.hive.ql.metadata.Partition will cause a NotSerializableException - // which indicates the SerDe we used is not Serializable. + @transient val hiveQlTable: Table = { + // We start by constructing an API table as Hive performs several important transformations + // internally when converting an API table to a QL table. + val tTable = new org.apache.hadoop.hive.metastore.api.Table() + tTable.setTableName(table.name) + tTable.setDbName(table.database) + + val tableParameters = new java.util.HashMap[String, String]() + tTable.setParameters(tableParameters) + table.properties.foreach { case (k, v) => tableParameters.put(k, v) } + + tTable.setTableType(table.tableType.name) + + val sd = new org.apache.hadoop.hive.metastore.api.StorageDescriptor() + tTable.setSd(sd) + sd.setCols(table.schema.map(c => new FieldSchema(c.name, c.hiveType, c.comment))) + tTable.setPartitionKeys( + table.partitionColumns.map(c => new FieldSchema(c.name, c.hiveType, c.comment))) + + table.location.foreach(sd.setLocation) + table.inputFormat.foreach(sd.setInputFormat) + table.outputFormat.foreach(sd.setOutputFormat) + + val serdeInfo = new org.apache.hadoop.hive.metastore.api.SerDeInfo + sd.setSerdeInfo(serdeInfo) + table.serde.foreach(serdeInfo.setSerializationLib) + val serdeParameters = new java.util.HashMap[String, String]() + serdeInfo.setParameters(serdeParameters) + table.serdeProperties.foreach { case (k, v) => serdeParameters.put(k, v) } + + new Table(tTable) + } + + @transient val hiveQlPartitions: Seq[Partition] = table.getAllPartitions.map { p => + val tPartition = new org.apache.hadoop.hive.metastore.api.Partition + tPartition.setDbName(databaseName) + tPartition.setTableName(tableName) + tPartition.setValues(p.values) + + val sd = new org.apache.hadoop.hive.metastore.api.StorageDescriptor() + tPartition.setSd(sd) + sd.setCols(table.schema.map(c => new FieldSchema(c.name, c.hiveType, c.comment))) + + sd.setLocation(p.storage.location) + sd.setInputFormat(p.storage.inputFormat) + sd.setOutputFormat(p.storage.outputFormat) + + val serdeInfo = new org.apache.hadoop.hive.metastore.api.SerDeInfo + sd.setSerdeInfo(serdeInfo) + serdeInfo.setSerializationLib(p.storage.serde) - @transient val hiveQlTable: Table = new Table(table) + val serdeParameters = new java.util.HashMap[String, String]() + serdeInfo.setParameters(serdeParameters) + table.serdeProperties.foreach { case (k, v) => serdeParameters.put(k, v) } + p.storage.serdeProperties.foreach { case (k, v) => serdeParameters.put(k, v) } - @transient val hiveQlPartitions: Seq[Partition] = partitions.map { p => - new Partition(hiveQlTable, p) + new Partition(hiveQlTable, tPartition) } @transient override lazy val statistics: Statistics = Statistics( @@ -841,31 +711,31 @@ private[hive] case class MetastoreRelation hiveQlTable.getMetadata ) - implicit class SchemaAttribute(f: FieldSchema) { + implicit class SchemaAttribute(f: HiveColumn) { def toAttribute: AttributeReference = AttributeReference( - f.getName, - HiveMetastoreTypes.toDataType(f.getType), + f.name, + HiveMetastoreTypes.toDataType(f.hiveType), // Since data can be dumped in randomly with no validation, everything is nullable. nullable = true )(qualifiers = Seq(alias.getOrElse(tableName))) } - // Must be a stable value since new attributes are born here. - val partitionKeys = hiveQlTable.getPartitionKeys.map(_.toAttribute) + /** PartitionKey attributes */ + val partitionKeys = table.partitionColumns.map(_.toAttribute) /** Non-partitionKey attributes */ - val attributes = hiveQlTable.getCols.map(_.toAttribute) + val attributes = table.schema.map(_.toAttribute) val output = attributes ++ partitionKeys /** An attribute map that can be used to lookup original attributes based on expression id. */ - val attributeMap = AttributeMap(output.map(o => (o,o))) + val attributeMap = AttributeMap(output.map(o => (o, o))) /** An attribute map for determining the ordinal for non-partition columns. */ val columnOrdinals = AttributeMap(attributes.zipWithIndex) override def newInstance(): MetastoreRelation = { - MetastoreRelation(databaseName, tableName, alias)(table, partitions)(sqlContext) + MetastoreRelation(databaseName, tableName, alias)(table)(sqlContext) } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala index 63a8c05f775b8..a5ca3613c5e00 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala @@ -22,14 +22,15 @@ import java.sql.Date import scala.collection.mutable.ArrayBuffer import org.apache.hadoop.hive.conf.HiveConf -import org.apache.hadoop.hive.ql.Context +import org.apache.hadoop.hive.serde.serdeConstants +import org.apache.hadoop.hive.ql.{ErrorMsg, Context} import org.apache.hadoop.hive.ql.exec.{FunctionRegistry, FunctionInfo} import org.apache.hadoop.hive.ql.lib.Node -import org.apache.hadoop.hive.ql.metadata.Table import org.apache.hadoop.hive.ql.parse._ import org.apache.hadoop.hive.ql.plan.PlanUtils -import org.apache.spark.sql.{AnalysisException, SparkSQLParser} +import org.apache.hadoop.hive.ql.session.SessionState +import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans._ @@ -38,6 +39,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.trees.CurrentOrigin import org.apache.spark.sql.execution.ExplainCommand import org.apache.spark.sql.sources.DescribeCommand +import org.apache.spark.sql.hive.client._ import org.apache.spark.sql.hive.execution.{HiveNativeCommand, DropTable, AnalyzeTable, HiveScriptIOSchema} import org.apache.spark.sql.types._ import org.apache.spark.util.random.RandomSampler @@ -50,7 +52,25 @@ import scala.collection.JavaConversions._ * back for Hive to execute natively. Will be replaced with a native command that contains the * cmd string. */ -private[hive] case object NativePlaceholder extends Command +private[hive] case object NativePlaceholder extends LogicalPlan { + override def children: Seq[LogicalPlan] = Seq.empty + override def output: Seq[Attribute] = Seq.empty +} + +private[hive] case class CreateTableAsSelect( + tableDesc: HiveTable, + child: LogicalPlan, + allowExisting: Boolean) extends UnaryNode with Command { + + override def output: Seq[Attribute] = Seq.empty[Attribute] + override lazy val resolved: Boolean = + tableDesc.specifiedDatabase.isDefined && + tableDesc.schema.size > 0 && + tableDesc.serde.isDefined && + tableDesc.inputFormat.isDefined && + tableDesc.outputFormat.isDefined && + childrenResolved +} /** Provides a mapping from HiveQL statements to catalyst logical plans and expression trees. */ private[hive] object HiveQl { @@ -78,16 +98,16 @@ private[hive] object HiveQl { "TOK_ALTERVIEW_DROPPARTS", "TOK_ALTERVIEW_PROPERTIES", "TOK_ALTERVIEW_RENAME", - + "TOK_CREATEDATABASE", "TOK_CREATEFUNCTION", "TOK_CREATEINDEX", "TOK_CREATEROLE", "TOK_CREATEVIEW", - + "TOK_DESCDATABASE", "TOK_DESCFUNCTION", - + "TOK_DROPDATABASE", "TOK_DROPFUNCTION", "TOK_DROPINDEX", @@ -95,22 +115,22 @@ private[hive] object HiveQl { "TOK_DROPTABLE_PROPERTIES", "TOK_DROPVIEW", "TOK_DROPVIEW_PROPERTIES", - + "TOK_EXPORT", - + "TOK_GRANT", "TOK_GRANT_ROLE", - + "TOK_IMPORT", - + "TOK_LOAD", - + "TOK_LOCKTABLE", - + "TOK_MSCK", - + "TOK_REVOKE", - + "TOK_SHOW_COMPACTIONS", "TOK_SHOW_CREATETABLE", "TOK_SHOW_GRANT", @@ -127,9 +147,9 @@ private[hive] object HiveQl { "TOK_SHOWINDEXES", "TOK_SHOWLOCKS", "TOK_SHOWPARTITIONS", - + "TOK_SWITCHDATABASE", - + "TOK_UNLOCKTABLE" ) @@ -140,10 +160,7 @@ private[hive] object HiveQl { "TOK_TRUNCATETABLE" // truncate table" is a NativeCommand, does not need to explain. ) ++ nativeCommands - protected val hqlParser = { - val fallback = new ExtendedHiveQlParser - new SparkSQLParser(fallback.parse(_)) - } + protected val hqlParser = new ExtendedHiveQlParser /** * A set of implicit transformations that allow Hive ASTNodes to be rewritten by transformations @@ -230,12 +247,23 @@ private[hive] object HiveQl { * Otherwise, there will be Null pointer exception, * when retrieving properties form HiveConf. */ - val hContext = new Context(new HiveConf()) + val hContext = new Context(hiveConf) val node = ParseUtils.findRootNonNullToken((new ParseDriver).parse(sql, hContext)) hContext.clear() node } + /** + * Returns the HiveConf + */ + private[this] def hiveConf(): HiveConf = { + val ss = SessionState.get() // SessionState is lazy initializaion, it can be null here + if (ss == null) { + new HiveConf() + } else { + ss.getConf + } + } /** Returns a LogicalPlan for a given HiveQL string. */ def parseSql(sql: String): LogicalPlan = hqlParser.parse(sql) @@ -262,6 +290,7 @@ private[hive] object HiveQl { case otherMessage => throw new AnalysisException(otherMessage) } + case e: MatchError => throw e case e: Exception => throw new AnalysisException(e.getMessage) case e: NotImplementedError => @@ -275,14 +304,6 @@ private[hive] object HiveQl { } } - /** Creates LogicalPlan for a given VIEW */ - def createPlanForView(view: Table, alias: Option[String]): Subquery = alias match { - // because hive use things like `_c0` to build the expanded text - // currently we cannot support view from "create view v1(c1) as ..." - case None => Subquery(view.getTableName, createPlan(view.getViewExpandedText)) - case Some(aliasText) => Subquery(aliasText, createPlan(view.getViewExpandedText)) - } - def parseDdl(ddl: String): Seq[Attribute] = { val tree = try { @@ -421,16 +442,16 @@ private[hive] object HiveQl { } /** - * SELECT MAX(value) FROM src GROUP BY k1, k2, k3 GROUPING SETS((k1, k2), (k2)) - * is equivalent to + * SELECT MAX(value) FROM src GROUP BY k1, k2, k3 GROUPING SETS((k1, k2), (k2)) + * is equivalent to * SELECT MAX(value) FROM src GROUP BY k1, k2 UNION SELECT MAX(value) FROM src GROUP BY k2 * Check the following link for details. - * + * https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C+Grouping+and+Rollup * * The bitmask denotes the grouping expressions validity for a grouping set, * the bitmask also be called as grouping id (`GROUPING__ID`, the virtual column in Hive) - * e.g. In superset (k1, k2, k3), (bit 0: k1, bit 1: k2, and bit 2: k3), the grouping id of + * e.g. In superset (k1, k2, k3), (bit 0: k1, bit 1: k2, and bit 2: k3), the grouping id of * GROUPING SETS (k1, k2) and (k2) should be 3 and 2 respectively. */ protected def extractGroupingSet(children: Seq[ASTNode]): (Seq[Expression], Seq[Int]) = { @@ -444,7 +465,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C val bitmasks: Seq[Int] = setASTs.map(set => set match { case Token("TOK_GROUPING_SETS_EXPRESSION", null) => 0 - case Token("TOK_GROUPING_SETS_EXPRESSION", children) => + case Token("TOK_GROUPING_SETS_EXPRESSION", children) => children.foldLeft(0)((bitmap, col) => { val colString = col.asInstanceOf[ASTNode].toStringTree() require(keyMap.contains(colString), s"$colString doens't show up in the GROUP BY list") @@ -456,6 +477,14 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C (keys, bitmasks) } + protected def getProperties(node: Node): Seq[(String, String)] = node match { + case Token("TOK_TABLEPROPLIST", list) => + list.map { + case Token("TOK_TABLEPROPERTY", Token(key, Nil) :: Token(value, Nil) :: Nil) => + (unquoteString(key) -> unquoteString(value)) + } + } + protected def nodeToPlan(node: Node): LogicalPlan = node match { // Special drop table that also uncaches. case Token("TOK_DROPTABLE", @@ -465,8 +494,8 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C DropTable(tableName, ifExists.nonEmpty) // Support "ANALYZE TABLE tableNmae COMPUTE STATISTICS noscan" case Token("TOK_ANALYZE", - Token("TOK_TAB", Token("TOK_TABNAME", tableNameParts) :: partitionSpec) :: - isNoscan) => + Token("TOK_TAB", Token("TOK_TABNAME", tableNameParts) :: partitionSpec) :: + isNoscan) => // Reference: // https://cwiki.apache.org/confluence/display/Hive/StatsDev#StatsDev-ExistingTables if (partitionSpec.nonEmpty) { @@ -536,6 +565,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C val ( Some(tableNameParts) :: _ /* likeTable */ :: + externalTable :: Some(query) :: allowExisting +: ignores) = @@ -543,6 +573,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C Seq( "TOK_TABNAME", "TOK_LIKETABLE", + "EXTERNAL", "TOK_QUERY", "TOK_IFNOTEXISTS", "TOK_TABLECOMMENT", @@ -565,14 +596,186 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C children) val (db, tableName) = extractDbNameTableName(tableNameParts) - CreateTableAsSelect(db, tableName, nodeToPlan(query), allowExisting != None, Some(node)) + // TODO add bucket support + var tableDesc: HiveTable = HiveTable( + specifiedDatabase = db, + name = tableName, + schema = Seq.empty[HiveColumn], + partitionColumns = Seq.empty[HiveColumn], + properties = Map[String, String](), + serdeProperties = Map[String, String](), + tableType = if (externalTable.isDefined) ExternalTable else ManagedTable, + location = None, + inputFormat = None, + outputFormat = None, + serde = None, + viewText = None) + + // default storage type abbriviation (e.g. RCFile, ORC, PARQUET etc.) + val defaultStorageType = hiveConf.getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT) + // handle the default format for the storage type abbriviation + tableDesc = if ("SequenceFile".equalsIgnoreCase(defaultStorageType)) { + tableDesc.copy( + inputFormat = Option("org.apache.hadoop.mapred.SequenceFileInputFormat"), + outputFormat = Option("org.apache.hadoop.mapred.SequenceFileOutputFormat")) + } else if ("RCFile".equalsIgnoreCase(defaultStorageType)) { + tableDesc.copy( + inputFormat = Option("org.apache.hadoop.hive.ql.io.RCFileInputFormat"), + outputFormat = Option("org.apache.hadoop.hive.ql.io.RCFileOutputFormat"), + serde = Option(hiveConf.getVar(HiveConf.ConfVars.HIVEDEFAULTRCFILESERDE))) + } else if ("ORC".equalsIgnoreCase(defaultStorageType)) { + tableDesc.copy( + inputFormat = Option("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"), + outputFormat = Option("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"), + serde = Option("org.apache.hadoop.hive.ql.io.orc.OrcSerde")) + } else if ("PARQUET".equalsIgnoreCase(defaultStorageType)) { + tableDesc.copy( + inputFormat = + Option("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"), + outputFormat = + Option("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"), + serde = + Option("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe")) + } else { + tableDesc.copy( + inputFormat = + Option("org.apache.hadoop.mapred.TextInputFormat"), + outputFormat = + Option("org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat")) + } + + children.collect { + case list @ Token("TOK_TABCOLLIST", _) => + val cols = BaseSemanticAnalyzer.getColumns(list, true) + if (cols != null) { + tableDesc = tableDesc.copy( + schema = cols.map { field => + HiveColumn(field.getName, field.getType, field.getComment) + }) + } + case Token("TOK_TABLECOMMENT", child :: Nil) => + val comment = BaseSemanticAnalyzer.unescapeSQLString(child.getText) + // TODO support the sql text + tableDesc = tableDesc.copy(viewText = Option(comment)) + case Token("TOK_TABLEPARTCOLS", list @ Token("TOK_TABCOLLIST", _) :: Nil) => + val cols = BaseSemanticAnalyzer.getColumns(list(0), false) + if (cols != null) { + tableDesc = tableDesc.copy( + partitionColumns = cols.map { field => + HiveColumn(field.getName, field.getType, field.getComment) + }) + } + case Token("TOK_TABLEROWFORMAT", Token("TOK_SERDEPROPS", child :: Nil) :: Nil) => + val serdeParams = new java.util.HashMap[String, String]() + child match { + case Token("TOK_TABLEROWFORMATFIELD", rowChild1 :: rowChild2) => + val fieldDelim = BaseSemanticAnalyzer.unescapeSQLString (rowChild1.getText()) + serdeParams.put(serdeConstants.FIELD_DELIM, fieldDelim) + serdeParams.put(serdeConstants.SERIALIZATION_FORMAT, fieldDelim) + if (rowChild2.length > 1) { + val fieldEscape = BaseSemanticAnalyzer.unescapeSQLString (rowChild2(0).getText) + serdeParams.put(serdeConstants.ESCAPE_CHAR, fieldEscape) + } + case Token("TOK_TABLEROWFORMATCOLLITEMS", rowChild :: Nil) => + val collItemDelim = BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText) + serdeParams.put(serdeConstants.COLLECTION_DELIM, collItemDelim) + case Token("TOK_TABLEROWFORMATMAPKEYS", rowChild :: Nil) => + val mapKeyDelim = BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText) + serdeParams.put(serdeConstants.MAPKEY_DELIM, mapKeyDelim) + case Token("TOK_TABLEROWFORMATLINES", rowChild :: Nil) => + val lineDelim = BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText) + if (!(lineDelim == "\n") && !(lineDelim == "10")) { + throw new AnalysisException( + SemanticAnalyzer.generateErrorMessage( + rowChild, + ErrorMsg.LINES_TERMINATED_BY_NON_NEWLINE.getMsg)) + } + serdeParams.put(serdeConstants.LINE_DELIM, lineDelim) + case Token("TOK_TABLEROWFORMATNULL", rowChild :: Nil) => + val nullFormat = BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText) + // TODO support the nullFormat + case _ => assert(false) + } + tableDesc = tableDesc.copy( + serdeProperties = tableDesc.serdeProperties ++ serdeParams) + case Token("TOK_TABLELOCATION", child :: Nil) => + var location = BaseSemanticAnalyzer.unescapeSQLString(child.getText) + location = EximUtil.relativeToAbsolutePath(hiveConf, location) + tableDesc = tableDesc.copy(location = Option(location)) + case Token("TOK_TABLESERIALIZER", child :: Nil) => + tableDesc = tableDesc.copy( + serde = Option(BaseSemanticAnalyzer.unescapeSQLString(child.getChild(0).getText))) + if (child.getChildCount == 2) { + val serdeParams = new java.util.HashMap[String, String]() + BaseSemanticAnalyzer.readProps( + (child.getChild(1).getChild(0)).asInstanceOf[ASTNode], serdeParams) + tableDesc = tableDesc.copy(serdeProperties = tableDesc.serdeProperties ++ serdeParams) + } + case Token("TOK_FILEFORMAT_GENERIC", child :: Nil) => + throw new SemanticException( + "Unrecognized file format in STORED AS clause:${child.getText}") + + case Token("TOK_TBLRCFILE", Nil) => + tableDesc = tableDesc.copy( + inputFormat = Option("org.apache.hadoop.hive.ql.io.RCFileInputFormat"), + outputFormat = Option("org.apache.hadoop.hive.ql.io.RCFileOutputFormat")) + if (tableDesc.serde.isEmpty) { + tableDesc = tableDesc.copy( + serde = Option("org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe")) + } + + case Token("TOK_TBLORCFILE", Nil) => + tableDesc = tableDesc.copy( + inputFormat = Option("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"), + outputFormat = Option("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat")) + if (tableDesc.serde.isEmpty) { + tableDesc = tableDesc.copy( + serde = Option("org.apache.hadoop.hive.ql.io.orc.OrcSerde")) + } + + case Token("TOK_TBLPARQUETFILE", Nil) => + tableDesc = tableDesc.copy( + inputFormat = + Option("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"), + outputFormat = + Option("org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat")) + if (tableDesc.serde.isEmpty) { + tableDesc = tableDesc.copy( + serde = Option("org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe")) + } - // If its not a "CREATE TABLE AS" like above then just pass it back to hive as a native command. + case Token("TOK_TABLESERIALIZER", + Token("TOK_SERDENAME", Token(serdeName, Nil) :: otherProps) :: Nil) => + tableDesc = tableDesc.copy(serde = Option(unquoteString(serdeName))) + + otherProps match { + case Token("TOK_TABLEPROPERTIES", list :: Nil) :: Nil => + tableDesc = tableDesc.copy( + serdeProperties = tableDesc.serdeProperties ++ getProperties(list)) + case Nil => + } + + case Token("TOK_TABLEPROPERTIES", list :: Nil) => + tableDesc = tableDesc.copy(properties = tableDesc.properties ++ getProperties(list)) + case list @ Token("TOK_TABLEFILEFORMAT", _) => + tableDesc = tableDesc.copy( + inputFormat = + Option(BaseSemanticAnalyzer.unescapeSQLString(list.getChild(0).getText)), + outputFormat = + Option(BaseSemanticAnalyzer.unescapeSQLString(list.getChild(1).getText))) + case Token("TOK_STORAGEHANDLER", _) => + throw new AnalysisException(ErrorMsg.CREATE_NON_NATIVE_AS.getMsg()) + case _ => // Unsupport features + } + + CreateTableAsSelect(tableDesc, nodeToPlan(query), allowExisting != None) + + // If its not a "CTAS" like above then take it as a native command case Token("TOK_CREATETABLE", _) => NativePlaceholder // Support "TRUNCATE TABLE table_name [PARTITION partition_spec]" case Token("TOK_TRUNCATETABLE", - Token("TOK_TABLE_PARTITION",table)::Nil) => NativePlaceholder + Token("TOK_TABLE_PARTITION", table) :: Nil) => NativePlaceholder case Token("TOK_QUERY", queryArgs) if Seq("TOK_FROM", "TOK_INSERT").contains(queryArgs.head.getText) => @@ -613,7 +816,8 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C clusterByClause :: distributeByClause :: limitClause :: - lateralViewClause :: Nil) = { + lateralViewClause :: + windowClause :: Nil) = { getClauses( Seq( "TOK_INSERT_INTO", @@ -631,15 +835,16 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C "TOK_CLUSTERBY", "TOK_DISTRIBUTEBY", "TOK_LIMIT", - "TOK_LATERAL_VIEW"), + "TOK_LATERAL_VIEW", + "WINDOW"), singleInsert) } - + val relations = fromClause match { case Some(f) => nodeToRelation(f) case None => OneRowRelation } - + val withWhere = whereClause.map { whereNode => val Seq(whereExpr) = whereNode.getChildren.toSeq Filter(nodeToExpr(whereExpr), relations) @@ -691,7 +896,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C val serdeProps = propsClause.map { case Token("TOK_TABLEPROPERTY", Token(name, Nil) :: Token(value, Nil) :: Nil) => (name, value) - } + } (Nil, serdeClass, serdeProps) case Nil => (Nil, "", Nil) @@ -736,7 +941,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C // The projection of the query can either be a normal projection, an aggregation // (if there is a group by) or a script transformation. val withProject: LogicalPlan = transformation.getOrElse { - val selectExpressions = + val selectExpressions = nameExpressions(select.getChildren.flatMap(selExprNodeToExpr).toSeq) Seq( groupByClause.map(e => e match { @@ -760,35 +965,38 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C case Token("TOK_CUBE_GROUPBY", children) => Cube(children.map(nodeToExpr), withLateralView, selectExpressions) case _ => sys.error("Expect WITH CUBE") - }), + }), Some(Project(selectExpressions, withLateralView))).flatten.head } - val withDistinct = - if (selectDistinctClause.isDefined) Distinct(withProject) else withProject - + // Handle HAVING clause. val withHaving = havingClause.map { h => val havingExpr = h.getChildren.toSeq match { case Seq(hexpr) => nodeToExpr(hexpr) } // Note that we added a cast to boolean. If the expression itself is already boolean, // the optimizer will get rid of the unnecessary cast. - Filter(Cast(havingExpr, BooleanType), withDistinct) - }.getOrElse(withDistinct) + Filter(Cast(havingExpr, BooleanType), withProject) + }.getOrElse(withProject) + // Handle SELECT DISTINCT + val withDistinct = + if (selectDistinctClause.isDefined) Distinct(withHaving) else withHaving + + // Handle ORDER BY, SORT BY, DISTRIBETU BY, and CLUSTER BY clause. val withSort = (orderByClause, sortByClause, distributeByClause, clusterByClause) match { case (Some(totalOrdering), None, None, None) => - Sort(totalOrdering.getChildren.map(nodeToSortOrder), true, withHaving) + Sort(totalOrdering.getChildren.map(nodeToSortOrder), true, withDistinct) case (None, Some(perPartitionOrdering), None, None) => - Sort(perPartitionOrdering.getChildren.map(nodeToSortOrder), false, withHaving) + Sort(perPartitionOrdering.getChildren.map(nodeToSortOrder), false, withDistinct) case (None, None, Some(partitionExprs), None) => - RepartitionByExpression(partitionExprs.getChildren.map(nodeToExpr), withHaving) + RepartitionByExpression(partitionExprs.getChildren.map(nodeToExpr), withDistinct) case (None, Some(perPartitionOrdering), Some(partitionExprs), None) => Sort(perPartitionOrdering.getChildren.map(nodeToSortOrder), false, - RepartitionByExpression(partitionExprs.getChildren.map(nodeToExpr), withHaving)) + RepartitionByExpression(partitionExprs.getChildren.map(nodeToExpr), withDistinct)) case (None, None, None, Some(clusterExprs)) => Sort(clusterExprs.getChildren.map(nodeToExpr).map(SortOrder(_, Ascending)), false, - RepartitionByExpression(clusterExprs.getChildren.map(nodeToExpr), withHaving)) - case (None, None, None, None) => withHaving + RepartitionByExpression(clusterExprs.getChildren.map(nodeToExpr), withDistinct)) + case (None, None, None, None) => withDistinct case _ => sys.error("Unsupported set of ordering / distribution clauses.") } @@ -797,6 +1005,27 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C .map(Limit(_, withSort)) .getOrElse(withSort) + // Collect all window specifications defined in the WINDOW clause. + val windowDefinitions = windowClause.map(_.getChildren.toSeq.collect { + case Token("TOK_WINDOWDEF", + Token(windowName, Nil) :: Token("TOK_WINDOWSPEC", spec) :: Nil) => + windowName -> nodesToWindowSpecification(spec) + }.toMap) + // Handle cases like + // window w1 as (partition by p_mfgr order by p_name + // range between 2 preceding and 2 following), + // w2 as w1 + val resolvedCrossReference = windowDefinitions.map { + windowDefMap => windowDefMap.map { + case (windowName, WindowSpecReference(other)) => + (windowName, windowDefMap(other).asInstanceOf[WindowSpecDefinition]) + case o => o.asInstanceOf[(String, WindowSpecDefinition)] + } + } + + val withWindowDefinitions = + resolvedCrossReference.map(WithWindowDefinition(_, withLimit)).getOrElse(withLimit) + // TOK_INSERT_INTO means to add files to the table. // TOK_DESTINATION means to overwrite the table. val resultDestination = @@ -804,7 +1033,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C val overwrite = intoClause.isEmpty nodeToDest( resultDestination, - withLimit, + withWindowDefinitions, overwrite) } @@ -922,7 +1151,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C case Seq(false, false) => Inner }.toBuffer - val joinedTables = tables.reduceLeft(Join(_,_, Inner, None)) + val joinedTables = tables.reduceLeft(Join(_, _, Inner, None)) // Must be transform down. val joinedResult = joinedTables transform { @@ -942,7 +1171,8 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C // worth the number of hacks that will be required to implement it. Namely, we need to add // some sort of mapped star expansion that would expand all child output row to be similarly // named output expressions where some aggregate expression has been applied (i.e. First). - ??? // Aggregate(groups, Star(None, First(_)) :: Nil, joinedResult) + // Aggregate(groups, Star(None, First(_)) :: Nil, joinedResult) + throw new UnsupportedOperationException case Token(allJoinTokens(joinToken), relation1 :: @@ -1053,8 +1283,16 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C throw new NotImplementedError(s"No parse rules for:\n ${dumpTree(a).toString} ") } - protected val escapedIdentifier = "`([^`]+)`".r + protected val doubleQuotedString = "\"([^\"]+)\"".r + protected val singleQuotedString = "'([^']+)'".r + + protected def unquoteString(str: String) = str match { + case singleQuotedString(s) => s + case doubleQuotedString(s) => s + case other => other + } + /** Strips backticks from ident if present */ protected def cleanIdentifier(ident: String): String = ident match { case escapedIdentifier(i) => i @@ -1104,7 +1342,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C nodeToExpr(qualifier) match { case UnresolvedAttribute(qualifierName) => UnresolvedAttribute(qualifierName :+ cleanIdentifier(attr)) - case other => UnresolvedGetField(other, attr) + case other => UnresolvedExtractValue(other, Literal(attr)) } /* Stars (*) */ @@ -1224,20 +1462,12 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C case Token("TOK_FUNCTION", Token(WHEN(), Nil) :: branches) => CaseWhen(branches.map(nodeToExpr)) case Token("TOK_FUNCTION", Token(CASE(), Nil) :: branches) => - val transformed = branches.drop(1).sliding(2, 2).map { - case Seq(condVal, value) => - // FIXME (SPARK-2155): the key will get evaluated for multiple times in CaseWhen's eval(). - // Hence effectful / non-deterministic key expressions are *not* supported at the moment. - // We should consider adding new Expressions to get around this. - Seq(EqualTo(nodeToExpr(branches(0)), nodeToExpr(condVal)), - nodeToExpr(value)) - case Seq(elseVal) => Seq(nodeToExpr(elseVal)) - }.toSeq.reduce(_ ++ _) - CaseWhen(transformed) + val keyExpr = nodeToExpr(branches.head) + CaseKeyWhen(keyExpr, branches.drop(1).map(nodeToExpr)) /* Complex datatype manipulation */ case Token("[", child :: ordinal :: Nil) => - GetItem(nodeToExpr(child), nodeToExpr(ordinal)) + UnresolvedExtractValue(nodeToExpr(child), nodeToExpr(ordinal)) /* Other functions */ case Token("TOK_FUNCTION", Token(ARRAY(), Nil) :: children) => @@ -1250,6 +1480,25 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C Substring(nodeToExpr(string), nodeToExpr(pos), nodeToExpr(length)) case Token("TOK_FUNCTION", Token(COALESCE(), Nil) :: list) => Coalesce(list.map(nodeToExpr)) + /* Window Functions */ + case Token("TOK_FUNCTION", Token(name, Nil) +: args :+ Token("TOK_WINDOWSPEC", spec)) => + val function = UnresolvedWindowFunction(name, args.map(nodeToExpr)) + nodesToWindowSpecification(spec) match { + case reference: WindowSpecReference => + UnresolvedWindowExpression(function, reference) + case definition: WindowSpecDefinition => + WindowExpression(function, definition) + } + case Token("TOK_FUNCTIONSTAR", Token(name, Nil) :: Token("TOK_WINDOWSPEC", spec) :: Nil) => + // Safe to use Literal(1)? + val function = UnresolvedWindowFunction(name, Literal(1) :: Nil) + nodesToWindowSpecification(spec) match { + case reference: WindowSpecReference => + UnresolvedWindowExpression(function, reference) + case definition: WindowSpecDefinition => + WindowExpression(function, definition) + } + /* UDFs - Must be last otherwise will preempt built in functions */ case Token("TOK_FUNCTION", Token(name, Nil) :: args) => UnresolvedFunction(name, args.map(nodeToExpr)) @@ -1312,6 +1561,101 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C """.stripMargin) } + /* Case insensitive matches for Window Specification */ + val PRECEDING = "(?i)preceding".r + val FOLLOWING = "(?i)following".r + val CURRENT = "(?i)current".r + def nodesToWindowSpecification(nodes: Seq[ASTNode]): WindowSpec = nodes match { + case Token(windowName, Nil) :: Nil => + // Refer to a window spec defined in the window clause. + WindowSpecReference(windowName) + case Nil => + // OVER() + WindowSpecDefinition( + partitionSpec = Nil, + orderSpec = Nil, + frameSpecification = UnspecifiedFrame) + case spec => + val (partitionClause :: rowFrame :: rangeFrame :: Nil) = + getClauses( + Seq( + "TOK_PARTITIONINGSPEC", + "TOK_WINDOWRANGE", + "TOK_WINDOWVALUES"), + spec) + + // Handle Partition By and Order By. + val (partitionSpec, orderSpec) = partitionClause.map { partitionAndOrdering => + val (partitionByClause :: orderByClause :: sortByClause :: clusterByClause :: Nil) = + getClauses( + Seq("TOK_DISTRIBUTEBY", "TOK_ORDERBY", "TOK_SORTBY", "TOK_CLUSTERBY"), + partitionAndOrdering.getChildren.toSeq.asInstanceOf[Seq[ASTNode]]) + + (partitionByClause, orderByClause.orElse(sortByClause), clusterByClause) match { + case (Some(partitionByExpr), Some(orderByExpr), None) => + (partitionByExpr.getChildren.map(nodeToExpr), + orderByExpr.getChildren.map(nodeToSortOrder)) + case (Some(partitionByExpr), None, None) => + (partitionByExpr.getChildren.map(nodeToExpr), Nil) + case (None, Some(orderByExpr), None) => + (Nil, orderByExpr.getChildren.map(nodeToSortOrder)) + case (None, None, Some(clusterByExpr)) => + val expressions = clusterByExpr.getChildren.map(nodeToExpr) + (expressions, expressions.map(SortOrder(_, Ascending))) + case _ => + throw new NotImplementedError( + s"""No parse rules for Node ${partitionAndOrdering.getName} + """.stripMargin) + } + }.getOrElse { + (Nil, Nil) + } + + // Handle Window Frame + val windowFrame = + if (rowFrame.isEmpty && rangeFrame.isEmpty) { + UnspecifiedFrame + } else { + val frameType = rowFrame.map(_ => RowFrame).getOrElse(RangeFrame) + def nodeToBoundary(node: Node): FrameBoundary = node match { + case Token(PRECEDING(), Token(count, Nil) :: Nil) => + if (count.toLowerCase() == "unbounded") { + UnboundedPreceding + } else { + ValuePreceding(count.toInt) + } + case Token(FOLLOWING(), Token(count, Nil) :: Nil) => + if (count.toLowerCase() == "unbounded") { + UnboundedFollowing + } else { + ValueFollowing(count.toInt) + } + case Token(CURRENT(), Nil) => CurrentRow + case _ => + throw new NotImplementedError( + s"""No parse rules for the Window Frame Boundary based on Node ${node.getName} + """.stripMargin) + } + + rowFrame.orElse(rangeFrame).map { frame => + frame.getChildren.toList match { + case precedingNode :: followingNode :: Nil => + SpecifiedWindowFrame( + frameType, + nodeToBoundary(precedingNode), + nodeToBoundary(followingNode)) + case precedingNode :: Nil => + SpecifiedWindowFrame(frameType, nodeToBoundary(precedingNode), CurrentRow) + case _ => + throw new NotImplementedError( + s"""No parse rules for the Window Frame based on Node ${frame.getName} + """.stripMargin) + } + }.getOrElse(sys.error(s"If you see this, please file a bug report with your query.")) + } + + WindowSpecDefinition(partitionSpec, orderSpec, windowFrame) + } val explode = "(?i)explode".r def nodesToGenerator(nodes: Seq[Node]): (Generator, Seq[String]) = { diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala index be9249a8b1f44..c6b65106452bf 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala @@ -140,7 +140,7 @@ private[hive] trait HiveStrategies { PhysicalRDD(plan.output, sparkContext.emptyRDD[Row]) :: Nil } else { hiveContext - .parquetFile(partitionLocations: _*) + .read.parquet(partitionLocations: _*) .addPartitioningAttributes(relation.partitionKeys) .lowerCase .where(unresolvedOtherPredicates) @@ -152,7 +152,7 @@ private[hive] trait HiveStrategies { } else { hiveContext - .parquetFile(relation.hiveQlTable.getDataLocation.toString) + .read.parquet(relation.hiveQlTable.getDataLocation.toString) .lowerCase .where(unresolvedOtherPredicates) .select(unresolvedProjection: _*) @@ -221,14 +221,14 @@ private[hive] trait HiveStrategies { object HiveDDLStrategy extends Strategy { def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { case CreateTableUsing( - tableName, userSpecifiedSchema, provider, false, opts, allowExisting, managedIfNoPath) => + tableName, userSpecifiedSchema, provider, false, opts, allowExisting, managedIfNoPath) => ExecutedCommand( CreateMetastoreDataSource( tableName, userSpecifiedSchema, provider, opts, allowExisting, managedIfNoPath)) :: Nil - case CreateTableUsingAsSelect(tableName, provider, false, mode, opts, query) => + case CreateTableUsingAsSelect(tableName, provider, false, partitionCols, mode, opts, query) => val cmd = - CreateMetastoreDataSourceAsSelect(tableName, provider, mode, opts, query) + CreateMetastoreDataSourceAsSelect(tableName, provider, partitionCols, mode, opts, query) ExecutedCommand(cmd) :: Nil case _ => Nil diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala index e556c74ffb015..294fc3bd7d5e9 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala @@ -32,9 +32,10 @@ import org.apache.hadoop.mapred.{FileInputFormat, InputFormat, JobConf} import org.apache.spark.SerializableWritable import org.apache.spark.broadcast.Broadcast +import org.apache.spark.Logging import org.apache.spark.rdd.{EmptyRDD, HadoopRDD, RDD, UnionRDD} import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.types.DateUtils +import org.apache.spark.sql.catalyst.util.DateUtils import org.apache.spark.util.Utils /** @@ -57,7 +58,7 @@ class HadoopTableReader( @transient relation: MetastoreRelation, @transient sc: HiveContext, @transient hiveExtraConf: HiveConf) - extends TableReader { + extends TableReader with Logging { // Hadoop honors "mapred.map.tasks" as hint, but will ignore when mapred.job.tracker is "local". // https://hadoop.apache.org/docs/r1.0.4/mapred-default.html @@ -78,7 +79,7 @@ class HadoopTableReader( makeRDDForTable( hiveTable, Class.forName( - relation.tableDesc.getSerdeClassName, true, sc.sessionState.getConf.getClassLoader) + relation.tableDesc.getSerdeClassName, true, Utils.getContextOrSparkClassLoader) .asInstanceOf[Class[Deserializer]], filterOpt = None) @@ -145,7 +146,7 @@ class HadoopTableReader( partitionToDeserializer: Map[HivePartition, Class[_ <: Deserializer]], filterOpt: Option[PathFilter]): RDD[Row] = { - + // SPARK-5068:get FileStatus and do the filtering locally when the path is not exists def verifyPartitionPath( partitionToDeserializer: Map[HivePartition, Class[_ <: Deserializer]]): @@ -288,7 +289,7 @@ class HadoopTableReader( } } -private[hive] object HadoopTableReader extends HiveInspectors { +private[hive] object HadoopTableReader extends HiveInspectors with Logging { /** * Curried. After given an argument for 'path', the resulting JobConf => Unit closure is used to * instantiate a HadoopRDD. @@ -329,6 +330,8 @@ private[hive] object HadoopTableReader extends HiveInspectors { tableDeser.getObjectInspector).asInstanceOf[StructObjectInspector] } + logDebug(soi.toString) + val (fieldRefs, fieldOrdinals) = nonPartitionKeyAttrs.map { case (attr, ordinal) => soi.getStructFieldRef(attr.name) -> ordinal }.unzip diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientInterface.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientInterface.scala index a863aa77cb7e0..0a1d761a52f88 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientInterface.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientInterface.scala @@ -17,30 +17,35 @@ package org.apache.spark.sql.hive.client +import java.io.PrintStream +import java.util.{Map => JMap} + import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchTableException} -case class HiveDatabase( +private[hive] case class HiveDatabase( name: String, location: String) -abstract class TableType { val name: String } -case object ExternalTable extends TableType { override val name = "EXTERNAL_TABLE" } -case object IndexTable extends TableType { override val name = "INDEX_TABLE" } -case object ManagedTable extends TableType { override val name = "MANAGED_TABLE" } -case object VirtualView extends TableType { override val name = "VIRTUAL_VIEW" } +private[hive] abstract class TableType { val name: String } +private[hive] case object ExternalTable extends TableType { override val name = "EXTERNAL_TABLE" } +private[hive] case object IndexTable extends TableType { override val name = "INDEX_TABLE" } +private[hive] case object ManagedTable extends TableType { override val name = "MANAGED_TABLE" } +private[hive] case object VirtualView extends TableType { override val name = "VIRTUAL_VIEW" } -case class HiveStorageDescriptor( +// TODO: Use this for Tables and Partitions +private[hive] case class HiveStorageDescriptor( location: String, inputFormat: String, outputFormat: String, - serde: String) + serde: String, + serdeProperties: Map[String, String]) -case class HivePartition( +private[hive] case class HivePartition( values: Seq[String], storage: HiveStorageDescriptor) -case class HiveColumn(name: String, hiveType: String, comment: String) -case class HiveTable( +private[hive] case class HiveColumn(name: String, hiveType: String, comment: String) +private[hive] case class HiveTable( specifiedDatabase: Option[String], name: String, schema: Seq[HiveColumn], @@ -51,7 +56,8 @@ case class HiveTable( location: Option[String] = None, inputFormat: Option[String] = None, outputFormat: Option[String] = None, - serde: Option[String] = None) { + serde: Option[String] = None, + viewText: Option[String] = None) { @transient private[client] var client: ClientInterface = _ @@ -76,13 +82,17 @@ case class HiveTable( * internal and external classloaders for a given version of Hive and thus must expose only * shared classes. */ -trait ClientInterface { +private[hive] trait ClientInterface { /** * Runs a HiveQL command using Hive, returning the results as a list of strings. Each row will * result in one string. */ def runSqlHive(sql: String): Seq[String] + def setOut(stream: PrintStream): Unit + def setInfo(stream: PrintStream): Unit + def setError(stream: PrintStream): Unit + /** Returns the names of all tables in the given database. */ def listTables(dbName: String): Seq[String] @@ -114,6 +124,11 @@ trait ClientInterface { /** Creates a new database with the given name. */ def createDatabase(database: HiveDatabase): Unit + /** Returns the specified paritition or None if it does not exist. */ + def getPartitionOption( + hTable: HiveTable, + partitionSpec: JMap[String, String]): Option[HivePartition] + /** Returns all partitions for the given table. */ def getAllPartitions(hTable: HiveTable): Seq[HivePartition] diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala index ea52fea037f1f..99aa0f1ded3f8 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.hive.client import java.io.{BufferedReader, InputStreamReader, File, PrintStream} import java.net.URI -import java.util.{ArrayList => JArrayList} +import java.util.{ArrayList => JArrayList, Map => JMap, List => JList, Set => JSet} import scala.collection.JavaConversions._ import scala.language.reflectiveCalls @@ -27,6 +27,7 @@ import scala.language.reflectiveCalls import org.apache.hadoop.fs.Path import org.apache.hadoop.hive.metastore.api.Database import org.apache.hadoop.hive.conf.HiveConf +import org.apache.hadoop.hive.metastore.TableType import org.apache.hadoop.hive.metastore.api import org.apache.hadoop.hive.metastore.api.FieldSchema import org.apache.hadoop.hive.ql.metadata @@ -54,19 +55,13 @@ import org.apache.spark.sql.execution.QueryExecutionException * @param config a collection of configuration options that will be added to the hive conf before * opening the hive client. */ -class ClientWrapper( +private[hive] class ClientWrapper( version: HiveVersion, config: Map[String, String]) extends ClientInterface with Logging with ReflectionMagic { - private val conf = new HiveConf(classOf[SessionState]) - config.foreach { case (k, v) => - logDebug(s"Hive Config: $k=$v") - conf.set(k, v) - } - // Circular buffer to hold what hive prints to STDOUT and ERR. Only printed when failures occur. private val outputBuffer = new java.io.OutputStream { var pos: Int = 0 @@ -99,17 +94,31 @@ class ClientWrapper( val original = Thread.currentThread().getContextClassLoader Thread.currentThread().setContextClassLoader(getClass.getClassLoader) val ret = try { - val newState = new SessionState(conf) - SessionState.start(newState) - newState.out = new PrintStream(outputBuffer, true, "UTF-8") - newState.err = new PrintStream(outputBuffer, true, "UTF-8") - newState + val oldState = SessionState.get() + if (oldState == null) { + val initialConf = new HiveConf(classOf[SessionState]) + config.foreach { case (k, v) => + logDebug(s"Hive Config: $k=$v") + initialConf.set(k, v) + } + val newState = new SessionState(initialConf) + SessionState.start(newState) + newState.out = new PrintStream(outputBuffer, true, "UTF-8") + newState.err = new PrintStream(outputBuffer, true, "UTF-8") + newState + } else { + oldState + } } finally { Thread.currentThread().setContextClassLoader(original) } ret } + /** Returns the configuration for the current session. */ + def conf: HiveConf = SessionState.get().getConf + + // TODO: should be a def?s private val client = Hive.get(conf) /** @@ -133,6 +142,18 @@ class ClientWrapper( ret } + def setOut(stream: PrintStream): Unit = withHiveState { + state.out = stream + } + + def setInfo(stream: PrintStream): Unit = withHiveState { + state.info = stream + } + + def setError(stream: PrintStream): Unit = withHiveState { + state.err = stream + } + override def currentDatabase: String = withHiveState { state.getCurrentDatabase } @@ -171,14 +192,20 @@ class ClientWrapper( partitionColumns = h.getPartCols.map(f => HiveColumn(f.getName, f.getType, f.getComment)), properties = h.getParameters.toMap, serdeProperties = h.getTTable.getSd.getSerdeInfo.getParameters.toMap, - tableType = ManagedTable, // TODO + tableType = h.getTableType match { + case TableType.MANAGED_TABLE => ManagedTable + case TableType.EXTERNAL_TABLE => ExternalTable + case TableType.VIRTUAL_VIEW => VirtualView + case TableType.INDEX_TABLE => IndexTable + }, location = version match { case hive.v12 => Option(h.call[URI]("getDataLocation")).map(_.toString) case hive.v13 => Option(h.call[Path]("getDataLocation")).map(_.toString) }, inputFormat = Option(h.getInputFormatClass).map(_.getName), outputFormat = Option(h.getOutputFormatClass).map(_.getName), - serde = Option(h.getSerializationLib)).withClient(this) + serde = Option(h.getSerializationLib), + viewText = Option(h.getViewExpandedText)).withClient(this) } converted } @@ -198,6 +225,12 @@ class ClientWrapper( table.partitionColumns.map(c => new FieldSchema(c.name, c.hiveType, c.comment))) table.properties.foreach { case (k, v) => qlTable.setProperty(k, v) } table.serdeProperties.foreach { case (k, v) => qlTable.setSerdeParam(k, v) } + + // set owner + qlTable.setOwner(conf.getUser) + // set create time + qlTable.setCreateTime((System.currentTimeMillis() / 1000).asInstanceOf[Int]) + version match { case hive.v12 => table.location.map(new URI(_)).foreach(u => qlTable.call[URI, Unit]("setDataLocation", u)) @@ -223,27 +256,40 @@ class ClientWrapper( client.alterTable(table.qualifiedName, qlTable) } + private def toHivePartition(partition: metadata.Partition): HivePartition = { + val apiPartition = partition.getTPartition + HivePartition( + values = Option(apiPartition.getValues).map(_.toSeq).getOrElse(Seq.empty), + storage = HiveStorageDescriptor( + location = apiPartition.getSd.getLocation, + inputFormat = apiPartition.getSd.getInputFormat, + outputFormat = apiPartition.getSd.getOutputFormat, + serde = apiPartition.getSd.getSerdeInfo.getSerializationLib, + serdeProperties = apiPartition.getSd.getSerdeInfo.getParameters.toMap)) + } + + override def getPartitionOption( + table: HiveTable, + partitionSpec: JMap[String, String]): Option[HivePartition] = withHiveState { + + val qlTable = toQlTable(table) + val qlPartition = client.getPartition(qlTable, partitionSpec, false) + Option(qlPartition).map(toHivePartition) + } + override def getAllPartitions(hTable: HiveTable): Seq[HivePartition] = withHiveState { val qlTable = toQlTable(hTable) val qlPartitions = version match { case hive.v12 => - client.call[metadata.Table, Set[metadata.Partition]]("getAllPartitionsForPruner", qlTable) + client.call[metadata.Table, JSet[metadata.Partition]]("getAllPartitionsForPruner", qlTable) case hive.v13 => - client.call[metadata.Table, Set[metadata.Partition]]("getAllPartitionsOf", qlTable) + client.call[metadata.Table, JSet[metadata.Partition]]("getAllPartitionsOf", qlTable) } - qlPartitions.map(_.getTPartition).map { p => - HivePartition( - values = Option(p.getValues).map(_.toSeq).getOrElse(Seq.empty), - storage = HiveStorageDescriptor( - location = p.getSd.getLocation, - inputFormat = p.getSd.getInputFormat, - outputFormat = p.getSd.getOutputFormat, - serde = p.getSd.getSerdeInfo.getSerializationLib)) - }.toSeq + qlPartitions.toSeq.map(toHivePartition) } override def listTables(dbName: String): Seq[String] = withHiveState { - client.getAllTables + client.getAllTables(dbName) } /** @@ -267,11 +313,12 @@ class ClientWrapper( try { val cmd_trimmed: String = cmd.trim() val tokens: Array[String] = cmd_trimmed.split("\\s+") + // The remainder of the command. val cmd_1: String = cmd_trimmed.substring(tokens(0).length()).trim() val proc: CommandProcessor = version match { case hive.v12 => classOf[CommandProcessorFactory] - .callStatic[String, HiveConf, CommandProcessor]("get", cmd_1, conf) + .callStatic[String, HiveConf, CommandProcessor]("get", tokens(0), conf) case hive.v13 => classOf[CommandProcessorFactory] .callStatic[Array[String], HiveConf, CommandProcessor]("get", Array(tokens(0)), conf) @@ -294,7 +341,7 @@ class ClientWrapper( res.toSeq case hive.v13 => val res = new JArrayList[Object] - driver.call[JArrayList[Object], Boolean]("getResults", res) + driver.call[JList[Object], Boolean]("getResults", res) res.map { r => r match { case s: String => s diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala index 710dbca6e3c66..196a3d836cab2 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.hive.client import java.io.File -import java.net.URLClassLoader +import java.net.{URL, URLClassLoader} import java.util import scala.language.reflectiveCalls @@ -30,9 +30,10 @@ import org.apache.spark.Logging import org.apache.spark.deploy.SparkSubmitUtils import org.apache.spark.sql.catalyst.util.quietly +import org.apache.spark.sql.hive.HiveContext /** Factory for `IsolatedClientLoader` with specific versions of hive. */ -object IsolatedClientLoader { +private[hive] object IsolatedClientLoader { /** * Creates isolated Hive client loaders by downloading the requested version from maven. */ @@ -49,14 +50,13 @@ object IsolatedClientLoader { case "13" | "0.13" | "0.13.0" | "0.13.1" => hive.v13 } - private def downloadVersion(version: HiveVersion): Seq[File] = { + private def downloadVersion(version: HiveVersion): Seq[URL] = { val hiveArtifacts = (Seq("hive-metastore", "hive-exec", "hive-common", "hive-serde") ++ (if (version.hasBuiltinsJar) "hive-builtins" :: Nil else Nil)) .map(a => s"org.apache.hive:$a:${version.fullVersion}") :+ "com.google.guava:guava:14.0.1" :+ - "org.apache.hadoop:hadoop-client:2.4.0" :+ - "mysql:mysql-connector-java:5.1.12" + "org.apache.hadoop:hadoop-client:2.4.0" val classpath = quietly { SparkSubmitUtils.resolveMavenCoordinates( @@ -72,10 +72,10 @@ object IsolatedClientLoader { tempDir.mkdir() allFiles.foreach(f => FileUtils.copyFileToDirectory(f, tempDir)) - tempDir.listFiles() + tempDir.listFiles().map(_.toURL) } - private def resolvedVersions = new scala.collection.mutable.HashMap[HiveVersion, Seq[File]] + private def resolvedVersions = new scala.collection.mutable.HashMap[HiveVersion, Seq[URL]] } /** @@ -99,20 +99,22 @@ object IsolatedClientLoader { * @param baseClassLoader The spark classloader that is used to load shared classes. * */ -class IsolatedClientLoader( +private[hive] class IsolatedClientLoader( val version: HiveVersion, - val execJars: Seq[File] = Seq.empty, + val execJars: Seq[URL] = Seq.empty, val config: Map[String, String] = Map.empty, val isolationOn: Boolean = true, val rootClassLoader: ClassLoader = ClassLoader.getSystemClassLoader.getParent.getParent, - val baseClassLoader: ClassLoader = Thread.currentThread().getContextClassLoader) + val baseClassLoader: ClassLoader = Thread.currentThread().getContextClassLoader, + val sharedPrefixes: Seq[String] = Seq.empty, + val barrierPrefixes: Seq[String] = Seq.empty) extends Logging { // Check to make sure that the root classloader does not know about Hive. assert(Try(baseClassLoader.loadClass("org.apache.hive.HiveConf")).isFailure) /** All jars used by the hive specific classloader. */ - protected def allJars = execJars.map(_.toURI.toURL).toArray + protected def allJars = execJars.toArray protected def isSharedClass(name: String): Boolean = name.contains("slf4j") || @@ -121,13 +123,14 @@ class IsolatedClientLoader( name.startsWith("scala.") || name.startsWith("com.google") || name.startsWith("java.lang.") || - name.startsWith("java.net") + name.startsWith("java.net") || + sharedPrefixes.exists(name.startsWith) /** True if `name` refers to a spark class that must see specific version of Hive. */ protected def isBarrierClass(name: String): Boolean = - name.startsWith("org.apache.spark.sql.hive.execution.PairSerDe") || name.startsWith(classOf[ClientWrapper].getName) || - name.startsWith(classOf[ReflectionMagic].getName) + name.startsWith(classOf[ReflectionMagic].getName) || + barrierPrefixes.exists(name.startsWith) protected def classToPath(name: String): String = name.replaceAll("\\.", "/") + ".class" @@ -166,6 +169,12 @@ class IsolatedClientLoader( .getConstructors.head .newInstance(version, config) .asInstanceOf[ClientInterface] + } catch { + case ReflectionException(cnf: NoClassDefFoundError) => + throw new ClassNotFoundException( + s"$cnf when creating Hive client using classpath: ${execJars.mkString(", ")}\n" + + "Please make sure that jars for your version of hive and hadoop are included in the " + + s"paths passed to ${HiveContext.HIVE_METASTORE_JARS}.") } finally { Thread.currentThread.setContextClassLoader(baseClassLoader) } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ReflectionMagic.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ReflectionMagic.scala index 90d03049356b5..4d053ae42c2ea 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ReflectionMagic.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ReflectionMagic.scala @@ -19,10 +19,18 @@ package org.apache.spark.sql.hive.client import scala.reflect._ +/** Unwraps reflection exceptions. */ +private[client] object ReflectionException { + def unapply(a: Throwable): Option[Throwable] = a match { + case ite: java.lang.reflect.InvocationTargetException => Option(ite.getCause) + case _ => None + } +} + /** * Provides implicit functions on any object for calling methods reflectively. */ -protected trait ReflectionMagic { +private[client] trait ReflectionMagic { /** code for InstanceMagic println( (1 to 22).map { n => diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala index 7db9200d47440..410d9881ac214 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala @@ -29,5 +29,5 @@ package object client { case object v13 extends HiveVersion("0.13.1", false) } // scalastyle:on - + } \ No newline at end of file diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala index 76a1965f3cb25..7d3ec12c4eb05 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateTableAsSelect.scala @@ -17,39 +17,51 @@ package org.apache.spark.sql.hive.execution -import org.apache.hadoop.hive.ql.plan.CreateTableDesc - import org.apache.spark.annotation.Experimental -import org.apache.spark.sql.SQLContext +import org.apache.spark.sql.{AnalysisException, SQLContext} import org.apache.spark.sql.catalyst.expressions.Row import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan} import org.apache.spark.sql.execution.RunnableCommand -import org.apache.spark.sql.hive.HiveContext -import org.apache.spark.sql.hive.MetastoreRelation +import org.apache.spark.sql.hive.client.{HiveTable, HiveColumn} +import org.apache.spark.sql.hive.{HiveContext, MetastoreRelation, HiveMetastoreTypes} /** * Create table and insert the query result into it. - * @param database the database name of the new relation - * @param tableName the table name of the new relation + * @param tableDesc the Table Describe, which may contains serde, storage handler etc. * @param query the query whose result will be insert into the new relation * @param allowExisting allow continue working if it's already exists, otherwise * raise exception - * @param desc the CreateTableDesc, which may contains serde, storage handler etc. - */ private[hive] case class CreateTableAsSelect( - database: String, - tableName: String, + tableDesc: HiveTable, query: LogicalPlan, - allowExisting: Boolean, - desc: Option[CreateTableDesc]) extends RunnableCommand { + allowExisting: Boolean) + extends RunnableCommand { + + def database: String = tableDesc.database + def tableName: String = tableDesc.name override def run(sqlContext: SQLContext): Seq[Row] = { val hiveContext = sqlContext.asInstanceOf[HiveContext] lazy val metastoreRelation: MetastoreRelation = { - // Create Hive Table - hiveContext.catalog.createTable(database, tableName, query.output, allowExisting, desc) + import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe + import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + import org.apache.hadoop.io.Text + import org.apache.hadoop.mapred.TextInputFormat + + val withSchema = + tableDesc.copy( + schema = + query.output.map(c => + HiveColumn(c.name, HiveMetastoreTypes.toMetastoreType(c.dataType), null)), + inputFormat = + tableDesc.inputFormat.orElse(Some(classOf[TextInputFormat].getName)), + outputFormat = + tableDesc.outputFormat + .orElse(Some(classOf[HiveIgnoreKeyTextOutputFormat[Text, Text]].getName)), + serde = tableDesc.serde.orElse(Some(classOf[LazySimpleSerDe].getName()))) + hiveContext.catalog.client.createTable(withSchema) // Get the Metastore Relation hiveContext.catalog.lookupRelation(Seq(database, tableName), None) match { @@ -63,8 +75,7 @@ case class CreateTableAsSelect( if (allowExisting) { // table already exists, will do nothing, to keep consistent with Hive } else { - throw - new org.apache.hadoop.hive.metastore.api.AlreadyExistsException(s"$database.$tableName") + throw new AnalysisException(s"$database.$tableName already exists.") } } else { hiveContext.executePlan(InsertIntoTable(metastoreRelation, Map(), query, true, false)).toRdd diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala index 0a5f19eee7105..11ee5503146b9 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala @@ -63,7 +63,7 @@ case class HiveTableScan( BindReferences.bindReference(pred, relation.partitionKeys) } - // Create a local copy of hiveconf,so that scan specific modifications should not impact + // Create a local copy of hiveconf,so that scan specific modifications should not impact // other queries @transient private[this] val hiveExtraConf = new HiveConf(context.hiveconf) @@ -72,7 +72,7 @@ case class HiveTableScan( addColumnMetadataToConf(hiveExtraConf) @transient - private[this] val hadoopReader = + private[this] val hadoopReader = new HadoopTableReader(attributes, relation, context, hiveExtraConf) private[this] def castFromString(value: String, dataType: DataType) = { @@ -129,7 +129,7 @@ case class HiveTableScan( } } - override def execute(): RDD[Row] = if (!relation.hiveQlTable.isPartitioned) { + protected override def doExecute(): RDD[Row] = if (!relation.hiveQlTable.isPartitioned) { hadoopReader.makeRDDForTable(relation.hiveQlTable) } else { hadoopReader.makeRDDForPartitionedTable(prunePartitions(relation.hiveQlPartitions)) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala index 89995a91b1a92..8613332186f28 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala @@ -106,7 +106,7 @@ case class InsertIntoHiveTable( } writerContainer - .getLocalFileWriter(row) + .getLocalFileWriter(row, table.schema) .write(serializer.serialize(outputData, standardOI)) } @@ -194,15 +194,12 @@ case class InsertIntoHiveTable( if (partition.nonEmpty) { // loadPartition call orders directories created on the iteration order of the this map - val orderedPartitionSpec = new util.LinkedHashMap[String,String]() - table.hiveQlTable.getPartCols().foreach{ - entry=> - orderedPartitionSpec.put(entry.getName,partitionSpec.get(entry.getName).getOrElse("")) + val orderedPartitionSpec = new util.LinkedHashMap[String, String]() + table.hiveQlTable.getPartCols().foreach { entry => + orderedPartitionSpec.put(entry.getName, partitionSpec.get(entry.getName).getOrElse("")) } val partVals = MetaStoreUtils.getPvals(table.hiveQlTable.getPartCols, partitionSpec) - catalog.synchronized { - catalog.client.validatePartitionNameCharacters(partVals) - } + // inheritTableSpecs is set to true. It should be set to false for a IMPORT query // which is currently considered as a Hive native command. val inheritTableSpecs = true @@ -211,7 +208,7 @@ case class InsertIntoHiveTable( if (numDynamicPartitions > 0) { catalog.synchronized { catalog.client.loadDynamicPartitions( - outputPath, + outputPath.toString, qualifiedTableName, orderedPartitionSpec, overwrite, @@ -224,31 +221,28 @@ case class InsertIntoHiveTable( // ifNotExists is only valid with static partition, refer to // https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DML#LanguageManualDML-InsertingdataintoHiveTablesfromqueries // scalastyle:on - val oldPart = catalog.synchronized { - catalog.client.getPartition( - catalog.client.getTable(qualifiedTableName), partitionSpec, false) - } - if (oldPart == null || !ifNotExists) { - catalog.synchronized { + val oldPart = + catalog.client.getPartitionOption( + catalog.client.getTable(table.databaseName, table.tableName), + partitionSpec) + + if (oldPart.isEmpty || !ifNotExists) { catalog.client.loadPartition( - outputPath, + outputPath.toString, qualifiedTableName, orderedPartitionSpec, overwrite, holdDDLTime, inheritTableSpecs, isSkewedStoreAsSubdir) - } } } } else { - catalog.synchronized { - catalog.client.loadTable( - outputPath, - qualifiedTableName, - overwrite, - holdDDLTime) - } + catalog.client.loadTable( + outputPath.toString, // TODO: URI + qualifiedTableName, + overwrite, + holdDDLTime) } // Invalidate the cache. @@ -263,5 +257,7 @@ case class InsertIntoHiveTable( override def executeCollect(): Array[Row] = sideEffectResult.toArray - override def execute(): RDD[Row] = sqlContext.sparkContext.parallelize(sideEffectResult, 1) + protected override def doExecute(): RDD[Row] = { + sqlContext.sparkContext.parallelize(sideEffectResult, 1) + } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala index 3eddda3b28c66..fd623370cc407 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala @@ -54,7 +54,7 @@ case class ScriptTransformation( override def otherCopyArgs: Seq[HiveContext] = sc :: Nil - def execute(): RDD[Row] = { + protected override def doExecute(): RDD[Row] = { child.execute().mapPartitions { iter => val cmd = List("/bin/bash", "-c", script) val builder = new ProcessBuilder(cmd) @@ -62,7 +62,7 @@ case class ScriptTransformation( val inputStream = proc.getInputStream val outputStream = proc.getOutputStream val reader = new BufferedReader(new InputStreamReader(inputStream)) - + val (outputSerde, outputSoi) = ioschema.initOutputSerDe(output) val iterator: Iterator[Row] = new Iterator[Row] with HiveInspectors { @@ -95,7 +95,7 @@ case class ScriptTransformation( val raw = outputSerde.deserialize(writable) val dataList = outputSoi.getStructFieldsDataAsList(raw) val fieldList = outputSoi.getAllStructFieldRefs() - + var i = 0 dataList.foreach( element => { if (element == null) { @@ -117,7 +117,7 @@ case class ScriptTransformation( if (!hasNext) { throw new NoSuchElementException } - + if (outputSerde == null) { val prevLine = curLine curLine = reader.readLine() @@ -192,7 +192,7 @@ case class HiveScriptIOSchema ( val inputRowFormatMap = inputRowFormat.toMap.withDefault((k) => defaultFormat(k)) val outputRowFormatMap = outputRowFormat.toMap.withDefault((k) => defaultFormat(k)) - + def initInputSerDe(input: Seq[Expression]): (AbstractSerDe, ObjectInspector) = { val (columns, columnTypes) = parseAttrs(input) val serde = initSerDe(inputSerdeClass, columns, columnTypes, inputSerdeProps) @@ -206,22 +206,22 @@ case class HiveScriptIOSchema ( } def parseAttrs(attrs: Seq[Expression]): (Seq[String], Seq[DataType]) = { - + val columns = attrs.map { case aref: AttributeReference => aref.name case e: NamedExpression => e.name case _ => null } - + val columnTypes = attrs.map { case aref: AttributeReference => aref.dataType case e: NamedExpression => e.dataType - case _ => null + case _ => null } (columns, columnTypes) } - + def initSerDe(serdeClassName: String, columns: Seq[String], columnTypes: Seq[DataType], serdeProps: Seq[(String, String)]): AbstractSerDe = { @@ -240,7 +240,7 @@ case class HiveScriptIOSchema ( (kv._1.split("'")(1), kv._2.split("'")(1)) }).toMap + (serdeConstants.LIST_COLUMNS -> columns.mkString(",")) propsMap = propsMap + (serdeConstants.LIST_COLUMN_TYPES -> columnTypesNames) - + val properties = new Properties() properties.putAll(propsMap) serde.initialize(null, properties) @@ -261,7 +261,7 @@ case class HiveScriptIOSchema ( null } } - + def initOutputputSoi(outputSerde: AbstractSerDe): StructObjectInspector = { if (outputSerde != null) { outputSerde.getObjectInspector().asInstanceOf[StructObjectInspector] diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala index a40a1e53117cd..0ba94d7b7c649 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala @@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.RunnableCommand import org.apache.spark.sql.hive.HiveContext import org.apache.spark.sql.types._ +import org.apache.spark.util.Utils /** * Analyzes the given table in the current database to generate statistics, which will be @@ -84,8 +85,20 @@ case class AddJar(path: String) extends RunnableCommand { override def run(sqlContext: SQLContext): Seq[Row] = { val hiveContext = sqlContext.asInstanceOf[HiveContext] + val currentClassLoader = Utils.getContextOrSparkClassLoader + + // Add jar to current context + val jarURL = new java.io.File(path).toURL + val newClassLoader = new java.net.URLClassLoader(Array(jarURL), currentClassLoader) + Thread.currentThread.setContextClassLoader(newClassLoader) + org.apache.hadoop.hive.ql.metadata.Hive.get().getConf().setClassLoader(newClassLoader) + + // Add jar to isolated hive classloader hiveContext.runSqlHive(s"ADD JAR $path") + + // Add jar to executors hiveContext.sparkContext.addJar(path) + Seq(Row(0)) } } @@ -133,6 +146,7 @@ case class CreateMetastoreDataSource( hiveContext.catalog.createDataSourceTable( tableName, userSpecifiedSchema, + Array.empty[String], provider, optionsWithPath, isExternal) @@ -145,6 +159,7 @@ private[hive] case class CreateMetastoreDataSourceAsSelect( tableName: String, provider: String, + partitionColumns: Array[String], mode: SaveMode, options: Map[String, String], query: LogicalPlan) extends RunnableCommand { @@ -176,12 +191,12 @@ case class CreateMetastoreDataSourceAsSelect( return Seq.empty[Row] case SaveMode.Append => // Check if the specified data source match the data source of the existing table. - val resolved = - ResolvedDataSource(sqlContext, Some(query.schema), provider, optionsWithPath) + val resolved = ResolvedDataSource( + sqlContext, Some(query.schema.asNullable), partitionColumns, provider, optionsWithPath) val createdRelation = LogicalRelation(resolved.relation) EliminateSubQueries(sqlContext.table(tableName).logicalPlan) match { - case l @ LogicalRelation(i: InsertableRelation) => - if (i != createdRelation.relation) { + case l @ LogicalRelation(_: InsertableRelation | _: HadoopFsRelation) => + if (l.relation != createdRelation.relation) { val errorDescription = s"Cannot append to table $tableName because the resolved relation does not " + s"match the existing relation of $tableName. " + @@ -189,14 +204,13 @@ case class CreateMetastoreDataSourceAsSelect( s"table $tableName and using its data source and options." val errorMessage = s""" - |$errorDescription - |== Relations == - |${sideBySide( - s"== Expected Relation ==" :: - l.toString :: Nil, - s"== Actual Relation ==" :: - createdRelation.toString :: Nil).mkString("\n")} - """.stripMargin + |$errorDescription + |== Relations == + |${sideBySide( + s"== Expected Relation ==" :: l.toString :: Nil, + s"== Actual Relation ==" :: createdRelation.toString :: Nil + ).mkString("\n")} + """.stripMargin throw new AnalysisException(errorMessage) } existingSchema = Some(l.schema) @@ -221,7 +235,8 @@ case class CreateMetastoreDataSourceAsSelect( } // Create the relation based on the data of df. - val resolved = ResolvedDataSource(sqlContext, provider, mode, optionsWithPath, df) + val resolved = + ResolvedDataSource(sqlContext, provider, partitionColumns, mode, optionsWithPath, df) if (createMetastoreTable) { // We will use the schema of resolved.relation as the schema of the table (instead of @@ -230,6 +245,7 @@ case class CreateMetastoreDataSourceAsSelect( hiveContext.catalog.createDataSourceTable( tableName, Some(resolved.relation.schema), + partitionColumns, provider, optionsWithPath, isExternal) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala index 4b6f0ad75f54f..7b67bcf63e62a 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala @@ -17,27 +17,27 @@ package org.apache.spark.sql.hive +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ConversionHelper +import org.apache.spark.sql.AnalysisException import scala.collection.mutable.ArrayBuffer import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspector, ConstantObjectInspector} import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory -import org.apache.hadoop.hive.ql.exec.{UDF, UDAF} -import org.apache.hadoop.hive.ql.exec.{FunctionInfo, FunctionRegistry} +import org.apache.hadoop.hive.ql.exec._ import org.apache.hadoop.hive.ql.udf.{UDFType => HiveUDFType} import org.apache.hadoop.hive.ql.udf.generic._ import org.apache.hadoop.hive.ql.udf.generic.GenericUDF._ import org.apache.spark.Logging import org.apache.spark.sql.catalyst.analysis +import org.apache.spark.sql.catalyst.errors.TreeNodeException import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.plans.logical.{Generate, Project, LogicalPlan} +import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.types._ -import org.apache.spark.sql.catalyst.analysis.MultiAlias -import org.apache.spark.sql.catalyst.errors.TreeNodeException /* Implicit conversions */ import scala.collection.JavaConversions._ @@ -78,6 +78,8 @@ private[hive] case class HiveSimpleUdf(funcWrapper: HiveFunctionWrapper, childre type EvaluatedType = Any type UDFType = UDF + override def deterministic: Boolean = isUDFDeterministic + override def nullable: Boolean = true @transient @@ -141,6 +143,8 @@ private[hive] case class HiveGenericUdf(funcWrapper: HiveFunctionWrapper, childr type UDFType = GenericUDF type EvaluatedType = Any + override def deterministic: Boolean = isUDFDeterministic + override def nullable: Boolean = true @transient @@ -189,6 +193,219 @@ private[hive] case class HiveGenericUdf(funcWrapper: HiveFunctionWrapper, childr } } +/** + * Resolves [[UnresolvedWindowFunction]] to [[HiveWindowFunction]]. + */ +private[spark] object ResolveHiveWindowFunction extends Rule[LogicalPlan] { + def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { + case p: LogicalPlan if !p.childrenResolved => p + + // We are resolving WindowExpressions at here. When we get here, we have already + // replaced those WindowSpecReferences. + case p: LogicalPlan => + p transformExpressions { + case WindowExpression( + UnresolvedWindowFunction(name, children), + windowSpec: WindowSpecDefinition) => + // First, let's find the window function info. + val windowFunctionInfo: WindowFunctionInfo = + Option(FunctionRegistry.getWindowFunctionInfo(name.toLowerCase)).getOrElse( + throw new AnalysisException(s"Couldn't find window function $name")) + + // Get the class of this function. + // In Hive 0.12, there is no windowFunctionInfo.getFunctionClass. So, we use + // windowFunctionInfo.getfInfo().getFunctionClass for both Hive 0.13 and Hive 0.13.1. + val functionClass = windowFunctionInfo.getfInfo().getFunctionClass + val newChildren = + // Rank(), DENSE_RANK(), CUME_DIST(), and PERCENT_RANK() do not take explicit + // input parameters and requires implicit parameters, which + // are expressions in Order By clause. + if (classOf[GenericUDAFRank].isAssignableFrom(functionClass)) { + if (children.nonEmpty) { + throw new AnalysisException(s"$name does not take input parameters.") + } + windowSpec.orderSpec.map(_.child) + } else { + children + } + + // If the class is UDAF, we need to use UDAFBridge. + val isUDAFBridgeRequired = + if (classOf[UDAF].isAssignableFrom(functionClass)) { + true + } else { + false + } + + // Create the HiveWindowFunction. For the meaning of isPivotResult, see the doc of + // HiveWindowFunction. + val windowFunction = + HiveWindowFunction( + new HiveFunctionWrapper(functionClass.getName), + windowFunctionInfo.isPivotResult, + isUDAFBridgeRequired, + newChildren) + + // Second, check if the specified window function can accept window definition. + windowSpec.frameSpecification match { + case frame: SpecifiedWindowFrame if !windowFunctionInfo.isSupportsWindow => + // This Hive window function does not support user-speficied window frame. + throw new AnalysisException( + s"Window function $name does not take a frame specification.") + case frame: SpecifiedWindowFrame if windowFunctionInfo.isSupportsWindow && + windowFunctionInfo.isPivotResult => + // These two should not be true at the same time when a window frame is defined. + // If so, throw an exception. + throw new AnalysisException(s"Could not handle Hive window function $name because " + + s"it supports both a user specified window frame and pivot result.") + case _ => // OK + } + // Resolve those UnspecifiedWindowFrame because the physical Window operator still needs + // a window frame specification to work. + val newWindowSpec = windowSpec.frameSpecification match { + case UnspecifiedFrame => + val newWindowFrame = + SpecifiedWindowFrame.defaultWindowFrame( + windowSpec.orderSpec.nonEmpty, + windowFunctionInfo.isSupportsWindow) + WindowSpecDefinition(windowSpec.partitionSpec, windowSpec.orderSpec, newWindowFrame) + case _ => windowSpec + } + + // Finally, we create a WindowExpression with the resolved window function and + // specified window spec. + WindowExpression(windowFunction, newWindowSpec) + } + } +} + +/** + * A [[WindowFunction]] implementation wrapping Hive's window function. + * @param funcWrapper The wrapper for the Hive Window Function. + * @param pivotResult If it is true, the Hive function will return a list of values representing + * the values of the added columns. Otherwise, a single value is returned for + * current row. + * @param isUDAFBridgeRequired If it is true, the function returned by functionWrapper's + * createFunction is UDAF, we need to use GenericUDAFBridge to wrap + * it as a GenericUDAFResolver2. + * @param children Input parameters. + */ +private[hive] case class HiveWindowFunction( + funcWrapper: HiveFunctionWrapper, + pivotResult: Boolean, + isUDAFBridgeRequired: Boolean, + children: Seq[Expression]) extends WindowFunction + with HiveInspectors { + + // Hive window functions are based on GenericUDAFResolver2. + type UDFType = GenericUDAFResolver2 + + @transient + protected lazy val resolver: GenericUDAFResolver2 = + if (isUDAFBridgeRequired) { + new GenericUDAFBridge(funcWrapper.createFunction[UDAF]()) + } else { + funcWrapper.createFunction[GenericUDAFResolver2]() + } + + @transient + protected lazy val inputInspectors = children.map(toInspector).toArray + + // The GenericUDAFEvaluator used to evaluate the window function. + @transient + protected lazy val evaluator: GenericUDAFEvaluator = { + val parameterInfo = new SimpleGenericUDAFParameterInfo(inputInspectors, false, false) + resolver.getEvaluator(parameterInfo) + } + + // The object inspector of values returned from the Hive window function. + @transient + protected lazy val returnInspector = { + evaluator.init(GenericUDAFEvaluator.Mode.COMPLETE, inputInspectors) + } + + def dataType: DataType = + if (!pivotResult) { + inspectorToDataType(returnInspector) + } else { + // If pivotResult is true, we should take the element type out as the data type of this + // function. + inspectorToDataType(returnInspector) match { + case ArrayType(dt, _) => dt + case _ => + sys.error( + s"error resolve the data type of window function ${funcWrapper.functionClassName}") + } + } + + def nullable: Boolean = true + + override type EvaluatedType = Any + + override def eval(input: Row): Any = + throw new TreeNodeException(this, s"No function to evaluate expression. type: ${this.nodeName}") + + @transient + lazy val inputProjection = new InterpretedProjection(children) + + @transient + private var hiveEvaluatorBuffer: AggregationBuffer = _ + // Output buffer. + private var outputBuffer: Any = _ + + override def init(): Unit = { + evaluator.init(GenericUDAFEvaluator.Mode.COMPLETE, inputInspectors) + } + + // Reset the hiveEvaluatorBuffer and outputPosition + override def reset(): Unit = { + // We create a new aggregation buffer to workaround the bug in GenericUDAFRowNumber. + // Basically, GenericUDAFRowNumberEvaluator.reset calls RowNumberBuffer.init. + // However, RowNumberBuffer.init does not really reset this buffer. + hiveEvaluatorBuffer = evaluator.getNewAggregationBuffer + evaluator.reset(hiveEvaluatorBuffer) + } + + override def prepareInputParameters(input: Row): AnyRef = { + wrap(inputProjection(input), inputInspectors, new Array[AnyRef](children.length)) + } + // Add input parameters for a single row. + override def update(input: AnyRef): Unit = { + evaluator.iterate(hiveEvaluatorBuffer, input.asInstanceOf[Array[AnyRef]]) + } + + override def batchUpdate(inputs: Array[AnyRef]): Unit = { + var i = 0 + while (i < inputs.length) { + evaluator.iterate(hiveEvaluatorBuffer, inputs(i).asInstanceOf[Array[AnyRef]]) + i += 1 + } + } + + override def evaluate(): Unit = { + outputBuffer = unwrap(evaluator.evaluate(hiveEvaluatorBuffer), returnInspector) + } + + override def get(index: Int): Any = { + if (!pivotResult) { + // if pivotResult is false, we will get a single value for all rows in the frame. + outputBuffer + } else { + // if pivotResult is true, we will get a Seq having the same size with the size + // of the window frame. At here, we will return the result at the position of + // index in the output buffer. + outputBuffer.asInstanceOf[Seq[Any]].get(index) + } + } + + override def toString: String = { + s"$nodeName#${funcWrapper.functionClassName}(${children.mkString(",")})" + } + + override def newInstance: WindowFunction = + new HiveWindowFunction(funcWrapper, pivotResult, isUDAFBridgeRequired, children) +} + private[hive] case class HiveGenericUdaf( funcWrapper: HiveFunctionWrapper, children: Seq[Expression]) extends AggregateExpression @@ -200,7 +417,7 @@ private[hive] case class HiveGenericUdaf( protected lazy val resolver: AbstractGenericUDAFResolver = funcWrapper.createFunction() @transient - protected lazy val objectInspector = { + protected lazy val objectInspector = { val parameterInfo = new SimpleGenericUDAFParameterInfo(inspectors.toArray, false, false) resolver.getEvaluator(parameterInfo) .init(GenericUDAFEvaluator.Mode.COMPLETE, inspectors.toArray) @@ -233,7 +450,7 @@ private[hive] case class HiveUdaf( new GenericUDAFBridge(funcWrapper.createFunction()) @transient - protected lazy val objectInspector = { + protected lazy val objectInspector = { val parameterInfo = new SimpleGenericUDAFParameterInfo(inspectors.toArray, false, false) resolver.getEvaluator(parameterInfo) .init(GenericUDAFEvaluator.Mode.COMPLETE, inspectors.toArray) @@ -270,7 +487,11 @@ private[hive] case class HiveGenericUdtf( extends Generator with HiveInspectors { @transient - protected lazy val function: GenericUDTF = funcWrapper.createFunction() + protected lazy val function: GenericUDTF = { + val fun: GenericUDTF = funcWrapper.createFunction() + fun.setCollector(collector) + fun + } @transient protected lazy val inputInspectors = children.map(toInspector) @@ -281,6 +502,9 @@ private[hive] case class HiveGenericUdtf( @transient protected lazy val udtInput = new Array[AnyRef](children.length) + @transient + protected lazy val collector = new UDTFCollector + lazy val elementTypes = outputInspector.getAllStructFieldRefs.map { field => (inspectorToDataType(field.getFieldObjectInspector), true) } @@ -289,8 +513,7 @@ private[hive] case class HiveGenericUdtf( outputInspector // Make sure initialized. val inputProjection = new InterpretedProjection(children) - val collector = new UDTFCollector - function.setCollector(collector) + function.process(wrap(inputProjection(input), inputInspectors, udtInput)) collector.collectRows() } @@ -312,6 +535,12 @@ private[hive] case class HiveGenericUdtf( } } + override def terminate(): TraversableOnce[Row] = { + outputInspector // Make sure initialized. + function.close() + collector.collectRows() + } + override def toString: String = { s"$nodeName#${funcWrapper.functionClassName}(${children.mkString(",")})" } @@ -333,12 +562,12 @@ private[hive] case class HiveUdafFunction( } else { funcWrapper.createFunction[AbstractGenericUDAFResolver]() } - + private val inspectors = exprs.map(toInspector).toArray - - private val function = { + + private val function = { val parameterInfo = new SimpleGenericUDAFParameterInfo(inspectors, false, false) - resolver.getEvaluator(parameterInfo) + resolver.getEvaluator(parameterInfo) } private val returnInspector = function.init(GenericUDAFEvaluator.Mode.COMPLETE, inspectors) @@ -353,7 +582,7 @@ private[hive] case class HiveUdafFunction( @transient protected lazy val cached = new Array[AnyRef](exprs.length) - + def update(input: Row): Unit = { val inputs = inputProjection(input) function.iterate(buffer, wrap(inputs, inspectors, cached)) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala index 8398da268174d..2bb526b14be34 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala @@ -34,8 +34,10 @@ import org.apache.hadoop.hive.common.FileUtils import org.apache.spark.mapred.SparkHadoopMapRedUtil import org.apache.spark.sql.Row import org.apache.spark.{Logging, SerializableWritable, SparkHadoopWriter} +import org.apache.spark.sql.catalyst.util.DateUtils import org.apache.spark.sql.hive.{ShimFileSinkDesc => FileSinkDesc} import org.apache.spark.sql.hive.HiveShim._ +import org.apache.spark.sql.types._ /** * Internal helper class that saves an RDD using a Hive OutputFormat. @@ -69,7 +71,7 @@ private[hive] class SparkHiveWriterContainer( @transient protected lazy val jobContext = newJobContext(conf.value, jID.value) @transient private lazy val taskContext = newTaskAttemptContext(conf.value, taID.value) @transient private lazy val outputFormat = - conf.value.getOutputFormat.asInstanceOf[HiveOutputFormat[AnyRef,Writable]] + conf.value.getOutputFormat.asInstanceOf[HiveOutputFormat[AnyRef, Writable]] def driverSideSetup() { setIDs(0, 0, 0) @@ -92,7 +94,7 @@ private[hive] class SparkHiveWriterContainer( "part-" + numberFormat.format(splitID) + extension } - def getLocalFileWriter(row: Row): FileSinkOperator.RecordWriter = writer + def getLocalFileWriter(row: Row, schema: StructType): FileSinkOperator.RecordWriter = writer def close() { // Seems the boolean value passed into close does not matter. @@ -195,16 +197,25 @@ private[spark] class SparkHiveDynamicPartitionWriterContainer( jobConf.setBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, oldMarker) } - override def getLocalFileWriter(row: Row): FileSinkOperator.RecordWriter = { + override def getLocalFileWriter(row: Row, schema: StructType): FileSinkOperator.RecordWriter = { + def convertToHiveRawString(col: String, value: Any): String = { + val raw = String.valueOf(value) + schema(col).dataType match { + case DateType => DateUtils.toString(raw.toInt) + case _: DecimalType => BigDecimal(raw).toString() + case _ => raw + } + } + val dynamicPartPath = dynamicPartColNames .zip(row.toSeq.takeRight(dynamicPartColNames.length)) .map { case (col, rawVal) => - val string = if (rawVal == null) null else String.valueOf(rawVal) + val string = if (rawVal == null) null else convertToHiveRawString(col, rawVal) val colString = if (string == null || string.isEmpty) { defaultPartName } else { - FileUtils.escapePathName(string) + FileUtils.escapePathName(string, defaultPartName) } s"/$col=$colString" }.mkString diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala new file mode 100644 index 0000000000000..1e51173a19882 --- /dev/null +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive.orc + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.Path +import org.apache.hadoop.hive.ql.io.orc.{OrcFile, Reader} +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector + +import org.apache.spark.Logging +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.sql.hive.HiveMetastoreTypes +import org.apache.spark.sql.types.StructType + +private[orc] object OrcFileOperator extends Logging{ + def getFileReader(pathStr: String, config: Option[Configuration] = None ): Reader = { + val conf = config.getOrElse(new Configuration) + val fspath = new Path(pathStr) + val fs = fspath.getFileSystem(conf) + val orcFiles = listOrcFiles(pathStr, conf) + + // TODO Need to consider all files when schema evolution is taken into account. + OrcFile.createReader(fs, orcFiles.head) + } + + def readSchema(path: String, conf: Option[Configuration]): StructType = { + val reader = getFileReader(path, conf) + val readerInspector = reader.getObjectInspector.asInstanceOf[StructObjectInspector] + val schema = readerInspector.getTypeName + HiveMetastoreTypes.toDataType(schema).asInstanceOf[StructType] + } + + def getObjectInspector(path: String, conf: Option[Configuration]): StructObjectInspector = { + getFileReader(path, conf).getObjectInspector.asInstanceOf[StructObjectInspector] + } + + def listOrcFiles(pathStr: String, conf: Configuration): Seq[Path] = { + val origPath = new Path(pathStr) + val fs = origPath.getFileSystem(conf) + val path = origPath.makeQualified(fs) + val paths = SparkHadoopUtil.get.listLeafStatuses(fs, origPath) + .filterNot(_.isDir) + .map(_.getPath) + .filterNot(_.getName.startsWith("_")) + .filterNot(_.getName.startsWith(".")) + + if (paths == null || paths.size == 0) { + throw new IllegalArgumentException( + s"orcFileOperator: path $path does not have valid orc files matching the pattern") + } + + paths + } +} diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala new file mode 100644 index 0000000000000..250e73a4dba92 --- /dev/null +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive.orc + +import org.apache.hadoop.hive.common.`type`.{HiveChar, HiveDecimal, HiveVarchar} +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.Builder +import org.apache.hadoop.hive.serde2.io.DateWritable + +import org.apache.spark.Logging +import org.apache.spark.sql.sources._ + +/** + * It may be optimized by push down partial filters. But we are conservative here. + * Because if some filters fail to be parsed, the tree may be corrupted, + * and cannot be used anymore. + */ +private[orc] object OrcFilters extends Logging { + def createFilter(expr: Array[Filter]): Option[SearchArgument] = { + expr.reduceOption(And).flatMap { conjunction => + val builder = SearchArgument.FACTORY.newBuilder() + buildSearchArgument(conjunction, builder).map(_.build()) + } + } + + private def buildSearchArgument(expression: Filter, builder: Builder): Option[Builder] = { + def newBuilder = SearchArgument.FACTORY.newBuilder() + + def isSearchableLiteral(value: Any) = value match { + // These are types recognized by the `SearchArgumentImpl.BuilderImpl.boxLiteral()` method. + case _: String | _: Long | _: Double | _: DateWritable | _: HiveDecimal | _: HiveChar | + _: HiveVarchar | _: Byte | _: Short | _: Integer | _: Float => true + case _ => false + } + + // lian: I probably missed something here, and had to end up with a pretty weird double-checking + // pattern when converting `And`/`Or`/`Not` filters. + // + // The annoying part is that, `SearchArgument` builder methods like `startAnd()` `startOr()`, + // and `startNot()` mutate internal state of the builder instance. This forces us to translate + // all convertible filters with a single builder instance. However, before actually converting a + // filter, we've no idea whether it can be recognized by ORC or not. Thus, when an inconvertible + // filter is found, we may already end up with a builder whose internal state is inconsistent. + // + // For example, to convert an `And` filter with builder `b`, we call `b.startAnd()` first, and + // then try to convert its children. Say we convert `left` child successfully, but find that + // `right` child is inconvertible. Alas, `b.startAnd()` call can't be rolled back, and `b` is + // inconsistent now. + // + // The workaround employed here is that, for `And`/`Or`/`Not`, we first try to convert their + // children with brand new builders, and only do the actual conversion with the right builder + // instance when the children are proven to be convertible. + // + // P.S.: Hive seems to use `SearchArgument` together with `ExprNodeGenericFuncDesc` only. + // Usage of builder methods mentioned above can only be found in test code, where all tested + // filters are known to be convertible. + + expression match { + case And(left, right) => + val tryLeft = buildSearchArgument(left, newBuilder) + val tryRight = buildSearchArgument(right, newBuilder) + + val conjunction = for { + _ <- tryLeft + _ <- tryRight + lhs <- buildSearchArgument(left, builder.startAnd()) + rhs <- buildSearchArgument(right, lhs) + } yield rhs.end() + + // For filter `left AND right`, we can still push down `left` even if `right` is not + // convertible, and vice versa. + conjunction + .orElse(tryLeft.flatMap(_ => buildSearchArgument(left, builder))) + .orElse(tryRight.flatMap(_ => buildSearchArgument(right, builder))) + + case Or(left, right) => + for { + _ <- buildSearchArgument(left, newBuilder) + _ <- buildSearchArgument(right, newBuilder) + lhs <- buildSearchArgument(left, builder.startOr()) + rhs <- buildSearchArgument(right, lhs) + } yield rhs.end() + + case Not(child) => + for { + _ <- buildSearchArgument(child, newBuilder) + negate <- buildSearchArgument(child, builder.startNot()) + } yield negate.end() + + case EqualTo(attribute, value) => + Option(value) + .filter(isSearchableLiteral) + .map(builder.equals(attribute, _)) + + case LessThan(attribute, value) => + Option(value) + .filter(isSearchableLiteral) + .map(builder.lessThan(attribute, _)) + + case LessThanOrEqual(attribute, value) => + Option(value) + .filter(isSearchableLiteral) + .map(builder.lessThanEquals(attribute, _)) + + case GreaterThan(attribute, value) => + Option(value) + .filter(isSearchableLiteral) + .map(builder.startNot().lessThanEquals(attribute, _).end()) + + case GreaterThanOrEqual(attribute, value) => + Option(value) + .filter(isSearchableLiteral) + .map(builder.startNot().lessThan(attribute, _).end()) + + case IsNull(attribute) => + Some(builder.isNull(attribute)) + + case IsNotNull(attribute) => + Some(builder.startNot().isNull(attribute).end()) + + case In(attribute, values) => + Option(values) + .filter(_.forall(isSearchableLiteral)) + .map(builder.in(attribute, _)) + + case _ => None + } + } +} diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala new file mode 100644 index 0000000000000..f03c4cd54e7e6 --- /dev/null +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala @@ -0,0 +1,303 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive.orc + +import java.util.Properties + +import com.google.common.base.Objects +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileStatus, Path} +import org.apache.hadoop.hive.conf.HiveConf.ConfVars +import org.apache.hadoop.hive.ql.io.orc.{OrcInputFormat, OrcOutputFormat, OrcSerde, OrcSplit} +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils +import org.apache.hadoop.io.{NullWritable, Writable} +import org.apache.hadoop.mapred.{InputFormat => MapRedInputFormat, JobConf, RecordWriter, Reporter} +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat +import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext} + +import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.mapred.SparkHadoopMapRedUtil +import org.apache.spark.rdd.{HadoopRDD, RDD} +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.hive.{HiveContext, HiveInspectors, HiveMetastoreTypes, HiveShim} +import org.apache.spark.sql.sources.{Filter, _} +import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.{Row, SQLContext} +import org.apache.spark.{Logging, SerializableWritable} + +/* Implicit conversions */ +import scala.collection.JavaConversions._ + +private[sql] class DefaultSource extends HadoopFsRelationProvider { + def createRelation( + sqlContext: SQLContext, + paths: Array[String], + dataSchema: Option[StructType], + partitionColumns: Option[StructType], + parameters: Map[String, String]): HadoopFsRelation = { + assert( + sqlContext.isInstanceOf[HiveContext], + "The ORC data source can only be used with HiveContext.") + + new OrcRelation(paths, dataSchema, None, partitionColumns, parameters)(sqlContext) + } +} + +private[orc] class OrcOutputWriter( + path: String, + dataSchema: StructType, + context: TaskAttemptContext) + extends OutputWriter with SparkHadoopMapRedUtil with HiveInspectors { + + private val serializer = { + val table = new Properties() + table.setProperty("columns", dataSchema.fieldNames.mkString(",")) + table.setProperty("columns.types", dataSchema.map { f => + HiveMetastoreTypes.toMetastoreType(f.dataType) + }.mkString(":")) + + val serde = new OrcSerde + serde.initialize(context.getConfiguration, table) + serde + } + + // Object inspector converted from the schema of the relation to be written. + private val structOI = { + val typeInfo = + TypeInfoUtils.getTypeInfoFromTypeString( + HiveMetastoreTypes.toMetastoreType(dataSchema)) + + TypeInfoUtils + .getStandardJavaObjectInspectorFromTypeInfo(typeInfo) + .asInstanceOf[StructObjectInspector] + } + + // Used to hold temporary `Writable` fields of the next row to be written. + private val reusableOutputBuffer = new Array[Any](dataSchema.length) + + // Used to convert Catalyst values into Hadoop `Writable`s. + private val wrappers = structOI.getAllStructFieldRefs.map { ref => + wrapperFor(ref.getFieldObjectInspector) + }.toArray + + // `OrcRecordWriter.close()` creates an empty file if no rows are written at all. We use this + // flag to decide whether `OrcRecordWriter.close()` needs to be called. + private var recordWriterInstantiated = false + + private lazy val recordWriter: RecordWriter[NullWritable, Writable] = { + recordWriterInstantiated = true + + val conf = context.getConfiguration + val partition = context.getTaskAttemptID.getTaskID.getId + val filename = f"part-r-$partition%05d-${System.currentTimeMillis}%015d.orc" + + new OrcOutputFormat().getRecordWriter( + new Path(path, filename).getFileSystem(conf), + conf.asInstanceOf[JobConf], + new Path(path, filename).toUri.getPath, + Reporter.NULL + ).asInstanceOf[RecordWriter[NullWritable, Writable]] + } + + override def write(row: Row): Unit = { + var i = 0 + while (i < row.length) { + reusableOutputBuffer(i) = wrappers(i)(row(i)) + i += 1 + } + + recordWriter.write( + NullWritable.get(), + serializer.serialize(reusableOutputBuffer, structOI)) + } + + override def close(): Unit = { + if (recordWriterInstantiated) { + recordWriter.close(Reporter.NULL) + } + } +} + +@DeveloperApi +private[sql] class OrcRelation( + override val paths: Array[String], + maybeDataSchema: Option[StructType], + maybePartitionSpec: Option[PartitionSpec], + override val userDefinedPartitionColumns: Option[StructType], + parameters: Map[String, String])( + @transient val sqlContext: SQLContext) + extends HadoopFsRelation(maybePartitionSpec) + with Logging { + + private[sql] def this( + paths: Array[String], + maybeDataSchema: Option[StructType], + maybePartitionSpec: Option[PartitionSpec], + parameters: Map[String, String])( + sqlContext: SQLContext) = { + this( + paths, + maybeDataSchema, + maybePartitionSpec, + maybePartitionSpec.map(_.partitionColumns), + parameters)(sqlContext) + } + + override val dataSchema: StructType = maybeDataSchema.getOrElse { + OrcFileOperator.readSchema( + paths.head, Some(sqlContext.sparkContext.hadoopConfiguration)) + } + + override def needConversion: Boolean = false + + override def equals(other: Any): Boolean = other match { + case that: OrcRelation => + paths.toSet == that.paths.toSet && + dataSchema == that.dataSchema && + schema == that.schema && + partitionColumns == that.partitionColumns + case _ => false + } + + override def hashCode(): Int = { + Objects.hashCode( + paths.toSet, + dataSchema, + schema, + partitionColumns) + } + + override def buildScan( + requiredColumns: Array[String], + filters: Array[Filter], + inputPaths: Array[FileStatus]): RDD[Row] = { + val output = StructType(requiredColumns.map(dataSchema(_))).toAttributes + OrcTableScan(output, this, filters, inputPaths).execute() + } + + override def prepareJobForWrite(job: Job): OutputWriterFactory = { + new OutputWriterFactory { + override def newInstance( + path: String, + dataSchema: StructType, + context: TaskAttemptContext): OutputWriter = { + new OrcOutputWriter(path, dataSchema, context) + } + } + } +} + +private[orc] case class OrcTableScan( + attributes: Seq[Attribute], + @transient relation: OrcRelation, + filters: Array[Filter], + @transient inputPaths: Array[FileStatus]) + extends Logging + with HiveInspectors { + + @transient private val sqlContext = relation.sqlContext + + private def addColumnIds( + output: Seq[Attribute], + relation: OrcRelation, + conf: Configuration): Unit = { + val ids = output.map(a => relation.dataSchema.fieldIndex(a.name): Integer) + val (sortedIds, sortedNames) = ids.zip(attributes.map(_.name)).sorted.unzip + HiveShim.appendReadColumns(conf, sortedIds, sortedNames) + } + + // Transform all given raw `Writable`s into `Row`s. + private def fillObject( + path: String, + conf: Configuration, + iterator: Iterator[Writable], + nonPartitionKeyAttrs: Seq[(Attribute, Int)], + mutableRow: MutableRow): Iterator[Row] = { + val deserializer = new OrcSerde + val soi = OrcFileOperator.getObjectInspector(path, Some(conf)) + val (fieldRefs, fieldOrdinals) = nonPartitionKeyAttrs.map { + case (attr, ordinal) => + soi.getStructFieldRef(attr.name.toLowerCase) -> ordinal + }.unzip + val unwrappers = fieldRefs.map(unwrapperFor) + // Map each tuple to a row object + iterator.map { value => + val raw = deserializer.deserialize(value) + var i = 0 + while (i < fieldRefs.length) { + val fieldValue = soi.getStructFieldData(raw, fieldRefs(i)) + if (fieldValue == null) { + mutableRow.setNullAt(fieldOrdinals(i)) + } else { + unwrappers(i)(fieldValue, mutableRow, fieldOrdinals(i)) + } + i += 1 + } + mutableRow: Row + } + } + + def execute(): RDD[Row] = { + val job = new Job(sqlContext.sparkContext.hadoopConfiguration) + val conf = job.getConfiguration + + // Tries to push down filters if ORC filter push-down is enabled + if (sqlContext.conf.orcFilterPushDown) { + OrcFilters.createFilter(filters).foreach { f => + conf.set(OrcTableScan.SARG_PUSHDOWN, f.toKryo) + conf.setBoolean(ConfVars.HIVEOPTINDEXFILTER.varname, true) + } + } + + // Sets requested columns + addColumnIds(attributes, relation, conf) + + if (inputPaths.nonEmpty) { + FileInputFormat.setInputPaths(job, inputPaths.map(_.getPath): _*) + } + + val inputFormatClass = + classOf[OrcInputFormat] + .asInstanceOf[Class[_ <: MapRedInputFormat[NullWritable, Writable]]] + + val rdd = sqlContext.sparkContext.hadoopRDD( + conf.asInstanceOf[JobConf], + inputFormatClass, + classOf[NullWritable], + classOf[Writable] + ).asInstanceOf[HadoopRDD[NullWritable, Writable]] + + val wrappedConf = new SerializableWritable(conf) + + rdd.mapPartitionsWithInputSplit { case (split: OrcSplit, iterator) => + val mutableRow = new SpecificMutableRow(attributes.map(_.dataType)) + fillObject( + split.getPath.toString, + wrappedConf.value, + iterator.map(_._2), + attributes.zipWithIndex, + mutableRow) + } + } +} + +private[orc] object OrcTableScan { + // This constant duplicates `OrcInputFormat.SARG_PUSHDOWN`, which is unfortunately not public. + private[orc] val SARG_PUSHDOWN = "sarg.pushdown" +} diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala index edeab5158df62..7c7afc824d7a6 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -20,20 +20,22 @@ package org.apache.spark.sql.hive.test import java.io.File import java.util.{Set => JavaSet} +import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.ql.exec.FunctionRegistry import org.apache.hadoop.hive.ql.io.avro.{AvroContainerInputFormat, AvroContainerOutputFormat} import org.apache.hadoop.hive.ql.metadata.Table import org.apache.hadoop.hive.ql.parse.VariableSubstitution import org.apache.hadoop.hive.ql.processors._ -import org.apache.hadoop.hive.serde2.RegexSerDe import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe import org.apache.hadoop.hive.serde2.avro.AvroSerDe -import org.apache.spark.sql.SQLConf + +import org.apache.spark.sql.catalyst.CatalystConf import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.CacheTableCommand import org.apache.spark.sql.hive._ import org.apache.spark.sql.hive.execution.HiveNativeCommand +import org.apache.spark.sql.SQLConf import org.apache.spark.util.Utils import org.apache.spark.{SparkConf, SparkContext} @@ -46,7 +48,14 @@ import scala.collection.JavaConversions._ // SPARK-3729: Test key required to check for initialization errors with config. object TestHive extends TestHiveContext( - new SparkContext("local[2]", "TestSQLContext", new SparkConf().set("spark.sql.test", ""))) + new SparkContext( + "local[2]", + "TestSQLContext", + new SparkConf() + .set("spark.sql.test", "") + .set( + "spark.sql.hive.metastore.barrierPrefixes", + "org.apache.spark.sql.hive.execution.PairSerDe"))) /** * A locally running test instance of Spark's Hive execution engine. @@ -62,6 +71,8 @@ object TestHive class TestHiveContext(sc: SparkContext) extends HiveContext(sc) { self => + import HiveContext._ + // By clearing the port we force Spark to pick a new one. This allows us to rerun tests // without restarting the JVM. System.clearProperty("spark.hostPort") @@ -70,31 +81,26 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) { hiveconf.set("hive.plan.serialization.format", "javaXML") lazy val warehousePath = Utils.createTempDir() - lazy val metastorePath = Utils.createTempDir() + + private lazy val temporaryConfig = newTemporaryConfiguration() /** Sets up the system initially or after a RESET command */ - protected def configure(): Unit = { - warehousePath.delete() - metastorePath.delete() - setConf("javax.jdo.option.ConnectionURL", - s"jdbc:derby:;databaseName=$metastorePath;create=true") - setConf("hive.metastore.warehouse.dir", warehousePath.toString) - } + protected override def configure(): Map[String, String] = + temporaryConfig ++ Map("hive.metastore.warehouse.dir" -> warehousePath.toString) val testTempDir = Utils.createTempDir() // For some hive test case which contain ${system:test.tmp.dir} System.setProperty("test.tmp.dir", testTempDir.getCanonicalPath) - configure() // Must be called before initializing the catalog below. - /** The location of the compiled hive distribution */ lazy val hiveHome = envVarToFile("HIVE_HOME") /** The location of the hive source code. */ lazy val hiveDevHome = envVarToFile("HIVE_DEV_HOME") // Override so we can intercept relative paths and rewrite them to point at hive. - override def runSqlHive(sql: String): Seq[String] = super.runSqlHive(rewritePaths(sql)) + override def runSqlHive(sql: String): Seq[String] = + super.runSqlHive(rewritePaths(substitutor.substitute(this.hiveconf, sql))) override def executePlan(plan: LogicalPlan): this.QueryExecution = new this.QueryExecution(plan) @@ -107,10 +113,11 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) { /** Fewer partitions to speed up testing. */ protected[sql] override lazy val conf: SQLConf = new SQLConf { override def numShufflePartitions: Int = getConf(SQLConf.SHUFFLE_PARTITIONS, "5").toInt - // TODO as in unit test, conf.clear() probably be called, all of the value will be cleared. // The super.getConf(SQLConf.DIALECT) is "sql" by default, we need to set it as "hiveql" override def dialect: String = super.getConf(SQLConf.DIALECT, "hiveql") + override def caseSensitiveAnalysis: Boolean = + getConf(SQLConf.CASE_SENSITIVE, "false").toBoolean } } @@ -157,22 +164,12 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) { val describedTable = "DESCRIBE (\\w+)".r - val vs = new VariableSubstitution() - - // we should substitute variables in hql to pass the text to parseSql() as a parameter. - // Hive parser need substituted text. HiveContext.sql() does this but return a DataFrame, - // while we need a logicalPlan so we cannot reuse that. - protected[hive] class HiveQLQueryExecution(hql: String) - extends this.QueryExecution(HiveQl.parseSql(vs.substitute(hiveconf, hql))) { - def hiveExec(): Seq[String] = runSqlHive(hql) - override def toString: String = hql + "\n" + super.toString - } - /** * Override QueryExecution with special debug workflow. */ class QueryExecution(logicalPlan: LogicalPlan) extends super.QueryExecution(logicalPlan) { + def this(sql: String) = this(parseSql(sql)) override lazy val analyzed = { val describedTables = logical match { case HiveNativeCommand(describedTable(tbl)) => tbl :: Nil @@ -192,11 +189,11 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) { } } - case class TestTable(name: String, commands: (()=>Unit)*) + case class TestTable(name: String, commands: (() => Unit)*) protected[hive] implicit class SqlCmd(sql: String) { def cmd: () => Unit = { - () => new HiveQLQueryExecution(sql).stringResult(): Unit + () => new QueryExecution(sql).stringResult(): Unit } } @@ -204,6 +201,7 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) { * A list of test tables and the DDL required to initialize them. A test table is loaded on * demand when a query are run against it. */ + @transient lazy val testTables = new mutable.HashMap[String, TestTable]() def registerTestTable(testTable: TestTable): Unit = { @@ -213,6 +211,7 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) { // The test tables that are defined in the Hive QTestUtil. // /itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java // https://github.com/apache/hive/blob/branch-0.13/data/scripts/q_test_init.sql + @transient val hiveQTestUtilTables = Seq( TestTable("src", "CREATE TABLE src (key INT, value STRING)".cmd, @@ -245,16 +244,18 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) { import org.apache.hadoop.mapred.{SequenceFileInputFormat, SequenceFileOutputFormat} import org.apache.thrift.protocol.TBinaryProtocol - val srcThrift = new Table("default", "src_thrift") - srcThrift.setFields(Nil) - srcThrift.setInputFormatClass(classOf[SequenceFileInputFormat[_,_]].getName) - // In Hive, SequenceFileOutputFormat will be substituted by HiveSequenceFileOutputFormat. - srcThrift.setOutputFormatClass(classOf[SequenceFileOutputFormat[_,_]].getName) - srcThrift.setSerializationLib(classOf[ThriftDeserializer].getName) - srcThrift.setSerdeParam("serialization.class", classOf[Complex].getName) - srcThrift.setSerdeParam("serialization.format", classOf[TBinaryProtocol].getName) - catalog.client.createTable(srcThrift) - + runSqlHive( + s""" + |CREATE TABLE src_thrift(fake INT) + |ROW FORMAT SERDE '${classOf[ThriftDeserializer].getName}' + |WITH SERDEPROPERTIES( + | 'serialization.class'='${classOf[Complex].getName}', + | 'serialization.format'='${classOf[TBinaryProtocol].getName}' + |) + |STORED AS + |INPUTFORMAT '${classOf[SequenceFileInputFormat[_, _]].getName}' + |OUTPUTFORMAT '${classOf[SequenceFileOutputFormat[_, _]].getName}' + """.stripMargin) runSqlHive( s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/complex.seq")}' INTO TABLE src_thrift") @@ -376,7 +377,7 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) { if (!(loadedTables contains name)) { // Marks the table as loaded first to prevent infinite mutually recursive table loading. loadedTables += name - logInfo(s"Loading test table $name") + logDebug(s"Loading test table $name") val createCmds = testTables.get(name).map(_.commands).getOrElse(sys.error(s"Unknown test table $name")) createCmds.foreach(_()) @@ -393,9 +394,6 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) { */ protected val originalUdfs: JavaSet[String] = FunctionRegistry.getFunctionNames - // Database default may not exist in 0.13.1, create it if not exist - HiveShim.createDefaultDBIfNeeded(this) - /** * Resets the test instance by deleting any tables that have been created. * TODO: also clear out UDFs, views, etc. @@ -410,24 +408,7 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) { cacheManager.clearCache() loadedTables.clear() catalog.cachedDataSourceTables.invalidateAll() - catalog.client.getAllTables("default").foreach { t => - logDebug(s"Deleting table $t") - val table = catalog.client.getTable("default", t) - - catalog.client.getIndexes("default", t, 255).foreach { index => - catalog.client.dropIndex("default", t, index.getIndexName, true) - } - - if (!table.isIndexTable) { - catalog.client.dropTable("default", t) - } - } - - catalog.client.getAllDatabases.filterNot(_ == "default").foreach { db => - logDebug(s"Dropping Database: $db") - catalog.client.dropDatabase(db, true, false, true) - } - + catalog.client.reset() catalog.unregisterAllTables() FunctionRegistry.getFunctionNames.filterNot(originalUdfs.contains(_)).foreach { udfName => @@ -438,7 +419,8 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) { hiveconf.set("fs.default.name", new File(".").toURI.toString) // It is important that we RESET first as broken hooks that might have been set could break // other sql exec here. - runSqlHive("RESET") + executionHive.runSqlHive("RESET") + metadataHive.runSqlHive("RESET") // For some reason, RESET does not reset the following variables... // https://issues.apache.org/jira/browse/HIVE-9004 runSqlHive("set hive.table.parameters.default=") @@ -446,7 +428,11 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) { runSqlHive("set datanucleus.cache.collections.lazy=true") // Lots of tests fail if we do not change the partition whitelist from the default. runSqlHive("set hive.metastore.partition.name.whitelist.pattern=.*") - configure() + + configure().foreach { + case (k, v) => + metadataHive.runSqlHive(s"SET $k=$v") + } runSqlHive("USE default") diff --git a/sql/hive/src/test/java/test/org/apache/spark/sql/hive/JavaDataFrameSuite.java b/sql/hive/src/test/java/test/org/apache/spark/sql/hive/JavaDataFrameSuite.java new file mode 100644 index 0000000000000..c4828c4717643 --- /dev/null +++ b/sql/hive/src/test/java/test/org/apache/spark/sql/hive/JavaDataFrameSuite.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package test.org.apache.spark.sql.hive; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.*; +import org.apache.spark.sql.expressions.Window; +import org.apache.spark.sql.hive.HiveContext; +import org.apache.spark.sql.hive.test.TestHive$; + +public class JavaDataFrameSuite { + private transient JavaSparkContext sc; + private transient HiveContext hc; + + DataFrame df; + + private void checkAnswer(DataFrame actual, List expected) { + String errorMessage = QueryTest$.MODULE$.checkAnswer(actual, expected); + if (errorMessage != null) { + Assert.fail(errorMessage); + } + } + + @Before + public void setUp() throws IOException { + hc = TestHive$.MODULE$; + sc = new JavaSparkContext(hc.sparkContext()); + + List jsonObjects = new ArrayList(10); + for (int i = 0; i < 10; i++) { + jsonObjects.add("{\"key\":" + i + ", \"value\":\"str" + i + "\"}"); + } + df = hc.jsonRDD(sc.parallelize(jsonObjects)); + df.registerTempTable("window_table"); + } + + @After + public void tearDown() throws IOException { + // Clean up tables. + hc.sql("DROP TABLE IF EXISTS window_table"); + } + + @Test + public void saveTableAndQueryIt() { + checkAnswer( + df.select(functions.avg("key").over( + Window.partitionBy("value").orderBy("key").rowsBetween(-1, 1))), + hc.sql("SELECT avg(key) " + + "OVER (PARTITION BY value " + + " ORDER BY key " + + " ROWS BETWEEN 1 preceding and 1 following) " + + "FROM window_table").collectAsList()); + } +} diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaMetastoreDataSourcesSuite.java b/sql/hive/src/test/java/test/org/apache/spark/sql/hive/JavaMetastoreDataSourcesSuite.java similarity index 89% rename from sql/hive/src/test/java/org/apache/spark/sql/hive/JavaMetastoreDataSourcesSuite.java rename to sql/hive/src/test/java/test/org/apache/spark/sql/hive/JavaMetastoreDataSourcesSuite.java index 53ddecf57958b..64d1ce92931eb 100644 --- a/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaMetastoreDataSourcesSuite.java +++ b/sql/hive/src/test/java/test/org/apache/spark/sql/hive/JavaMetastoreDataSourcesSuite.java @@ -14,7 +14,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.sql.hive; + +package test.org.apache.spark.sql.hive; import java.io.File; import java.io.IOException; @@ -36,6 +37,7 @@ import org.apache.spark.sql.DataFrame; import org.apache.spark.sql.QueryTest$; import org.apache.spark.sql.Row; +import org.apache.spark.sql.hive.HiveContext; import org.apache.spark.sql.hive.test.TestHive$; import org.apache.spark.sql.types.DataTypes; import org.apache.spark.sql.types.StructField; @@ -81,7 +83,7 @@ public void setUp() throws IOException { jsonObjects.add("{\"a\":" + i + ", \"b\":\"str" + i + "\"}"); } JavaRDD rdd = sc.parallelize(jsonObjects); - df = sqlContext.jsonRDD(rdd); + df = sqlContext.read().json(rdd); df.registerTempTable("jsonTable"); } @@ -96,7 +98,11 @@ public void tearDown() throws IOException { public void saveExternalTableAndQueryIt() { Map options = new HashMap(); options.put("path", path.toString()); - df.saveAsTable("javaSavedTable", "org.apache.spark.sql.json", SaveMode.Append, options); + df.write() + .format("org.apache.spark.sql.json") + .mode(SaveMode.Append) + .options(options) + .saveAsTable("javaSavedTable"); checkAnswer( sqlContext.sql("SELECT * FROM javaSavedTable"), @@ -115,7 +121,11 @@ public void saveExternalTableAndQueryIt() { public void saveExternalTableWithSchemaAndQueryIt() { Map options = new HashMap(); options.put("path", path.toString()); - df.saveAsTable("javaSavedTable", "org.apache.spark.sql.json", SaveMode.Append, options); + df.write() + .format("org.apache.spark.sql.json") + .mode(SaveMode.Append) + .options(options) + .saveAsTable("javaSavedTable"); checkAnswer( sqlContext.sql("SELECT * FROM javaSavedTable"), @@ -138,7 +148,11 @@ public void saveExternalTableWithSchemaAndQueryIt() { @Test public void saveTableAndQueryIt() { Map options = new HashMap(); - df.saveAsTable("javaSavedTable", "org.apache.spark.sql.json", SaveMode.Append, options); + df.write() + .format("org.apache.spark.sql.json") + .mode(SaveMode.Append) + .options(options) + .saveAsTable("javaSavedTable"); checkAnswer( sqlContext.sql("SELECT * FROM javaSavedTable"), diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFIntegerToString.java b/sql/hive/src/test/java/test/org/apache/spark/sql/hive/execution/UDFIntegerToString.java similarity index 100% rename from sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFIntegerToString.java rename to sql/hive/src/test/java/test/org/apache/spark/sql/hive/execution/UDFIntegerToString.java diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFListListInt.java b/sql/hive/src/test/java/test/org/apache/spark/sql/hive/execution/UDFListListInt.java similarity index 100% rename from sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFListListInt.java rename to sql/hive/src/test/java/test/org/apache/spark/sql/hive/execution/UDFListListInt.java diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFListString.java b/sql/hive/src/test/java/test/org/apache/spark/sql/hive/execution/UDFListString.java similarity index 100% rename from sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFListString.java rename to sql/hive/src/test/java/test/org/apache/spark/sql/hive/execution/UDFListString.java diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFStringString.java b/sql/hive/src/test/java/test/org/apache/spark/sql/hive/execution/UDFStringString.java similarity index 100% rename from sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFStringString.java rename to sql/hive/src/test/java/test/org/apache/spark/sql/hive/execution/UDFStringString.java diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFTwoListList.java b/sql/hive/src/test/java/test/org/apache/spark/sql/hive/execution/UDFTwoListList.java similarity index 100% rename from sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFTwoListList.java rename to sql/hive/src/test/java/test/org/apache/spark/sql/hive/execution/UDFTwoListList.java diff --git a/sql/hive/src/test/resources/TestUDTF.jar b/sql/hive/src/test/resources/TestUDTF.jar new file mode 100644 index 0000000000000..514f2d5d26fd3 Binary files /dev/null and b/sql/hive/src/test/resources/TestUDTF.jar differ diff --git a/sql/hive/src/test/resources/golden/Test UDTF.close in Lateral Views-0-ac5c96224a534f07b49462ad76620678 b/sql/hive/src/test/resources/golden/Test UDTF.close in Lateral Views-0-ac5c96224a534f07b49462ad76620678 new file mode 100644 index 0000000000000..946e72fc87c2e --- /dev/null +++ b/sql/hive/src/test/resources/golden/Test UDTF.close in Lateral Views-0-ac5c96224a534f07b49462ad76620678 @@ -0,0 +1,2 @@ +97 500 +97 500 diff --git a/sql/hive/src/test/resources/golden/Test UDTF.close in SELECT-0-517f834fef35b896ec64399f42b2a151 b/sql/hive/src/test/resources/golden/Test UDTF.close in SELECT-0-517f834fef35b896ec64399f42b2a151 new file mode 100644 index 0000000000000..a5c8806279fa7 --- /dev/null +++ b/sql/hive/src/test/resources/golden/Test UDTF.close in SELECT-0-517f834fef35b896ec64399f42b2a151 @@ -0,0 +1,2 @@ +3 +3 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 1. testWindowing-0-327a8cd39fe30255ff492ee86f660522 b/sql/hive/src/test/resources/golden/windowing.q -- 1. testWindowing-0-327a8cd39fe30255ff492ee86f660522 new file mode 100644 index 0000000000000..850c41c8115d6 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 1. testWindowing-0-327a8cd39fe30255ff492ee86f660522 @@ -0,0 +1,26 @@ +Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.650000000001 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.070000000001 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.730000000001 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.360000000001 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 +Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.620000000001 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 +Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 10. testHavingWithWindowingCondRankNoGBY-0-fef4bf638d52a9a601845347010602fd b/sql/hive/src/test/resources/golden/windowing.q -- 10. testHavingWithWindowingCondRankNoGBY-0-fef4bf638d52a9a601845347010602fd new file mode 100644 index 0000000000000..850c41c8115d6 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 10. testHavingWithWindowingCondRankNoGBY-0-fef4bf638d52a9a601845347010602fd @@ -0,0 +1,26 @@ +Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.650000000001 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.070000000001 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.730000000001 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.360000000001 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 +Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.620000000001 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 +Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 11. testFirstLast-0-86bb9c97d92fdcd941bcb5143513e2e6 b/sql/hive/src/test/resources/golden/windowing.q -- 11. testFirstLast-0-86bb9c97d92fdcd941bcb5143513e2e6 new file mode 100644 index 0000000000000..921679cdcf569 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 11. testFirstLast-0-86bb9c97d92fdcd941bcb5143513e2e6 @@ -0,0 +1,26 @@ +Manufacturer#1 almond antique burnished rose metallic 2 2 2 34 +Manufacturer#1 almond antique burnished rose metallic 2 2 2 6 +Manufacturer#1 almond antique chartreuse lavender yellow 34 34 2 28 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 6 2 42 +Manufacturer#1 almond aquamarine burnished black steel 28 28 34 42 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 42 6 42 +Manufacturer#2 almond antique violet chocolate turquoise 14 14 14 2 +Manufacturer#2 almond antique violet turquoise frosted 40 40 14 25 +Manufacturer#2 almond aquamarine midnight light salmon 2 2 14 18 +Manufacturer#2 almond aquamarine rose maroon antique 25 25 40 18 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 18 2 18 +Manufacturer#3 almond antique chartreuse khaki white 17 17 17 19 +Manufacturer#3 almond antique forest lavender goldenrod 14 14 17 1 +Manufacturer#3 almond antique metallic orange dim 19 19 17 45 +Manufacturer#3 almond antique misty red olive 1 1 14 45 +Manufacturer#3 almond antique olive coral navajo 45 45 19 45 +Manufacturer#4 almond antique gainsboro frosted violet 10 10 10 27 +Manufacturer#4 almond antique violet mint lemon 39 39 10 7 +Manufacturer#4 almond aquamarine floral ivory bisque 27 27 10 12 +Manufacturer#4 almond aquamarine yellow dodger mint 7 7 39 12 +Manufacturer#4 almond azure aquamarine papaya violet 12 12 27 12 +Manufacturer#5 almond antique blue firebrick mint 31 31 31 2 +Manufacturer#5 almond antique medium spring khaki 6 6 31 46 +Manufacturer#5 almond antique sky peru orange 2 2 31 23 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 6 23 +Manufacturer#5 almond azure blanched chiffon midnight 23 23 2 23 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 12. testFirstLastWithWhere-0-84345a9f685ba63b87caa4bb16b122b5 b/sql/hive/src/test/resources/golden/windowing.q -- 12. testFirstLastWithWhere-0-84345a9f685ba63b87caa4bb16b122b5 new file mode 100644 index 0000000000000..09e30c7c57349 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 12. testFirstLastWithWhere-0-84345a9f685ba63b87caa4bb16b122b5 @@ -0,0 +1,5 @@ +Manufacturer#3 almond antique chartreuse khaki white 17 1 17 17 19 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 14 17 1 +Manufacturer#3 almond antique metallic orange dim 19 3 19 17 45 +Manufacturer#3 almond antique misty red olive 1 4 1 14 45 +Manufacturer#3 almond antique olive coral navajo 45 5 45 19 45 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 13. testSumWindow-0-6cfc8840d3a4469b0fe11d63182cb59f b/sql/hive/src/test/resources/golden/windowing.q -- 13. testSumWindow-0-6cfc8840d3a4469b0fe11d63182cb59f new file mode 100644 index 0000000000000..01ee88ff23302 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 13. testSumWindow-0-6cfc8840d3a4469b0fe11d63182cb59f @@ -0,0 +1,26 @@ +Manufacturer#1 almond antique burnished rose metallic 2 38 2 +Manufacturer#1 almond antique burnished rose metallic 2 44 2 +Manufacturer#1 almond antique chartreuse lavender yellow 34 72 34 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 112 6 +Manufacturer#1 almond aquamarine burnished black steel 28 110 28 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 76 42 +Manufacturer#2 almond antique violet chocolate turquoise 14 56 14 +Manufacturer#2 almond antique violet turquoise frosted 40 81 40 +Manufacturer#2 almond aquamarine midnight light salmon 2 99 2 +Manufacturer#2 almond aquamarine rose maroon antique 25 85 25 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 45 18 +Manufacturer#3 almond antique chartreuse khaki white 17 50 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 51 14 +Manufacturer#3 almond antique metallic orange dim 19 96 19 +Manufacturer#3 almond antique misty red olive 1 79 1 +Manufacturer#3 almond antique olive coral navajo 45 65 45 +Manufacturer#4 almond antique gainsboro frosted violet 10 76 10 +Manufacturer#4 almond antique violet mint lemon 39 83 39 +Manufacturer#4 almond aquamarine floral ivory bisque 27 95 27 +Manufacturer#4 almond aquamarine yellow dodger mint 7 85 7 +Manufacturer#4 almond azure aquamarine papaya violet 12 46 12 +Manufacturer#5 almond antique blue firebrick mint 31 39 31 +Manufacturer#5 almond antique medium spring khaki 6 85 6 +Manufacturer#5 almond antique sky peru orange 2 108 2 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 77 46 +Manufacturer#5 almond azure blanched chiffon midnight 23 71 23 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 14. testNoSortClause-0-88d96a526d3cae6ed8168c5b228974d1 b/sql/hive/src/test/resources/golden/windowing.q -- 14. testNoSortClause-0-88d96a526d3cae6ed8168c5b228974d1 new file mode 100644 index 0000000000000..c78eb640c9c27 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 14. testNoSortClause-0-88d96a526d3cae6ed8168c5b228974d1 @@ -0,0 +1,26 @@ +Manufacturer#1 almond antique burnished rose metallic 2 1 1 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 +Manufacturer#3 almond antique metallic orange dim 19 3 3 +Manufacturer#3 almond antique misty red olive 1 4 4 +Manufacturer#3 almond antique olive coral navajo 45 5 5 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 +Manufacturer#4 almond antique violet mint lemon 39 2 2 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 +Manufacturer#5 almond antique medium spring khaki 6 2 2 +Manufacturer#5 almond antique sky peru orange 2 3 3 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 15. testExpressions-0-11f6c13cf2710ce7054654cca136e73e b/sql/hive/src/test/resources/golden/windowing.q -- 15. testExpressions-0-11f6c13cf2710ce7054654cca136e73e new file mode 100644 index 0000000000000..050138ccf04ce --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 15. testExpressions-0-11f6c13cf2710ce7054654cca136e73e @@ -0,0 +1,26 @@ +Manufacturer#1 almond antique burnished rose metallic 2 1 1 0.3333333333333333 0.0 1 2 2.0 0.0 2 2 2 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 0.3333333333333333 0.0 1 2 2.0 0.0 2 2 2 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 0.5 0.4 2 3 12.666666666666666 15.084944665313014 2 34 2 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 0.6666666666666666 0.6 2 4 11.0 13.379088160259652 2 6 2 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 0.8333333333333334 0.8 3 5 14.4 13.763720427268202 2 28 34 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 1.0 1.0 3 6 19.0 16.237815945091466 2 42 6 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 0.2 0.0 1 1 14.0 0.0 4 14 14 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 0.4 0.25 1 2 27.0 13.0 4 40 14 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 0.6 0.5 2 3 18.666666666666668 15.86050300449376 4 2 14 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 0.8 0.75 2 4 20.25 14.00669482783144 4 25 40 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 1.0 1.0 3 5 19.8 12.560254774486067 4 18 2 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 0.2 0.0 1 1 17.0 0.0 2 17 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 0.4 0.25 1 2 15.5 1.5 2 14 17 +Manufacturer#3 almond antique metallic orange dim 19 3 3 0.6 0.5 2 3 16.666666666666668 2.0548046676563256 2 19 17 +Manufacturer#3 almond antique misty red olive 1 4 4 0.8 0.75 2 4 12.75 7.013380069552769 2 1 14 +Manufacturer#3 almond antique olive coral navajo 45 5 5 1.0 1.0 3 5 19.2 14.344336861632886 2 45 19 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 0.2 0.0 1 1 10.0 0.0 0 10 10 +Manufacturer#4 almond antique violet mint lemon 39 2 2 0.4 0.25 1 2 24.5 14.5 0 39 10 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 0.6 0.5 2 3 25.333333333333332 11.897712198383164 0 27 10 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 0.8 0.75 2 4 20.75 13.007209539328564 0 7 39 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 1.0 1.0 3 5 19.0 12.149074038789951 0 12 27 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 0.2 0.0 1 1 31.0 0.0 1 31 31 +Manufacturer#5 almond antique medium spring khaki 6 2 2 0.4 0.25 1 2 18.5 12.5 1 6 31 +Manufacturer#5 almond antique sky peru orange 2 3 3 0.6 0.5 2 3 13.0 12.832251036613439 1 2 31 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 0.8 0.75 2 4 21.25 18.102140757380052 1 46 6 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 1.0 1.0 3 5 21.6 16.206171663906314 1 23 2 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 16. testMultipleWindows-0-efd1476255eeb1b1961149144f574b7a b/sql/hive/src/test/resources/golden/windowing.q -- 16. testMultipleWindows-0-efd1476255eeb1b1961149144f574b7a new file mode 100644 index 0000000000000..c10888852b504 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 16. testMultipleWindows-0-efd1476255eeb1b1961149144f574b7a @@ -0,0 +1,26 @@ +Manufacturer#1 almond antique burnished rose metallic 2 1 1 0.3333333333333333 4 4 2 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 0.3333333333333333 4 4 2 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 0.5 38 34 2 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 0.6666666666666666 44 10 2 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 0.8333333333333334 72 28 34 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 1.0 114 42 6 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 0.2 14 14 14 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 0.4 54 40 14 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 0.6 56 2 14 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 0.8 81 25 40 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 1.0 99 32 2 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 0.2 17 31 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 0.4 31 14 17 +Manufacturer#3 almond antique metallic orange dim 19 3 3 0.6 50 50 17 +Manufacturer#3 almond antique misty red olive 1 4 4 0.8 51 1 14 +Manufacturer#3 almond antique olive coral navajo 45 5 5 1.0 96 45 19 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 0.2 10 17 10 +Manufacturer#4 almond antique violet mint lemon 39 2 2 0.4 49 39 10 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 0.6 76 27 10 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 0.8 83 7 39 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 1.0 95 29 27 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 0.2 31 31 31 +Manufacturer#5 almond antique medium spring khaki 6 2 2 0.4 37 8 31 +Manufacturer#5 almond antique sky peru orange 2 3 3 0.6 39 2 31 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 0.8 85 46 6 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 1.0 108 23 2 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 17. testCountStar-0-1b1fc185c8fddf68e58e92f29052ab2d b/sql/hive/src/test/resources/golden/windowing.q -- 17. testCountStar-0-1b1fc185c8fddf68e58e92f29052ab2d new file mode 100644 index 0000000000000..b1309a497d68e --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 17. testCountStar-0-1b1fc185c8fddf68e58e92f29052ab2d @@ -0,0 +1,26 @@ +Manufacturer#1 almond antique burnished rose metallic 2 2 2 2 +Manufacturer#1 almond antique burnished rose metallic 2 2 2 2 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 3 2 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 4 2 +Manufacturer#1 almond aquamarine burnished black steel 28 5 5 34 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 6 6 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 14 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 14 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 14 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 40 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 2 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 17 +Manufacturer#3 almond antique metallic orange dim 19 3 3 17 +Manufacturer#3 almond antique misty red olive 1 4 4 14 +Manufacturer#3 almond antique olive coral navajo 45 5 5 19 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 10 +Manufacturer#4 almond antique violet mint lemon 39 2 2 10 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 10 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 39 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 27 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 31 +Manufacturer#5 almond antique medium spring khaki 6 2 2 31 +Manufacturer#5 almond antique sky peru orange 2 3 3 31 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 2 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 18. testUDAFs-0-6974e5959e41a661e09db18547fef58a b/sql/hive/src/test/resources/golden/windowing.q -- 18. testUDAFs-0-6974e5959e41a661e09db18547fef58a new file mode 100644 index 0000000000000..52d2ee8d0cd3f --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 18. testUDAFs-0-6974e5959e41a661e09db18547fef58a @@ -0,0 +1,26 @@ +Manufacturer#1 almond antique burnished rose metallic 2 4100.06 1173.15 1753.76 1366.6866666666667 +Manufacturer#1 almond antique burnished rose metallic 2 5702.650000000001 1173.15 1753.76 1425.6625000000001 +Manufacturer#1 almond antique chartreuse lavender yellow 34 7117.070000000001 1173.15 1753.76 1423.4140000000002 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 7576.58 1173.15 1753.76 1515.316 +Manufacturer#1 almond aquamarine burnished black steel 28 6403.43 1414.42 1753.76 1600.8575 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 4649.67 1414.42 1632.66 1549.89 +Manufacturer#2 almond antique violet chocolate turquoise 14 5523.360000000001 1690.68 2031.98 1841.1200000000001 +Manufacturer#2 almond antique violet turquoise frosted 40 7222.02 1690.68 2031.98 1805.505 +Manufacturer#2 almond aquamarine midnight light salmon 2 8923.62 1690.68 2031.98 1784.7240000000002 +Manufacturer#2 almond aquamarine rose maroon antique 25 7232.9400000000005 1698.66 2031.98 1808.2350000000001 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5432.24 1698.66 2031.98 1810.7466666666667 +Manufacturer#3 almond antique chartreuse khaki white 17 4272.34 1190.27 1671.68 1424.1133333333335 +Manufacturer#3 almond antique forest lavender goldenrod 14 6195.32 1190.27 1922.98 1548.83 +Manufacturer#3 almond antique metallic orange dim 19 7532.61 1190.27 1922.98 1506.522 +Manufacturer#3 almond antique misty red olive 1 5860.929999999999 1190.27 1922.98 1465.2324999999998 +Manufacturer#3 almond antique olive coral navajo 45 4670.66 1337.29 1922.98 1556.8866666666665 +Manufacturer#4 almond antique gainsboro frosted violet 10 4202.35 1206.26 1620.67 1400.7833333333335 +Manufacturer#4 almond antique violet mint lemon 39 6047.27 1206.26 1844.92 1511.8175 +Manufacturer#4 almond aquamarine floral ivory bisque 27 7337.620000000001 1206.26 1844.92 1467.5240000000001 +Manufacturer#4 almond aquamarine yellow dodger mint 7 5716.950000000001 1206.26 1844.92 1429.2375000000002 +Manufacturer#4 almond azure aquamarine papaya violet 12 4341.530000000001 1206.26 1844.92 1447.176666666667 +Manufacturer#5 almond antique blue firebrick mint 31 5190.08 1611.66 1789.69 1730.0266666666666 +Manufacturer#5 almond antique medium spring khaki 6 6208.18 1018.1 1789.69 1552.045 +Manufacturer#5 almond antique sky peru orange 2 7672.66 1018.1 1789.69 1534.532 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 5882.970000000001 1018.1 1788.73 1470.7425000000003 +Manufacturer#5 almond azure blanched chiffon midnight 23 4271.3099999999995 1018.1 1788.73 1423.7699999999998 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 19. testUDAFsWithGBY-0-67d15ee5915ac64a738fd4b60d75eb35 b/sql/hive/src/test/resources/golden/windowing.q -- 19. testUDAFsWithGBY-0-67d15ee5915ac64a738fd4b60d75eb35 new file mode 100644 index 0000000000000..6461642d34a21 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 19. testUDAFsWithGBY-0-67d15ee5915ac64a738fd4b60d75eb35 @@ -0,0 +1,25 @@ +Manufacturer#1 almond antique burnished rose metallic 2 1173.15 4529.5 1173.15 1173.15 1509.8333333333333 +Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 5943.92 1753.76 1753.76 1485.98 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 7576.58 1602.59 1602.59 1515.316 +Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 6403.43 1414.42 1414.42 1600.8575 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 4649.67 1632.66 1632.66 1549.89 +Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 5523.360000000001 1690.68 1690.68 1841.1200000000001 +Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 7222.02 1800.7 1800.7 1805.505 +Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 8923.62 2031.98 2031.98 1784.7240000000002 +Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 7232.9400000000005 1698.66 1698.66 1808.2350000000001 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5432.24 1701.6 1701.6 1810.7466666666667 +Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 4272.34 1671.68 1671.68 1424.1133333333335 +Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 6195.32 1190.27 1190.27 1548.83 +Manufacturer#3 almond antique metallic orange dim 19 1410.39 7532.61 1410.39 1410.39 1506.522 +Manufacturer#3 almond antique misty red olive 1 1922.98 5860.929999999999 1922.98 1922.98 1465.2324999999998 +Manufacturer#3 almond antique olive coral navajo 45 1337.29 4670.66 1337.29 1337.29 1556.8866666666665 +Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 4202.35 1620.67 1620.67 1400.7833333333335 +Manufacturer#4 almond antique violet mint lemon 39 1375.42 6047.27 1375.42 1375.42 1511.8175 +Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 7337.620000000001 1206.26 1206.26 1467.5240000000001 +Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 5716.950000000001 1844.92 1844.92 1429.2375000000002 +Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 4341.530000000001 1290.35 1290.35 1447.176666666667 +Manufacturer#5 almond antique blue firebrick mint 31 1789.69 5190.08 1789.69 1789.69 1730.0266666666666 +Manufacturer#5 almond antique medium spring khaki 6 1611.66 6208.18 1611.66 1611.66 1552.045 +Manufacturer#5 almond antique sky peru orange 2 1788.73 7672.66 1788.73 1788.73 1534.532 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 5882.970000000001 1018.1 1018.1 1470.7425000000003 +Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 4271.3099999999995 1464.48 1464.48 1423.7699999999998 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 2. testGroupByWithPartitioning-0-cb5618b1e626f3a9d4a030b508b5d251 b/sql/hive/src/test/resources/golden/windowing.q -- 2. testGroupByWithPartitioning-0-cb5618b1e626f3a9d4a030b508b5d251 new file mode 100644 index 0000000000000..2c30e652aa26d --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 2. testGroupByWithPartitioning-0-cb5618b1e626f3a9d4a030b508b5d251 @@ -0,0 +1,25 @@ +Manufacturer#1 almond antique burnished rose metallic 2 1173.15 1 1 2 0 +Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 2 2 34 32 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 3 3 6 -28 +Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 4 4 28 22 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 5 5 42 14 +Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1 1 14 0 +Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 2 2 40 26 +Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 3 3 2 -38 +Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 4 4 25 23 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5 5 18 -7 +Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 1 1 17 0 +Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 2 2 14 -3 +Manufacturer#3 almond antique metallic orange dim 19 1410.39 3 3 19 5 +Manufacturer#3 almond antique misty red olive 1 1922.98 4 4 1 -18 +Manufacturer#3 almond antique olive coral navajo 45 1337.29 5 5 45 44 +Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 1 1 10 0 +Manufacturer#4 almond antique violet mint lemon 39 1375.42 2 2 39 29 +Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 3 3 27 -12 +Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 4 4 7 -20 +Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 5 5 12 5 +Manufacturer#5 almond antique blue firebrick mint 31 1789.69 1 1 31 0 +Manufacturer#5 almond antique medium spring khaki 6 1611.66 2 2 6 -25 +Manufacturer#5 almond antique sky peru orange 2 1788.73 3 3 2 -4 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 4 4 46 44 +Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 5 5 23 -23 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 20. testSTATs-0-da0e0cca69e42118a96b8609b8fa5838 b/sql/hive/src/test/resources/golden/windowing.q -- 20. testSTATs-0-da0e0cca69e42118a96b8609b8fa5838 new file mode 100644 index 0000000000000..1f7e8a5d67036 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 20. testSTATs-0-da0e0cca69e42118a96b8609b8fa5838 @@ -0,0 +1,26 @@ +Manufacturer#1 almond antique burnished rose metallic 2 273.70217881648074 273.70217881648074 [34,2] 74912.8826888888 1.0 4128.782222222221 +Manufacturer#1 almond antique burnished rose metallic 2 258.10677784349235 258.10677784349235 [34,2,6] 66619.10876874991 0.811328754177887 2801.7074999999995 +Manufacturer#1 almond antique chartreuse lavender yellow 34 230.90151585470358 230.90151585470358 [34,2,6,28] 53315.51002399992 0.695639377397664 2210.7864 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 202.73109328368946 202.73109328368946 [34,2,6,42,28] 41099.896184 0.630785977101214 2009.9536000000007 +Manufacturer#1 almond aquamarine burnished black steel 28 121.6064517973862 121.6064517973862 [34,6,42,28] 14788.129118750014 0.2036684720435979 331.1337500000004 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 96.5751586416853 96.5751586416853 [6,42,28] 9326.761266666683 -1.4442181184933883E-4 -0.20666666666708502 +Manufacturer#2 almond antique violet chocolate turquoise 14 142.2363169751898 142.2363169751898 [2,40,14] 20231.169866666663 -0.49369526554523185 -1113.7466666666658 +Manufacturer#2 almond antique violet turquoise frosted 40 137.76306498840682 137.76306498840682 [2,25,40,14] 18978.662075 -0.5205630897335946 -1004.4812499999995 +Manufacturer#2 almond aquamarine midnight light salmon 2 130.03972279269132 130.03972279269132 [2,18,25,40,14] 16910.329504000005 -0.46908967495720255 -766.1791999999995 +Manufacturer#2 almond aquamarine rose maroon antique 25 135.55100986344584 135.55100986344584 [2,18,25,40] 18374.07627499999 -0.6091405874714462 -1128.1787499999987 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 156.44019460768044 156.44019460768044 [2,18,25] 24473.534488888927 -0.9571686373491608 -1441.4466666666676 +Manufacturer#3 almond antique chartreuse khaki white 17 196.7742266885805 196.7742266885805 [17,19,14] 38720.09628888887 0.5557168646224995 224.6944444444446 +Manufacturer#3 almond antique forest lavender goldenrod 14 275.14144189852607 275.14144189852607 [17,1,19,14] 75702.81305 -0.6720833036576083 -1296.9000000000003 +Manufacturer#3 almond antique metallic orange dim 19 260.23473614412046 260.23473614412046 [17,1,19,14,45] 67722.117896 -0.5703526513979519 -2129.0664 +Manufacturer#3 almond antique misty red olive 1 275.9139962356932 275.9139962356932 [1,19,14,45] 76128.53331875012 -0.577476899644802 -2547.7868749999993 +Manufacturer#3 almond antique olive coral navajo 45 260.5815918713796 260.5815918713796 [1,19,45] 67902.76602222225 -0.8710736366736884 -4099.731111111111 +Manufacturer#4 almond antique gainsboro frosted violet 10 170.13011889596618 170.13011889596618 [39,27,10] 28944.25735555559 -0.6656975320098423 -1347.4777777777779 +Manufacturer#4 almond antique violet mint lemon 39 242.26834609323197 242.26834609323197 [39,7,27,10] 58693.95151875002 -0.8051852719193339 -2537.328125 +Manufacturer#4 almond aquamarine floral ivory bisque 27 234.10001662537326 234.10001662537326 [39,7,27,10,12] 54802.817784000035 -0.6046935574240581 -1719.8079999999995 +Manufacturer#4 almond aquamarine yellow dodger mint 7 247.3342714197732 247.3342714197732 [39,7,27,12] 61174.24181875003 -0.5508665654707869 -1719.0368749999975 +Manufacturer#4 almond azure aquamarine papaya violet 12 283.3344330566893 283.3344330566893 [7,27,12] 80278.40095555557 -0.7755740084632333 -1867.4888888888881 +Manufacturer#5 almond antique blue firebrick mint 31 83.69879024746363 83.69879024746363 [2,6,31] 7005.487488888913 0.39004303087285047 418.9233333333353 +Manufacturer#5 almond antique medium spring khaki 6 316.68049612345885 316.68049612345885 [2,6,46,31] 100286.53662500004 -0.713612911776183 -4090.853749999999 +Manufacturer#5 almond antique sky peru orange 2 285.40506298242155 285.40506298242155 [2,23,6,46,31] 81456.04997600002 -0.712858514567818 -3297.2011999999986 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 285.43749038756283 285.43749038756283 [2,23,6,46] 81474.56091875004 -0.984128787153391 -4871.028125000002 +Manufacturer#5 almond azure blanched chiffon midnight 23 315.9225931564038 315.9225931564038 [2,23,46] 99807.08486666664 -0.9978877469246936 -5664.856666666666 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 21. testDISTs-0-672d4cb385b7ced2e446f132474293ad b/sql/hive/src/test/resources/golden/windowing.q -- 21. testDISTs-0-672d4cb385b7ced2e446f132474293ad new file mode 100644 index 0000000000000..e7c39f454fb37 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 21. testDISTs-0-672d4cb385b7ced2e446f132474293ad @@ -0,0 +1,26 @@ +Manufacturer#1 almond antique burnished rose metallic 2 [{"x":1173.15,"y":2.0},{"x":1753.76,"y":1.0}] 121152.0 1 +Manufacturer#1 almond antique burnished rose metallic 2 [{"x":1173.15,"y":2.0},{"x":1602.59,"y":1.0},{"x":1753.76,"y":1.0}] 115872.0 2 +Manufacturer#1 almond antique chartreuse lavender yellow 34 [{"x":1173.15,"y":2.0},{"x":1414.42,"y":1.0},{"x":1602.59,"y":1.0},{"x":1753.76,"y":1.0}] 110592.0 3 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 [{"x":1173.15,"y":1.0},{"x":1414.42,"y":1.0},{"x":1602.59,"y":1.0},{"x":1632.66,"y":1.0},{"x":1753.76,"y":1.0}] 86428.0 4 +Manufacturer#1 almond aquamarine burnished black steel 28 [{"x":1414.42,"y":1.0},{"x":1602.59,"y":1.0},{"x":1632.66,"y":1.0},{"x":1753.76,"y":1.0}] 86098.0 5 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 [{"x":1414.42,"y":1.0},{"x":1602.59,"y":1.0},{"x":1632.66,"y":1.0}] 86428.0 6 +Manufacturer#2 almond antique violet chocolate turquoise 14 [{"x":1690.68,"y":1.0},{"x":1800.7,"y":1.0},{"x":2031.98,"y":1.0}] 146985.0 1 +Manufacturer#2 almond antique violet turquoise frosted 40 [{"x":1690.68,"y":1.0},{"x":1698.66,"y":1.0},{"x":1800.7,"y":1.0},{"x":2031.98,"y":1.0}] 139825.5 2 +Manufacturer#2 almond aquamarine midnight light salmon 2 [{"x":1690.68,"y":1.0},{"x":1698.66,"y":1.0},{"x":1701.6,"y":1.0},{"x":1800.7,"y":1.0},{"x":2031.98,"y":1.0}] 146985.0 3 +Manufacturer#2 almond aquamarine rose maroon antique 25 [{"x":1698.66,"y":1.0},{"x":1701.6,"y":1.0},{"x":1800.7,"y":1.0},{"x":2031.98,"y":1.0}] 169347.0 4 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 [{"x":1698.66,"y":1.0},{"x":1701.6,"y":1.0},{"x":2031.98,"y":1.0}] 146985.0 5 +Manufacturer#3 almond antique chartreuse khaki white 17 [{"x":1190.27,"y":1.0},{"x":1410.39,"y":1.0},{"x":1671.68,"y":1.0}] 90681.0 1 +Manufacturer#3 almond antique forest lavender goldenrod 14 [{"x":1190.27,"y":1.0},{"x":1410.39,"y":1.0},{"x":1671.68,"y":1.0},{"x":1922.98,"y":1.0}] 65831.5 2 +Manufacturer#3 almond antique metallic orange dim 19 [{"x":1190.27,"y":1.0},{"x":1337.29,"y":1.0},{"x":1410.39,"y":1.0},{"x":1671.68,"y":1.0},{"x":1922.98,"y":1.0}] 90681.0 3 +Manufacturer#3 almond antique misty red olive 1 [{"x":1190.27,"y":1.0},{"x":1337.29,"y":1.0},{"x":1410.39,"y":1.0},{"x":1922.98,"y":1.0}] 76690.0 4 +Manufacturer#3 almond antique olive coral navajo 45 [{"x":1337.29,"y":1.0},{"x":1410.39,"y":1.0},{"x":1922.98,"y":1.0}] 112398.0 5 +Manufacturer#4 almond antique gainsboro frosted violet 10 [{"x":1206.26,"y":1.0},{"x":1375.42,"y":1.0},{"x":1620.67,"y":1.0}] 48427.0 1 +Manufacturer#4 almond antique violet mint lemon 39 [{"x":1206.26,"y":1.0},{"x":1375.42,"y":1.0},{"x":1620.67,"y":1.0},{"x":1844.92,"y":1.0}] 46844.0 2 +Manufacturer#4 almond aquamarine floral ivory bisque 27 [{"x":1206.26,"y":1.0},{"x":1290.35,"y":1.0},{"x":1375.42,"y":1.0},{"x":1620.67,"y":1.0},{"x":1844.92,"y":1.0}] 45261.0 3 +Manufacturer#4 almond aquamarine yellow dodger mint 7 [{"x":1206.26,"y":1.0},{"x":1290.35,"y":1.0},{"x":1375.42,"y":1.0},{"x":1844.92,"y":1.0}] 39309.0 4 +Manufacturer#4 almond azure aquamarine papaya violet 12 [{"x":1206.26,"y":1.0},{"x":1290.35,"y":1.0},{"x":1844.92,"y":1.0}] 33357.0 5 +Manufacturer#5 almond antique blue firebrick mint 31 [{"x":1611.66,"y":1.0},{"x":1788.73,"y":1.0},{"x":1789.69,"y":1.0}] 155733.0 1 +Manufacturer#5 almond antique medium spring khaki 6 [{"x":1018.1,"y":1.0},{"x":1611.66,"y":1.0},{"x":1788.73,"y":1.0},{"x":1789.69,"y":1.0}] 99201.0 2 +Manufacturer#5 almond antique sky peru orange 2 [{"x":1018.1,"y":1.0},{"x":1464.48,"y":1.0},{"x":1611.66,"y":1.0},{"x":1788.73,"y":1.0},{"x":1789.69,"y":1.0}] 78486.0 3 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 [{"x":1018.1,"y":1.0},{"x":1464.48,"y":1.0},{"x":1611.66,"y":1.0},{"x":1788.73,"y":1.0}] 60577.5 4 +Manufacturer#5 almond azure blanched chiffon midnight 23 [{"x":1018.1,"y":1.0},{"x":1464.48,"y":1.0},{"x":1788.73,"y":1.0}] 78486.0 5 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 24. testLateralViews-0-dea06072f0a64fe4537fae854944ed5a b/sql/hive/src/test/resources/golden/windowing.q -- 24. testLateralViews-0-dea06072f0a64fe4537fae854944ed5a new file mode 100644 index 0000000000000..dc83c9fffe932 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 24. testLateralViews-0-dea06072f0a64fe4537fae854944ed5a @@ -0,0 +1,78 @@ +Manufacturer#1 almond antique burnished rose metallic 1 2 2 +Manufacturer#1 almond antique burnished rose metallic 1 2 4 +Manufacturer#1 almond antique burnished rose metallic 2 2 6 +Manufacturer#1 almond antique burnished rose metallic 2 2 6 +Manufacturer#1 almond antique burnished rose metallic 3 2 6 +Manufacturer#1 almond antique burnished rose metallic 3 2 6 +Manufacturer#1 almond antique salmon chartreuse burlywood 1 6 10 +Manufacturer#1 almond antique salmon chartreuse burlywood 2 6 14 +Manufacturer#1 almond antique salmon chartreuse burlywood 3 6 18 +Manufacturer#1 almond aquamarine burnished black steel 1 28 40 +Manufacturer#1 almond aquamarine burnished black steel 2 28 62 +Manufacturer#1 almond aquamarine burnished black steel 3 28 84 +Manufacturer#1 almond antique chartreuse lavender yellow 1 34 90 +Manufacturer#1 almond antique chartreuse lavender yellow 2 34 96 +Manufacturer#1 almond antique chartreuse lavender yellow 3 34 102 +Manufacturer#1 almond aquamarine pink moccasin thistle 1 42 110 +Manufacturer#1 almond aquamarine pink moccasin thistle 2 42 118 +Manufacturer#1 almond aquamarine pink moccasin thistle 3 42 126 +Manufacturer#2 almond aquamarine midnight light salmon 1 2 2 +Manufacturer#2 almond aquamarine midnight light salmon 2 2 4 +Manufacturer#2 almond aquamarine midnight light salmon 3 2 6 +Manufacturer#2 almond antique violet chocolate turquoise 1 14 18 +Manufacturer#2 almond antique violet chocolate turquoise 2 14 30 +Manufacturer#2 almond antique violet chocolate turquoise 3 14 42 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 1 18 46 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 2 18 50 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 3 18 54 +Manufacturer#2 almond aquamarine rose maroon antique 1 25 61 +Manufacturer#2 almond aquamarine rose maroon antique 2 25 68 +Manufacturer#2 almond aquamarine rose maroon antique 3 25 75 +Manufacturer#2 almond antique violet turquoise frosted 1 40 90 +Manufacturer#2 almond antique violet turquoise frosted 2 40 105 +Manufacturer#2 almond antique violet turquoise frosted 3 40 120 +Manufacturer#3 almond antique misty red olive 1 1 1 +Manufacturer#3 almond antique misty red olive 2 1 2 +Manufacturer#3 almond antique misty red olive 3 1 3 +Manufacturer#3 almond antique forest lavender goldenrod 1 14 16 +Manufacturer#3 almond antique forest lavender goldenrod 2 14 29 +Manufacturer#3 almond antique forest lavender goldenrod 3 14 42 +Manufacturer#3 almond antique chartreuse khaki white 1 17 45 +Manufacturer#3 almond antique chartreuse khaki white 2 17 48 +Manufacturer#3 almond antique chartreuse khaki white 3 17 51 +Manufacturer#3 almond antique metallic orange dim 1 19 53 +Manufacturer#3 almond antique metallic orange dim 2 19 55 +Manufacturer#3 almond antique metallic orange dim 3 19 57 +Manufacturer#3 almond antique olive coral navajo 1 45 83 +Manufacturer#3 almond antique olive coral navajo 2 45 109 +Manufacturer#3 almond antique olive coral navajo 3 45 135 +Manufacturer#4 almond aquamarine yellow dodger mint 1 7 7 +Manufacturer#4 almond aquamarine yellow dodger mint 2 7 14 +Manufacturer#4 almond aquamarine yellow dodger mint 3 7 21 +Manufacturer#4 almond antique gainsboro frosted violet 1 10 24 +Manufacturer#4 almond antique gainsboro frosted violet 2 10 27 +Manufacturer#4 almond antique gainsboro frosted violet 3 10 30 +Manufacturer#4 almond azure aquamarine papaya violet 1 12 32 +Manufacturer#4 almond azure aquamarine papaya violet 2 12 34 +Manufacturer#4 almond azure aquamarine papaya violet 3 12 36 +Manufacturer#4 almond aquamarine floral ivory bisque 1 27 51 +Manufacturer#4 almond aquamarine floral ivory bisque 2 27 66 +Manufacturer#4 almond aquamarine floral ivory bisque 3 27 81 +Manufacturer#4 almond antique violet mint lemon 1 39 93 +Manufacturer#4 almond antique violet mint lemon 2 39 105 +Manufacturer#4 almond antique violet mint lemon 3 39 117 +Manufacturer#5 almond antique sky peru orange 1 2 2 +Manufacturer#5 almond antique sky peru orange 2 2 4 +Manufacturer#5 almond antique sky peru orange 3 2 6 +Manufacturer#5 almond antique medium spring khaki 1 6 10 +Manufacturer#5 almond antique medium spring khaki 2 6 14 +Manufacturer#5 almond antique medium spring khaki 3 6 18 +Manufacturer#5 almond azure blanched chiffon midnight 1 23 35 +Manufacturer#5 almond azure blanched chiffon midnight 2 23 52 +Manufacturer#5 almond azure blanched chiffon midnight 3 23 69 +Manufacturer#5 almond antique blue firebrick mint 1 31 77 +Manufacturer#5 almond antique blue firebrick mint 2 31 85 +Manufacturer#5 almond antique blue firebrick mint 3 31 93 +Manufacturer#5 almond aquamarine dodger light gainsboro 1 46 108 +Manufacturer#5 almond aquamarine dodger light gainsboro 2 46 123 +Manufacturer#5 almond aquamarine dodger light gainsboro 3 46 138 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 26. testGroupByHavingWithSWQAndAlias-0-b996a664b06e5741c08079d5c38241bc b/sql/hive/src/test/resources/golden/windowing.q -- 26. testGroupByHavingWithSWQAndAlias-0-b996a664b06e5741c08079d5c38241bc new file mode 100644 index 0000000000000..2c30e652aa26d --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 26. testGroupByHavingWithSWQAndAlias-0-b996a664b06e5741c08079d5c38241bc @@ -0,0 +1,25 @@ +Manufacturer#1 almond antique burnished rose metallic 2 1173.15 1 1 2 0 +Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 2 2 34 32 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 3 3 6 -28 +Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 4 4 28 22 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 5 5 42 14 +Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1 1 14 0 +Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 2 2 40 26 +Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 3 3 2 -38 +Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 4 4 25 23 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5 5 18 -7 +Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 1 1 17 0 +Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 2 2 14 -3 +Manufacturer#3 almond antique metallic orange dim 19 1410.39 3 3 19 5 +Manufacturer#3 almond antique misty red olive 1 1922.98 4 4 1 -18 +Manufacturer#3 almond antique olive coral navajo 45 1337.29 5 5 45 44 +Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 1 1 10 0 +Manufacturer#4 almond antique violet mint lemon 39 1375.42 2 2 39 29 +Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 3 3 27 -12 +Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 4 4 7 -20 +Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 5 5 12 5 +Manufacturer#5 almond antique blue firebrick mint 31 1789.69 1 1 31 0 +Manufacturer#5 almond antique medium spring khaki 6 1611.66 2 2 6 -25 +Manufacturer#5 almond antique sky peru orange 2 1788.73 3 3 2 -4 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 4 4 46 44 +Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 5 5 23 -23 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 27. testMultipleRangeWindows-0-227e080e337d734dd88ff814b3b412e4 b/sql/hive/src/test/resources/golden/windowing.q -- 27. testMultipleRangeWindows-0-227e080e337d734dd88ff814b3b412e4 new file mode 100644 index 0000000000000..b2a91ba727a72 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 27. testMultipleRangeWindows-0-227e080e337d734dd88ff814b3b412e4 @@ -0,0 +1,26 @@ +Manufacturer#1 almond antique burnished rose metallic 2 4 10 +Manufacturer#1 almond antique burnished rose metallic 2 4 10 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 10 6 +Manufacturer#1 almond aquamarine burnished black steel 28 28 62 +Manufacturer#1 almond antique chartreuse lavender yellow 34 62 76 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 76 42 +Manufacturer#2 almond aquamarine midnight light salmon 2 2 2 +Manufacturer#2 almond antique violet chocolate turquoise 14 14 32 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 32 43 +Manufacturer#2 almond aquamarine rose maroon antique 25 43 25 +Manufacturer#2 almond antique violet turquoise frosted 40 40 40 +Manufacturer#3 almond antique misty red olive 1 1 1 +Manufacturer#3 almond antique forest lavender goldenrod 14 14 50 +Manufacturer#3 almond antique chartreuse khaki white 17 31 36 +Manufacturer#3 almond antique metallic orange dim 19 50 19 +Manufacturer#3 almond antique olive coral navajo 45 45 45 +Manufacturer#4 almond aquamarine yellow dodger mint 7 7 29 +Manufacturer#4 almond antique gainsboro frosted violet 10 17 22 +Manufacturer#4 almond azure aquamarine papaya violet 12 29 12 +Manufacturer#4 almond aquamarine floral ivory bisque 27 27 27 +Manufacturer#4 almond antique violet mint lemon 39 39 39 +Manufacturer#5 almond antique sky peru orange 2 2 8 +Manufacturer#5 almond antique medium spring khaki 6 8 6 +Manufacturer#5 almond azure blanched chiffon midnight 23 23 54 +Manufacturer#5 almond antique blue firebrick mint 31 54 31 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 46 46 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 28. testPartOrderInUDAFInvoke-0-25912ae7d18c91cc09e17e57968fb5db b/sql/hive/src/test/resources/golden/windowing.q -- 28. testPartOrderInUDAFInvoke-0-25912ae7d18c91cc09e17e57968fb5db new file mode 100644 index 0000000000000..5bcb0fa941d65 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 28. testPartOrderInUDAFInvoke-0-25912ae7d18c91cc09e17e57968fb5db @@ -0,0 +1,26 @@ +Manufacturer#1 almond antique burnished rose metallic 2 38 +Manufacturer#1 almond antique burnished rose metallic 2 44 +Manufacturer#1 almond antique chartreuse lavender yellow 34 72 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 112 +Manufacturer#1 almond aquamarine burnished black steel 28 110 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 76 +Manufacturer#2 almond antique violet chocolate turquoise 14 56 +Manufacturer#2 almond antique violet turquoise frosted 40 81 +Manufacturer#2 almond aquamarine midnight light salmon 2 99 +Manufacturer#2 almond aquamarine rose maroon antique 25 85 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 45 +Manufacturer#3 almond antique chartreuse khaki white 17 50 +Manufacturer#3 almond antique forest lavender goldenrod 14 51 +Manufacturer#3 almond antique metallic orange dim 19 96 +Manufacturer#3 almond antique misty red olive 1 79 +Manufacturer#3 almond antique olive coral navajo 45 65 +Manufacturer#4 almond antique gainsboro frosted violet 10 76 +Manufacturer#4 almond antique violet mint lemon 39 83 +Manufacturer#4 almond aquamarine floral ivory bisque 27 95 +Manufacturer#4 almond aquamarine yellow dodger mint 7 85 +Manufacturer#4 almond azure aquamarine papaya violet 12 46 +Manufacturer#5 almond antique blue firebrick mint 31 39 +Manufacturer#5 almond antique medium spring khaki 6 85 +Manufacturer#5 almond antique sky peru orange 2 108 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 77 +Manufacturer#5 almond azure blanched chiffon midnight 23 71 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 29. testPartOrderInWdwDef-0-88945892370ccbc1125a927a3d55342a b/sql/hive/src/test/resources/golden/windowing.q -- 29. testPartOrderInWdwDef-0-88945892370ccbc1125a927a3d55342a new file mode 100644 index 0000000000000..5bcb0fa941d65 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 29. testPartOrderInWdwDef-0-88945892370ccbc1125a927a3d55342a @@ -0,0 +1,26 @@ +Manufacturer#1 almond antique burnished rose metallic 2 38 +Manufacturer#1 almond antique burnished rose metallic 2 44 +Manufacturer#1 almond antique chartreuse lavender yellow 34 72 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 112 +Manufacturer#1 almond aquamarine burnished black steel 28 110 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 76 +Manufacturer#2 almond antique violet chocolate turquoise 14 56 +Manufacturer#2 almond antique violet turquoise frosted 40 81 +Manufacturer#2 almond aquamarine midnight light salmon 2 99 +Manufacturer#2 almond aquamarine rose maroon antique 25 85 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 45 +Manufacturer#3 almond antique chartreuse khaki white 17 50 +Manufacturer#3 almond antique forest lavender goldenrod 14 51 +Manufacturer#3 almond antique metallic orange dim 19 96 +Manufacturer#3 almond antique misty red olive 1 79 +Manufacturer#3 almond antique olive coral navajo 45 65 +Manufacturer#4 almond antique gainsboro frosted violet 10 76 +Manufacturer#4 almond antique violet mint lemon 39 83 +Manufacturer#4 almond aquamarine floral ivory bisque 27 95 +Manufacturer#4 almond aquamarine yellow dodger mint 7 85 +Manufacturer#4 almond azure aquamarine papaya violet 12 46 +Manufacturer#5 almond antique blue firebrick mint 31 39 +Manufacturer#5 almond antique medium spring khaki 6 85 +Manufacturer#5 almond antique sky peru orange 2 108 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 77 +Manufacturer#5 almond azure blanched chiffon midnight 23 71 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 3. testGroupByHavingWithSWQ-0-a5a5339330a6a6660d32ccb0cc5d7100 b/sql/hive/src/test/resources/golden/windowing.q -- 3. testGroupByHavingWithSWQ-0-a5a5339330a6a6660d32ccb0cc5d7100 new file mode 100644 index 0000000000000..2c30e652aa26d --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 3. testGroupByHavingWithSWQ-0-a5a5339330a6a6660d32ccb0cc5d7100 @@ -0,0 +1,25 @@ +Manufacturer#1 almond antique burnished rose metallic 2 1173.15 1 1 2 0 +Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 2 2 34 32 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 3 3 6 -28 +Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 4 4 28 22 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 5 5 42 14 +Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1 1 14 0 +Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 2 2 40 26 +Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 3 3 2 -38 +Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 4 4 25 23 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 5 5 18 -7 +Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 1 1 17 0 +Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 2 2 14 -3 +Manufacturer#3 almond antique metallic orange dim 19 1410.39 3 3 19 5 +Manufacturer#3 almond antique misty red olive 1 1922.98 4 4 1 -18 +Manufacturer#3 almond antique olive coral navajo 45 1337.29 5 5 45 44 +Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 1 1 10 0 +Manufacturer#4 almond antique violet mint lemon 39 1375.42 2 2 39 29 +Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 3 3 27 -12 +Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 4 4 7 -20 +Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 5 5 12 5 +Manufacturer#5 almond antique blue firebrick mint 31 1789.69 1 1 31 0 +Manufacturer#5 almond antique medium spring khaki 6 1611.66 2 2 6 -25 +Manufacturer#5 almond antique sky peru orange 2 1788.73 3 3 2 -4 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 4 4 46 44 +Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 5 5 23 -23 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 30. testDefaultPartitioningSpecRules-0-fa80b09c99e3c1487de48ea71a88dada b/sql/hive/src/test/resources/golden/windowing.q -- 30. testDefaultPartitioningSpecRules-0-fa80b09c99e3c1487de48ea71a88dada new file mode 100644 index 0000000000000..698a44349d2a6 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 30. testDefaultPartitioningSpecRules-0-fa80b09c99e3c1487de48ea71a88dada @@ -0,0 +1,26 @@ +Manufacturer#1 almond antique burnished rose metallic 2 38 4 +Manufacturer#1 almond antique burnished rose metallic 2 44 4 +Manufacturer#1 almond antique chartreuse lavender yellow 34 72 38 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 112 44 +Manufacturer#1 almond aquamarine burnished black steel 28 110 72 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 76 114 +Manufacturer#2 almond antique violet chocolate turquoise 14 56 14 +Manufacturer#2 almond antique violet turquoise frosted 40 81 54 +Manufacturer#2 almond aquamarine midnight light salmon 2 99 56 +Manufacturer#2 almond aquamarine rose maroon antique 25 85 81 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 45 99 +Manufacturer#3 almond antique chartreuse khaki white 17 50 17 +Manufacturer#3 almond antique forest lavender goldenrod 14 51 31 +Manufacturer#3 almond antique metallic orange dim 19 96 50 +Manufacturer#3 almond antique misty red olive 1 79 51 +Manufacturer#3 almond antique olive coral navajo 45 65 96 +Manufacturer#4 almond antique gainsboro frosted violet 10 76 10 +Manufacturer#4 almond antique violet mint lemon 39 83 49 +Manufacturer#4 almond aquamarine floral ivory bisque 27 95 76 +Manufacturer#4 almond aquamarine yellow dodger mint 7 85 83 +Manufacturer#4 almond azure aquamarine papaya violet 12 46 95 +Manufacturer#5 almond antique blue firebrick mint 31 39 31 +Manufacturer#5 almond antique medium spring khaki 6 85 37 +Manufacturer#5 almond antique sky peru orange 2 108 39 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 77 85 +Manufacturer#5 almond azure blanched chiffon midnight 23 71 108 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 36. testRankWithPartitioning-0-45ccbaf0ee083858f7661c66b11d4768 b/sql/hive/src/test/resources/golden/windowing.q -- 36. testRankWithPartitioning-0-45ccbaf0ee083858f7661c66b11d4768 new file mode 100644 index 0000000000000..e35257d98382e --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 36. testRankWithPartitioning-0-45ccbaf0ee083858f7661c66b11d4768 @@ -0,0 +1,26 @@ +Manufacturer#1 almond antique burnished rose metallic 2 1 +Manufacturer#1 almond antique burnished rose metallic 2 1 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 +Manufacturer#1 almond aquamarine burnished black steel 28 5 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 +Manufacturer#2 almond antique violet turquoise frosted 40 2 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 +Manufacturer#3 almond antique chartreuse khaki white 17 1 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 +Manufacturer#3 almond antique metallic orange dim 19 3 +Manufacturer#3 almond antique misty red olive 1 4 +Manufacturer#3 almond antique olive coral navajo 45 5 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 +Manufacturer#4 almond antique violet mint lemon 39 2 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 +Manufacturer#5 almond antique blue firebrick mint 31 1 +Manufacturer#5 almond antique medium spring khaki 6 2 +Manufacturer#5 almond antique sky peru orange 2 3 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 37. testPartitioningVariousForms-0-3436e50214f9afdec84334e10faa931a b/sql/hive/src/test/resources/golden/windowing.q -- 37. testPartitioningVariousForms-0-3436e50214f9afdec84334e10faa931a new file mode 100644 index 0000000000000..9c0ca6c7a00ba --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 37. testPartitioningVariousForms-0-3436e50214f9afdec84334e10faa931a @@ -0,0 +1,26 @@ +Manufacturer#1 8749.73 1173.15 1753.76 1458.29 6 +Manufacturer#1 8749.73 1173.15 1753.76 1458.29 6 +Manufacturer#1 8749.73 1173.15 1753.76 1458.29 6 +Manufacturer#1 8749.73 1173.15 1753.76 1458.29 6 +Manufacturer#1 8749.73 1173.15 1753.76 1458.29 6 +Manufacturer#1 8749.73 1173.15 1753.76 1458.29 6 +Manufacturer#2 8923.62 1690.68 2031.98 1784.72 5 +Manufacturer#2 8923.62 1690.68 2031.98 1784.72 5 +Manufacturer#2 8923.62 1690.68 2031.98 1784.72 5 +Manufacturer#2 8923.62 1690.68 2031.98 1784.72 5 +Manufacturer#2 8923.62 1690.68 2031.98 1784.72 5 +Manufacturer#3 7532.61 1190.27 1922.98 1506.52 5 +Manufacturer#3 7532.61 1190.27 1922.98 1506.52 5 +Manufacturer#3 7532.61 1190.27 1922.98 1506.52 5 +Manufacturer#3 7532.61 1190.27 1922.98 1506.52 5 +Manufacturer#3 7532.61 1190.27 1922.98 1506.52 5 +Manufacturer#4 7337.62 1206.26 1844.92 1467.52 5 +Manufacturer#4 7337.62 1206.26 1844.92 1467.52 5 +Manufacturer#4 7337.62 1206.26 1844.92 1467.52 5 +Manufacturer#4 7337.62 1206.26 1844.92 1467.52 5 +Manufacturer#4 7337.62 1206.26 1844.92 1467.52 5 +Manufacturer#5 7672.66 1018.1 1789.69 1534.53 5 +Manufacturer#5 7672.66 1018.1 1789.69 1534.53 5 +Manufacturer#5 7672.66 1018.1 1789.69 1534.53 5 +Manufacturer#5 7672.66 1018.1 1789.69 1534.53 5 +Manufacturer#5 7672.66 1018.1 1789.69 1534.53 5 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 38. testPartitioningVariousForms2-0-cba9d84a6b1bb5e36595338d4602377e b/sql/hive/src/test/resources/golden/windowing.q -- 38. testPartitioningVariousForms2-0-cba9d84a6b1bb5e36595338d4602377e new file mode 100644 index 0000000000000..fc27df2f2b648 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 38. testPartitioningVariousForms2-0-cba9d84a6b1bb5e36595338d4602377e @@ -0,0 +1,26 @@ +Manufacturer#1 almond antique burnished rose metallic 2 1173.15 1173.15 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 2346.3 1173.15 1173.15 +Manufacturer#1 almond antique chartreuse lavender yellow 34 1753.76 1753.76 1753.76 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 1602.59 1602.59 1602.59 +Manufacturer#1 almond aquamarine burnished black steel 28 1414.42 1414.42 1414.42 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 1632.66 1632.66 +Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 1690.68 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 1800.7 1800.7 +Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 2031.98 2031.98 +Manufacturer#2 almond aquamarine rose maroon antique 25 1698.66 1698.66 1698.66 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 1701.6 1701.6 +Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 1671.68 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 1190.27 1190.27 1190.27 +Manufacturer#3 almond antique metallic orange dim 19 1410.39 1410.39 1410.39 +Manufacturer#3 almond antique misty red olive 1 1922.98 1922.98 1922.98 +Manufacturer#3 almond antique olive coral navajo 45 1337.29 1337.29 1337.29 +Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 1620.67 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 1375.42 1375.42 1375.42 +Manufacturer#4 almond aquamarine floral ivory bisque 27 1206.26 1206.26 1206.26 +Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 1844.92 1844.92 +Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 1290.35 1290.35 +Manufacturer#5 almond antique blue firebrick mint 31 1789.69 1789.69 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 1611.66 1611.66 1611.66 +Manufacturer#5 almond antique sky peru orange 2 1788.73 1788.73 1788.73 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 1018.1 1018.1 +Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 1464.48 1464.48 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 39. testUDFOnOrderCols-0-7647562850dd367ef1e6c63117805423 b/sql/hive/src/test/resources/golden/windowing.q -- 39. testUDFOnOrderCols-0-7647562850dd367ef1e6c63117805423 new file mode 100644 index 0000000000000..e5a541f56f6f3 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 39. testUDFOnOrderCols-0-7647562850dd367ef1e6c63117805423 @@ -0,0 +1,26 @@ +Manufacturer#1 LARGE BRUSHED STEEL ARGE BRUSHED STEEL 1 +Manufacturer#1 LARGE BURNISHED STEEL ARGE BURNISHED STEEL 2 +Manufacturer#1 PROMO BURNISHED NICKEL ROMO BURNISHED NICKEL 3 +Manufacturer#1 PROMO PLATED TIN ROMO PLATED TIN 4 +Manufacturer#1 PROMO PLATED TIN ROMO PLATED TIN 4 +Manufacturer#1 STANDARD ANODIZED STEEL TANDARD ANODIZED STEEL 6 +Manufacturer#2 ECONOMY POLISHED STEEL CONOMY POLISHED STEEL 1 +Manufacturer#2 MEDIUM ANODIZED COPPER EDIUM ANODIZED COPPER 2 +Manufacturer#2 MEDIUM BURNISHED COPPER EDIUM BURNISHED COPPER 3 +Manufacturer#2 SMALL POLISHED NICKEL MALL POLISHED NICKEL 4 +Manufacturer#2 STANDARD PLATED TIN TANDARD PLATED TIN 5 +Manufacturer#3 ECONOMY PLATED COPPER CONOMY PLATED COPPER 1 +Manufacturer#3 MEDIUM BURNISHED BRASS EDIUM BURNISHED BRASS 2 +Manufacturer#3 MEDIUM BURNISHED TIN EDIUM BURNISHED TIN 3 +Manufacturer#3 PROMO ANODIZED TIN ROMO ANODIZED TIN 4 +Manufacturer#3 STANDARD POLISHED STEEL TANDARD POLISHED STEEL 5 +Manufacturer#4 ECONOMY BRUSHED COPPER CONOMY BRUSHED COPPER 1 +Manufacturer#4 SMALL BRUSHED BRASS MALL BRUSHED BRASS 2 +Manufacturer#4 SMALL PLATED STEEL MALL PLATED STEEL 3 +Manufacturer#4 PROMO POLISHED STEEL ROMO POLISHED STEEL 4 +Manufacturer#4 STANDARD ANODIZED TIN TANDARD ANODIZED TIN 5 +Manufacturer#5 LARGE BRUSHED BRASS ARGE BRUSHED BRASS 1 +Manufacturer#5 ECONOMY BURNISHED STEEL CONOMY BURNISHED STEEL 2 +Manufacturer#5 MEDIUM BURNISHED TIN EDIUM BURNISHED TIN 3 +Manufacturer#5 SMALL PLATED BRASS MALL PLATED BRASS 4 +Manufacturer#5 STANDARD BURNISHED TIN TANDARD BURNISHED TIN 5 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 4. testCount-0-e6e97e884327df86f16b870527ec026c b/sql/hive/src/test/resources/golden/windowing.q -- 4. testCount-0-e6e97e884327df86f16b870527ec026c new file mode 100644 index 0000000000000..bf8e620a304af --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 4. testCount-0-e6e97e884327df86f16b870527ec026c @@ -0,0 +1,26 @@ +Manufacturer#1 almond antique burnished rose metallic 2 +Manufacturer#1 almond antique burnished rose metallic 2 +Manufacturer#1 almond antique chartreuse lavender yellow 3 +Manufacturer#1 almond antique salmon chartreuse burlywood 4 +Manufacturer#1 almond aquamarine burnished black steel 5 +Manufacturer#1 almond aquamarine pink moccasin thistle 6 +Manufacturer#2 almond antique violet chocolate turquoise 1 +Manufacturer#2 almond antique violet turquoise frosted 2 +Manufacturer#2 almond aquamarine midnight light salmon 3 +Manufacturer#2 almond aquamarine rose maroon antique 4 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 5 +Manufacturer#3 almond antique chartreuse khaki white 1 +Manufacturer#3 almond antique forest lavender goldenrod 2 +Manufacturer#3 almond antique metallic orange dim 3 +Manufacturer#3 almond antique misty red olive 4 +Manufacturer#3 almond antique olive coral navajo 5 +Manufacturer#4 almond antique gainsboro frosted violet 1 +Manufacturer#4 almond antique violet mint lemon 2 +Manufacturer#4 almond aquamarine floral ivory bisque 3 +Manufacturer#4 almond aquamarine yellow dodger mint 4 +Manufacturer#4 almond azure aquamarine papaya violet 5 +Manufacturer#5 almond antique blue firebrick mint 1 +Manufacturer#5 almond antique medium spring khaki 2 +Manufacturer#5 almond antique sky peru orange 3 +Manufacturer#5 almond aquamarine dodger light gainsboro 4 +Manufacturer#5 almond azure blanched chiffon midnight 5 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 40. testNoBetweenForRows-0-99007f45b6406869e048b0e4eb9213f1 b/sql/hive/src/test/resources/golden/windowing.q -- 40. testNoBetweenForRows-0-99007f45b6406869e048b0e4eb9213f1 new file mode 100644 index 0000000000000..1e29df62901d0 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 40. testNoBetweenForRows-0-99007f45b6406869e048b0e4eb9213f1 @@ -0,0 +1,26 @@ +Manufacturer#1 almond antique burnished rose metallic 2 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 2346.3 +Manufacturer#1 almond antique chartreuse lavender yellow 34 4100.06 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 5702.650000000001 +Manufacturer#1 almond aquamarine burnished black steel 28 7117.070000000001 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 8749.730000000001 +Manufacturer#2 almond antique violet chocolate turquoise 14 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 5523.360000000001 +Manufacturer#2 almond aquamarine rose maroon antique 25 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 8923.62 +Manufacturer#3 almond antique chartreuse khaki white 17 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 4272.34 +Manufacturer#3 almond antique misty red olive 1 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 7532.61 +Manufacturer#4 almond antique gainsboro frosted violet 10 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 2996.09 +Manufacturer#4 almond aquamarine floral ivory bisque 27 4202.35 +Manufacturer#4 almond aquamarine yellow dodger mint 7 6047.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 7337.620000000001 +Manufacturer#5 almond antique blue firebrick mint 31 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 3401.3500000000004 +Manufacturer#5 almond antique sky peru orange 2 5190.08 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 6208.18 +Manufacturer#5 almond azure blanched chiffon midnight 23 7672.66 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 41. testNoBetweenForRange-0-d81a591e90950de291d2f133793e9283 b/sql/hive/src/test/resources/golden/windowing.q -- 41. testNoBetweenForRange-0-d81a591e90950de291d2f133793e9283 new file mode 100644 index 0000000000000..a620479fe4064 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 41. testNoBetweenForRange-0-d81a591e90950de291d2f133793e9283 @@ -0,0 +1,26 @@ +Manufacturer#1 almond antique burnished rose metallic 2 2346.3 +Manufacturer#1 almond antique burnished rose metallic 2 2346.3 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 3948.8900000000003 +Manufacturer#1 almond aquamarine burnished black steel 28 5363.31 +Manufacturer#1 almond antique chartreuse lavender yellow 34 7117.070000000001 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 8749.730000000001 +Manufacturer#2 almond aquamarine midnight light salmon 2 2031.98 +Manufacturer#2 almond antique violet chocolate turquoise 14 3722.66 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5424.26 +Manufacturer#2 almond aquamarine rose maroon antique 25 7122.92 +Manufacturer#2 almond antique violet turquoise frosted 40 8923.62 +Manufacturer#3 almond antique misty red olive 1 1922.98 +Manufacturer#3 almond antique forest lavender goldenrod 14 3113.25 +Manufacturer#3 almond antique chartreuse khaki white 17 4784.93 +Manufacturer#3 almond antique metallic orange dim 19 6195.320000000001 +Manufacturer#3 almond antique olive coral navajo 45 7532.610000000001 +Manufacturer#4 almond aquamarine yellow dodger mint 7 1844.92 +Manufacturer#4 almond antique gainsboro frosted violet 10 3465.59 +Manufacturer#4 almond azure aquamarine papaya violet 12 4755.9400000000005 +Manufacturer#4 almond aquamarine floral ivory bisque 27 5962.200000000001 +Manufacturer#4 almond antique violet mint lemon 39 7337.620000000001 +Manufacturer#5 almond antique sky peru orange 2 1788.73 +Manufacturer#5 almond antique medium spring khaki 6 3400.3900000000003 +Manufacturer#5 almond azure blanched chiffon midnight 23 4864.870000000001 +Manufacturer#5 almond antique blue firebrick mint 31 6654.560000000001 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 7672.660000000002 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 42. testUnboundedFollowingForRows-0-fb8648e82e4dd56d6bdcfd739dd1edf0 b/sql/hive/src/test/resources/golden/windowing.q -- 42. testUnboundedFollowingForRows-0-fb8648e82e4dd56d6bdcfd739dd1edf0 new file mode 100644 index 0000000000000..74147d2571a15 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 42. testUnboundedFollowingForRows-0-fb8648e82e4dd56d6bdcfd739dd1edf0 @@ -0,0 +1,26 @@ +Manufacturer#1 almond antique burnished rose metallic 2 8749.730000000001 +Manufacturer#1 almond antique burnished rose metallic 2 7576.58 +Manufacturer#1 almond antique chartreuse lavender yellow 34 6403.43 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4649.67 +Manufacturer#1 almond aquamarine burnished black steel 28 3047.08 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 +Manufacturer#2 almond antique violet chocolate turquoise 14 8923.62 +Manufacturer#2 almond antique violet turquoise frosted 40 7232.9400000000005 +Manufacturer#2 almond aquamarine midnight light salmon 2 5432.24 +Manufacturer#2 almond aquamarine rose maroon antique 25 3400.26 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 1701.6 +Manufacturer#3 almond antique chartreuse khaki white 17 7532.61 +Manufacturer#3 almond antique forest lavender goldenrod 14 5860.929999999999 +Manufacturer#3 almond antique metallic orange dim 19 4670.66 +Manufacturer#3 almond antique misty red olive 1 3260.27 +Manufacturer#3 almond antique olive coral navajo 45 1337.29 +Manufacturer#4 almond antique gainsboro frosted violet 10 7337.620000000001 +Manufacturer#4 almond antique violet mint lemon 39 5716.950000000001 +Manufacturer#4 almond aquamarine floral ivory bisque 27 4341.530000000001 +Manufacturer#4 almond aquamarine yellow dodger mint 7 3135.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 1290.35 +Manufacturer#5 almond antique blue firebrick mint 31 7672.66 +Manufacturer#5 almond antique medium spring khaki 6 5882.970000000001 +Manufacturer#5 almond antique sky peru orange 2 4271.3099999999995 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 2482.58 +Manufacturer#5 almond azure blanched chiffon midnight 23 1464.48 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 43. testUnboundedFollowingForRange-0-3cd04e5f2398853c4850f4f86142bb39 b/sql/hive/src/test/resources/golden/windowing.q -- 43. testUnboundedFollowingForRange-0-3cd04e5f2398853c4850f4f86142bb39 new file mode 100644 index 0000000000000..49d003b5de139 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 43. testUnboundedFollowingForRange-0-3cd04e5f2398853c4850f4f86142bb39 @@ -0,0 +1,26 @@ +Manufacturer#1 almond antique burnished rose metallic 2 8749.730000000001 +Manufacturer#1 almond antique burnished rose metallic 2 8749.730000000001 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 6403.43 +Manufacturer#1 almond aquamarine burnished black steel 28 4800.84 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3386.42 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 1632.66 +Manufacturer#2 almond aquamarine midnight light salmon 2 8923.62 +Manufacturer#2 almond antique violet chocolate turquoise 14 6891.639999999999 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5200.96 +Manufacturer#2 almond aquamarine rose maroon antique 25 3499.36 +Manufacturer#2 almond antique violet turquoise frosted 40 1800.7 +Manufacturer#3 almond antique misty red olive 1 7532.610000000001 +Manufacturer#3 almond antique forest lavender goldenrod 14 5609.63 +Manufacturer#3 almond antique chartreuse khaki white 17 4419.360000000001 +Manufacturer#3 almond antique metallic orange dim 19 2747.6800000000003 +Manufacturer#3 almond antique olive coral navajo 45 1337.29 +Manufacturer#4 almond aquamarine yellow dodger mint 7 7337.620000000001 +Manufacturer#4 almond antique gainsboro frosted violet 10 5492.7 +Manufacturer#4 almond azure aquamarine papaya violet 12 3872.0299999999997 +Manufacturer#4 almond aquamarine floral ivory bisque 27 2581.6800000000003 +Manufacturer#4 almond antique violet mint lemon 39 1375.42 +Manufacturer#5 almond antique sky peru orange 2 7672.660000000002 +Manufacturer#5 almond antique medium spring khaki 6 5883.93 +Manufacturer#5 almond azure blanched chiffon midnight 23 4272.27 +Manufacturer#5 almond antique blue firebrick mint 31 2807.79 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 1018.1 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 44. testOverNoPartitionSingleAggregate-0-cb3d2f8c1296044dc2658876bb6103ae b/sql/hive/src/test/resources/golden/windowing.q -- 44. testOverNoPartitionSingleAggregate-0-cb3d2f8c1296044dc2658876bb6103ae new file mode 100644 index 0000000000000..5982c9ee2a4d4 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 44. testOverNoPartitionSingleAggregate-0-cb3d2f8c1296044dc2658876bb6103ae @@ -0,0 +1,26 @@ +almond antique blue firebrick mint 1789.69 1546.78 +almond antique burnished rose metallic 1173.15 1546.78 +almond antique burnished rose metallic 1173.15 1546.78 +almond antique chartreuse khaki white 1671.68 1546.78 +almond antique chartreuse lavender yellow 1753.76 1546.78 +almond antique forest lavender goldenrod 1190.27 1546.78 +almond antique gainsboro frosted violet 1620.67 1546.78 +almond antique medium spring khaki 1611.66 1546.78 +almond antique metallic orange dim 1410.39 1546.78 +almond antique misty red olive 1922.98 1546.78 +almond antique olive coral navajo 1337.29 1546.78 +almond antique salmon chartreuse burlywood 1602.59 1546.78 +almond antique sky peru orange 1788.73 1546.78 +almond antique violet chocolate turquoise 1690.68 1546.78 +almond antique violet mint lemon 1375.42 1546.78 +almond antique violet turquoise frosted 1800.7 1546.78 +almond aquamarine burnished black steel 1414.42 1546.78 +almond aquamarine dodger light gainsboro 1018.1 1546.78 +almond aquamarine floral ivory bisque 1206.26 1546.78 +almond aquamarine midnight light salmon 2031.98 1546.78 +almond aquamarine pink moccasin thistle 1632.66 1546.78 +almond aquamarine rose maroon antique 1698.66 1546.78 +almond aquamarine sandy cyan gainsboro 1701.6 1546.78 +almond aquamarine yellow dodger mint 1844.92 1546.78 +almond azure aquamarine papaya violet 1290.35 1546.78 +almond azure blanched chiffon midnight 1464.48 1546.78 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 5. testCountWithWindowingUDAF-0-3bde93728761b780a745c2ce0398aa0f b/sql/hive/src/test/resources/golden/windowing.q -- 5. testCountWithWindowingUDAF-0-3bde93728761b780a745c2ce0398aa0f new file mode 100644 index 0000000000000..00d41fc0bcd9c --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 5. testCountWithWindowingUDAF-0-3bde93728761b780a745c2ce0398aa0f @@ -0,0 +1,26 @@ +Manufacturer#1 almond antique burnished rose metallic 1 1 2 1173.15 1173.15 2 0 +Manufacturer#1 almond antique burnished rose metallic 1 1 2 1173.15 2346.3 2 0 +Manufacturer#1 almond antique chartreuse lavender yellow 3 2 3 1753.76 4100.06 34 32 +Manufacturer#1 almond antique salmon chartreuse burlywood 4 3 4 1602.59 5702.650000000001 6 -28 +Manufacturer#1 almond aquamarine burnished black steel 5 4 5 1414.42 7117.070000000001 28 22 +Manufacturer#1 almond aquamarine pink moccasin thistle 6 5 6 1632.66 8749.730000000001 42 14 +Manufacturer#2 almond antique violet chocolate turquoise 1 1 1 1690.68 1690.68 14 0 +Manufacturer#2 almond antique violet turquoise frosted 2 2 2 1800.7 3491.38 40 26 +Manufacturer#2 almond aquamarine midnight light salmon 3 3 3 2031.98 5523.360000000001 2 -38 +Manufacturer#2 almond aquamarine rose maroon antique 4 4 4 1698.66 7222.02 25 23 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 5 5 5 1701.6 8923.62 18 -7 +Manufacturer#3 almond antique chartreuse khaki white 1 1 1 1671.68 1671.68 17 0 +Manufacturer#3 almond antique forest lavender goldenrod 2 2 2 1190.27 2861.95 14 -3 +Manufacturer#3 almond antique metallic orange dim 3 3 3 1410.39 4272.34 19 5 +Manufacturer#3 almond antique misty red olive 4 4 4 1922.98 6195.32 1 -18 +Manufacturer#3 almond antique olive coral navajo 5 5 5 1337.29 7532.61 45 44 +Manufacturer#4 almond antique gainsboro frosted violet 1 1 1 1620.67 1620.67 10 0 +Manufacturer#4 almond antique violet mint lemon 2 2 2 1375.42 2996.09 39 29 +Manufacturer#4 almond aquamarine floral ivory bisque 3 3 3 1206.26 4202.35 27 -12 +Manufacturer#4 almond aquamarine yellow dodger mint 4 4 4 1844.92 6047.27 7 -20 +Manufacturer#4 almond azure aquamarine papaya violet 5 5 5 1290.35 7337.620000000001 12 5 +Manufacturer#5 almond antique blue firebrick mint 1 1 1 1789.69 1789.69 31 0 +Manufacturer#5 almond antique medium spring khaki 2 2 2 1611.66 3401.3500000000004 6 -25 +Manufacturer#5 almond antique sky peru orange 3 3 3 1788.73 5190.08 2 -4 +Manufacturer#5 almond aquamarine dodger light gainsboro 4 4 4 1018.1 6208.18 46 44 +Manufacturer#5 almond azure blanched chiffon midnight 5 5 5 1464.48 7672.66 23 -23 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 6. testCountInSubQ-0-73d5274a21d4f4fd51d2a0f1d98516ce b/sql/hive/src/test/resources/golden/windowing.q -- 6. testCountInSubQ-0-73d5274a21d4f4fd51d2a0f1d98516ce new file mode 100644 index 0000000000000..98c09e4fe15c3 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 6. testCountInSubQ-0-73d5274a21d4f4fd51d2a0f1d98516ce @@ -0,0 +1,26 @@ +1 1 2 1173.15 0 +1 1 2 2346.3 0 +3 2 3 4100.06 32 +4 3 4 5702.650000000001 -28 +5 4 5 7117.070000000001 22 +6 5 6 8749.730000000001 14 +1 1 1 1690.68 0 +2 2 2 3491.38 26 +3 3 3 5523.360000000001 -38 +4 4 4 7222.02 23 +5 5 5 8923.62 -7 +1 1 1 1671.68 0 +2 2 2 2861.95 -3 +3 3 3 4272.34 5 +4 4 4 6195.32 -18 +5 5 5 7532.61 44 +1 1 1 1620.67 0 +2 2 2 2996.09 29 +3 3 3 4202.35 -12 +4 4 4 6047.27 -20 +5 5 5 7337.620000000001 5 +1 1 1 1789.69 0 +2 2 2 3401.3500000000004 -25 +3 3 3 5190.08 -4 +4 4 4 6208.18 44 +5 5 5 7672.66 -23 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 8. testMixedCaseAlias-0-4b1ad2515fb079012467e987f484a722 b/sql/hive/src/test/resources/golden/windowing.q -- 8. testMixedCaseAlias-0-4b1ad2515fb079012467e987f484a722 new file mode 100644 index 0000000000000..e35257d98382e --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 8. testMixedCaseAlias-0-4b1ad2515fb079012467e987f484a722 @@ -0,0 +1,26 @@ +Manufacturer#1 almond antique burnished rose metallic 2 1 +Manufacturer#1 almond antique burnished rose metallic 2 1 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 +Manufacturer#1 almond aquamarine burnished black steel 28 5 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 +Manufacturer#2 almond antique violet turquoise frosted 40 2 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 +Manufacturer#3 almond antique chartreuse khaki white 17 1 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 +Manufacturer#3 almond antique metallic orange dim 19 3 +Manufacturer#3 almond antique misty red olive 1 4 +Manufacturer#3 almond antique olive coral navajo 45 5 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 +Manufacturer#4 almond antique violet mint lemon 39 2 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 +Manufacturer#5 almond antique blue firebrick mint 31 1 +Manufacturer#5 almond antique medium spring khaki 6 2 +Manufacturer#5 almond antique sky peru orange 2 3 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 diff --git a/sql/hive/src/test/resources/golden/windowing.q -- 9. testHavingWithWindowingNoGBY-0-70cdc0555a61ef08534a9ebebb95ebbf b/sql/hive/src/test/resources/golden/windowing.q -- 9. testHavingWithWindowingNoGBY-0-70cdc0555a61ef08534a9ebebb95ebbf new file mode 100644 index 0000000000000..850c41c8115d6 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing.q -- 9. testHavingWithWindowingNoGBY-0-70cdc0555a61ef08534a9ebebb95ebbf @@ -0,0 +1,26 @@ +Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.650000000001 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.070000000001 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.730000000001 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.360000000001 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 +Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.620000000001 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 +Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 diff --git a/sql/hive/src/test/resources/golden/windowing_adjust_rowcontainer_sz-0-d3f50875bd5dff172cf813fdb7d738eb b/sql/hive/src/test/resources/golden/windowing_adjust_rowcontainer_sz-0-d3f50875bd5dff172cf813fdb7d738eb new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/sql/hive/src/test/resources/golden/windowing_adjust_rowcontainer_sz-1-dda16565b98926fc3587de937b9401c7 b/sql/hive/src/test/resources/golden/windowing_adjust_rowcontainer_sz-1-dda16565b98926fc3587de937b9401c7 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/sql/hive/src/test/resources/golden/windowing_adjust_rowcontainer_sz-2-374e39786feb745cd70f25be58bfa24 b/sql/hive/src/test/resources/golden/windowing_adjust_rowcontainer_sz-2-374e39786feb745cd70f25be58bfa24 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/sql/hive/src/test/resources/golden/windowing_adjust_rowcontainer_sz-3-d2b5e23edec42a62e61750b110ecbaac b/sql/hive/src/test/resources/golden/windowing_adjust_rowcontainer_sz-3-d2b5e23edec42a62e61750b110ecbaac new file mode 100644 index 0000000000000..573541ac9702d --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_adjust_rowcontainer_sz-3-d2b5e23edec42a62e61750b110ecbaac @@ -0,0 +1 @@ +0 diff --git a/sql/hive/src/test/resources/golden/windowing_adjust_rowcontainer_sz-4-50d0c630159068b5b8ccdeb76493f1f7 b/sql/hive/src/test/resources/golden/windowing_adjust_rowcontainer_sz-4-50d0c630159068b5b8ccdeb76493f1f7 new file mode 100644 index 0000000000000..850c41c8115d6 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_adjust_rowcontainer_sz-4-50d0c630159068b5b8ccdeb76493f1f7 @@ -0,0 +1,26 @@ +Manufacturer#1 almond antique burnished rose metallic 2 1 1 1173.15 +Manufacturer#1 almond antique burnished rose metallic 2 1 1 2346.3 +Manufacturer#1 almond antique chartreuse lavender yellow 34 3 2 4100.06 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 4 3 5702.650000000001 +Manufacturer#1 almond aquamarine burnished black steel 28 5 4 7117.070000000001 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 6 5 8749.730000000001 +Manufacturer#2 almond antique violet chocolate turquoise 14 1 1 1690.68 +Manufacturer#2 almond antique violet turquoise frosted 40 2 2 3491.38 +Manufacturer#2 almond aquamarine midnight light salmon 2 3 3 5523.360000000001 +Manufacturer#2 almond aquamarine rose maroon antique 25 4 4 7222.02 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 5 5 8923.62 +Manufacturer#3 almond antique chartreuse khaki white 17 1 1 1671.68 +Manufacturer#3 almond antique forest lavender goldenrod 14 2 2 2861.95 +Manufacturer#3 almond antique metallic orange dim 19 3 3 4272.34 +Manufacturer#3 almond antique misty red olive 1 4 4 6195.32 +Manufacturer#3 almond antique olive coral navajo 45 5 5 7532.61 +Manufacturer#4 almond antique gainsboro frosted violet 10 1 1 1620.67 +Manufacturer#4 almond antique violet mint lemon 39 2 2 2996.09 +Manufacturer#4 almond aquamarine floral ivory bisque 27 3 3 4202.35 +Manufacturer#4 almond aquamarine yellow dodger mint 7 4 4 6047.27 +Manufacturer#4 almond azure aquamarine papaya violet 12 5 5 7337.620000000001 +Manufacturer#5 almond antique blue firebrick mint 31 1 1 1789.69 +Manufacturer#5 almond antique medium spring khaki 6 2 2 3401.3500000000004 +Manufacturer#5 almond antique sky peru orange 2 3 3 5190.08 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 4 4 6208.18 +Manufacturer#5 almond azure blanched chiffon midnight 23 5 5 7672.66 diff --git a/sql/hive/src/test/resources/golden/windowing_adjust_rowcontainer_sz-5-3f95cd6f4add7a2d0101fe3dd97e5082 b/sql/hive/src/test/resources/golden/windowing_adjust_rowcontainer_sz-5-3f95cd6f4add7a2d0101fe3dd97e5082 new file mode 100644 index 0000000000000..573541ac9702d --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_adjust_rowcontainer_sz-5-3f95cd6f4add7a2d0101fe3dd97e5082 @@ -0,0 +1 @@ +0 diff --git a/sql/hive/src/test/resources/golden/windowing_columnPruning-0-d3f50875bd5dff172cf813fdb7d738eb b/sql/hive/src/test/resources/golden/windowing_columnPruning-0-d3f50875bd5dff172cf813fdb7d738eb new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/sql/hive/src/test/resources/golden/windowing_columnPruning-1-dda16565b98926fc3587de937b9401c7 b/sql/hive/src/test/resources/golden/windowing_columnPruning-1-dda16565b98926fc3587de937b9401c7 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/sql/hive/src/test/resources/golden/windowing_columnPruning-2-374e39786feb745cd70f25be58bfa24 b/sql/hive/src/test/resources/golden/windowing_columnPruning-2-374e39786feb745cd70f25be58bfa24 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/sql/hive/src/test/resources/golden/windowing_columnPruning-3-9294b4a22bc396ff2accabd53c5da98b b/sql/hive/src/test/resources/golden/windowing_columnPruning-3-9294b4a22bc396ff2accabd53c5da98b new file mode 100644 index 0000000000000..1b5ae55383a4a --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_columnPruning-3-9294b4a22bc396ff2accabd53c5da98b @@ -0,0 +1,26 @@ +2 1173.15 +2 2346.3 +34 4100.06 +6 5702.650000000001 +28 7117.070000000001 +42 8749.730000000001 +14 1690.68 +40 3491.38 +2 5523.360000000001 +25 7222.02 +18 8923.62 +17 1671.68 +14 2861.95 +19 4272.34 +1 6195.32 +45 7532.61 +10 1620.67 +39 2996.09 +27 4202.35 +7 6047.27 +12 7337.620000000001 +31 1789.69 +6 3401.3500000000004 +2 5190.08 +46 6208.18 +23 7672.66 diff --git a/sql/hive/src/test/resources/golden/windowing_columnPruning-4-445cab062581c449ceffcb368cdf133 b/sql/hive/src/test/resources/golden/windowing_columnPruning-4-445cab062581c449ceffcb368cdf133 new file mode 100644 index 0000000000000..1b5ae55383a4a --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_columnPruning-4-445cab062581c449ceffcb368cdf133 @@ -0,0 +1,26 @@ +2 1173.15 +2 2346.3 +34 4100.06 +6 5702.650000000001 +28 7117.070000000001 +42 8749.730000000001 +14 1690.68 +40 3491.38 +2 5523.360000000001 +25 7222.02 +18 8923.62 +17 1671.68 +14 2861.95 +19 4272.34 +1 6195.32 +45 7532.61 +10 1620.67 +39 2996.09 +27 4202.35 +7 6047.27 +12 7337.620000000001 +31 1789.69 +6 3401.3500000000004 +2 5190.08 +46 6208.18 +23 7672.66 diff --git a/sql/hive/src/test/resources/golden/windowing_columnPruning-5-89110070c761eafb992eb9315128b53f b/sql/hive/src/test/resources/golden/windowing_columnPruning-5-89110070c761eafb992eb9315128b53f new file mode 100644 index 0000000000000..e426c725b0e36 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_columnPruning-5-89110070c761eafb992eb9315128b53f @@ -0,0 +1,26 @@ +Manufacturer#1 1173.15 +Manufacturer#1 2346.3 +Manufacturer#1 4100.06 +Manufacturer#1 5702.650000000001 +Manufacturer#1 7117.070000000001 +Manufacturer#1 8749.730000000001 +Manufacturer#2 1690.68 +Manufacturer#2 3491.38 +Manufacturer#2 5523.360000000001 +Manufacturer#2 7222.02 +Manufacturer#2 8923.62 +Manufacturer#3 1671.68 +Manufacturer#3 2861.95 +Manufacturer#3 4272.34 +Manufacturer#3 6195.32 +Manufacturer#3 7532.61 +Manufacturer#4 1620.67 +Manufacturer#4 2996.09 +Manufacturer#4 4202.35 +Manufacturer#4 6047.27 +Manufacturer#4 7337.620000000001 +Manufacturer#5 1789.69 +Manufacturer#5 3401.3500000000004 +Manufacturer#5 5190.08 +Manufacturer#5 6208.18 +Manufacturer#5 7672.66 diff --git a/sql/hive/src/test/resources/golden/windowing_multipartitioning.q (deterministic) 1-0-12a92d8800e0da8b515ba3eaf6a7fd0f b/sql/hive/src/test/resources/golden/windowing_multipartitioning.q (deterministic) 1-0-12a92d8800e0da8b515ba3eaf6a7fd0f new file mode 100644 index 0000000000000..acc4f3bc2a2d5 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_multipartitioning.q (deterministic) 1-0-12a92d8800e0da8b515ba3eaf6a7fd0f @@ -0,0 +1,1049 @@ + 1 4294967354 + 2 8589934811 + 3 12884902227 +alice allen 1 4294967487 +alice allen 2 8589935012 +alice allen 3 12884902543 +alice brown 1 4294967355 +alice carson 1 4294967370 +alice davidson 1 4294967517 +alice falkner 1 4294967316 +alice garcia 1 4294967369 +alice hernandez 1 4294967314 +alice hernandez 2 8589934613 +alice johnson 1 4294967424 +alice king 1 4294967387 +alice king 2 8589934903 +alice king 3 12884902449 +alice laertes 1 4294967519 +alice laertes 2 8589935038 +alice miller 1 4294967324 +alice nixon 1 4294967484 +alice nixon 2 8589934894 +alice nixon 3 12884902307 +alice ovid 1 4294967412 +alice polk 1 4294967366 +alice quirinius 1 4294967505 +alice quirinius 2 8589935054 +alice robinson 1 4294967502 +alice robinson 2 8589934947 +alice steinbeck 1 4294967549 +alice steinbeck 2 8589934913 +alice steinbeck 3 12884902387 +alice underhill 1 4294967441 +alice van buren 1 4294967428 +alice xylophone 1 4294967519 +alice xylophone 2 8589934877 +alice xylophone 3 12884902240 +alice zipper 1 4294967380 +alice zipper 2 8589934919 +alice zipper 3 12884902439 +bob brown 1 4294967431 +bob brown 2 8589934853 +bob brown 3 12884902280 +bob carson 1 4294967408 +bob davidson 1 4294967435 +bob davidson 2 8589934939 +bob davidson 3 12884902293 +bob ellison 1 4294967530 +bob ellison 2 8589934966 +bob ellison 3 12884902328 +bob ellison 4 17179869672 +bob falkner 1 4294967464 +bob garcia 1 4294967435 +bob garcia 2 8589934804 +bob garcia 3 12884902148 +bob garcia 4 17179869587 +bob garcia 5 21474836905 +bob hernandez 1 4294967500 +bob ichabod 1 4294967424 +bob king 1 4294967443 +bob king 2 8589934740 +bob king 3 12884902279 +bob laertes 1 4294967472 +bob laertes 2 8589934852 +bob miller 1 4294967349 +bob ovid 1 4294967401 +bob ovid 2 8589934801 +bob ovid 3 12884902313 +bob ovid 4 17179869708 +bob polk 1 4294967337 +bob quirinius 1 4294967346 +bob steinbeck 1 4294967342 +bob van buren 1 4294967422 +bob white 1 4294967493 +bob white 2 8589934855 +bob xylophone 1 4294967407 +bob xylophone 2 8589934872 +bob young 1 4294967413 +bob zipper 1 4294967416 +bob zipper 2 8589934769 +bob zipper 3 12884902068 +calvin allen 1 4294967373 +calvin brown 1 4294967530 +calvin brown 2 8589934967 +calvin brown 3 12884902378 +calvin carson 1 4294967534 +calvin davidson 1 4294967437 +calvin davidson 2 8589934905 +calvin ellison 1 4294967480 +calvin falkner 1 4294967305 +calvin falkner 2 8589934723 +calvin falkner 3 12884902155 +calvin falkner 4 17179869455 +calvin falkner 5 21474836800 +calvin falkner 6 25769804250 +calvin garcia 1 4294967492 +calvin hernandez 1 4294967341 +calvin johnson 1 4294967546 +calvin laertes 1 4294967499 +calvin laertes 2 8589934930 +calvin nixon 1 4294967488 +calvin nixon 2 8589934788 +calvin nixon 3 12884902200 +calvin ovid 1 4294967343 +calvin ovid 2 8589934881 +calvin ovid 3 12884902210 +calvin ovid 4 17179869559 +calvin polk 1 4294967416 +calvin quirinius 1 4294967532 +calvin quirinius 2 8589935053 +calvin robinson 1 4294967326 +calvin steinbeck 1 4294967417 +calvin steinbeck 2 8589934891 +calvin steinbeck 3 12884902396 +calvin thompson 1 4294967346 +calvin thompson 2 8589934859 +calvin underhill 1 4294967478 +calvin van buren 1 4294967300 +calvin van buren 2 8589934808 +calvin white 1 4294967304 +calvin white 2 8589934848 +calvin xylophone 1 4294967299 +calvin xylophone 2 8589934675 +calvin xylophone 3 12884902133 +calvin young 1 4294967410 +calvin young 2 8589934752 +calvin zipper 1 4294967441 +calvin zipper 2 8589934960 +david allen 1 4294967381 +david allen 2 8589934752 +david brown 1 4294967544 +david brown 2 8589934870 +david davidson 1 4294967487 +david davidson 2 8589934952 +david davidson 3 12884902474 +david davidson 4 17179869819 +david ellison 1 4294967477 +david ellison 2 8589934963 +david ellison 3 12884902426 +david hernandez 1 4294967324 +david ichabod 1 4294967385 +david ichabod 2 8589934872 +david laertes 1 4294967385 +david nixon 1 4294967381 +david ovid 1 4294967396 +david ovid 2 8589934839 +david quirinius 1 4294967375 +david quirinius 2 8589934905 +david quirinius 3 12884902362 +david robinson 1 4294967465 +david robinson 2 8589934933 +david thompson 1 4294967361 +david underhill 1 4294967331 +david underhill 2 8589934715 +david underhill 3 12884902185 +david van buren 1 4294967438 +david van buren 2 8589934747 +david white 1 4294967428 +david xylophone 1 8589934898 +david xylophone 1 8589934898 +david xylophone 3 12884902378 +david young 1 4294967296 +david young 2 8589934601 +ethan allen 1 4294967351 +ethan brown 1 4294967477 +ethan brown 2 8589934897 +ethan brown 3 12884902217 +ethan brown 4 17179869548 +ethan brown 5 21474836951 +ethan brown 6 25769804375 +ethan carson 1 4294967352 +ethan ellison 1 4294967514 +ethan ellison 2 8589934923 +ethan falkner 1 4294967318 +ethan falkner 2 8589934779 +ethan garcia 1 4294967310 +ethan hernandez 1 4294967349 +ethan johnson 1 4294967357 +ethan king 1 4294967413 +ethan laertes 1 4294967402 +ethan laertes 2 8589934859 +ethan laertes 3 12884902390 +ethan laertes 4 17179869880 +ethan laertes 5 21474837302 +ethan laertes 6 25769804603 +ethan laertes 7 30064771974 +ethan miller 1 4294967352 +ethan nixon 1 4294967499 +ethan ovid 1 4294967452 +ethan polk 1 4294967329 +ethan polk 2 8589934711 +ethan polk 3 12884902253 +ethan polk 4 17179869732 +ethan quirinius 1 4294967501 +ethan quirinius 2 8589934852 +ethan quirinius 3 12884902200 +ethan robinson 1 4294967353 +ethan robinson 2 8589934855 +ethan underhill 1 4294967466 +ethan van buren 1 4294967511 +ethan white 1 4294967445 +ethan white 2 8589934872 +ethan xylophone 1 4294967543 +ethan zipper 1 4294967462 +ethan zipper 2 8589934815 +fred davidson 1 4294967512 +fred davidson 2 8589934936 +fred davidson 3 12884902424 +fred ellison 1 4294967470 +fred ellison 2 8589934901 +fred ellison 3 12884902294 +fred falkner 1 4294967340 +fred falkner 2 8589934887 +fred falkner 3 12884902187 +fred hernandez 1 4294967365 +fred ichabod 1 4294967342 +fred ichabod 2 8589934831 +fred johnson 1 4294967373 +fred king 1 4294967346 +fred king 2 8589934766 +fred laertes 1 4294967351 +fred miller 1 4294967490 +fred nixon 1 4294967514 +fred nixon 2 8589934811 +fred nixon 3 12884902293 +fred nixon 4 17179869668 +fred polk 1 4294967332 +fred polk 2 8589934775 +fred polk 3 12884902233 +fred polk 4 17179869740 +fred quirinius 1 4294967426 +fred quirinius 2 8589934951 +fred robinson 1 4294967461 +fred steinbeck 1 4294967411 +fred steinbeck 2 8589934740 +fred steinbeck 3 12884902212 +fred underhill 1 4294967387 +fred van buren 1 4294967431 +fred van buren 2 8589934812 +fred van buren 3 12884902338 +fred van buren 4 17179869801 +fred white 1 4294967434 +fred young 1 4294967495 +fred young 2 8589934980 +fred zipper 1 4294967447 +gabriella allen 1 4294967405 +gabriella allen 2 8589934939 +gabriella brown 1 4294967543 +gabriella brown 2 8589934946 +gabriella carson 1 4294967540 +gabriella davidson 1 4294967507 +gabriella ellison 1 4294967393 +gabriella ellison 2 8589934733 +gabriella falkner 1 4294967378 +gabriella falkner 2 8589934901 +gabriella falkner 3 12884902335 +gabriella garcia 1 4294967419 +gabriella hernandez 1 4294967481 +gabriella hernandez 2 8589934943 +gabriella ichabod 1 4294967337 +gabriella ichabod 2 8589934725 +gabriella ichabod 3 12884902062 +gabriella ichabod 4 17179869382 +gabriella ichabod 5 21474836880 +gabriella king 1 4294967434 +gabriella king 2 8589934827 +gabriella laertes 1 4294967410 +gabriella miller 1 4294967363 +gabriella ovid 1 4294967482 +gabriella ovid 2 8589935004 +gabriella polk 1 4294967410 +gabriella polk 2 8589934712 +gabriella steinbeck 1 4294967500 +gabriella steinbeck 2 8589934935 +gabriella thompson 1 4294967299 +gabriella thompson 2 8589934711 +gabriella thompson 3 12884902196 +gabriella van buren 1 4294967457 +gabriella van buren 2 8589934927 +gabriella white 1 4294967335 +gabriella young 1 4294967493 +gabriella young 2 8589934924 +gabriella zipper 1 4294967357 +gabriella zipper 2 8589934867 +holly allen 1 4294967327 +holly brown 1 4294967321 +holly brown 2 8589934659 +holly falkner 1 4294967324 +holly hernandez 1 4294967378 +holly hernandez 2 8589934921 +holly hernandez 3 12884902465 +holly hernandez 4 17179869773 +holly ichabod 1 4294967342 +holly ichabod 2 8589934800 +holly ichabod 3 12884902129 +holly johnson 1 4294967517 +holly johnson 2 8589934897 +holly johnson 3 12884902432 +holly king 1 4294967392 +holly king 2 8589934753 +holly laertes 1 4294967406 +holly miller 1 4294967388 +holly nixon 1 4294967383 +holly nixon 2 8589934707 +holly polk 1 4294967398 +holly polk 2 8589934832 +holly robinson 1 4294967532 +holly thompson 1 4294967529 +holly thompson 2 8589934868 +holly thompson 3 12884902242 +holly underhill 1 4294967383 +holly underhill 2 8589934894 +holly underhill 3 12884902330 +holly underhill 4 17179869856 +holly van buren 1 4294967539 +holly white 1 4294967320 +holly white 2 8589934735 +holly xylophone 1 4294967435 +holly young 1 4294967487 +holly young 2 8589934987 +holly zipper 1 4294967337 +holly zipper 2 8589934846 +irene allen 1 4294967518 +irene brown 1 4294967434 +irene brown 2 8589934862 +irene brown 3 12884902272 +irene carson 1 4294967473 +irene ellison 1 4294967379 +irene ellison 2 8589934797 +irene falkner 1 4294967404 +irene falkner 2 8589934952 +irene garcia 1 4294967498 +irene garcia 2 8589934869 +irene garcia 3 12884902192 +irene ichabod 1 4294967529 +irene ichabod 2 8589935038 +irene johnson 1 4294967468 +irene laertes 1 4294967481 +irene laertes 2 8589934780 +irene laertes 3 12884902116 +irene miller 1 4294967387 +irene nixon 1 4294967323 +irene nixon 2 8589934824 +irene nixon 3 12884902362 +irene ovid 1 4294967499 +irene ovid 2 8589934870 +irene ovid 3 12884902230 +irene polk 1 4294967521 +irene polk 2 8589934930 +irene polk 3 12884902395 +irene polk 4 17179869941 +irene polk 5 21474837237 +irene quirinius 1 4294967365 +irene quirinius 2 8589934751 +irene quirinius 3 12884902141 +irene robinson 1 4294967347 +irene steinbeck 1 4294967549 +irene thompson 1 4294967479 +irene underhill 1 4294967371 +irene underhill 2 8589934753 +irene van buren 1 4294967439 +irene van buren 2 8589934906 +irene xylophone 1 4294967436 +jessica brown 1 4294967496 +jessica carson 1 4294967389 +jessica carson 2 8589934897 +jessica carson 3 12884902345 +jessica davidson 1 4294967325 +jessica davidson 2 8589934709 +jessica davidson 3 12884902098 +jessica davidson 4 17179869569 +jessica ellison 1 4294967316 +jessica ellison 2 8589934721 +jessica falkner 1 4294967549 +jessica garcia 1 4294967540 +jessica garcia 2 8589935041 +jessica ichabod 1 4294967413 +jessica johnson 1 4294967497 +jessica johnson 2 8589934870 +jessica miller 1 4294967495 +jessica nixon 1 4294967311 +jessica nixon 2 8589934754 +jessica ovid 1 4294967330 +jessica ovid 2 8589934855 +jessica polk 1 4294967403 +jessica quirinius 1 4294967523 +jessica quirinius 2 8589934942 +jessica quirinius 3 12884902388 +jessica quirinius 4 17179869696 +jessica robinson 1 4294967542 +jessica thompson 1 4294967449 +jessica thompson 2 8589934763 +jessica underhill 1 4294967541 +jessica underhill 2 8589934844 +jessica underhill 3 12884902153 +jessica van buren 1 4294967344 +jessica white 1 4294967482 +jessica white 2 8589934929 +jessica white 3 12884902378 +jessica white 4 17179869687 +jessica white 5 21474837086 +jessica xylophone 1 4294967421 +jessica young 1 4294967382 +jessica young 2 8589934903 +jessica zipper 1 4294967334 +jessica zipper 2 8589934785 +jessica zipper 3 12884902157 +katie allen 1 4294967443 +katie brown 1 4294967420 +katie davidson 1 4294967459 +katie ellison 1 4294967486 +katie ellison 2 8589934861 +katie falkner 1 4294967362 +katie garcia 1 4294967306 +katie garcia 2 8589934680 +katie hernandez 1 4294967451 +katie ichabod 1 4294967330 +katie ichabod 2 8589934742 +katie ichabod 3 12884902209 +katie king 1 4294967339 +katie king 2 8589934760 +katie king 3 12884902199 +katie miller 1 4294967425 +katie miller 2 8589934859 +katie nixon 1 4294967500 +katie ovid 1 4294967519 +katie polk 1 4294967384 +katie polk 2 8589934926 +katie robinson 1 4294967310 +katie van buren 1 4294967335 +katie van buren 2 8589934647 +katie white 1 4294967337 +katie white 2 8589934643 +katie xylophone 1 4294967486 +katie young 1 4294967349 +katie young 2 8589934681 +katie young 3 12884902225 +katie zipper 1 4294967354 +katie zipper 2 8589934766 +luke allen 1 4294967533 +luke allen 2 8589934836 +luke allen 3 12884902346 +luke allen 4 17179869863 +luke allen 5 21474837208 +luke brown 1 4294967473 +luke davidson 1 4294967550 +luke davidson 2 8589934904 +luke ellison 1 4294967322 +luke ellison 2 8589934675 +luke ellison 3 12884902103 +luke falkner 1 4294967359 +luke falkner 2 8589934782 +luke garcia 1 4294967304 +luke garcia 2 8589934683 +luke ichabod 1 4294967324 +luke ichabod 2 8589934766 +luke johnson 1 4294967527 +luke johnson 2 8589934987 +luke johnson 3 12884902342 +luke laertes 1 4294967505 +luke laertes 2 8589935011 +luke laertes 3 12884902497 +luke laertes 4 17179869806 +luke laertes 5 21474837193 +luke miller 1 4294967497 +luke ovid 1 4294967492 +luke ovid 2 8589934901 +luke polk 1 4294967545 +luke polk 2 8589934873 +luke quirinius 1 4294967320 +luke robinson 1 4294967299 +luke robinson 2 8589934606 +luke thompson 1 4294967521 +luke underhill 1 4294967393 +luke underhill 2 8589934803 +luke underhill 3 12884902122 +luke van buren 1 4294967424 +luke white 1 4294967505 +luke xylophone 1 4294967382 +luke zipper 1 4294967353 +mike allen 1 4294967466 +mike brown 1 4294967369 +mike carson 1 4294967477 +mike carson 2 8589934934 +mike carson 3 12884902482 +mike davidson 1 4294967501 +mike davidson 2 8589934965 +mike ellison 1 4294967353 +mike ellison 2 8589934747 +mike ellison 3 12884902282 +mike ellison 4 17179869806 +mike ellison 5 21474837309 +mike falkner 1 4294967301 +mike garcia 1 4294967428 +mike garcia 2 8589934826 +mike garcia 3 12884902289 +mike hernandez 1 4294967316 +mike hernandez 2 8589934800 +mike ichabod 1 4294967494 +mike king 1 4294967323 +mike king 2 8589934848 +mike king 3 12884902248 +mike king 4 17179869595 +mike king 5 21474837046 +mike king 6 25769804478 +mike miller 1 4294967449 +mike nixon 1 4294967527 +mike nixon 2 8589935004 +mike polk 1 4294967389 +mike polk 2 8589934848 +mike polk 3 12884902351 +mike quirinius 1 4294967422 +mike steinbeck 1 4294967519 +mike steinbeck 2 8589934827 +mike steinbeck 3 12884902316 +mike steinbeck 4 17179869850 +mike van buren 1 4294967544 +mike van buren 2 8589935061 +mike white 1 4294967336 +mike white 2 8589934882 +mike white 3 12884902374 +mike white 4 17179869843 +mike young 1 4294967453 +mike young 2 8589934804 +mike young 3 12884902198 +mike zipper 1 4294967402 +mike zipper 2 8589934727 +mike zipper 3 12884902228 +nick allen 1 4294967507 +nick allen 2 8589934807 +nick brown 1 4294967334 +nick davidson 1 4294967357 +nick ellison 1 4294967397 +nick ellison 2 8589934699 +nick falkner 1 4294967480 +nick falkner 2 8589934923 +nick garcia 1 4294967384 +nick garcia 2 8589934797 +nick garcia 3 12884902319 +nick ichabod 1 4294967388 +nick ichabod 2 8589934758 +nick ichabod 3 12884902225 +nick johnson 1 4294967398 +nick johnson 2 8589934809 +nick laertes 1 4294967389 +nick miller 1 4294967550 +nick nixon 1 4294967482 +nick ovid 1 4294967488 +nick polk 1 4294967551 +nick quirinius 1 4294967316 +nick quirinius 2 8589934612 +nick robinson 1 4294967409 +nick robinson 2 8589934731 +nick steinbeck 1 4294967355 +nick thompson 1 4294967401 +nick underhill 1 4294967527 +nick van buren 1 4294967303 +nick xylophone 1 4294967460 +nick young 1 4294967405 +nick young 2 8589934917 +nick zipper 1 4294967430 +nick zipper 2 8589934796 +oscar allen 1 4294967500 +oscar brown 1 4294967331 +oscar carson 1 4294967460 +oscar carson 2 8589934904 +oscar carson 3 12884902286 +oscar carson 4 17179869599 +oscar carson 5 21474836960 +oscar davidson 1 4294967482 +oscar ellison 1 8589934740 +oscar ellison 1 8589934740 +oscar falkner 1 4294967526 +oscar garcia 1 4294967301 +oscar hernandez 1 4294967343 +oscar hernandez 2 8589934843 +oscar ichabod 1 4294967513 +oscar ichabod 2 8589934837 +oscar ichabod 3 12884902165 +oscar ichabod 4 17179869569 +oscar johnson 1 4294967418 +oscar johnson 2 8589934763 +oscar king 1 4294967465 +oscar king 2 8589934936 +oscar king 3 12884902469 +oscar laertes 1 4294967425 +oscar laertes 2 8589934876 +oscar laertes 3 12884902426 +oscar laertes 4 17179869786 +oscar nixon 1 4294967532 +oscar ovid 1 4294967508 +oscar ovid 2 8589934910 +oscar ovid 3 12884902418 +oscar polk 1 4294967325 +oscar polk 2 8589934713 +oscar quirinius 1 4294967416 +oscar quirinius 2 8589934932 +oscar quirinius 3 12884902390 +oscar quirinius 4 17179869763 +oscar robinson 1 4294967355 +oscar robinson 2 8589934681 +oscar robinson 3 12884902031 +oscar robinson 4 17179869383 +oscar steinbeck 1 4294967548 +oscar thompson 1 4294967453 +oscar thompson 2 8589934824 +oscar thompson 3 12884902197 +oscar thompson 4 17179869496 +oscar underhill 1 4294967374 +oscar van buren 1 4294967520 +oscar van buren 2 8589934990 +oscar van buren 3 12884902490 +oscar white 1 4294967454 +oscar white 2 8589934761 +oscar white 3 12884902163 +oscar white 4 17179869512 +oscar xylophone 1 4294967400 +oscar xylophone 2 8589934806 +oscar xylophone 3 12884902124 +oscar zipper 1 4294967449 +oscar zipper 2 8589934969 +oscar zipper 3 12884902458 +priscilla brown 1 4294967369 +priscilla brown 2 8589934897 +priscilla brown 3 12884902360 +priscilla carson 1 4294967489 +priscilla carson 2 8589934838 +priscilla carson 3 12884902270 +priscilla ichabod 1 4294967379 +priscilla ichabod 2 8589934926 +priscilla johnson 1 4294967535 +priscilla johnson 2 8589935003 +priscilla johnson 3 12884902308 +priscilla johnson 4 17179869707 +priscilla johnson 5 21474837167 +priscilla king 1 4294967385 +priscilla nixon 1 4294967388 +priscilla nixon 2 8589934849 +priscilla ovid 1 4294967528 +priscilla ovid 2 8589935035 +priscilla polk 1 4294967434 +priscilla quirinius 1 4294967347 +priscilla thompson 1 4294967497 +priscilla underhill 1 4294967520 +priscilla underhill 2 8589934853 +priscilla van buren 1 4294967318 +priscilla van buren 2 8589934809 +priscilla van buren 3 12884902351 +priscilla white 1 4294967419 +priscilla xylophone 1 4294967503 +priscilla xylophone 2 8589934956 +priscilla xylophone 3 12884902406 +priscilla young 1 4294967401 +priscilla young 2 8589934931 +priscilla zipper 1 4294967516 +priscilla zipper 2 8589934950 +quinn allen 1 4294967339 +quinn allen 2 8589934881 +quinn brown 1 4294967335 +quinn brown 2 8589934651 +quinn brown 3 12884902099 +quinn davidson 1 4294967478 +quinn davidson 2 8589934849 +quinn davidson 3 12884902238 +quinn davidson 4 17179869565 +quinn ellison 1 4294967392 +quinn ellison 2 8589934907 +quinn garcia 1 4294967344 +quinn garcia 2 8589934882 +quinn garcia 3 12884902395 +quinn garcia 4 17179869868 +quinn ichabod 1 4294967405 +quinn king 1 4294967538 +quinn king 2 8589934996 +quinn laertes 1 4294967533 +quinn laertes 2 8589934977 +quinn laertes 3 12884902524 +quinn nixon 1 4294967432 +quinn ovid 1 4294967340 +quinn quirinius 1 4294967347 +quinn robinson 1 4294967365 +quinn steinbeck 1 4294967358 +quinn steinbeck 2 8589934810 +quinn thompson 1 4294967488 +quinn thompson 2 8589934888 +quinn underhill 1 4294967307 +quinn underhill 2 8589934744 +quinn underhill 3 12884902278 +quinn van buren 1 4294967362 +quinn young 1 4294967392 +quinn zipper 1 4294967521 +quinn zipper 2 8589934944 +rachel allen 1 4294967334 +rachel allen 2 8589934713 +rachel brown 1 4294967451 +rachel brown 2 8589934886 +rachel brown 3 12884902325 +rachel brown 4 17179869632 +rachel brown 5 21474836938 +rachel carson 1 4294967461 +rachel carson 2 8589934777 +rachel davidson 1 4294967387 +rachel ellison 1 4294967423 +rachel falkner 1 4294967544 +rachel falkner 2 8589934892 +rachel falkner 3 12884902350 +rachel falkner 4 17179869809 +rachel johnson 1 4294967541 +rachel king 1 4294967442 +rachel king 2 8589934771 +rachel laertes 1 4294967446 +rachel laertes 2 8589934804 +rachel ovid 1 4294967481 +rachel ovid 2 8589934832 +rachel polk 1 4294967335 +rachel quirinius 1 4294967297 +rachel robinson 1 4294967344 +rachel robinson 2 8589934807 +rachel robinson 3 12884902135 +rachel thompson 1 4294967518 +rachel thompson 2 8589934881 +rachel thompson 3 12884902306 +rachel underhill 1 4294967382 +rachel white 1 4294967457 +rachel white 2 8589934793 +rachel young 1 4294967391 +rachel zipper 1 4294967434 +rachel zipper 2 8589934813 +sarah carson 1 4294967503 +sarah carson 2 8589934822 +sarah carson 3 12884902167 +sarah ellison 1 4294967542 +sarah falkner 1 4294967525 +sarah falkner 2 8589934974 +sarah garcia 1 4294967391 +sarah garcia 2 8589934849 +sarah garcia 3 12884902247 +sarah ichabod 1 4294967370 +sarah ichabod 2 8589934909 +sarah johnson 1 4294967433 +sarah johnson 2 8589934926 +sarah johnson 3 12884902235 +sarah johnson 4 17179869559 +sarah king 1 4294967496 +sarah king 2 8589935039 +sarah miller 1 4294967458 +sarah ovid 1 4294967350 +sarah robinson 1 4294967419 +sarah robinson 2 8589934917 +sarah steinbeck 1 4294967456 +sarah white 1 4294967514 +sarah white 2 8589934882 +sarah xylophone 1 4294967355 +sarah young 1 4294967442 +sarah zipper 1 4294967432 +tom brown 1 4294967432 +tom brown 2 8589934950 +tom carson 1 4294967388 +tom carson 2 8589934693 +tom carson 3 12884902227 +tom davidson 1 4294967507 +tom ellison 1 4294967487 +tom ellison 2 8589934844 +tom ellison 3 12884902188 +tom falkner 1 4294967382 +tom falkner 2 8589934837 +tom hernandez 1 8589934733 +tom hernandez 1 8589934733 +tom ichabod 1 4294967445 +tom johnson 1 4294967492 +tom johnson 2 8589934923 +tom king 1 4294967331 +tom laertes 1 4294967431 +tom laertes 2 8589934744 +tom miller 1 4294967366 +tom miller 2 8589934723 +tom miller 3 12884902078 +tom nixon 1 4294967506 +tom ovid 1 4294967512 +tom polk 1 4294967329 +tom polk 2 8589934869 +tom quirinius 1 4294967507 +tom quirinius 2 8589934823 +tom robinson 1 4294967457 +tom robinson 2 8589935008 +tom robinson 3 12884902462 +tom robinson 4 17179869770 +tom steinbeck 1 4294967447 +tom van buren 1 4294967374 +tom van buren 2 8589934703 +tom van buren 3 12884902195 +tom white 1 4294967413 +tom young 1 4294967539 +tom young 2 8589935074 +tom zipper 1 4294967526 +ulysses brown 1 4294967537 +ulysses carson 1 4294967323 +ulysses carson 2 8589934815 +ulysses carson 3 12884902127 +ulysses carson 4 17179869485 +ulysses davidson 1 4294967467 +ulysses ellison 1 4294967442 +ulysses garcia 1 4294967470 +ulysses hernandez 1 4294967449 +ulysses hernandez 2 8589934995 +ulysses hernandez 3 12884902393 +ulysses ichabod 1 4294967353 +ulysses ichabod 2 8589934728 +ulysses johnson 1 4294967432 +ulysses king 1 4294967537 +ulysses laertes 1 4294967391 +ulysses laertes 2 8589934938 +ulysses laertes 3 12884902431 +ulysses miller 1 4294967373 +ulysses miller 2 8589934808 +ulysses nixon 1 4294967296 +ulysses ovid 1 4294967394 +ulysses polk 1 4294967509 +ulysses polk 2 8589934960 +ulysses polk 3 12884902440 +ulysses polk 4 17179869745 +ulysses quirinius 1 4294967449 +ulysses robinson 1 4294967531 +ulysses steinbeck 1 4294967303 +ulysses steinbeck 2 8589934788 +ulysses thompson 1 4294967389 +ulysses underhill 1 4294967544 +ulysses underhill 2 8589934949 +ulysses underhill 3 12884902275 +ulysses underhill 4 17179869726 +ulysses underhill 5 21474837190 +ulysses underhill 6 25769804570 +ulysses underhill 7 30064771927 +ulysses van buren 1 4294967439 +ulysses white 1 4294967429 +ulysses white 2 8589934878 +ulysses xylophone 1 4294967524 +ulysses xylophone 2 8589935025 +ulysses xylophone 3 12884902473 +ulysses young 1 4294967427 +ulysses young 2 8589934763 +ulysses young 3 12884902154 +victor allen 1 4294967450 +victor allen 2 8589934776 +victor brown 1 4294967521 +victor brown 2 8589934864 +victor brown 3 12884902170 +victor brown 4 17179869625 +victor davidson 1 4294967419 +victor davidson 2 8589934720 +victor davidson 3 12884902156 +victor ellison 1 4294967362 +victor ellison 2 8589934831 +victor hernandez 1 4294967428 +victor hernandez 2 8589934733 +victor hernandez 3 12884902062 +victor hernandez 4 17179869402 +victor hernandez 5 21474836874 +victor johnson 1 4294967496 +victor johnson 2 8589934824 +victor johnson 3 12884902246 +victor king 1 4294967401 +victor king 2 8589934884 +victor laertes 1 4294967407 +victor laertes 2 8589934862 +victor miller 1 4294967410 +victor nixon 1 4294967424 +victor nixon 2 8589934803 +victor ovid 1 4294967355 +victor polk 1 4294967333 +victor quirinius 1 4294967520 +victor quirinius 2 8589934846 +victor robinson 1 4294967440 +victor robinson 2 8589934930 +victor steinbeck 1 4294967390 +victor steinbeck 2 8589934707 +victor steinbeck 3 12884902037 +victor thompson 1 4294967319 +victor van buren 1 4294967365 +victor van buren 2 8589934906 +victor white 1 4294967403 +victor white 2 8589934862 +victor xylophone 1 4294967331 +victor xylophone 2 8589934864 +victor xylophone 3 12884902262 +victor xylophone 4 17179869633 +victor xylophone 5 21474837062 +victor young 1 4294967337 +victor zipper 1 4294967428 +wendy allen 1 4294967473 +wendy allen 2 8589934989 +wendy allen 3 12884902367 +wendy brown 1 4294967337 +wendy brown 2 8589934817 +wendy ellison 1 4294967475 +wendy ellison 2 8589934989 +wendy falkner 1 4294967313 +wendy falkner 2 8589934810 +wendy falkner 3 12884902236 +wendy garcia 1 4294967394 +wendy garcia 2 8589934775 +wendy garcia 3 12884902088 +wendy garcia 4 17179869400 +wendy hernandez 1 4294967299 +wendy ichabod 1 4294967516 +wendy king 1 4294967420 +wendy king 2 8589934811 +wendy king 3 12884902252 +wendy laertes 1 4294967519 +wendy laertes 2 8589934939 +wendy laertes 3 12884902315 +wendy miller 1 4294967478 +wendy miller 2 8589934957 +wendy nixon 1 4294967407 +wendy nixon 2 8589934901 +wendy ovid 1 4294967464 +wendy ovid 2 8589934894 +wendy polk 1 4294967434 +wendy polk 2 8589934824 +wendy quirinius 1 4294967334 +wendy quirinius 2 8589934782 +wendy robinson 1 4294967302 +wendy robinson 2 8589934613 +wendy robinson 3 12884901977 +wendy steinbeck 1 4294967444 +wendy thompson 1 4294967301 +wendy thompson 2 8589934621 +wendy underhill 1 4294967540 +wendy underhill 2 8589934993 +wendy underhill 3 12884902410 +wendy van buren 1 4294967488 +wendy van buren 2 8589934835 +wendy white 1 4294967490 +wendy xylophone 1 4294967488 +wendy xylophone 2 8589934939 +wendy young 1 4294967395 +wendy young 2 8589934708 +xavier allen 1 4294967304 +xavier allen 2 8589934743 +xavier allen 3 12884902129 +xavier brown 1 4294967546 +xavier brown 2 8589935074 +xavier brown 3 12884902532 +xavier carson 1 4294967547 +xavier carson 2 8589934862 +xavier davidson 1 4294967361 +xavier davidson 2 8589934760 +xavier davidson 3 12884902204 +xavier ellison 1 4294967441 +xavier ellison 2 8589934914 +xavier garcia 1 4294967465 +xavier hernandez 1 4294967383 +xavier hernandez 2 8589934743 +xavier hernandez 3 12884902274 +xavier ichabod 1 4294967511 +xavier ichabod 2 8589934950 +xavier johnson 1 4294967507 +xavier johnson 2 8589934898 +xavier king 1 4294967456 +xavier king 2 8589934758 +xavier laertes 1 4294967450 +xavier ovid 1 4294967403 +xavier polk 1 4294967506 +xavier polk 2 8589934925 +xavier polk 3 12884902406 +xavier polk 4 17179869906 +xavier quirinius 1 4294967383 +xavier quirinius 2 8589934748 +xavier quirinius 3 12884902060 +xavier quirinius 4 17179869562 +xavier thompson 1 4294967444 +xavier underhill 1 4294967332 +xavier white 1 4294967473 +xavier white 2 8589934952 +xavier xylophone 1 4294967499 +xavier zipper 1 4294967547 +yuri allen 1 4294967528 +yuri allen 2 8589935079 +yuri brown 1 4294967433 +yuri brown 2 8589934960 +yuri carson 1 4294967317 +yuri carson 2 8589934851 +yuri ellison 1 4294967299 +yuri ellison 2 8589934697 +yuri falkner 1 4294967368 +yuri falkner 2 8589934891 +yuri garcia 1 4294967362 +yuri hernandez 1 4294967367 +yuri johnson 1 4294967421 +yuri johnson 2 8589934877 +yuri johnson 3 12884902361 +yuri king 1 4294967376 +yuri laertes 1 4294967402 +yuri laertes 2 8589934924 +yuri nixon 1 4294967400 +yuri nixon 2 8589934706 +yuri polk 1 4294967391 +yuri polk 2 8589934861 +yuri polk 3 12884902167 +yuri quirinius 1 4294967398 +yuri quirinius 2 8589934768 +yuri quirinius 3 12884902081 +yuri steinbeck 1 4294967535 +yuri steinbeck 2 8589934873 +yuri thompson 1 4294967447 +yuri underhill 1 4294967499 +yuri underhill 2 8589934900 +yuri white 1 4294967341 +yuri xylophone 1 4294967420 +zach allen 1 4294967507 +zach brown 1 4294967316 +zach brown 2 8589934728 +zach brown 3 12884902099 +zach brown 4 17179869452 +zach brown 5 21474836769 +zach carson 1 4294967463 +zach ellison 1 4294967471 +zach falkner 1 4294967362 +zach falkner 2 8589934717 +zach garcia 1 4294967481 +zach garcia 2 8589934854 +zach garcia 3 12884902240 +zach garcia 4 17179869723 +zach ichabod 1 4294967539 +zach ichabod 2 8589934912 +zach king 1 4294967424 +zach king 2 8589934956 +zach king 3 12884902458 +zach miller 1 4294967442 +zach miller 2 8589934772 +zach miller 3 12884902163 +zach ovid 1 4294967412 +zach ovid 2 8589934775 +zach ovid 3 12884902244 +zach ovid 4 17179869574 +zach quirinius 1 4294967299 +zach robinson 1 4294967325 +zach steinbeck 1 4294967469 +zach steinbeck 2 8589934834 +zach thompson 1 4294967405 +zach thompson 2 8589934730 +zach underhill 1 4294967496 +zach white 1 4294967501 +zach xylophone 1 4294967452 +zach xylophone 2 8589934755 +zach young 1 4294967297 +zach zipper 1 4294967497 +zach zipper 2 8589934855 +zach zipper 3 12884902222 diff --git a/sql/hive/src/test/resources/golden/windowing_multipartitioning.q (deterministic) 3-0-455e41d9949a2d22bab634fd8e42f2b1 b/sql/hive/src/test/resources/golden/windowing_multipartitioning.q (deterministic) 3-0-455e41d9949a2d22bab634fd8e42f2b1 new file mode 100644 index 0000000000000..f47923618a1aa --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_multipartitioning.q (deterministic) 3-0-455e41d9949a2d22bab634fd8e42f2b1 @@ -0,0 +1 @@ +bob steinbeck 65637 9.699999809265137 diff --git a/sql/hive/src/test/resources/golden/windowing_multipartitioning.q (deterministic) 4-0-cfad06ae8eba6b047d32a6a61dd59392 b/sql/hive/src/test/resources/golden/windowing_multipartitioning.q (deterministic) 4-0-cfad06ae8eba6b047d32a6a61dd59392 new file mode 100644 index 0000000000000..f41eaa259cec0 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_multipartitioning.q (deterministic) 4-0-cfad06ae8eba6b047d32a6a61dd59392 @@ -0,0 +1 @@ +bob steinbeck 1 1 diff --git a/sql/hive/src/test/resources/golden/windowing_multipartitioning.q (deterministic) 5-0-d7ca7a61377cef3a9f721a28afdae012 b/sql/hive/src/test/resources/golden/windowing_multipartitioning.q (deterministic) 5-0-d7ca7a61377cef3a9f721a28afdae012 new file mode 100644 index 0000000000000..5308b2eb457e7 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_multipartitioning.q (deterministic) 5-0-d7ca7a61377cef3a9f721a28afdae012 @@ -0,0 +1 @@ +bob steinbeck 9.699999809265137 1 diff --git a/sql/hive/src/test/resources/golden/windowing_multipartitioning.q (deterministic) 6-0-287bcc7679822bc7b684532b267bf11f b/sql/hive/src/test/resources/golden/windowing_multipartitioning.q (deterministic) 6-0-287bcc7679822bc7b684532b267bf11f new file mode 100644 index 0000000000000..f41eaa259cec0 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_multipartitioning.q (deterministic) 6-0-287bcc7679822bc7b684532b267bf11f @@ -0,0 +1 @@ +bob steinbeck 1 1 diff --git a/sql/hive/src/test/resources/golden/windowing_navfn.q (deterministic)-0-36217f6074daaacddb9fcb50a3f4fb5b b/sql/hive/src/test/resources/golden/windowing_navfn.q (deterministic)-0-36217f6074daaacddb9fcb50a3f4fb5b new file mode 100644 index 0000000000000..8150409e62d33 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_navfn.q (deterministic)-0-36217f6074daaacddb9fcb50a3f4fb5b @@ -0,0 +1,1049 @@ + 1 + 1 + 1 +alice allen 2 +alice allen 1 +alice allen 1 +alice brown 1 +alice carson 1 +alice davidson 1 +alice falkner 1 +alice garcia 1 +alice hernandez 2 +alice hernandez 1 +alice johnson 2 +alice king 1 +alice king 1 +alice king 1 +alice laertes 1 +alice laertes 1 +alice miller 1 +alice nixon 1 +alice nixon 1 +alice nixon 1 +alice ovid 1 +alice polk 3 +alice quirinius 1 +alice quirinius 1 +alice robinson 1 +alice robinson 1 +alice steinbeck 1 +alice steinbeck 1 +alice steinbeck 1 +alice underhill 2 +alice van buren 1 +alice xylophone 1 +alice xylophone 1 +alice xylophone 1 +alice zipper 1 +alice zipper 1 +alice zipper 1 +bob brown 1 +bob brown 1 +bob brown 1 +bob carson 1 +bob davidson 1 +bob davidson 1 +bob davidson 1 +bob ellison 2 +bob ellison 1 +bob ellison 1 +bob ellison 1 +bob falkner 1 +bob garcia 1 +bob garcia 1 +bob garcia 1 +bob garcia 1 +bob garcia 1 +bob hernandez 1 +bob ichabod 1 +bob king 2 +bob king 1 +bob king 1 +bob laertes 2 +bob laertes 1 +bob miller 1 +bob ovid 1 +bob ovid 1 +bob ovid 1 +bob ovid 1 +bob polk 1 +bob quirinius 1 +bob steinbeck 1 +bob van buren 1 +bob white 1 +bob white 1 +bob xylophone 1 +bob xylophone 1 +bob young 1 +bob zipper 2 +bob zipper 1 +bob zipper 1 +calvin allen 1 +calvin brown 2 +calvin brown 1 +calvin brown 1 +calvin carson 2 +calvin davidson 2 +calvin davidson 1 +calvin ellison 1 +calvin falkner 1 +calvin falkner 1 +calvin falkner 1 +calvin falkner 1 +calvin falkner 1 +calvin falkner 1 +calvin garcia 1 +calvin hernandez 2 +calvin johnson 1 +calvin laertes 1 +calvin laertes 1 +calvin nixon 3 +calvin nixon 1 +calvin nixon 1 +calvin ovid 1 +calvin ovid 1 +calvin ovid 1 +calvin ovid 1 +calvin polk 1 +calvin quirinius 1 +calvin quirinius 1 +calvin robinson 2 +calvin steinbeck 1 +calvin steinbeck 1 +calvin steinbeck 1 +calvin thompson 1 +calvin thompson 1 +calvin underhill 1 +calvin van buren 1 +calvin van buren 1 +calvin white 1 +calvin white 1 +calvin xylophone 2 +calvin xylophone 1 +calvin xylophone 1 +calvin young 1 +calvin young 1 +calvin zipper 1 +calvin zipper 1 +david allen 1 +david allen 1 +david brown 1 +david brown 1 +david davidson 1 +david davidson 1 +david davidson 1 +david davidson 1 +david ellison 1 +david ellison 1 +david ellison 1 +david hernandez 1 +david ichabod 1 +david ichabod 1 +david laertes 1 +david nixon 1 +david ovid 1 +david ovid 1 +david quirinius 1 +david quirinius 1 +david quirinius 1 +david robinson 1 +david robinson 1 +david thompson 3 +david underhill 1 +david underhill 1 +david underhill 1 +david van buren 1 +david van buren 1 +david white 1 +david xylophone 1 +david xylophone 1 +david xylophone 1 +david young 1 +david young 1 +ethan allen 1 +ethan brown 2 +ethan brown 2 +ethan brown 1 +ethan brown 1 +ethan brown 1 +ethan brown 1 +ethan carson 1 +ethan ellison 1 +ethan ellison 1 +ethan falkner 3 +ethan falkner 1 +ethan garcia 1 +ethan hernandez 1 +ethan johnson 1 +ethan king 1 +ethan laertes 1 +ethan laertes 1 +ethan laertes 1 +ethan laertes 1 +ethan laertes 1 +ethan laertes 1 +ethan laertes 1 +ethan miller 2 +ethan nixon 1 +ethan ovid 1 +ethan polk 2 +ethan polk 1 +ethan polk 1 +ethan polk 1 +ethan quirinius 1 +ethan quirinius 1 +ethan quirinius 1 +ethan robinson 1 +ethan robinson 1 +ethan underhill 1 +ethan van buren 2 +ethan white 1 +ethan white 1 +ethan xylophone 1 +ethan zipper 1 +ethan zipper 1 +fred davidson 1 +fred davidson 1 +fred davidson 1 +fred ellison 1 +fred ellison 1 +fred ellison 1 +fred falkner 2 +fred falkner 1 +fred falkner 1 +fred hernandez 1 +fred ichabod 1 +fred ichabod 1 +fred johnson 1 +fred king 2 +fred king 1 +fred laertes 1 +fred miller 1 +fred nixon 2 +fred nixon 1 +fred nixon 1 +fred nixon 1 +fred polk 1 +fred polk 1 +fred polk 1 +fred polk 1 +fred quirinius 1 +fred quirinius 1 +fred robinson 1 +fred steinbeck 1 +fred steinbeck 1 +fred steinbeck 1 +fred underhill 1 +fred van buren 1 +fred van buren 1 +fred van buren 1 +fred van buren 1 +fred white 1 +fred young 3 +fred young 1 +fred zipper 1 +gabriella allen 1 +gabriella allen 1 +gabriella brown 2 +gabriella brown 1 +gabriella carson 1 +gabriella davidson 1 +gabriella ellison 1 +gabriella ellison 1 +gabriella falkner 1 +gabriella falkner 1 +gabriella falkner 1 +gabriella garcia 1 +gabriella hernandez 1 +gabriella hernandez 1 +gabriella ichabod 1 +gabriella ichabod 1 +gabriella ichabod 1 +gabriella ichabod 1 +gabriella ichabod 1 +gabriella king 1 +gabriella king 1 +gabriella laertes 1 +gabriella miller 1 +gabriella ovid 1 +gabriella ovid 1 +gabriella polk 1 +gabriella polk 1 +gabriella steinbeck 1 +gabriella steinbeck 1 +gabriella thompson 1 +gabriella thompson 1 +gabriella thompson 1 +gabriella van buren 2 +gabriella van buren 1 +gabriella white 1 +gabriella young 1 +gabriella young 1 +gabriella zipper 1 +gabriella zipper 1 +holly allen 1 +holly brown 1 +holly brown 1 +holly falkner 1 +holly hernandez 1 +holly hernandez 1 +holly hernandez 1 +holly hernandez 1 +holly ichabod 1 +holly ichabod 1 +holly ichabod 1 +holly johnson 2 +holly johnson 1 +holly johnson 1 +holly king 1 +holly king 1 +holly laertes 1 +holly miller 1 +holly nixon 1 +holly nixon 1 +holly polk 2 +holly polk 1 +holly robinson 2 +holly thompson 1 +holly thompson 1 +holly thompson 1 +holly underhill 1 +holly underhill 1 +holly underhill 1 +holly underhill 1 +holly van buren 1 +holly white 1 +holly white 1 +holly xylophone 1 +holly young 1 +holly young 1 +holly zipper 1 +holly zipper 1 +irene allen 1 +irene brown 1 +irene brown 1 +irene brown 1 +irene carson 1 +irene ellison 1 +irene ellison 1 +irene falkner 1 +irene falkner 1 +irene garcia 1 +irene garcia 1 +irene garcia 1 +irene ichabod 1 +irene ichabod 1 +irene johnson 2 +irene laertes 1 +irene laertes 1 +irene laertes 1 +irene miller 2 +irene nixon 1 +irene nixon 1 +irene nixon 1 +irene ovid 1 +irene ovid 1 +irene ovid 1 +irene polk 1 +irene polk 1 +irene polk 1 +irene polk 1 +irene polk 1 +irene quirinius 1 +irene quirinius 1 +irene quirinius 1 +irene robinson 1 +irene steinbeck 1 +irene thompson 2 +irene underhill 2 +irene underhill 1 +irene van buren 1 +irene van buren 1 +irene xylophone 1 +jessica brown 2 +jessica carson 1 +jessica carson 1 +jessica carson 1 +jessica davidson 2 +jessica davidson 1 +jessica davidson 1 +jessica davidson 1 +jessica ellison 2 +jessica ellison 1 +jessica falkner 1 +jessica garcia 1 +jessica garcia 1 +jessica ichabod 1 +jessica johnson 1 +jessica johnson 1 +jessica miller 1 +jessica nixon 1 +jessica nixon 1 +jessica ovid 2 +jessica ovid 1 +jessica polk 1 +jessica quirinius 1 +jessica quirinius 1 +jessica quirinius 1 +jessica quirinius 1 +jessica robinson 1 +jessica thompson 1 +jessica thompson 1 +jessica underhill 1 +jessica underhill 1 +jessica underhill 1 +jessica van buren 1 +jessica white 2 +jessica white 1 +jessica white 1 +jessica white 1 +jessica white 1 +jessica xylophone 1 +jessica young 1 +jessica young 1 +jessica zipper 1 +jessica zipper 1 +jessica zipper 1 +katie allen 1 +katie brown 1 +katie davidson 1 +katie ellison 1 +katie ellison 1 +katie falkner 1 +katie garcia 1 +katie garcia 1 +katie hernandez 1 +katie ichabod 2 +katie ichabod 1 +katie ichabod 1 +katie king 1 +katie king 1 +katie king 1 +katie miller 1 +katie miller 1 +katie nixon 1 +katie ovid 1 +katie polk 1 +katie polk 1 +katie robinson 1 +katie van buren 2 +katie van buren 1 +katie white 1 +katie white 1 +katie xylophone 1 +katie young 1 +katie young 1 +katie young 1 +katie zipper 1 +katie zipper 1 +luke allen 2 +luke allen 1 +luke allen 1 +luke allen 1 +luke allen 1 +luke brown 2 +luke davidson 1 +luke davidson 1 +luke ellison 1 +luke ellison 1 +luke ellison 1 +luke falkner 2 +luke falkner 1 +luke garcia 1 +luke garcia 1 +luke ichabod 1 +luke ichabod 1 +luke johnson 1 +luke johnson 1 +luke johnson 1 +luke laertes 1 +luke laertes 1 +luke laertes 1 +luke laertes 1 +luke laertes 1 +luke miller 2 +luke ovid 2 +luke ovid 1 +luke polk 1 +luke polk 1 +luke quirinius 1 +luke robinson 1 +luke robinson 1 +luke thompson 1 +luke underhill 1 +luke underhill 1 +luke underhill 1 +luke van buren 2 +luke white 1 +luke xylophone 1 +luke zipper 1 +mike allen 2 +mike brown 1 +mike carson 1 +mike carson 1 +mike carson 1 +mike davidson 1 +mike davidson 1 +mike ellison 2 +mike ellison 1 +mike ellison 1 +mike ellison 1 +mike ellison 1 +mike falkner 1 +mike garcia 1 +mike garcia 1 +mike garcia 1 +mike hernandez 1 +mike hernandez 1 +mike ichabod 1 +mike king 2 +mike king 1 +mike king 1 +mike king 1 +mike king 1 +mike king 1 +mike miller 1 +mike nixon 2 +mike nixon 1 +mike polk 2 +mike polk 1 +mike polk 1 +mike quirinius 1 +mike steinbeck 1 +mike steinbeck 1 +mike steinbeck 1 +mike steinbeck 1 +mike van buren 2 +mike van buren 1 +mike white 1 +mike white 1 +mike white 1 +mike white 1 +mike young 1 +mike young 1 +mike young 1 +mike zipper 1 +mike zipper 1 +mike zipper 1 +nick allen 1 +nick allen 1 +nick brown 1 +nick davidson 1 +nick ellison 2 +nick ellison 1 +nick falkner 1 +nick falkner 1 +nick garcia 1 +nick garcia 1 +nick garcia 1 +nick ichabod 1 +nick ichabod 1 +nick ichabod 1 +nick johnson 1 +nick johnson 1 +nick laertes 1 +nick miller 1 +nick nixon 1 +nick ovid 1 +nick polk 2 +nick quirinius 2 +nick quirinius 1 +nick robinson 1 +nick robinson 1 +nick steinbeck 1 +nick thompson 2 +nick underhill 1 +nick van buren 1 +nick xylophone 1 +nick young 1 +nick young 1 +nick zipper 2 +nick zipper 1 +oscar allen 1 +oscar brown 1 +oscar carson 1 +oscar carson 1 +oscar carson 1 +oscar carson 1 +oscar carson 1 +oscar davidson 1 +oscar ellison 2 +oscar ellison 2 +oscar falkner 1 +oscar garcia 1 +oscar hernandez 1 +oscar hernandez 1 +oscar ichabod 2 +oscar ichabod 1 +oscar ichabod 1 +oscar ichabod 1 +oscar johnson 2 +oscar johnson 1 +oscar king 1 +oscar king 1 +oscar king 1 +oscar laertes 1 +oscar laertes 1 +oscar laertes 1 +oscar laertes 1 +oscar nixon 1 +oscar ovid 1 +oscar ovid 1 +oscar ovid 1 +oscar polk 1 +oscar polk 1 +oscar quirinius 2 +oscar quirinius 2 +oscar quirinius 1 +oscar quirinius 1 +oscar robinson 2 +oscar robinson 1 +oscar robinson 1 +oscar robinson 1 +oscar steinbeck 1 +oscar thompson 1 +oscar thompson 1 +oscar thompson 1 +oscar thompson 1 +oscar underhill 1 +oscar van buren 1 +oscar van buren 1 +oscar van buren 1 +oscar white 1 +oscar white 1 +oscar white 1 +oscar white 1 +oscar xylophone 2 +oscar xylophone 1 +oscar xylophone 1 +oscar zipper 2 +oscar zipper 1 +oscar zipper 1 +priscilla brown 2 +priscilla brown 1 +priscilla brown 1 +priscilla carson 2 +priscilla carson 1 +priscilla carson 1 +priscilla ichabod 2 +priscilla ichabod 1 +priscilla johnson 1 +priscilla johnson 1 +priscilla johnson 1 +priscilla johnson 1 +priscilla johnson 1 +priscilla king 1 +priscilla nixon 2 +priscilla nixon 2 +priscilla ovid 2 +priscilla ovid 1 +priscilla polk 1 +priscilla quirinius 1 +priscilla thompson 1 +priscilla underhill 1 +priscilla underhill 1 +priscilla van buren 2 +priscilla van buren 1 +priscilla van buren 1 +priscilla white 1 +priscilla xylophone 1 +priscilla xylophone 1 +priscilla xylophone 1 +priscilla young 1 +priscilla young 1 +priscilla zipper 1 +priscilla zipper 1 +quinn allen 1 +quinn allen 1 +quinn brown 1 +quinn brown 1 +quinn brown 1 +quinn davidson 2 +quinn davidson 1 +quinn davidson 1 +quinn davidson 1 +quinn ellison 1 +quinn ellison 1 +quinn garcia 1 +quinn garcia 1 +quinn garcia 1 +quinn garcia 1 +quinn ichabod 1 +quinn king 1 +quinn king 1 +quinn laertes 1 +quinn laertes 1 +quinn laertes 1 +quinn nixon 2 +quinn ovid 1 +quinn quirinius 1 +quinn robinson 2 +quinn steinbeck 1 +quinn steinbeck 1 +quinn thompson 1 +quinn thompson 1 +quinn underhill 2 +quinn underhill 1 +quinn underhill 1 +quinn van buren 1 +quinn young 2 +quinn zipper 1 +quinn zipper 1 +rachel allen 1 +rachel allen 1 +rachel brown 3 +rachel brown 1 +rachel brown 1 +rachel brown 1 +rachel brown 1 +rachel carson 2 +rachel carson 1 +rachel davidson 1 +rachel ellison 1 +rachel falkner 1 +rachel falkner 1 +rachel falkner 1 +rachel falkner 1 +rachel johnson 1 +rachel king 2 +rachel king 1 +rachel laertes 1 +rachel laertes 1 +rachel ovid 1 +rachel ovid 1 +rachel polk 1 +rachel quirinius 1 +rachel robinson 1 +rachel robinson 1 +rachel robinson 1 +rachel thompson 1 +rachel thompson 1 +rachel thompson 1 +rachel underhill 1 +rachel white 1 +rachel white 1 +rachel young 1 +rachel zipper 1 +rachel zipper 1 +sarah carson 1 +sarah carson 1 +sarah carson 1 +sarah ellison 1 +sarah falkner 1 +sarah falkner 1 +sarah garcia 1 +sarah garcia 1 +sarah garcia 1 +sarah ichabod 1 +sarah ichabod 1 +sarah johnson 1 +sarah johnson 1 +sarah johnson 1 +sarah johnson 1 +sarah king 1 +sarah king 1 +sarah miller 1 +sarah ovid 1 +sarah robinson 1 +sarah robinson 1 +sarah steinbeck 1 +sarah white 1 +sarah white 1 +sarah xylophone 1 +sarah young 1 +sarah zipper 1 +tom brown 1 +tom brown 1 +tom carson 1 +tom carson 1 +tom carson 1 +tom davidson 1 +tom ellison 1 +tom ellison 1 +tom ellison 1 +tom falkner 1 +tom falkner 1 +tom hernandez 1 +tom hernandez 1 +tom ichabod 1 +tom johnson 1 +tom johnson 1 +tom king 1 +tom laertes 2 +tom laertes 1 +tom miller 2 +tom miller 1 +tom miller 1 +tom nixon 2 +tom ovid 1 +tom polk 1 +tom polk 1 +tom quirinius 1 +tom quirinius 1 +tom robinson 1 +tom robinson 1 +tom robinson 1 +tom robinson 1 +tom steinbeck 3 +tom van buren 1 +tom van buren 1 +tom van buren 1 +tom white 2 +tom young 1 +tom young 1 +tom zipper 3 +ulysses brown 1 +ulysses carson 4 +ulysses carson 1 +ulysses carson 1 +ulysses carson 1 +ulysses davidson 2 +ulysses ellison 1 +ulysses garcia 1 +ulysses hernandez 1 +ulysses hernandez 1 +ulysses hernandez 1 +ulysses ichabod 1 +ulysses ichabod 1 +ulysses johnson 2 +ulysses king 1 +ulysses laertes 2 +ulysses laertes 1 +ulysses laertes 1 +ulysses miller 1 +ulysses miller 1 +ulysses nixon 1 +ulysses ovid 1 +ulysses polk 1 +ulysses polk 1 +ulysses polk 1 +ulysses polk 1 +ulysses quirinius 1 +ulysses robinson 1 +ulysses steinbeck 1 +ulysses steinbeck 1 +ulysses thompson 1 +ulysses underhill 2 +ulysses underhill 1 +ulysses underhill 1 +ulysses underhill 1 +ulysses underhill 1 +ulysses underhill 1 +ulysses underhill 1 +ulysses van buren 1 +ulysses white 1 +ulysses white 1 +ulysses xylophone 2 +ulysses xylophone 1 +ulysses xylophone 1 +ulysses young 2 +ulysses young 1 +ulysses young 1 +victor allen 1 +victor allen 1 +victor brown 1 +victor brown 1 +victor brown 1 +victor brown 1 +victor davidson 1 +victor davidson 1 +victor davidson 1 +victor ellison 2 +victor ellison 1 +victor hernandez 1 +victor hernandez 1 +victor hernandez 1 +victor hernandez 1 +victor hernandez 1 +victor johnson 2 +victor johnson 1 +victor johnson 1 +victor king 1 +victor king 1 +victor laertes 1 +victor laertes 1 +victor miller 1 +victor nixon 1 +victor nixon 1 +victor ovid 2 +victor polk 2 +victor quirinius 1 +victor quirinius 1 +victor robinson 2 +victor robinson 1 +victor steinbeck 2 +victor steinbeck 1 +victor steinbeck 1 +victor thompson 1 +victor van buren 1 +victor van buren 1 +victor white 2 +victor white 1 +victor xylophone 1 +victor xylophone 1 +victor xylophone 1 +victor xylophone 1 +victor xylophone 1 +victor young 1 +victor zipper 1 +wendy allen 1 +wendy allen 1 +wendy allen 1 +wendy brown 1 +wendy brown 1 +wendy ellison 1 +wendy ellison 1 +wendy falkner 2 +wendy falkner 1 +wendy falkner 1 +wendy garcia 2 +wendy garcia 1 +wendy garcia 1 +wendy garcia 1 +wendy hernandez 1 +wendy ichabod 1 +wendy king 1 +wendy king 1 +wendy king 1 +wendy laertes 1 +wendy laertes 1 +wendy laertes 1 +wendy miller 1 +wendy miller 1 +wendy nixon 1 +wendy nixon 1 +wendy ovid 1 +wendy ovid 1 +wendy polk 2 +wendy polk 1 +wendy quirinius 1 +wendy quirinius 1 +wendy robinson 2 +wendy robinson 1 +wendy robinson 1 +wendy steinbeck 1 +wendy thompson 2 +wendy thompson 1 +wendy underhill 2 +wendy underhill 1 +wendy underhill 1 +wendy van buren 1 +wendy van buren 1 +wendy white 1 +wendy xylophone 1 +wendy xylophone 1 +wendy young 1 +wendy young 1 +xavier allen 1 +xavier allen 1 +xavier allen 1 +xavier brown 1 +xavier brown 1 +xavier brown 1 +xavier carson 1 +xavier carson 1 +xavier davidson 1 +xavier davidson 1 +xavier davidson 1 +xavier ellison 1 +xavier ellison 1 +xavier garcia 1 +xavier hernandez 1 +xavier hernandez 1 +xavier hernandez 1 +xavier ichabod 1 +xavier ichabod 1 +xavier johnson 1 +xavier johnson 1 +xavier king 1 +xavier king 1 +xavier laertes 1 +xavier ovid 2 +xavier polk 1 +xavier polk 1 +xavier polk 1 +xavier polk 1 +xavier quirinius 2 +xavier quirinius 2 +xavier quirinius 1 +xavier quirinius 1 +xavier thompson 1 +xavier underhill 1 +xavier white 2 +xavier white 1 +xavier xylophone 1 +xavier zipper 1 +yuri allen 1 +yuri allen 1 +yuri brown 1 +yuri brown 1 +yuri carson 1 +yuri carson 1 +yuri ellison 1 +yuri ellison 1 +yuri falkner 1 +yuri falkner 1 +yuri garcia 1 +yuri hernandez 1 +yuri johnson 1 +yuri johnson 1 +yuri johnson 1 +yuri king 2 +yuri laertes 1 +yuri laertes 1 +yuri nixon 1 +yuri nixon 1 +yuri polk 1 +yuri polk 1 +yuri polk 1 +yuri quirinius 1 +yuri quirinius 1 +yuri quirinius 1 +yuri steinbeck 1 +yuri steinbeck 1 +yuri thompson 1 +yuri underhill 1 +yuri underhill 1 +yuri white 1 +yuri xylophone 1 +zach allen 1 +zach brown 2 +zach brown 1 +zach brown 1 +zach brown 1 +zach brown 1 +zach carson 2 +zach ellison 1 +zach falkner 1 +zach falkner 1 +zach garcia 2 +zach garcia 1 +zach garcia 1 +zach garcia 1 +zach ichabod 1 +zach ichabod 1 +zach king 2 +zach king 1 +zach king 1 +zach miller 1 +zach miller 1 +zach miller 1 +zach ovid 1 +zach ovid 1 +zach ovid 1 +zach ovid 1 +zach quirinius 1 +zach robinson 1 +zach steinbeck 1 +zach steinbeck 1 +zach thompson 2 +zach thompson 1 +zach underhill 1 +zach white 1 +zach xylophone 2 +zach xylophone 1 +zach young 1 +zach zipper 1 +zach zipper 1 +zach zipper 1 diff --git a/sql/hive/src/test/resources/golden/windowing_navfn.q (deterministic)-1-9ee79e711248dd6e0a6ce27e439e55f4 b/sql/hive/src/test/resources/golden/windowing_navfn.q (deterministic)-1-9ee79e711248dd6e0a6ce27e439e55f4 new file mode 100644 index 0000000000000..275772e1f643a --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_navfn.q (deterministic)-1-9ee79e711248dd6e0a6ce27e439e55f4 @@ -0,0 +1,1049 @@ +65791 calvin nixon +65791 katie garcia +65790 fred nixon +65790 victor polk +65790 yuri ellison +65789 NULL +65789 alice laertes +65789 gabriella king +65789 katie zipper +65789 oscar zipper +65789 quinn davidson +65789 wendy brown +65788 irene brown +65788 oscar zipper +65788 rachel king +65788 xavier thompson +65787 alice laertes +65787 david laertes +65787 katie ichabod +65787 ulysses king +65786 bob carson +65786 quinn king +65786 victor xylophone +65786 xavier allen +65786 xavier davidson +65785 sarah carson +65785 sarah johnson +65784 holly young +65784 jessica ellison +65784 jessica ovid +65784 jessica zipper +65784 quinn van buren +65783 david allen +65783 fred ellison +65783 irene nixon +65783 mike ichabod +65783 wendy miller +65783 zach garcia +65782 tom miller +65782 ulysses underhill +65782 victor nixon +65782 yuri white +65782 zach ovid +65781 ethan carson +65781 luke laertes +65781 quinn brown +65780 holly hernandez +65780 holly zipper +65780 wendy falkner +65779 gabriella ichabod +65779 irene brown +65779 irene underhill +65779 ulysses xylophone +65779 wendy hernandez +65779 yuri nixon +65779 zach ellison +65778 jessica davidson +65778 katie allen +65778 oscar van buren +65778 oscar white +65778 rachel johnson +65778 victor quirinius +65778 yuri polk +65778 yuri quirinius +65777 ethan garcia +65777 irene garcia +65777 katie polk +65777 rachel thompson +65776 NULL +65776 luke garcia +65776 luke quirinius +65776 priscilla ovid +65776 sarah king +65775 bob ellison +65775 calvin steinbeck +65775 ethan laertes +65775 luke robinson +65775 oscar polk +65774 calvin young +65774 irene quirinius +65774 katie brown +65774 oscar hernandez +65774 sarah ichabod +65774 zach king +65773 NULL +65773 calvin garcia +65773 irene polk +65773 jessica davidson +65773 nick ichabod +65773 nick zipper +65773 quinn ovid +65773 zach falkner +65772 oscar garcia +65771 ethan underhill +65771 yuri hernandez +65771 zach brown +65770 alice nixon +65770 gabriella ellison +65769 mike king +65769 nick steinbeck +65769 oscar nixon +65769 rachel zipper +65769 xavier quirinius +65769 zach ichabod +65768 jessica davidson +65767 tom robinson +65767 victor davidson +65767 xavier king +65766 fred davidson +65766 luke underhill +65766 ulysses carson +65766 wendy nixon +65766 xavier brown +65765 bob polk +65765 gabriella brown +65765 wendy allen +65764 alice hernandez +65764 alice robinson +65764 luke ellison +65763 calvin van buren +65763 ethan brown +65763 priscilla johnson +65763 tom ichabod +65763 tom miller +65762 david xylophone +65762 priscilla xylophone +65762 quinn allen +65762 sarah johnson +65762 tom carson +65761 david ovid +65760 NULL +65760 ethan king +65760 priscilla johnson +65760 wendy van buren +65759 alice xylophone +65759 ethan quirinius +65759 fred steinbeck +65759 sarah robinson +65759 xavier johnson +65758 alice van buren +65758 ethan white +65758 irene johnson +65758 jessica underhill +65758 rachel laertes +65757 bob quirinius +65757 nick quirinius +65757 tom van buren +65757 xavier carson +65757 zach brown +65756 gabriella falkner +65756 mike polk +65756 nick allen +65756 yuri xylophone +65755 alice johnson +65755 david ellison +65755 fred polk +65755 gabriella hernandez +65755 irene van buren +65755 jessica nixon +65755 mike ellison +65755 nick van buren +65755 zach miller +65754 luke johnson +65754 xavier quirinius +65753 rachel robinson +65753 yuri garcia +65752 bob miller +65752 oscar zipper +65751 calvin quirinius +65751 katie king +65751 mike allen +65751 mike quirinius +65751 mike white +65751 zach zipper +65750 mike white +65750 nick thompson +65750 oscar quirinius +65750 priscilla polk +65750 rachel brown +65749 david allen +65749 gabriella polk +65749 mike ellison +65749 sarah carson +65749 wendy thompson +65748 david davidson +65748 oscar laertes +65747 calvin falkner +65747 fred steinbeck +65747 priscilla zipper +65747 quinn underhill +65747 rachel falkner +65747 wendy falkner +65747 yuri falkner +65746 +65746 katie robinson +65746 luke garcia +65746 tom brown +65746 zach steinbeck +65745 oscar steinbeck +65745 oscar xylophone +65745 priscilla nixon +65745 victor laertes +65744 alice allen +65744 priscilla ichabod +65743 jessica carson +65743 oscar ichabod +65743 sarah falkner +65743 tom brown +65742 jessica brown +65742 jessica ellison +65742 wendy polk +65741 calvin thompson +65740 irene van buren +65740 mike zipper +65740 rachel quirinius +65739 gabriella van buren +65739 victor robinson +65739 wendy nixon +65738 sarah ichabod +65738 wendy robinson +65738 wendy young +65738 xavier king +65737 holly brown +65737 irene falkner +65737 jessica white +65737 quinn garcia +65737 wendy laertes +65736 fred young +65736 quinn laertes +65736 rachel allen +65736 victor ellison +65735 oscar laertes +65735 tom falkner +65735 ulysses thompson +65734 calvin falkner +65734 mike garcia +65733 gabriella van buren +65733 holly nixon +65733 luke underhill +65733 oscar allen +65733 xavier polk +65733 yuri ellison +65732 mike steinbeck +65732 tom carson +65732 tom ellison +65731 alice robinson +65731 priscilla carson +65731 tom johnson +65731 ulysses ovid +65730 bob king +65730 calvin polk +65730 gabriella ichabod +65730 rachel brown +65729 fred quirinius +65728 priscilla king +65728 victor brown +65727 ethan brown +65727 fred polk +65727 jessica white +65727 priscilla underhill +65727 sarah white +65726 mike garcia +65726 ulysses hernandez +65726 wendy quirinius +65725 zach thompson +65724 NULL +65724 alice king +65724 alice steinbeck +65724 mike ellison +65724 nick ovid +65724 priscilla zipper +65723 nick allen +65723 rachel white +65722 fred falkner +65722 jessica underhill +65722 luke ichabod +65721 ethan falkner +65721 jessica zipper +65721 luke laertes +65721 luke underhill +65721 mike carson +65721 oscar laertes +65721 ulysses ichabod +65720 calvin nixon +65720 calvin thompson +65720 gabriella young +65720 holly polk +65720 mike king +65719 bob brown +65719 holly ichabod +65719 ulysses ellison +65719 ulysses young +65718 jessica ichabod +65718 oscar johnson +65718 victor hernandez +65718 zach ovid +65717 holly hernandez +65717 mike nixon +65717 quinn garcia +65717 ulysses davidson +65717 ulysses polk +65716 ethan zipper +65716 holly xylophone +65716 jessica garcia +65716 nick falkner +65716 sarah king +65716 victor xylophone +65715 alice hernandez +65715 david young +65715 ethan polk +65715 oscar falkner +65715 priscilla brown +65714 NULL +65714 ethan laertes +65714 luke allen +65713 bob ellison +65713 nick nixon +65713 ulysses carson +65713 ulysses ichabod +65713 zach garcia +65712 NULL +65712 katie king +65712 luke davidson +65712 quinn garcia +65712 zach carson +65711 bob zipper +65711 fred miller +65711 holly nixon +65711 katie ellison +65711 wendy ovid +65711 zach zipper +65710 luke polk +65710 mike young +65709 bob laertes +65709 calvin laertes +65709 oscar laertes +65708 gabriella laertes +65708 priscilla van buren +65707 bob garcia +65707 mike steinbeck +65706 bob ellison +65706 bob xylophone +65706 luke allen +65706 ulysses xylophone +65706 wendy ichabod +65705 david ellison +65705 mike white +65705 priscilla johnson +65705 tom ellison +65704 ethan brown +65704 holly king +65704 jessica young +65704 tom steinbeck +65704 victor steinbeck +65704 zach falkner +65703 holly underhill +65703 mike ellison +65703 victor davidson +65703 xavier underhill +65702 NULL +65702 bob brown +65702 bob zipper +65702 ulysses quirinius +65701 alice allen +65701 mike steinbeck +65701 oscar thompson +65700 luke allen +65700 quinn laertes +65700 tom quirinius +65699 ethan brown +65699 ethan van buren +65699 irene laertes +65699 mike young +65699 nick garcia +65699 sarah white +65698 gabriella thompson +65698 nick polk +65697 NULL +65697 NULL +65697 holly miller +65697 oscar quirinius +65697 oscar thompson +65697 xavier davidson +65696 alice xylophone +65696 jessica davidson +65696 luke allen +65696 oscar xylophone +65695 oscar king +65695 rachel young +65695 wendy ellison +65695 yuri quirinius +65694 david brown +65694 holly underhill +65694 victor quirinius +65694 zach brown +65693 bob hernandez +65693 bob young +65693 david brown +65693 holly hernandez +65693 tom polk +65693 ulysses polk +65693 victor brown +65692 holly johnson +65692 tom robinson +65691 calvin ovid +65691 ethan nixon +65691 ethan robinson +65691 fred underhill +65691 holly white +65691 irene polk +65691 oscar white +65691 rachel brown +65690 fred van buren +65690 jessica quirinius +65689 oscar ovid +65689 wendy thompson +65688 bob steinbeck +65688 victor steinbeck +65687 gabriella ichabod +65687 jessica underhill +65687 mike zipper +65687 quinn thompson +65686 bob king +65686 bob zipper +65686 david quirinius +65686 luke ichabod +65685 ethan robinson +65685 gabriella hernandez +65685 katie garcia +65685 sarah ellison +65685 victor hernandez +65685 victor nixon +65684 priscilla brown +65684 victor laertes +65684 wendy van buren +65683 NULL +65683 mike king +65683 tom laertes +65682 calvin quirinius +65682 ethan brown +65682 katie ellison +65681 gabriella allen +65681 luke laertes +65681 oscar quirinius +65681 ulysses laertes +65681 wendy ellison +65681 xavier polk +65680 NULL +65680 alice nixon +65680 gabriella ovid +65680 jessica carson +65680 ulysses nixon +65680 zach zipper +65679 bob garcia +65679 wendy underhill +65678 bob falkner +65678 victor xylophone +65678 wendy king +65677 alice allen +65677 fred van buren +65677 mike brown +65677 nick xylophone +65677 ulysses underhill +65677 zach robinson +65676 bob davidson +65676 bob laertes +65676 tom ovid +65676 xavier johnson +65675 david hernandez +65675 david nixon +65675 holly falkner +65675 quinn steinbeck +65675 rachel robinson +65675 sarah zipper +65675 tom polk +65675 victor allen +65674 gabriella falkner +65673 nick johnson +65673 quinn brown +65673 quinn underhill +65673 rachel ovid +65673 wendy brown +65672 nick laertes +65672 nick underhill +65672 rachel zipper +65672 tom white +65672 victor king +65671 fred ellison +65671 fred falkner +65671 zach white +65670 david robinson +65670 jessica zipper +65670 luke van buren +65670 oscar ovid +65670 quinn steinbeck +65669 NULL +65669 alice king +65669 calvin hernandez +65669 katie polk +65669 nick miller +65669 oscar van buren +65668 luke ellison +65667 bob brown +65667 irene nixon +65667 oscar brown +65667 tom falkner +65666 +65666 david underhill +65666 fred van buren +65665 rachel brown +65664 NULL +65664 bob davidson +65664 david ichabod +65664 ethan laertes +65664 irene robinson +65664 mike carson +65664 priscilla young +65664 victor king +65663 calvin underhill +65663 jessica johnson +65663 priscilla carson +65663 zach ichabod +65662 ethan allen +65662 katie ovid +65662 oscar johnson +65662 ulysses carson +65662 ulysses polk +65662 victor ovid +65661 david van buren +65661 luke xylophone +65661 mike falkner +65661 priscilla van buren +65661 victor johnson +65660 holly ichabod +65660 priscilla johnson +65660 victor thompson +65659 david robinson +65659 gabriella king +65659 luke davidson +65659 mike king +65659 mike zipper +65659 nick brown +65659 nick zipper +65659 yuri underhill +65658 NULL +65658 alice zipper +65658 calvin allen +65658 calvin johnson +65658 jessica garcia +65658 quinn davidson +65658 sarah ovid +65658 ulysses brown +65658 ulysses miller +65658 yuri king +65657 ethan falkner +65657 holly zipper +65657 irene ovid +65657 luke ovid +65657 priscilla white +65656 david davidson +65656 irene ovid +65656 jessica xylophone +65656 luke laertes +65656 oscar ichabod +65656 xavier ellison +65655 calvin falkner +65655 yuri laertes +65654 alice carson +65654 alice quirinius +65654 gabriella falkner +65654 nick young +65654 oscar robinson +65654 quinn robinson +65654 rachel falkner +65654 tom laertes +65654 yuri johnson +65653 calvin ellison +65653 holly underhill +65653 ulysses polk +65653 wendy xylophone +65652 NULL +65652 tom ellison +65652 victor johnson +65651 NULL +65651 ethan laertes +65651 ethan laertes +65651 irene garcia +65651 mike young +65650 irene ellison +65650 oscar white +65650 sarah steinbeck +65650 ulysses underhill +65650 ulysses xylophone +65650 victor xylophone +65649 irene underhill +65649 priscilla quirinius +65649 quinn ellison +65649 tom quirinius +65648 alice nixon +65648 calvin brown +65648 sarah carson +65648 xavier ellison +65647 irene allen +65647 mike ellison +65646 bob ovid +65646 xavier brown +65646 xavier ovid +65645 jessica quirinius +65645 katie miller +65645 ulysses hernandez +65644 alice king +65644 calvin ovid +65644 jessica white +65644 katie van buren +65644 sarah young +65644 ulysses hernandez +65644 yuri carson +65643 david davidson +65643 ethan polk +65643 ethan zipper +65643 gabriella ichabod +65643 mike davidson +65643 mike hernandez +65643 oscar robinson +65643 priscilla underhill +65643 zach king +65642 gabriella thompson +65641 fred laertes +65641 sarah garcia +65641 tom miller +65641 xavier hernandez +65640 david underhill +65639 wendy garcia +65638 fred nixon +65638 luke polk +65638 rachel carson +65637 alice underhill +65637 david davidson +65637 fred davidson +65637 gabriella davidson +65637 oscar carson +65637 rachel laertes +65637 sarah garcia +65637 wendy garcia +65636 +65636 irene polk +65636 wendy allen +65635 alice steinbeck +65635 alice zipper +65635 ulysses white +65634 NULL +65634 calvin white +65634 holly underhill +65634 sarah falkner +65633 NULL +65633 holly polk +65633 jessica nixon +65633 oscar quirinius +65632 alice falkner +65632 zach quirinius +65631 fred ellison +65630 rachel brown +65630 xavier hernandez +65629 jessica quirinius +65629 priscilla carson +65629 victor young +65629 xavier carson +65628 bob ovid +65628 ethan ovid +65628 irene ichabod +65628 oscar hernandez +65628 oscar robinson +65628 xavier quirinius +65627 alice miller +65627 holly johnson +65627 luke falkner +65627 yuri polk +65626 ethan polk +65626 holly robinson +65626 tom young +65626 yuri johnson +65625 david xylophone +65625 fred ichabod +65625 katie white +65625 ulysses garcia +65624 calvin steinbeck +65624 calvin xylophone +65624 rachel carson +65624 tom van buren +65624 yuri brown +65623 alice quirinius +65623 jessica miller +65623 oscar ichabod +65623 quinn zipper +65623 tom van buren +65623 victor brown +65623 wendy young +65622 nick davidson +65622 rachel king +65622 wendy robinson +65622 xavier ichabod +65622 zach xylophone +65622 zach young +65621 quinn underhill +65621 ulysses young +65620 nick garcia +65620 oscar thompson +65620 quinn quirinius +65620 victor white +65620 victor xylophone +65620 wendy quirinius +65619 calvin brown +65619 gabriella polk +65619 oscar king +65619 ulysses miller +65619 ulysses robinson +65619 ulysses steinbeck +65618 gabriella ovid +65618 irene laertes +65618 katie king +65618 oscar ovid +65618 quinn thompson +65617 fred van buren +65617 gabriella carson +65617 sarah johnson +65617 ulysses underhill +65616 calvin steinbeck +65616 xavier ichabod +65615 alice ovid +65615 david quirinius +65615 irene quirinius +65615 katie nixon +65614 wendy king +65614 xavier quirinius +65614 xavier white +65613 xavier zipper +65612 irene miller +65612 victor hernandez +65612 wendy white +65612 yuri polk +65611 ethan johnson +65611 fred zipper +65611 irene carson +65611 nick quirinius +65610 tom king +65610 victor steinbeck +65610 wendy garcia +65610 yuri carson +65610 zach ovid +65609 sarah robinson +65608 katie van buren +65608 mike van buren +65608 quinn ichabod +65608 zach underhill +65607 katie miller +65607 luke falkner +65607 mike polk +65607 priscilla xylophone +65607 yuri allen +65607 yuri allen +65606 bob white +65606 gabriella white +65606 oscar carson +65606 victor white +65606 xavier allen +65606 zach allen +65605 holly king +65604 katie zipper +65604 oscar davidson +65604 wendy laertes +65604 zach brown +65603 alice davidson +65603 ethan miller +65603 katie davidson +65603 katie young +65603 mike garcia +65602 NULL +65602 calvin laertes +65602 ethan laertes +65602 fred steinbeck +65602 jessica young +65602 xavier brown +65601 priscilla ovid +65601 sarah xylophone +65601 tom robinson +65600 gabriella thompson +65600 jessica polk +65600 nick robinson +65600 rachel allen +65599 fred quirinius +65599 luke johnson +65599 nick garcia +65599 oscar xylophone +65599 ulysses underhill +65598 ulysses van buren +65598 victor zipper +65597 ethan ellison +65597 nick ellison +65597 quinn davidson +65596 NULL +65596 calvin zipper +65596 david ellison +65596 irene ichabod +65596 wendy laertes +65595 bob white +65595 holly hernandez +65595 luke brown +65595 oscar ellison +65595 oscar ichabod +65595 quinn ellison +65594 gabriella ellison +65594 oscar robinson +65594 ulysses underhill +65594 victor robinson +65593 oscar white +65593 zach xylophone +65592 calvin xylophone +65591 alice zipper +65591 nick ichabod +65591 priscilla ichabod +65591 rachel underhill +65590 NULL +65590 katie falkner +65590 oscar van buren +65590 xavier garcia +65590 yuri underhill +65589 ethan white +65589 gabriella zipper +65589 irene ovid +65589 oscar king +65589 wendy xylophone +65588 bob van buren +65588 david ichabod +65588 mike miller +65588 tom hernandez +65588 victor van buren +65587 bob garcia +65587 luke johnson +65587 mike king +65587 victor allen +65587 xavier white +65586 david young +65586 irene brown +65586 priscilla brown +65586 wendy allen +65586 xavier laertes +65585 alice garcia +65585 bob garcia +65585 ethan ellison +65585 nick ellison +65585 priscilla thompson +65584 jessica carson +65584 jessica van buren +65584 jessica white +65583 bob xylophone +65583 nick ichabod +65583 yuri brown +65583 yuri steinbeck +65582 holly johnson +65582 mike carson +65582 victor van buren +65582 zach miller +65581 gabriella steinbeck +65581 irene quirinius +65581 luke allen +65581 nick robinson +65581 nick young +65581 wendy robinson +65580 alice steinbeck +65580 alice xylophone +65580 irene xylophone +65579 irene polk +65579 luke ovid +65579 quinn nixon +65579 sarah garcia +65579 wendy ovid +65578 calvin robinson +65578 fred king +65578 holly thompson +65578 katie ichabod +65578 quinn king +65578 rachel davidson +65578 victor hernandez +65577 holly white +65576 calvin falkner +65576 calvin ovid +65576 fred polk +65576 luke robinson +65575 calvin falkner +65575 irene steinbeck +65575 luke zipper +65575 zach king +65574 gabriella steinbeck +65574 priscilla nixon +65574 rachel thompson +65573 victor ellison +65573 victor hernandez +65573 yuri nixon +65572 calvin davidson +65572 calvin young +65572 katie young +65572 oscar ellison +65572 quinn garcia +65571 bob king +65571 irene polk +65571 katie ichabod +65571 mike steinbeck +65570 NULL +65570 bob ovid +65570 fred polk +65570 luke ellison +65570 mike hernandez +65570 yuri quirinius +65569 nick falkner +65568 bob ichabod +65568 holly thompson +65568 jessica thompson +65567 katie xylophone +65566 gabriella garcia +65566 rachel white +65565 katie young +65565 quinn young +65564 alice polk +65564 calvin carson +65564 calvin white +65564 ethan hernandez +65564 ethan quirinius +65564 jessica thompson +65564 katie hernandez +65563 calvin zipper +65563 priscilla young +65563 xavier davidson +65563 yuri steinbeck +65562 calvin falkner +65562 ethan xylophone +65562 luke white +65562 quinn allen +65562 rachel polk +65562 wendy polk +65561 bob davidson +65561 ethan polk +65561 jessica robinson +65560 fred white +65560 jessica johnson +65560 oscar thompson +65560 ulysses steinbeck +65560 zach brown +65559 NULL +65559 ethan laertes +65559 gabriella ichabod +65559 gabriella zipper +65559 irene garcia +65558 fred robinson +65557 fred hernandez +65557 nick johnson +65556 oscar underhill +65556 xavier hernandez +65556 yuri falkner +65556 zach garcia +65556 zach steinbeck +65555 fred nixon +65554 gabriella miller +65554 rachel falkner +65553 calvin van buren +65553 david van buren +65553 irene nixon +65553 luke laertes +65553 oscar carson +65552 NULL +65552 irene ellison +65552 oscar polk +65552 wendy falkner +65552 zach miller +65551 fred young +65551 ulysses underhill +65551 wendy underhill +65550 ethan quirinius +65550 fred davidson +65550 holly young +65550 jessica ovid +65550 quinn brown +65550 quinn laertes +65550 tom johnson +65549 bob garcia +65549 bob ovid +65549 fred ichabod +65549 fred king +65549 jessica white +65549 ulysses laertes +65549 victor davidson +65549 victor miller +65548 calvin ovid +65548 gabriella allen +65548 holly ichabod +65548 priscilla johnson +65548 quinn zipper +65548 tom hernandez +65548 wendy king +65547 bob ellison +65547 jessica quirinius +65547 mike davidson +65547 xavier allen +65546 katie white +65545 mike king +65545 tom carson +65545 victor brown +65544 calvin davidson +65544 calvin nixon +65544 david ovid +65544 irene thompson +65544 ulysses young +65544 xavier polk +65544 xavier xylophone +65544 zach ovid +65543 fred johnson +65543 sarah johnson +65542 fred falkner +65542 holly thompson +65542 luke miller +65542 mike white +65542 tom davidson +65541 calvin brown +65541 ethan brown +65541 holly brown +65541 jessica falkner +65541 rachel thompson +65541 tom zipper +65541 wendy underhill +65541 xavier polk +65541 yuri johnson +65540 rachel falkner +65539 gabriella young +65539 holly laertes +65539 oscar carson +65538 irene laertes +65538 mike polk +65538 tom robinson +65537 NULL +65537 david quirinius +65537 rachel ovid +65537 ulysses laertes +65537 zach garcia +65536 calvin xylophone +65536 david thompson +65536 irene falkner +65536 ulysses johnson +65536 victor johnson +65536 wendy miller +65536 yuri thompson diff --git a/sql/hive/src/test/resources/golden/windowing_navfn.q (deterministic)-2-1e88e0ba414a00195f7ebf6b8600ac04 b/sql/hive/src/test/resources/golden/windowing_navfn.q (deterministic)-2-1e88e0ba414a00195f7ebf6b8600ac04 new file mode 100644 index 0000000000000..62d71abc6fc7d --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_navfn.q (deterministic)-2-1e88e0ba414a00195f7ebf6b8600ac04 @@ -0,0 +1,1049 @@ +65536 NULL +65536 16.85 +65536 32.68 +65536 33.45 +65536 58.86 +65536 75.7 +65536 83.48 +65537 NULL +65537 4.49 +65537 11.87 +65537 51.91 +65537 99.34 +65538 NULL +65538 9.53 +65538 30.27 +65539 NULL +65539 58.85 +65539 96.64 +65540 NULL +65541 NULL +65541 9.04 +65541 14.94 +65541 15.85 +65541 27.89 +65541 35.38 +65541 72.33 +65541 89.14 +65541 98.87 +65542 NULL +65542 42.85 +65542 73.72 +65542 73.93 +65542 84.63 +65543 NULL +65543 21.59 +65544 NULL +65544 26.94 +65544 32.03 +65544 48.84 +65544 65.77 +65544 65.85 +65544 73.08 +65544 79.21 +65545 NULL +65545 34.65 +65545 65.81 +65546 NULL +65547 NULL +65547 17.71 +65547 62.31 +65547 83.21 +65548 NULL +65548 52.94 +65548 53.52 +65548 57.61 +65548 58.51 +65548 75.39 +65548 77.24 +65549 NULL +65549 13.3 +65549 28.93 +65549 50.6 +65549 55.04 +65549 64.91 +65549 76.06 +65549 80.09 +65550 NULL +65550 1.75 +65550 8.46 +65550 33.01 +65550 57.63 +65550 91.38 +65550 96.9 +65551 NULL +65551 39.43 +65551 73.93 +65552 NULL +65552 3.43 +65552 18.11 +65552 48.96 +65552 82.21 +65553 NULL +65553 25.31 +65553 29.62 +65553 71.07 +65553 72.16 +65554 NULL +65554 0.24 +65555 NULL +65556 NULL +65556 9.96 +65556 45.81 +65556 50.42 +65556 95.12 +65557 NULL +65557 21.14 +65558 NULL +65559 NULL +65559 29.55 +65559 56.06 +65559 73.94 +65559 83.5 +65560 NULL +65560 16.86 +65560 21.81 +65560 64.38 +65560 71.59 +65561 NULL +65561 32.86 +65561 47.71 +65562 NULL +65562 26.86 +65562 45.94 +65562 54.94 +65562 69.41 +65562 72.04 +65563 NULL +65563 14.36 +65563 33.29 +65563 39.96 +65564 NULL +65564 9.47 +65564 15.82 +65564 26.97 +65564 30.78 +65564 90.34 +65564 98.36 +65565 NULL +65565 81.72 +65566 NULL +65566 7.8 +65567 NULL +65568 NULL +65568 21.79 +65568 58.66 +65569 NULL +65570 NULL +65570 17.09 +65570 18.2 +65570 25.57 +65570 45.23 +65570 76.8 +65571 NULL +65571 26.64 +65571 40.68 +65571 82.5 +65572 NULL +65572 22.64 +65572 43.49 +65572 70.91 +65572 93.48 +65573 NULL +65573 53.56 +65573 96.32 +65574 NULL +65574 31.28 +65574 38.54 +65575 NULL +65575 17 +65575 32.85 +65575 83.4 +65576 NULL +65576 2.04 +65576 4.88 +65576 66.55 +65577 NULL +65578 NULL +65578 16.01 +65578 41.1 +65578 51.36 +65578 54.35 +65578 58.78 +65578 75.62 +65579 NULL +65579 21.36 +65579 33.37 +65579 73.48 +65579 91.42 +65580 NULL +65580 56.33 +65580 81.42 +65581 NULL +65581 29.74 +65581 45.48 +65581 56.59 +65581 60.88 +65581 88.09 +65582 NULL +65582 1.23 +65582 9.35 +65582 96.6 +65583 NULL +65583 28.07 +65583 50.57 +65583 57.67 +65584 NULL +65584 6.88 +65584 65.78 +65585 NULL +65585 31.23 +65585 37.34 +65585 39.32 +65585 50.38 +65586 NULL +65586 1.15 +65586 18.65 +65586 87.57 +65586 94.25 +65587 NULL +65587 5.83 +65587 11.86 +65587 53.84 +65587 94.47 +65588 NULL +65588 64.63 +65588 69.48 +65588 76.44 +65588 98.33 +65589 NULL +65589 49.49 +65589 72.3 +65589 74.83 +65589 94.73 +65590 NULL +65590 13.51 +65590 22.13 +65590 42.53 +65590 92.71 +65591 NULL +65591 9.85 +65591 11.43 +65591 60.78 +65592 NULL +65593 NULL +65593 35.15 +65594 NULL +65594 35.28 +65594 60.57 +65594 65.61 +65595 NULL +65595 8.76 +65595 67.56 +65595 72.7 +65595 89.6 +65595 90.24 +65596 NULL +65596 12.72 +65596 54.75 +65596 85.74 +65596 94.35 +65597 NULL +65597 37.41 +65597 69.05 +65598 NULL +65598 63.3 +65599 NULL +65599 0.56 +65599 4.93 +65599 41.61 +65599 76.29 +65600 NULL +65600 36.57 +65600 43.03 +65600 92.92 +65601 NULL +65601 26.54 +65601 37.93 +65602 NULL +65602 8.13 +65602 47.16 +65602 83.67 +65602 86.23 +65602 95.58 +65603 NULL +65603 41.44 +65603 45.63 +65603 69.26 +65603 80.24 +65604 NULL +65604 63.36 +65604 63.65 +65604 85.91 +65605 NULL +65606 NULL +65606 7.51 +65606 24.8 +65606 57.69 +65606 67.94 +65606 87.16 +65607 NULL +65607 9.67 +65607 36.58 +65607 71.75 +65607 75.86 +65607 91.52 +65608 NULL +65608 48.9 +65608 69.42 +65608 87.9 +65609 NULL +65610 NULL +65610 7.59 +65610 11.99 +65610 36.77 +65610 39.74 +65611 NULL +65611 21.21 +65611 25.92 +65611 64.89 +65612 NULL +65612 16.05 +65612 25.1 +65612 52.64 +65613 NULL +65614 NULL +65614 1.42 +65614 94.47 +65615 NULL +65615 10.79 +65615 39.4 +65615 99.88 +65616 NULL +65616 75.2 +65617 NULL +65617 18.51 +65617 47.45 +65617 64.9 +65618 NULL +65618 10.06 +65618 16.6 +65618 81.99 +65618 88.38 +65619 NULL +65619 27.32 +65619 32.64 +65619 34.72 +65619 36.48 +65619 36.59 +65620 NULL +65620 6.85 +65620 8.16 +65620 29.14 +65620 64.65 +65620 81.28 +65621 NULL +65621 95.14 +65622 NULL +65622 28.37 +65622 50.08 +65622 74.31 +65622 88.6 +65622 93.7 +65623 NULL +65623 30.83 +65623 31.22 +65623 39.74 +65623 48.51 +65623 95.58 +65623 97.2 +65624 NULL +65624 58.02 +65624 65.31 +65624 70.08 +65624 93.3 +65625 NULL +65625 20.61 +65625 42.86 +65625 55.06 +65626 NULL +65626 63.54 +65626 64.61 +65626 75.15 +65627 NULL +65627 19.65 +65627 61.89 +65627 93.29 +65628 NULL +65628 14.83 +65628 30.43 +65628 37.8 +65628 74.31 +65628 83.26 +65629 NULL +65629 19.33 +65629 58.81 +65629 72.9 +65630 NULL +65630 72.13 +65631 NULL +65632 NULL +65632 88.51 +65633 NULL +65633 59.56 +65633 72.54 +65633 81.02 +65634 NULL +65634 57.09 +65634 64.36 +65634 99.34 +65635 NULL +65635 64.99 +65635 82.29 +65636 NULL +65636 21.15 +65636 86.29 +65637 NULL +65637 16.89 +65637 26.78 +65637 29.34 +65637 35.51 +65637 44.32 +65637 48.88 +65637 93.41 +65638 NULL +65638 11.2 +65638 19.13 +65639 NULL +65640 NULL +65641 NULL +65641 26.02 +65641 84.27 +65641 91.46 +65642 NULL +65643 NULL +65643 22.05 +65643 50.79 +65643 52.56 +65643 61.29 +65643 71.29 +65643 80.96 +65643 92.24 +65643 93.11 +65644 NULL +65644 1.97 +65644 30.25 +65644 58.05 +65644 87.31 +65644 89.95 +65644 96.45 +65645 NULL +65645 3.95 +65645 63.22 +65646 NULL +65646 17.92 +65646 27.34 +65647 NULL +65647 58.03 +65648 NULL +65648 0.08 +65648 17.66 +65648 64.06 +65649 NULL +65649 8.69 +65649 43.92 +65649 91.03 +65650 NULL +65650 23.55 +65650 59.55 +65650 85.89 +65650 89.12 +65650 90.77 +65651 NULL +65651 24.25 +65651 58.25 +65651 74.13 +65651 84.42 +65652 NULL +65652 55.04 +65652 73.61 +65653 NULL +65653 3.81 +65653 52.23 +65653 85.09 +65654 NULL +65654 8.91 +65654 11.64 +65654 26.73 +65654 29.85 +65654 37.74 +65654 37.8 +65654 53.55 +65654 88.23 +65655 NULL +65655 77.41 +65656 NULL +65656 14 +65656 14.96 +65656 53.27 +65656 64.44 +65656 82.67 +65657 NULL +65657 11.93 +65657 26.4 +65657 64.39 +65657 65.01 +65658 NULL +65658 2.63 +65658 20.69 +65658 42.93 +65658 46.61 +65658 60.94 +65658 66.53 +65658 68.85 +65658 77.66 +65658 92.67 +65659 NULL +65659 8.95 +65659 46.57 +65659 53.8 +65659 94.3 +65659 94.69 +65659 95.71 +65659 99.87 +65660 NULL +65660 28.05 +65660 62.82 +65661 NULL +65661 5.24 +65661 8.06 +65661 26.8 +65661 68.98 +65662 NULL +65662 59.92 +65662 76.11 +65662 76.51 +65662 88.64 +65662 99.18 +65663 NULL +65663 5.42 +65663 78.56 +65663 94.16 +65664 NULL +65664 11.46 +65664 27.6 +65664 34.71 +65664 38.42 +65664 45.4 +65664 55.82 +65664 97.64 +65665 NULL +65666 NULL +65666 32.73 +65666 83.95 +65667 NULL +65667 13.96 +65667 63.9 +65667 97.87 +65668 NULL +65669 NULL +65669 1.76 +65669 16.95 +65669 38.6 +65669 54.25 +65669 93.79 +65670 NULL +65670 5.37 +65670 61.06 +65670 61.54 +65670 92.97 +65671 NULL +65671 8.65 +65671 52.05 +65672 NULL +65672 52.6 +65672 58.1 +65672 64.09 +65672 75.27 +65673 NULL +65673 0.9 +65673 33.27 +65673 43.81 +65673 87.78 +65674 NULL +65675 NULL +65675 4.19 +65675 24.19 +65675 35.33 +65675 35.78 +65675 79.9 +65675 83.09 +65675 87.36 +65676 NULL +65676 8.77 +65676 58.12 +65676 80.13 +65677 NULL +65677 5.06 +65677 25.37 +65677 44.47 +65677 48.79 +65677 87.67 +65678 NULL +65678 8.72 +65678 33.9 +65679 NULL +65679 64.15 +65680 NULL +65680 1.01 +65680 34.08 +65680 54.11 +65680 55.3 +65680 65.88 +65681 NULL +65681 35.45 +65681 41.57 +65681 61.3 +65681 71.17 +65681 75.85 +65682 NULL +65682 67.17 +65682 92.95 +65683 NULL +65683 17.62 +65683 99.56 +65684 NULL +65684 3.51 +65684 67.34 +65685 NULL +65685 38.71 +65685 43.48 +65685 63.27 +65685 87.84 +65685 90.69 +65686 NULL +65686 31.75 +65686 58.87 +65686 98.68 +65687 NULL +65687 3.37 +65687 21.79 +65687 48.73 +65688 NULL +65688 76.21 +65689 NULL +65689 9.12 +65690 NULL +65690 3.43 +65691 NULL +65691 5.01 +65691 6.93 +65691 28.47 +65691 56.02 +65691 58.01 +65691 69.8 +65691 76.98 +65692 NULL +65692 54.76 +65693 NULL +65693 8.38 +65693 32.33 +65693 45.69 +65693 69.32 +65693 71.72 +65693 84.88 +65694 NULL +65694 58.23 +65694 82.24 +65694 88.5 +65695 NULL +65695 57.33 +65695 59.96 +65695 77.09 +65696 NULL +65696 17.35 +65696 40.3 +65696 54.02 +65697 NULL +65697 3.18 +65697 50.01 +65697 67.9 +65697 86.79 +65697 90.16 +65698 NULL +65698 42.98 +65699 NULL +65699 13.29 +65699 38.71 +65699 68.94 +65699 84.79 +65699 88.09 +65700 NULL +65700 2.83 +65700 37.61 +65701 NULL +65701 1.81 +65701 6.35 +65702 NULL +65702 37.6 +65702 55.68 +65702 79.5 +65703 NULL +65703 37.18 +65703 40.81 +65703 90.89 +65704 NULL +65704 16.22 +65704 37.12 +65704 48.48 +65704 54.76 +65704 93.21 +65705 NULL +65705 20.57 +65705 25.89 +65705 65.13 +65706 NULL +65706 3.91 +65706 9.74 +65706 55.94 +65706 72.87 +65707 NULL +65707 76.2 +65708 NULL +65708 1.29 +65709 NULL +65709 5.64 +65709 49.79 +65710 NULL +65710 86.7 +65711 NULL +65711 8.66 +65711 50.26 +65711 71.89 +65711 78.69 +65711 96.1 +65712 NULL +65712 30.27 +65712 34.7 +65712 49.69 +65712 53.65 +65713 NULL +65713 10.94 +65713 39.47 +65713 72.37 +65713 90.91 +65714 NULL +65714 14.85 +65714 47.42 +65715 NULL +65715 39.62 +65715 54.79 +65715 81.28 +65715 89.4 +65716 NULL +65716 9 +65716 10.07 +65716 33.4 +65716 71.53 +65716 85.93 +65717 NULL +65717 1.23 +65717 5.81 +65717 57.61 +65717 80.05 +65718 NULL +65718 63.06 +65718 84.35 +65718 89.67 +65719 NULL +65719 51.13 +65719 66.85 +65719 82.1 +65720 NULL +65720 2.72 +65720 18.8 +65720 22.34 +65720 62.04 +65721 NULL +65721 23.78 +65721 39.19 +65721 55.75 +65721 72.82 +65721 95.12 +65721 95.38 +65722 NULL +65722 1.76 +65722 38.82 +65723 NULL +65723 39.9 +65724 NULL +65724 10.52 +65724 36.05 +65724 50.96 +65724 71.66 +65724 85.52 +65725 NULL +65726 NULL +65726 6 +65726 60.46 +65727 NULL +65727 19.81 +65727 49.19 +65727 87.37 +65727 88.11 +65728 NULL +65728 55.37 +65729 NULL +65730 NULL +65730 1.35 +65730 30.6 +65730 81.44 +65731 NULL +65731 24.48 +65731 61.52 +65731 97.18 +65732 NULL +65732 30.06 +65732 91.15 +65733 NULL +65733 11.44 +65733 20.72 +65733 88.46 +65733 93.45 +65733 99.8 +65734 NULL +65734 31.71 +65735 NULL +65735 12.67 +65735 61.16 +65736 NULL +65736 28.9 +65736 48.54 +65736 86.51 +65737 NULL +65737 3.98 +65737 20.85 +65737 29.92 +65737 80.97 +65738 NULL +65738 30.94 +65738 82.32 +65738 95.1 +65739 NULL +65739 74.77 +65739 92.4 +65740 NULL +65740 7.49 +65740 58.65 +65741 NULL +65742 NULL +65742 6.61 +65742 43.84 +65743 NULL +65743 26.6 +65743 52.65 +65743 62 +65744 NULL +65744 46.98 +65745 NULL +65745 25.19 +65745 66.36 +65745 80.12 +65746 NULL +65746 36.74 +65746 93.21 +65746 97.52 +65746 98.1 +65747 NULL +65747 11.16 +65747 15.07 +65747 21.8 +65747 39.77 +65747 52.77 +65747 71.87 +65748 NULL +65748 29.49 +65749 NULL +65749 15.14 +65749 45 +65749 65.49 +65749 73.24 +65750 NULL +65750 20.91 +65750 83.44 +65750 85.44 +65750 96.85 +65751 NULL +65751 2.96 +65751 9.02 +65751 30.68 +65751 47.81 +65751 78.75 +65752 NULL +65752 47.82 +65753 NULL +65753 86.97 +65754 NULL +65754 54.35 +65755 NULL +65755 11.23 +65755 22.44 +65755 64 +65755 67.54 +65755 76.75 +65755 81.44 +65755 90.08 +65755 96.8 +65756 NULL +65756 1.45 +65756 11.81 +65756 63.51 +65757 NULL +65757 1.86 +65757 9.24 +65757 34.84 +65757 90.09 +65758 NULL +65758 25.62 +65758 56.56 +65758 60.88 +65758 94.9 +65759 NULL +65759 10.63 +65759 14.1 +65759 47.54 +65759 92.81 +65760 NULL +65760 21.14 +65760 27.52 +65760 95.45 +65761 NULL +65762 NULL +65762 5.49 +65762 45.7 +65762 77.96 +65762 87.5 +65763 NULL +65763 0.72 +65763 43.8 +65763 86.43 +65763 87.99 +65764 NULL +65764 31.41 +65764 57.1 +65765 NULL +65765 88.52 +65765 88.56 +65766 NULL +65766 37.06 +65766 66.34 +65766 86.53 +65766 98.9 +65767 NULL +65767 90.88 +65767 95.57 +65768 NULL +65769 NULL +65769 11.45 +65769 38.98 +65769 58.05 +65769 70.52 +65769 91.49 +65770 NULL +65770 51.9 +65771 NULL +65771 6.15 +65771 7.5 +65772 NULL +65773 NULL +65773 3.81 +65773 18.2 +65773 30.49 +65773 47.09 +65773 53.09 +65773 63.26 +65773 76.46 +65774 NULL +65774 45.74 +65774 45.97 +65774 48.8 +65774 56.84 +65774 94.77 +65775 NULL +65775 7.88 +65775 66.56 +65775 66.68 +65775 98.43 +65776 NULL +65776 18.7 +65776 28.47 +65776 49.73 +65776 98.87 +65777 NULL +65777 54.39 +65777 73.79 +65777 82.62 +65778 NULL +65778 7.37 +65778 51.64 +65778 59.03 +65778 62.17 +65778 64.69 +65778 89.51 +65778 95.69 +65779 NULL +65779 11.87 +65779 28.2 +65779 39.48 +65779 45.61 +65779 64.41 +65779 65.24 +65780 NULL +65780 10.95 +65780 38.58 +65781 NULL +65781 70.59 +65781 95.52 +65782 NULL +65782 30.24 +65782 34.31 +65782 76.14 +65782 81.9 +65783 NULL +65783 46.34 +65783 51.08 +65783 52.43 +65783 62.58 +65783 77.4 +65784 NULL +65784 15.7 +65784 31.35 +65784 68.18 +65784 93.95 +65785 NULL +65785 29.61 +65786 NULL +65786 8.99 +65786 29.32 +65786 66.89 +65786 80.94 +65787 NULL +65787 18.78 +65787 31.19 +65787 64.88 +65788 NULL +65788 16.1 +65788 21.81 +65788 25.77 +65789 NULL +65789 20.44 +65789 43.53 +65789 52.49 +65789 83.18 +65789 92.74 +65789 96.9 +65790 NULL +65790 46.91 +65790 84.87 +65791 NULL +65791 4.24 diff --git a/sql/hive/src/test/resources/golden/windowing_navfn.q (deterministic)-3-34d9ee4120f21d0d0ae914fba0acc60c b/sql/hive/src/test/resources/golden/windowing_navfn.q (deterministic)-3-34d9ee4120f21d0d0ae914fba0acc60c new file mode 100644 index 0000000000000..569c1d4e5f7b6 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_navfn.q (deterministic)-3-34d9ee4120f21d0d0ae914fba0acc60c @@ -0,0 +1,1049 @@ + 7 + 11 + 38 +alice allen 48 +alice allen 78 +alice allen 79 +alice brown 91 +alice carson 28 +alice davidson 88 +alice falkner 117 +alice garcia 106 +alice hernandez 37 +alice hernandez 85 +alice johnson 65 +alice king 109 +alice king 113 +alice king 118 +alice laertes 6 +alice laertes 65 +alice miller 97 +alice nixon 15 +alice nixon 31 +alice nixon 56 +alice ovid 15 +alice polk 90 +alice quirinius 27 +alice quirinius 89 +alice robinson 5 +alice robinson 68 +alice steinbeck 45 +alice steinbeck 50 +alice steinbeck 77 +alice underhill 34 +alice van buren 31 +alice xylophone 0 +alice xylophone 5 +alice xylophone 113 +alice zipper 19 +alice zipper 51 +alice zipper 74 +bob brown 35 +bob brown 61 +bob brown 71 +bob carson 40 +bob davidson 44 +bob davidson 103 +bob davidson 124 +bob ellison 7 +bob ellison 46 +bob ellison 85 +bob ellison 103 +bob falkner 2 +bob garcia 16 +bob garcia 42 +bob garcia 71 +bob garcia 77 +bob garcia 83 +bob hernandez 93 +bob ichabod 72 +bob king 9 +bob king 22 +bob king 81 +bob laertes -1 +bob laertes 105 +bob miller 31 +bob ovid 26 +bob ovid 27 +bob ovid 81 +bob ovid 86 +bob polk 55 +bob quirinius 26 +bob steinbeck 95 +bob van buren 88 +bob white 1 +bob white 16 +bob xylophone -2 +bob xylophone 49 +bob young -1 +bob zipper 36 +bob zipper 78 +bob zipper 92 +calvin allen 98 +calvin brown 81 +calvin brown 87 +calvin brown 121 +calvin carson 105 +calvin davidson 34 +calvin davidson 89 +calvin ellison 34 +calvin falkner -1 +calvin falkner 12 +calvin falkner 46 +calvin falkner 50 +calvin falkner 53 +calvin falkner 101 +calvin garcia 79 +calvin hernandez 22 +calvin johnson 34 +calvin laertes 37 +calvin laertes 100 +calvin nixon 50 +calvin nixon 71 +calvin nixon 72 +calvin ovid -1 +calvin ovid 50 +calvin ovid 65 +calvin ovid 71 +calvin polk 23 +calvin quirinius 5 +calvin quirinius 96 +calvin robinson 0 +calvin steinbeck 13 +calvin steinbeck 14 +calvin steinbeck 33 +calvin thompson 12 +calvin thompson 40 +calvin underhill 19 +calvin van buren 57 +calvin van buren 78 +calvin white 26 +calvin white 80 +calvin xylophone 17 +calvin xylophone 48 +calvin xylophone 78 +calvin young 8 +calvin young 99 +calvin zipper 31 +calvin zipper 46 +david allen 27 +david allen 80 +david brown 75 +david brown 117 +david davidson 11 +david davidson 38 +david davidson 54 +david davidson 74 +david ellison 50 +david ellison 54 +david ellison 120 +david hernandez 72 +david ichabod 6 +david ichabod 55 +david laertes 88 +david nixon 118 +david ovid 61 +david ovid 95 +david quirinius 43 +david quirinius 85 +david quirinius 121 +david robinson 47 +david robinson 59 +david thompson 89 +david underhill 69 +david underhill 87 +david underhill 98 +david van buren 1 +david van buren 38 +david white 93 +david xylophone 0 +david xylophone 22 +david xylophone 82 +david young 2 +david young 79 +ethan allen 24 +ethan brown 3 +ethan brown 29 +ethan brown 55 +ethan brown 64 +ethan brown 84 +ethan brown 108 +ethan carson 83 +ethan ellison 28 +ethan ellison 65 +ethan falkner 45 +ethan falkner 49 +ethan garcia 16 +ethan hernandez 71 +ethan johnson 108 +ethan king 44 +ethan laertes -1 +ethan laertes 27 +ethan laertes 46 +ethan laertes 68 +ethan laertes 81 +ethan laertes 103 +ethan laertes 114 +ethan miller 40 +ethan nixon 18 +ethan ovid 66 +ethan polk 46 +ethan polk 71 +ethan polk 114 +ethan polk 117 +ethan quirinius 16 +ethan quirinius 41 +ethan quirinius 85 +ethan robinson 32 +ethan robinson 34 +ethan underhill 89 +ethan van buren 43 +ethan white 38 +ethan white 51 +ethan xylophone 107 +ethan zipper 37 +ethan zipper 89 +fred davidson 18 +fred davidson 18 +fred davidson 77 +fred ellison -3 +fred ellison 44 +fred ellison 116 +fred falkner 18 +fred falkner 33 +fred falkner 100 +fred hernandez 36 +fred ichabod 1 +fred ichabod 77 +fred johnson 62 +fred king 33 +fred king 92 +fred laertes 17 +fred miller NULL +fred nixon 33 +fred nixon 36 +fred nixon 102 +fred nixon 111 +fred polk -2 +fred polk 39 +fred polk 60 +fred polk 85 +fred quirinius 25 +fred quirinius 124 +fred robinson 89 +fred steinbeck 79 +fred steinbeck 118 +fred steinbeck 119 +fred underhill 122 +fred van buren 4 +fred van buren 24 +fred van buren 63 +fred van buren 106 +fred white 97 +fred young 33 +fred young 103 +fred zipper 66 +gabriella allen 114 +gabriella allen 119 +gabriella brown 25 +gabriella brown 92 +gabriella carson 112 +gabriella davidson 45 +gabriella ellison 21 +gabriella ellison 101 +gabriella falkner 14 +gabriella falkner 66 +gabriella falkner 77 +gabriella garcia 110 +gabriella hernandez 20 +gabriella hernandez 36 +gabriella ichabod 17 +gabriella ichabod 66 +gabriella ichabod 71 +gabriella ichabod 90 +gabriella ichabod 91 +gabriella king 100 +gabriella king 115 +gabriella laertes 50 +gabriella miller 35 +gabriella ovid 38 +gabriella ovid 89 +gabriella polk 42 +gabriella polk 105 +gabriella steinbeck 18 +gabriella steinbeck 115 +gabriella thompson 45 +gabriella thompson 70 +gabriella thompson 88 +gabriella van buren 5 +gabriella van buren 117 +gabriella white 37 +gabriella young 48 +gabriella young 107 +gabriella zipper 57 +gabriella zipper 71 +holly allen 63 +holly brown 50 +holly brown 117 +holly falkner NULL +holly hernandez 31 +holly hernandez 43 +holly hernandez 48 +holly hernandez 100 +holly ichabod 28 +holly ichabod 53 +holly ichabod 83 +holly johnson 60 +holly johnson 112 +holly johnson 121 +holly king 90 +holly king 114 +holly laertes 13 +holly miller 28 +holly nixon -2 +holly nixon 120 +holly polk 54 +holly polk 124 +holly robinson 1 +holly thompson 28 +holly thompson 33 +holly thompson 83 +holly underhill 5 +holly underhill 31 +holly underhill 101 +holly underhill 113 +holly van buren 19 +holly white 18 +holly white 90 +holly xylophone 18 +holly young 32 +holly young 72 +holly zipper 78 +holly zipper 88 +irene allen 56 +irene brown 78 +irene brown 93 +irene brown 108 +irene carson 0 +irene ellison -3 +irene ellison 40 +irene falkner 13 +irene falkner 104 +irene garcia 34 +irene garcia 62 +irene garcia 124 +irene ichabod 83 +irene ichabod 112 +irene johnson 88 +irene laertes 9 +irene laertes 54 +irene laertes 60 +irene miller 108 +irene nixon -1 +irene nixon 12 +irene nixon 101 +irene ovid 26 +irene ovid 32 +irene ovid 53 +irene polk 18 +irene polk 92 +irene polk 99 +irene polk 109 +irene polk 116 +irene quirinius 7 +irene quirinius 76 +irene quirinius 97 +irene robinson 51 +irene steinbeck 46 +irene thompson 10 +irene underhill 27 +irene underhill 63 +irene van buren 17 +irene van buren 104 +irene xylophone 18 +jessica brown 117 +jessica carson 3 +jessica carson 13 +jessica carson 88 +jessica davidson 11 +jessica davidson 28 +jessica davidson 89 +jessica davidson 124 +jessica ellison 38 +jessica ellison 50 +jessica falkner 71 +jessica garcia 25 +jessica garcia 43 +jessica ichabod 104 +jessica johnson 31 +jessica johnson 69 +jessica miller 74 +jessica nixon 22 +jessica nixon 120 +jessica ovid 47 +jessica ovid 73 +jessica polk 118 +jessica quirinius 0 +jessica quirinius 87 +jessica quirinius 105 +jessica quirinius 114 +jessica robinson 15 +jessica thompson 1 +jessica thompson 77 +jessica underhill 32 +jessica underhill 46 +jessica underhill 83 +jessica van buren 54 +jessica white 5 +jessica white 30 +jessica white 45 +jessica white 65 +jessica white 98 +jessica xylophone 67 +jessica young 61 +jessica young 123 +jessica zipper 27 +jessica zipper 33 +jessica zipper 54 +katie allen 114 +katie brown 39 +katie davidson 35 +katie ellison 5 +katie ellison 58 +katie falkner 15 +katie garcia 49 +katie garcia 65 +katie hernandez 83 +katie ichabod 9 +katie ichabod 75 +katie ichabod 104 +katie king 44 +katie king 59 +katie king 93 +katie miller 23 +katie miller 117 +katie nixon 43 +katie ovid 81 +katie polk 17 +katie polk 85 +katie robinson 92 +katie van buren 25 +katie van buren 88 +katie white 34 +katie white 86 +katie xylophone 84 +katie young 2 +katie young 24 +katie young 70 +katie zipper 25 +katie zipper 87 +luke allen 7 +luke allen 44 +luke allen 62 +luke allen 100 +luke allen 114 +luke brown 112 +luke davidson 51 +luke davidson 84 +luke ellison 35 +luke ellison 40 +luke ellison 86 +luke falkner 59 +luke falkner 97 +luke garcia 51 +luke garcia 100 +luke ichabod 42 +luke ichabod 123 +luke johnson 9 +luke johnson 17 +luke johnson 53 +luke laertes 66 +luke laertes 73 +luke laertes 76 +luke laertes 101 +luke laertes 118 +luke miller 93 +luke ovid 43 +luke ovid 70 +luke polk 53 +luke polk 88 +luke quirinius 82 +luke robinson 0 +luke robinson 114 +luke thompson 51 +luke underhill 2 +luke underhill 109 +luke underhill 119 +luke van buren 43 +luke white 110 +luke xylophone 15 +luke zipper 10 +mike allen 0 +mike brown 88 +mike carson 12 +mike carson 17 +mike carson 122 +mike davidson 9 +mike davidson 110 +mike ellison 5 +mike ellison 50 +mike ellison 70 +mike ellison 94 +mike ellison 95 +mike falkner 61 +mike garcia 2 +mike garcia 68 +mike garcia 110 +mike hernandez 91 +mike hernandez 106 +mike ichabod 18 +mike king 4 +mike king 58 +mike king 83 +mike king 96 +mike king 103 +mike king 118 +mike miller 51 +mike nixon 97 +mike nixon 106 +mike polk 6 +mike polk 65 +mike polk 119 +mike quirinius 22 +mike steinbeck 75 +mike steinbeck 85 +mike steinbeck 101 +mike steinbeck 116 +mike van buren 16 +mike van buren 111 +mike white -1 +mike white 22 +mike white 45 +mike white 61 +mike young 37 +mike young 53 +mike young 72 +mike zipper 27 +mike zipper 76 +mike zipper 106 +nick allen 8 +nick allen 57 +nick brown 114 +nick davidson 84 +nick ellison 10 +nick ellison 107 +nick falkner 83 +nick falkner 86 +nick garcia 53 +nick garcia 69 +nick garcia 108 +nick ichabod 59 +nick ichabod 71 +nick ichabod 84 +nick johnson 47 +nick johnson 88 +nick laertes 17 +nick miller 101 +nick nixon 43 +nick ovid 42 +nick polk 1 +nick quirinius 22 +nick quirinius 36 +nick robinson 48 +nick robinson 54 +nick steinbeck 33 +nick thompson 73 +nick underhill 122 +nick van buren 53 +nick xylophone 80 +nick young 6 +nick young 60 +nick zipper 3 +nick zipper 21 +oscar allen 58 +oscar brown 80 +oscar carson 10 +oscar carson 27 +oscar carson 36 +oscar carson 72 +oscar carson 88 +oscar davidson 14 +oscar ellison 50 +oscar ellison 74 +oscar falkner 96 +oscar garcia 44 +oscar hernandez 1 +oscar hernandez 93 +oscar ichabod 20 +oscar ichabod 28 +oscar ichabod 69 +oscar ichabod 120 +oscar johnson 44 +oscar johnson 53 +oscar king 67 +oscar king 71 +oscar king 81 +oscar laertes 4 +oscar laertes 28 +oscar laertes 53 +oscar laertes 63 +oscar nixon 58 +oscar ovid 3 +oscar ovid 27 +oscar ovid 47 +oscar polk 8 +oscar polk 112 +oscar quirinius NULL +oscar quirinius 0 +oscar quirinius 17 +oscar quirinius 114 +oscar robinson 16 +oscar robinson 42 +oscar robinson 59 +oscar robinson 93 +oscar steinbeck 51 +oscar thompson 44 +oscar thompson 44 +oscar thompson 60 +oscar thompson 66 +oscar underhill 86 +oscar van buren 40 +oscar van buren 51 +oscar van buren 114 +oscar white 2 +oscar white 20 +oscar white 49 +oscar white 58 +oscar xylophone 18 +oscar xylophone 73 +oscar xylophone 74 +oscar zipper 0 +oscar zipper 23 +oscar zipper 95 +priscilla brown 51 +priscilla brown 75 +priscilla brown 97 +priscilla carson 16 +priscilla carson 52 +priscilla carson 124 +priscilla ichabod 117 +priscilla ichabod 122 +priscilla johnson 5 +priscilla johnson 17 +priscilla johnson 62 +priscilla johnson 77 +priscilla johnson 117 +priscilla king 43 +priscilla nixon 61 +priscilla nixon 66 +priscilla ovid 46 +priscilla ovid 118 +priscilla polk 45 +priscilla quirinius 83 +priscilla thompson 82 +priscilla underhill 117 +priscilla underhill 122 +priscilla van buren 0 +priscilla van buren 22 +priscilla van buren 102 +priscilla white 88 +priscilla xylophone 8 +priscilla xylophone 90 +priscilla xylophone 109 +priscilla young 17 +priscilla young 113 +priscilla zipper 27 +priscilla zipper 35 +quinn allen 27 +quinn allen 114 +quinn brown 70 +quinn brown 88 +quinn brown 117 +quinn davidson 93 +quinn davidson 93 +quinn davidson 109 +quinn davidson 121 +quinn ellison 83 +quinn ellison 116 +quinn garcia 78 +quinn garcia 104 +quinn garcia 110 +quinn garcia 120 +quinn ichabod 60 +quinn king 14 +quinn king 46 +quinn laertes -2 +quinn laertes 65 +quinn laertes 95 +quinn nixon 11 +quinn ovid 123 +quinn quirinius 94 +quinn robinson 60 +quinn steinbeck 82 +quinn steinbeck 122 +quinn thompson 41 +quinn thompson 60 +quinn underhill 19 +quinn underhill 28 +quinn underhill 34 +quinn van buren 18 +quinn young 15 +quinn zipper 44 +quinn zipper 103 +rachel allen 76 +rachel allen 122 +rachel brown 23 +rachel brown 56 +rachel brown 71 +rachel brown 101 +rachel brown 108 +rachel carson 27 +rachel carson 74 +rachel davidson 84 +rachel ellison 51 +rachel falkner -2 +rachel falkner 43 +rachel falkner 72 +rachel falkner 104 +rachel johnson 32 +rachel king 84 +rachel king 95 +rachel laertes 37 +rachel laertes 106 +rachel ovid 5 +rachel ovid 31 +rachel polk 79 +rachel quirinius 108 +rachel robinson 24 +rachel robinson 41 +rachel robinson 91 +rachel thompson -3 +rachel thompson -2 +rachel thompson 74 +rachel underhill 11 +rachel white 108 +rachel white 119 +rachel young 77 +rachel zipper 16 +rachel zipper 116 +sarah carson 41 +sarah carson 58 +sarah carson 119 +sarah ellison 14 +sarah falkner 112 +sarah falkner 123 +sarah garcia 72 +sarah garcia 91 +sarah garcia 98 +sarah ichabod 38 +sarah ichabod 80 +sarah johnson 5 +sarah johnson 51 +sarah johnson 69 +sarah johnson 116 +sarah king 13 +sarah king 120 +sarah miller 31 +sarah ovid 122 +sarah robinson 26 +sarah robinson 35 +sarah steinbeck 30 +sarah white 11 +sarah white 32 +sarah xylophone 28 +sarah young 120 +sarah zipper 107 +tom brown 27 +tom brown 89 +tom carson 11 +tom carson 70 +tom carson 123 +tom davidson 72 +tom ellison 28 +tom ellison 118 +tom ellison 120 +tom falkner 11 +tom falkner 35 +tom hernandez -3 +tom hernandez 118 +tom ichabod 19 +tom johnson 42 +tom johnson 82 +tom king 59 +tom laertes 33 +tom laertes 54 +tom miller 9 +tom miller 48 +tom miller 94 +tom nixon 45 +tom ovid 68 +tom polk 70 +tom polk 107 +tom quirinius 10 +tom quirinius 38 +tom robinson 52 +tom robinson 104 +tom robinson 109 +tom robinson 115 +tom steinbeck 113 +tom van buren 5 +tom van buren 48 +tom van buren 63 +tom white 81 +tom young 13 +tom young 99 +tom zipper 31 +ulysses brown 46 +ulysses carson 5 +ulysses carson 26 +ulysses carson 55 +ulysses carson 109 +ulysses davidson 18 +ulysses ellison 61 +ulysses garcia 12 +ulysses hernandez 9 +ulysses hernandez 22 +ulysses hernandez 53 +ulysses ichabod 32 +ulysses ichabod 99 +ulysses johnson 41 +ulysses king 2 +ulysses laertes 40 +ulysses laertes 51 +ulysses laertes 95 +ulysses miller 23 +ulysses miller 85 +ulysses nixon 92 +ulysses ovid 31 +ulysses polk 28 +ulysses polk 74 +ulysses polk 86 +ulysses polk 89 +ulysses quirinius 7 +ulysses robinson 79 +ulysses steinbeck 6 +ulysses steinbeck 45 +ulysses thompson 24 +ulysses underhill 6 +ulysses underhill 27 +ulysses underhill 42 +ulysses underhill 51 +ulysses underhill 93 +ulysses underhill 98 +ulysses underhill 111 +ulysses van buren 58 +ulysses white 67 +ulysses white 109 +ulysses xylophone 47 +ulysses xylophone 105 +ulysses xylophone 123 +ulysses young 61 +ulysses young 86 +ulysses young 89 +victor allen 2 +victor allen 17 +victor brown 0 +victor brown 23 +victor brown 60 +victor brown 64 +victor davidson 42 +victor davidson 89 +victor davidson 123 +victor ellison 35 +victor ellison 84 +victor hernandez 1 +victor hernandez 17 +victor hernandez 91 +victor hernandez 94 +victor hernandez 116 +victor johnson 34 +victor johnson 53 +victor johnson 57 +victor king 59 +victor king 112 +victor laertes 18 +victor laertes 118 +victor miller 79 +victor nixon 50 +victor nixon 104 +victor ovid 120 +victor polk 106 +victor quirinius 77 +victor quirinius 85 +victor robinson 29 +victor robinson 105 +victor steinbeck 20 +victor steinbeck 92 +victor steinbeck 100 +victor thompson 124 +victor van buren 41 +victor van buren 71 +victor white 15 +victor white 49 +victor xylophone -3 +victor xylophone 41 +victor xylophone 43 +victor xylophone 54 +victor xylophone 91 +victor young 24 +victor zipper 3 +wendy allen 25 +wendy allen 38 +wendy allen 95 +wendy brown 92 +wendy brown 119 +wendy ellison 53 +wendy ellison 103 +wendy falkner 23 +wendy falkner 28 +wendy falkner 58 +wendy garcia 3 +wendy garcia 48 +wendy garcia 60 +wendy garcia 99 +wendy hernandez 53 +wendy ichabod 87 +wendy king -2 +wendy king 45 +wendy king 124 +wendy laertes 26 +wendy laertes 51 +wendy laertes 72 +wendy miller 51 +wendy miller 105 +wendy nixon 5 +wendy nixon 25 +wendy ovid 17 +wendy ovid 85 +wendy polk 44 +wendy polk 99 +wendy quirinius 77 +wendy quirinius 88 +wendy robinson -3 +wendy robinson 71 +wendy robinson 97 +wendy steinbeck 37 +wendy thompson 28 +wendy thompson 31 +wendy underhill 58 +wendy underhill 82 +wendy underhill 120 +wendy van buren 27 +wendy van buren 82 +wendy white 63 +wendy xylophone 53 +wendy xylophone 119 +wendy young 66 +wendy young 112 +xavier allen 18 +xavier allen 41 +xavier allen 106 +xavier brown 10 +xavier brown 63 +xavier brown 108 +xavier carson 20 +xavier carson 57 +xavier davidson 21 +xavier davidson 24 +xavier davidson 106 +xavier ellison 0 +xavier ellison 53 +xavier garcia 42 +xavier hernandez 9 +xavier hernandez 80 +xavier hernandez 114 +xavier ichabod 20 +xavier ichabod 58 +xavier johnson 44 +xavier johnson 85 +xavier king 26 +xavier king 107 +xavier laertes 60 +xavier ovid 3 +xavier polk 29 +xavier polk 83 +xavier polk 91 +xavier polk 122 +xavier quirinius 27 +xavier quirinius 35 +xavier quirinius 39 +xavier quirinius 111 +xavier thompson 2 +xavier underhill 102 +xavier white 8 +xavier white 56 +xavier xylophone 24 +xavier zipper 48 +yuri allen 31 +yuri allen 121 +yuri brown 101 +yuri brown 106 +yuri carson 1 +yuri carson 36 +yuri ellison -1 +yuri ellison 43 +yuri falkner 31 +yuri falkner 96 +yuri garcia 49 +yuri hernandez 92 +yuri johnson 1 +yuri johnson 2 +yuri johnson 111 +yuri king 44 +yuri laertes 84 +yuri laertes 115 +yuri nixon 5 +yuri nixon 111 +yuri polk 13 +yuri polk 49 +yuri polk 115 +yuri quirinius 24 +yuri quirinius 28 +yuri quirinius 90 +yuri steinbeck 8 +yuri steinbeck 65 +yuri thompson 42 +yuri underhill 10 +yuri underhill 66 +yuri white 73 +yuri xylophone 63 +zach allen 35 +zach brown 7 +zach brown 15 +zach brown 37 +zach brown 61 +zach brown 94 +zach carson 114 +zach ellison 16 +zach falkner 70 +zach falkner 115 +zach garcia -2 +zach garcia 59 +zach garcia 68 +zach garcia 97 +zach ichabod 14 +zach ichabod 73 +zach king 66 +zach king 70 +zach king 81 +zach miller 4 +zach miller 9 +zach miller 73 +zach ovid 61 +zach ovid 68 +zach ovid 77 +zach ovid 114 +zach quirinius 79 +zach robinson 69 +zach steinbeck 6 +zach steinbeck 122 +zach thompson 75 +zach thompson 95 +zach underhill 123 +zach white 58 +zach xylophone 19 +zach xylophone 85 +zach young 11 +zach zipper 68 +zach zipper 100 +zach zipper 101 diff --git a/sql/hive/src/test/resources/golden/windowing_navfn.q (deterministic)-4-dfd39236756a3951bc1ec354799d69e4 b/sql/hive/src/test/resources/golden/windowing_navfn.q (deterministic)-4-dfd39236756a3951bc1ec354799d69e4 new file mode 100644 index 0000000000000..86ca4e49d21bd --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_navfn.q (deterministic)-4-dfd39236756a3951bc1ec354799d69e4 @@ -0,0 +1,1049 @@ + + + +alice allen +alice allen +alice allen +alice brown +alice carson +alice davidson +alice falkner +alice garcia +alice hernandez +alice hernandez +alice johnson +alice king +alice king +alice king +alice laertes +alice laertes +alice miller +alice nixon +alice nixon +alice nixon +alice ovid +alice polk +alice quirinius +alice quirinius +alice robinson +alice robinson +alice steinbeck +alice steinbeck +alice steinbeck +alice underhill +alice van buren +alice xylophone +alice xylophone +alice xylophone +alice zipper +alice zipper +alice zipper +bob brown +bob brown +bob brown +bob carson +bob davidson +bob davidson +bob davidson +bob ellison +bob ellison +bob ellison +bob ellison +bob falkner +bob garcia +bob garcia +bob garcia +bob garcia +bob garcia +bob hernandez +bob ichabod +bob king +bob king +bob king +bob laertes +bob laertes +bob miller +bob ovid +bob ovid +bob ovid +bob ovid +bob polk +bob quirinius +bob steinbeck +bob van buren +bob white +bob white +bob xylophone +bob xylophone +bob young +bob zipper +bob zipper +bob zipper +calvin allen +calvin brown +calvin brown +calvin brown +calvin carson +calvin davidson +calvin davidson +calvin ellison +calvin falkner +calvin falkner +calvin falkner +calvin falkner +calvin falkner +calvin falkner +calvin garcia +calvin hernandez +calvin johnson +calvin laertes +calvin laertes +calvin nixon +calvin nixon +calvin nixon +calvin ovid +calvin ovid +calvin ovid +calvin ovid +calvin polk +calvin quirinius +calvin quirinius +calvin robinson +calvin steinbeck +calvin steinbeck +calvin steinbeck +calvin thompson +calvin thompson +calvin underhill +calvin van buren +calvin van buren +calvin white +calvin white +calvin xylophone +calvin xylophone +calvin xylophone +calvin young +calvin young +calvin zipper +calvin zipper +david allen +david allen +david brown +david brown +david davidson +david davidson +david davidson +david davidson +david ellison +david ellison +david ellison +david hernandez +david ichabod +david ichabod +david laertes +david nixon +david ovid +david ovid +david quirinius +david quirinius +david quirinius +david robinson +david robinson +david thompson +david underhill +david underhill +david underhill +david van buren +david van buren +david white +david xylophone +david xylophone +david xylophone +david young +david young +ethan allen +ethan brown +ethan brown +ethan brown +ethan brown +ethan brown +ethan brown +ethan carson +ethan ellison +ethan ellison +ethan falkner +ethan falkner +ethan garcia +ethan hernandez +ethan johnson +ethan king +ethan laertes +ethan laertes +ethan laertes +ethan laertes +ethan laertes +ethan laertes +ethan laertes +ethan miller +ethan nixon +ethan ovid +ethan polk +ethan polk +ethan polk +ethan polk +ethan quirinius +ethan quirinius +ethan quirinius +ethan robinson +ethan robinson +ethan underhill +ethan van buren +ethan white +ethan white +ethan xylophone +ethan zipper +ethan zipper +fred davidson +fred davidson +fred davidson +fred ellison +fred ellison +fred ellison +fred falkner +fred falkner +fred falkner +fred hernandez +fred ichabod +fred ichabod +fred johnson +fred king +fred king +fred laertes +fred miller +fred nixon +fred nixon +fred nixon +fred nixon +fred polk +fred polk +fred polk +fred polk +fred quirinius +fred quirinius +fred robinson +fred steinbeck +fred steinbeck +fred steinbeck +fred underhill +fred van buren +fred van buren +fred van buren +fred van buren +fred white +fred young +fred young +fred zipper +gabriella allen +gabriella allen +gabriella brown +gabriella brown +gabriella carson +gabriella davidson +gabriella ellison +gabriella ellison +gabriella falkner +gabriella falkner +gabriella falkner +gabriella garcia +gabriella hernandez +gabriella hernandez +gabriella ichabod +gabriella ichabod +gabriella ichabod +gabriella ichabod +gabriella ichabod +gabriella king +gabriella king +gabriella laertes +gabriella miller +gabriella ovid +gabriella ovid +gabriella polk +gabriella polk +gabriella steinbeck +gabriella steinbeck +gabriella thompson +gabriella thompson +gabriella thompson +gabriella van buren +gabriella van buren +gabriella white +gabriella young +gabriella young +gabriella zipper +gabriella zipper +holly allen +holly brown +holly brown +holly falkner +holly hernandez +holly hernandez +holly hernandez +holly hernandez +holly ichabod +holly ichabod +holly ichabod +holly johnson +holly johnson +holly johnson +holly king +holly king +holly laertes +holly miller +holly nixon +holly nixon +holly polk +holly polk +holly robinson +holly thompson +holly thompson +holly thompson +holly underhill +holly underhill +holly underhill +holly underhill +holly van buren +holly white +holly white +holly xylophone +holly young +holly young +holly zipper +holly zipper +irene allen +irene brown +irene brown +irene brown +irene carson +irene ellison +irene ellison +irene falkner +irene falkner +irene garcia +irene garcia +irene garcia +irene ichabod +irene ichabod +irene johnson +irene laertes +irene laertes +irene laertes +irene miller +irene nixon +irene nixon +irene nixon +irene ovid +irene ovid +irene ovid +irene polk +irene polk +irene polk +irene polk +irene polk +irene quirinius +irene quirinius +irene quirinius +irene robinson +irene steinbeck +irene thompson +irene underhill +irene underhill +irene van buren +irene van buren +irene xylophone +jessica brown +jessica carson +jessica carson +jessica carson +jessica davidson +jessica davidson +jessica davidson +jessica davidson +jessica ellison +jessica ellison +jessica falkner +jessica garcia +jessica garcia +jessica ichabod +jessica johnson +jessica johnson +jessica miller +jessica nixon +jessica nixon +jessica ovid +jessica ovid +jessica polk +jessica quirinius +jessica quirinius +jessica quirinius +jessica quirinius +jessica robinson +jessica thompson +jessica thompson +jessica underhill +jessica underhill +jessica underhill +jessica van buren +jessica white +jessica white +jessica white +jessica white +jessica white +jessica xylophone +jessica young +jessica young +jessica zipper +jessica zipper +jessica zipper +katie allen +katie brown +katie davidson +katie ellison +katie ellison +katie falkner +katie garcia +katie garcia +katie hernandez +katie ichabod +katie ichabod +katie ichabod +katie king +katie king +katie king +katie miller +katie miller +katie nixon +katie ovid +katie polk +katie polk +katie robinson +katie van buren +katie van buren +katie white +katie white +katie xylophone +katie young +katie young +katie young +katie zipper +katie zipper +luke allen +luke allen +luke allen +luke allen +luke allen +luke brown +luke davidson +luke davidson +luke ellison +luke ellison +luke ellison +luke falkner +luke falkner +luke garcia +luke garcia +luke ichabod +luke ichabod +luke johnson +luke johnson +luke johnson +luke laertes +luke laertes +luke laertes +luke laertes +luke laertes +luke miller +luke ovid +luke ovid +luke polk +luke polk +luke quirinius +luke robinson +luke robinson +luke thompson +luke underhill +luke underhill +luke underhill +luke van buren +luke white +luke xylophone +luke zipper +mike allen +mike brown +mike carson +mike carson +mike carson +mike davidson +mike davidson +mike ellison +mike ellison +mike ellison +mike ellison +mike ellison +mike falkner +mike garcia +mike garcia +mike garcia +mike hernandez +mike hernandez +mike ichabod +mike king +mike king +mike king +mike king +mike king +mike king +mike miller +mike nixon +mike nixon +mike polk +mike polk +mike polk +mike quirinius +mike steinbeck +mike steinbeck +mike steinbeck +mike steinbeck +mike van buren +mike van buren +mike white +mike white +mike white +mike white +mike young +mike young +mike young +mike zipper +mike zipper +mike zipper +nick allen +nick allen +nick brown +nick davidson +nick ellison +nick ellison +nick falkner +nick falkner +nick garcia +nick garcia +nick garcia +nick ichabod +nick ichabod +nick ichabod +nick johnson +nick johnson +nick laertes +nick miller +nick nixon +nick ovid +nick polk +nick quirinius +nick quirinius +nick robinson +nick robinson +nick steinbeck +nick thompson +nick underhill +nick van buren +nick xylophone +nick young +nick young +nick zipper +nick zipper +oscar allen +oscar brown +oscar carson +oscar carson +oscar carson +oscar carson +oscar carson +oscar davidson +oscar ellison +oscar ellison +oscar falkner +oscar garcia +oscar hernandez +oscar hernandez +oscar ichabod +oscar ichabod +oscar ichabod +oscar ichabod +oscar johnson +oscar johnson +oscar king +oscar king +oscar king +oscar laertes +oscar laertes +oscar laertes +oscar laertes +oscar nixon +oscar ovid +oscar ovid +oscar ovid +oscar polk +oscar polk +oscar quirinius +oscar quirinius +oscar quirinius +oscar quirinius +oscar robinson +oscar robinson +oscar robinson +oscar robinson +oscar steinbeck +oscar thompson +oscar thompson +oscar thompson +oscar thompson +oscar underhill +oscar van buren +oscar van buren +oscar van buren +oscar white +oscar white +oscar white +oscar white +oscar xylophone +oscar xylophone +oscar xylophone +oscar zipper +oscar zipper +oscar zipper +priscilla brown +priscilla brown +priscilla brown +priscilla carson +priscilla carson +priscilla carson +priscilla ichabod +priscilla ichabod +priscilla johnson +priscilla johnson +priscilla johnson +priscilla johnson +priscilla johnson +priscilla king +priscilla nixon +priscilla nixon +priscilla ovid +priscilla ovid +priscilla polk +priscilla quirinius +priscilla thompson +priscilla underhill +priscilla underhill +priscilla van buren +priscilla van buren +priscilla van buren +priscilla white +priscilla xylophone +priscilla xylophone +priscilla xylophone +priscilla young +priscilla young +priscilla zipper +priscilla zipper +quinn allen +quinn allen +quinn brown +quinn brown +quinn brown +quinn davidson +quinn davidson +quinn davidson +quinn davidson +quinn ellison +quinn ellison +quinn garcia +quinn garcia +quinn garcia +quinn garcia +quinn ichabod +quinn king +quinn king +quinn laertes +quinn laertes +quinn laertes +quinn nixon +quinn ovid +quinn quirinius +quinn robinson +quinn steinbeck +quinn steinbeck +quinn thompson +quinn thompson +quinn underhill +quinn underhill +quinn underhill +quinn van buren +quinn young +quinn zipper +quinn zipper +rachel allen +rachel allen +rachel brown +rachel brown +rachel brown +rachel brown +rachel brown +rachel carson +rachel carson +rachel davidson +rachel ellison +rachel falkner +rachel falkner +rachel falkner +rachel falkner +rachel johnson +rachel king +rachel king +rachel laertes +rachel laertes +rachel ovid +rachel ovid +rachel polk +rachel quirinius +rachel robinson +rachel robinson +rachel robinson +rachel thompson +rachel thompson +rachel thompson +rachel underhill +rachel white +rachel white +rachel young +rachel zipper +rachel zipper +sarah carson +sarah carson +sarah carson +sarah ellison +sarah falkner +sarah falkner +sarah garcia +sarah garcia +sarah garcia +sarah ichabod +sarah ichabod +sarah johnson +sarah johnson +sarah johnson +sarah johnson +sarah king +sarah king +sarah miller +sarah ovid +sarah robinson +sarah robinson +sarah steinbeck +sarah white +sarah white +sarah xylophone +sarah young +sarah zipper +tom brown +tom brown +tom carson +tom carson +tom carson +tom davidson +tom ellison +tom ellison +tom ellison +tom falkner +tom falkner +tom hernandez +tom hernandez +tom ichabod +tom johnson +tom johnson +tom king +tom laertes +tom laertes +tom miller +tom miller +tom miller +tom nixon +tom ovid +tom polk +tom polk +tom quirinius +tom quirinius +tom robinson +tom robinson +tom robinson +tom robinson +tom steinbeck +tom van buren +tom van buren +tom van buren +tom white +tom young +tom young +tom zipper +ulysses brown +ulysses carson +ulysses carson +ulysses carson +ulysses carson +ulysses davidson +ulysses ellison +ulysses garcia +ulysses hernandez +ulysses hernandez +ulysses hernandez +ulysses ichabod +ulysses ichabod +ulysses johnson +ulysses king +ulysses laertes +ulysses laertes +ulysses laertes +ulysses miller +ulysses miller +ulysses nixon +ulysses ovid +ulysses polk +ulysses polk +ulysses polk +ulysses polk +ulysses quirinius +ulysses robinson +ulysses steinbeck +ulysses steinbeck +ulysses thompson +ulysses underhill +ulysses underhill +ulysses underhill +ulysses underhill +ulysses underhill +ulysses underhill +ulysses underhill +ulysses van buren +ulysses white +ulysses white +ulysses xylophone +ulysses xylophone +ulysses xylophone +ulysses young +ulysses young +ulysses young +victor allen +victor allen +victor brown +victor brown +victor brown +victor brown +victor davidson +victor davidson +victor davidson +victor ellison +victor ellison +victor hernandez +victor hernandez +victor hernandez +victor hernandez +victor hernandez +victor johnson +victor johnson +victor johnson +victor king +victor king +victor laertes +victor laertes +victor miller +victor nixon +victor nixon +victor ovid +victor polk +victor quirinius +victor quirinius +victor robinson +victor robinson +victor steinbeck +victor steinbeck +victor steinbeck +victor thompson +victor van buren +victor van buren +victor white +victor white +victor xylophone +victor xylophone +victor xylophone +victor xylophone +victor xylophone +victor young +victor zipper +wendy allen +wendy allen +wendy allen +wendy brown +wendy brown +wendy ellison +wendy ellison +wendy falkner +wendy falkner +wendy falkner +wendy garcia +wendy garcia +wendy garcia +wendy garcia +wendy hernandez +wendy ichabod +wendy king +wendy king +wendy king +wendy laertes +wendy laertes +wendy laertes +wendy miller +wendy miller +wendy nixon +wendy nixon +wendy ovid +wendy ovid +wendy polk +wendy polk +wendy quirinius +wendy quirinius +wendy robinson +wendy robinson +wendy robinson +wendy steinbeck +wendy thompson +wendy thompson +wendy underhill +wendy underhill +wendy underhill +wendy van buren +wendy van buren +wendy white +wendy xylophone +wendy xylophone +wendy young +wendy young +xavier allen +xavier allen +xavier allen +xavier brown +xavier brown +xavier brown +xavier carson +xavier carson +xavier davidson +xavier davidson +xavier davidson +xavier ellison +xavier ellison +xavier garcia +xavier hernandez +xavier hernandez +xavier hernandez +xavier ichabod +xavier ichabod +xavier johnson +xavier johnson +xavier king +xavier king +xavier laertes +xavier ovid +xavier polk +xavier polk +xavier polk +xavier polk +xavier quirinius +xavier quirinius +xavier quirinius +xavier quirinius +xavier thompson +xavier underhill +xavier white +xavier white +xavier xylophone +xavier zipper +yuri allen +yuri allen +yuri brown +yuri brown +yuri carson +yuri carson +yuri ellison +yuri ellison +yuri falkner +yuri falkner +yuri garcia +yuri hernandez +yuri johnson +yuri johnson +yuri johnson +yuri king +yuri laertes +yuri laertes +yuri nixon +yuri nixon +yuri polk +yuri polk +yuri polk +yuri quirinius +yuri quirinius +yuri quirinius +yuri steinbeck +yuri steinbeck +yuri thompson +yuri underhill +yuri underhill +yuri white +yuri xylophone +zach allen +zach brown +zach brown +zach brown +zach brown +zach brown +zach carson +zach ellison +zach falkner +zach falkner +zach garcia +zach garcia +zach garcia +zach garcia +zach ichabod +zach ichabod +zach king +zach king +zach king +zach miller +zach miller +zach miller +zach ovid +zach ovid +zach ovid +zach ovid +zach quirinius +zach robinson +zach steinbeck +zach steinbeck +zach thompson +zach thompson +zach underhill +zach white +zach xylophone +zach xylophone +zach young +zach zipper +zach zipper +zach zipper diff --git a/sql/hive/src/test/resources/golden/windowing_navfn.q (deterministic)-5-8d0ee3e1605f38214bfad28a5ce897cc b/sql/hive/src/test/resources/golden/windowing_navfn.q (deterministic)-5-8d0ee3e1605f38214bfad28a5ce897cc new file mode 100644 index 0000000000000..ddb15e338263f --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_navfn.q (deterministic)-5-8d0ee3e1605f38214bfad28a5ce897cc @@ -0,0 +1 @@ +10 oscar carson 65549 65549 diff --git a/sql/hive/src/test/resources/golden/windowing_ntile.q (deterministic)-0-b7cb25303831392a51cd996e758ac79a b/sql/hive/src/test/resources/golden/windowing_ntile.q (deterministic)-0-b7cb25303831392a51cd996e758ac79a new file mode 100644 index 0000000000000..42e5151fe211b --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_ntile.q (deterministic)-0-b7cb25303831392a51cd996e758ac79a @@ -0,0 +1,1049 @@ +65536 1 +65536 1 +65536 1 +65536 1 +65536 1 +65536 1 +65536 1 +65537 1 +65537 1 +65537 1 +65537 1 +65537 1 +65538 1 +65538 1 +65538 1 +65539 1 +65539 1 +65539 1 +65540 1 +65541 1 +65541 1 +65541 1 +65541 1 +65541 1 +65541 1 +65541 1 +65541 1 +65541 1 +65542 1 +65542 1 +65542 1 +65542 1 +65542 1 +65543 1 +65543 1 +65544 1 +65544 1 +65544 1 +65544 1 +65544 1 +65544 1 +65544 1 +65544 2 +65545 1 +65545 1 +65545 1 +65546 2 +65547 1 +65547 1 +65547 1 +65547 1 +65548 1 +65548 1 +65548 1 +65548 1 +65548 1 +65548 1 +65548 2 +65549 1 +65549 1 +65549 1 +65549 1 +65549 1 +65549 1 +65549 1 +65549 3 +65550 1 +65550 1 +65550 1 +65550 1 +65550 1 +65550 1 +65550 2 +65551 1 +65551 1 +65551 1 +65552 1 +65552 1 +65552 1 +65552 1 +65552 2 +65553 1 +65553 1 +65553 1 +65553 1 +65553 1 +65554 1 +65554 1 +65555 2 +65556 1 +65556 1 +65556 1 +65556 1 +65556 1 +65557 1 +65557 1 +65558 1 +65559 1 +65559 1 +65559 1 +65559 1 +65559 1 +65560 1 +65560 1 +65560 1 +65560 2 +65560 2 +65561 1 +65561 2 +65561 2 +65562 1 +65562 1 +65562 1 +65562 1 +65562 1 +65562 2 +65563 1 +65563 1 +65563 1 +65563 1 +65564 1 +65564 1 +65564 1 +65564 1 +65564 2 +65564 2 +65564 2 +65565 1 +65565 1 +65566 1 +65566 2 +65567 1 +65568 1 +65568 1 +65568 1 +65569 1 +65570 1 +65570 1 +65570 1 +65570 1 +65570 1 +65570 2 +65571 1 +65571 1 +65571 1 +65571 2 +65572 1 +65572 1 +65572 1 +65572 1 +65572 1 +65573 1 +65573 1 +65573 3 +65574 1 +65574 1 +65574 1 +65575 1 +65575 1 +65575 1 +65575 2 +65576 1 +65576 1 +65576 1 +65576 3 +65577 1 +65578 1 +65578 1 +65578 1 +65578 1 +65578 1 +65578 1 +65578 2 +65579 1 +65579 1 +65579 1 +65579 1 +65579 3 +65580 1 +65580 2 +65580 2 +65581 1 +65581 1 +65581 1 +65581 1 +65581 2 +65581 2 +65582 1 +65582 1 +65582 1 +65582 1 +65583 1 +65583 1 +65583 2 +65583 2 +65584 1 +65584 1 +65584 1 +65585 1 +65585 1 +65585 1 +65585 1 +65585 2 +65586 1 +65586 1 +65586 1 +65586 1 +65586 2 +65587 1 +65587 1 +65587 1 +65587 2 +65587 3 +65588 1 +65588 1 +65588 1 +65588 1 +65588 2 +65589 1 +65589 1 +65589 1 +65589 1 +65589 2 +65590 1 +65590 1 +65590 1 +65590 1 +65590 1 +65591 1 +65591 1 +65591 2 +65591 3 +65592 1 +65593 1 +65593 1 +65594 1 +65594 1 +65594 1 +65594 3 +65595 1 +65595 1 +65595 1 +65595 2 +65595 2 +65595 4 +65596 1 +65596 1 +65596 1 +65596 2 +65596 2 +65597 1 +65597 1 +65597 2 +65598 1 +65598 2 +65599 1 +65599 1 +65599 1 +65599 1 +65599 3 +65600 1 +65600 1 +65600 2 +65600 2 +65601 1 +65601 2 +65601 2 +65602 1 +65602 1 +65602 1 +65602 1 +65602 1 +65602 2 +65603 1 +65603 1 +65603 1 +65603 2 +65603 2 +65604 1 +65604 2 +65604 2 +65604 3 +65605 2 +65606 1 +65606 1 +65606 1 +65606 1 +65606 2 +65606 2 +65607 1 +65607 1 +65607 1 +65607 1 +65607 2 +65607 3 +65608 1 +65608 1 +65608 1 +65608 2 +65609 1 +65610 1 +65610 1 +65610 3 +65610 4 +65610 5 +65611 1 +65611 1 +65611 1 +65611 1 +65612 1 +65612 2 +65612 2 +65612 3 +65613 1 +65614 1 +65614 1 +65614 2 +65615 1 +65615 1 +65615 2 +65615 2 +65616 1 +65616 2 +65617 1 +65617 2 +65617 2 +65617 2 +65618 1 +65618 1 +65618 2 +65618 2 +65618 2 +65619 1 +65619 1 +65619 1 +65619 1 +65619 2 +65619 2 +65620 1 +65620 1 +65620 1 +65620 1 +65620 1 +65620 3 +65621 1 +65621 1 +65622 1 +65622 1 +65622 1 +65622 3 +65622 3 +65622 4 +65623 1 +65623 1 +65623 1 +65623 1 +65623 1 +65623 2 +65623 4 +65624 1 +65624 2 +65624 2 +65624 2 +65624 4 +65625 1 +65625 1 +65625 1 +65625 1 +65626 1 +65626 1 +65626 1 +65626 2 +65627 1 +65627 1 +65627 2 +65627 3 +65628 1 +65628 1 +65628 1 +65628 2 +65628 2 +65628 3 +65629 1 +65629 1 +65629 1 +65629 3 +65630 1 +65630 2 +65631 1 +65632 2 +65632 2 +65633 1 +65633 2 +65633 2 +65633 2 +65634 1 +65634 1 +65634 1 +65634 2 +65635 1 +65635 2 +65635 3 +65636 1 +65636 1 +65636 2 +65637 1 +65637 1 +65637 1 +65637 1 +65637 1 +65637 1 +65637 2 +65637 3 +65638 1 +65638 1 +65638 2 +65639 1 +65640 2 +65641 1 +65641 1 +65641 1 +65641 5 +65642 4 +65643 1 +65643 1 +65643 2 +65643 2 +65643 2 +65643 2 +65643 2 +65643 2 +65643 4 +65644 1 +65644 1 +65644 1 +65644 1 +65644 2 +65644 2 +65644 3 +65645 1 +65645 1 +65645 1 +65646 1 +65646 1 +65646 1 +65647 1 +65647 2 +65648 1 +65648 1 +65648 2 +65648 2 +65649 1 +65649 1 +65649 2 +65649 2 +65650 1 +65650 1 +65650 1 +65650 1 +65650 2 +65650 2 +65651 1 +65651 1 +65651 2 +65651 2 +65651 3 +65652 1 +65652 1 +65652 2 +65653 1 +65653 2 +65653 2 +65653 3 +65654 1 +65654 1 +65654 1 +65654 1 +65654 1 +65654 1 +65654 2 +65654 2 +65654 2 +65655 1 +65655 1 +65656 1 +65656 1 +65656 1 +65656 1 +65656 2 +65656 3 +65657 1 +65657 2 +65657 2 +65657 2 +65657 2 +65658 1 +65658 1 +65658 1 +65658 1 +65658 1 +65658 2 +65658 2 +65658 2 +65658 2 +65658 2 +65659 1 +65659 1 +65659 1 +65659 1 +65659 1 +65659 2 +65659 2 +65659 3 +65660 1 +65660 1 +65660 2 +65661 1 +65661 2 +65661 2 +65661 2 +65661 3 +65662 1 +65662 1 +65662 2 +65662 2 +65662 2 +65662 2 +65663 2 +65663 2 +65663 2 +65663 3 +65664 1 +65664 1 +65664 1 +65664 1 +65664 1 +65664 2 +65664 2 +65664 2 +65665 2 +65666 1 +65666 1 +65666 2 +65667 1 +65667 1 +65667 1 +65667 1 +65668 3 +65669 1 +65669 1 +65669 1 +65669 1 +65669 1 +65669 3 +65670 1 +65670 1 +65670 2 +65670 2 +65670 3 +65671 2 +65671 2 +65671 3 +65672 1 +65672 1 +65672 1 +65672 2 +65672 2 +65673 2 +65673 2 +65673 3 +65673 4 +65673 4 +65674 1 +65675 1 +65675 1 +65675 2 +65675 2 +65675 2 +65675 2 +65675 3 +65675 3 +65676 1 +65676 1 +65676 2 +65676 3 +65677 1 +65677 1 +65677 1 +65677 2 +65677 3 +65677 4 +65678 1 +65678 1 +65678 1 +65679 1 +65679 2 +65680 1 +65680 1 +65680 2 +65680 2 +65680 2 +65680 5 +65681 1 +65681 2 +65681 2 +65681 2 +65681 4 +65681 4 +65682 1 +65682 2 +65682 2 +65683 1 +65683 1 +65683 2 +65684 1 +65684 1 +65684 2 +65685 2 +65685 2 +65685 2 +65685 3 +65685 3 +65685 4 +65686 1 +65686 2 +65686 3 +65686 3 +65687 1 +65687 1 +65687 2 +65687 3 +65688 1 +65688 2 +65689 1 +65689 3 +65690 2 +65690 2 +65691 1 +65691 1 +65691 1 +65691 1 +65691 1 +65691 2 +65691 3 +65691 3 +65692 1 +65692 2 +65693 1 +65693 2 +65693 2 +65693 2 +65693 2 +65693 3 +65693 5 +65694 1 +65694 2 +65694 2 +65694 3 +65695 1 +65695 2 +65695 2 +65695 3 +65696 1 +65696 2 +65696 2 +65696 4 +65697 1 +65697 1 +65697 1 +65697 1 +65697 2 +65697 5 +65698 1 +65698 2 +65699 1 +65699 1 +65699 1 +65699 2 +65699 2 +65699 2 +65700 1 +65700 2 +65700 2 +65701 1 +65701 2 +65701 3 +65702 2 +65702 2 +65702 2 +65702 3 +65703 1 +65703 2 +65703 3 +65703 3 +65704 1 +65704 1 +65704 2 +65704 2 +65704 3 +65704 3 +65705 1 +65705 1 +65705 3 +65705 4 +65706 1 +65706 1 +65706 1 +65706 3 +65706 4 +65707 2 +65707 2 +65708 2 +65708 3 +65709 1 +65709 1 +65709 2 +65710 1 +65710 2 +65711 1 +65711 2 +65711 2 +65711 2 +65711 2 +65711 2 +65712 1 +65712 2 +65712 3 +65712 3 +65712 4 +65713 1 +65713 2 +65713 3 +65713 3 +65713 6 +65714 1 +65714 1 +65714 2 +65715 1 +65715 1 +65715 2 +65715 2 +65715 4 +65716 1 +65716 2 +65716 2 +65716 2 +65716 4 +65716 4 +65717 1 +65717 2 +65717 2 +65717 2 +65717 5 +65718 1 +65718 2 +65718 3 +65718 3 +65719 1 +65719 1 +65719 2 +65719 3 +65720 1 +65720 2 +65720 2 +65720 3 +65720 4 +65721 1 +65721 1 +65721 1 +65721 1 +65721 3 +65721 3 +65721 3 +65722 2 +65722 3 +65722 5 +65723 2 +65723 3 +65724 1 +65724 1 +65724 2 +65724 2 +65724 3 +65724 3 +65725 1 +65726 2 +65726 2 +65726 4 +65727 1 +65727 1 +65727 3 +65727 3 +65727 4 +65728 1 +65728 2 +65729 2 +65730 1 +65730 1 +65730 2 +65730 4 +65731 1 +65731 1 +65731 1 +65731 3 +65732 1 +65732 2 +65732 2 +65733 1 +65733 1 +65733 2 +65733 3 +65733 3 +65733 6 +65734 2 +65734 2 +65735 1 +65735 4 +65735 4 +65736 1 +65736 2 +65736 2 +65736 3 +65737 1 +65737 1 +65737 2 +65737 3 +65737 4 +65738 3 +65738 3 +65738 4 +65738 4 +65739 1 +65739 2 +65739 3 +65740 2 +65740 2 +65740 3 +65741 1 +65742 1 +65742 2 +65742 3 +65743 1 +65743 1 +65743 1 +65743 2 +65744 1 +65744 2 +65745 2 +65745 2 +65745 3 +65745 6 +65746 1 +65746 2 +65746 2 +65746 2 +65746 2 +65747 1 +65747 1 +65747 1 +65747 2 +65747 2 +65747 3 +65747 3 +65748 1 +65748 3 +65749 2 +65749 3 +65749 3 +65749 3 +65749 4 +65750 1 +65750 1 +65750 2 +65750 3 +65750 3 +65751 1 +65751 2 +65751 2 +65751 3 +65751 3 +65751 4 +65752 1 +65752 3 +65753 2 +65753 3 +65754 2 +65754 4 +65755 1 +65755 2 +65755 2 +65755 3 +65755 3 +65755 3 +65755 3 +65755 5 +65755 5 +65756 2 +65756 3 +65756 3 +65756 5 +65757 1 +65757 1 +65757 1 +65757 2 +65757 3 +65758 1 +65758 2 +65758 2 +65758 3 +65758 4 +65759 2 +65759 2 +65759 2 +65759 4 +65759 4 +65760 2 +65760 4 +65760 5 +65760 7 +65761 1 +65762 1 +65762 1 +65762 2 +65762 4 +65762 5 +65763 1 +65763 2 +65763 2 +65763 2 +65763 4 +65764 3 +65764 3 +65764 4 +65765 2 +65765 2 +65765 3 +65766 1 +65766 1 +65766 3 +65766 3 +65766 3 +65767 2 +65767 3 +65767 3 +65768 3 +65769 1 +65769 2 +65769 2 +65769 2 +65769 3 +65769 5 +65770 2 +65770 3 +65771 2 +65771 3 +65771 3 +65772 2 +65773 1 +65773 2 +65773 2 +65773 2 +65773 2 +65773 3 +65773 3 +65773 4 +65774 2 +65774 2 +65774 2 +65774 2 +65774 3 +65774 3 +65775 1 +65775 2 +65775 3 +65775 3 +65775 5 +65776 1 +65776 3 +65776 3 +65776 5 +65776 6 +65777 2 +65777 3 +65777 4 +65777 4 +65778 1 +65778 2 +65778 2 +65778 2 +65778 3 +65778 3 +65778 4 +65778 5 +65779 3 +65779 3 +65779 3 +65779 3 +65779 4 +65779 4 +65779 5 +65780 1 +65780 2 +65780 3 +65781 1 +65781 3 +65781 3 +65782 2 +65782 2 +65782 3 +65782 4 +65782 5 +65783 2 +65783 3 +65783 3 +65783 3 +65783 3 +65783 3 +65784 2 +65784 2 +65784 2 +65784 4 +65784 6 +65785 2 +65785 7 +65786 1 +65786 2 +65786 3 +65786 3 +65786 4 +65787 1 +65787 2 +65787 3 +65787 3 +65788 1 +65788 1 +65788 2 +65788 4 +65789 1 +65789 1 +65789 2 +65789 2 +65789 2 +65789 2 +65789 4 +65790 2 +65790 2 +65790 4 +65791 2 +65791 2 diff --git a/sql/hive/src/test/resources/golden/windowing_ntile.q (deterministic)-1-a3d352560ac835993001665db6954965 b/sql/hive/src/test/resources/golden/windowing_ntile.q (deterministic)-1-a3d352560ac835993001665db6954965 new file mode 100644 index 0000000000000..dc72606a83db6 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_ntile.q (deterministic)-1-a3d352560ac835993001665db6954965 @@ -0,0 +1,1049 @@ + 1 + 1 + 1 +alice allen 1 +alice allen 1 +alice allen 1 +alice brown 1 +alice carson 1 +alice davidson 1 +alice falkner 1 +alice garcia 1 +alice hernandez 1 +alice hernandez 1 +alice johnson 1 +alice king 1 +alice king 1 +alice king 1 +alice laertes 1 +alice laertes 2 +alice miller 1 +alice nixon 1 +alice nixon 1 +alice nixon 1 +alice ovid 2 +alice polk 1 +alice quirinius 1 +alice quirinius 1 +alice robinson 1 +alice robinson 1 +alice steinbeck 1 +alice steinbeck 1 +alice steinbeck 1 +alice underhill 1 +alice van buren 1 +alice xylophone 1 +alice xylophone 1 +alice xylophone 2 +alice zipper 1 +alice zipper 1 +alice zipper 2 +bob brown 1 +bob brown 1 +bob brown 1 +bob carson 1 +bob davidson 1 +bob davidson 1 +bob davidson 1 +bob ellison 1 +bob ellison 1 +bob ellison 1 +bob ellison 1 +bob falkner 2 +bob garcia 1 +bob garcia 1 +bob garcia 1 +bob garcia 2 +bob garcia 2 +bob hernandez 1 +bob ichabod 1 +bob king 1 +bob king 1 +bob king 2 +bob laertes 1 +bob laertes 1 +bob miller 1 +bob ovid 1 +bob ovid 1 +bob ovid 1 +bob ovid 1 +bob polk 1 +bob quirinius 1 +bob steinbeck 1 +bob van buren 1 +bob white 1 +bob white 1 +bob xylophone 1 +bob xylophone 1 +bob young 1 +bob zipper 1 +bob zipper 2 +bob zipper 2 +calvin allen 3 +calvin brown 1 +calvin brown 1 +calvin brown 1 +calvin carson 2 +calvin davidson 1 +calvin davidson 2 +calvin ellison 1 +calvin falkner 1 +calvin falkner 1 +calvin falkner 1 +calvin falkner 2 +calvin falkner 2 +calvin falkner 2 +calvin garcia 2 +calvin hernandez 3 +calvin johnson 1 +calvin laertes 1 +calvin laertes 1 +calvin nixon 1 +calvin nixon 1 +calvin nixon 1 +calvin ovid 1 +calvin ovid 1 +calvin ovid 2 +calvin ovid 2 +calvin polk 2 +calvin quirinius 1 +calvin quirinius 1 +calvin robinson 1 +calvin steinbeck 1 +calvin steinbeck 1 +calvin steinbeck 2 +calvin thompson 1 +calvin thompson 2 +calvin underhill 1 +calvin van buren 1 +calvin van buren 1 +calvin white 1 +calvin white 2 +calvin xylophone 1 +calvin xylophone 2 +calvin xylophone 2 +calvin young 1 +calvin young 2 +calvin zipper 3 +calvin zipper 4 +david allen 1 +david allen 1 +david brown 2 +david brown 3 +david davidson 1 +david davidson 2 +david davidson 3 +david davidson 3 +david ellison 1 +david ellison 2 +david ellison 3 +david hernandez 1 +david ichabod 1 +david ichabod 3 +david laertes 3 +david nixon 1 +david ovid 1 +david ovid 1 +david quirinius 1 +david quirinius 1 +david quirinius 3 +david robinson 1 +david robinson 4 +david thompson 1 +david underhill 1 +david underhill 2 +david underhill 3 +david van buren 1 +david van buren 2 +david white 1 +david xylophone 1 +david xylophone 1 +david xylophone 2 +david young 1 +david young 1 +ethan allen 1 +ethan brown 1 +ethan brown 1 +ethan brown 1 +ethan brown 1 +ethan brown 2 +ethan brown 2 +ethan carson 1 +ethan ellison 1 +ethan ellison 2 +ethan falkner 1 +ethan falkner 1 +ethan garcia 1 +ethan hernandez 1 +ethan johnson 2 +ethan king 2 +ethan laertes 1 +ethan laertes 1 +ethan laertes 2 +ethan laertes 2 +ethan laertes 2 +ethan laertes 2 +ethan laertes 3 +ethan miller 1 +ethan nixon 2 +ethan ovid 2 +ethan polk 1 +ethan polk 1 +ethan polk 1 +ethan polk 2 +ethan quirinius 1 +ethan quirinius 1 +ethan quirinius 2 +ethan robinson 1 +ethan robinson 2 +ethan underhill 2 +ethan van buren 1 +ethan white 1 +ethan white 2 +ethan xylophone 2 +ethan zipper 1 +ethan zipper 3 +fred davidson 2 +fred davidson 2 +fred davidson 3 +fred ellison 1 +fred ellison 2 +fred ellison 2 +fred falkner 1 +fred falkner 3 +fred falkner 4 +fred hernandez 2 +fred ichabod 2 +fred ichabod 3 +fred johnson 2 +fred king 2 +fred king 2 +fred laertes 2 +fred miller 3 +fred nixon 1 +fred nixon 1 +fred nixon 1 +fred nixon 3 +fred polk 1 +fred polk 1 +fred polk 1 +fred polk 2 +fred quirinius 2 +fred quirinius 3 +fred robinson 2 +fred steinbeck 1 +fred steinbeck 1 +fred steinbeck 1 +fred underhill 1 +fred van buren 2 +fred van buren 3 +fred van buren 3 +fred van buren 4 +fred white 2 +fred young 1 +fred young 2 +fred zipper 3 +gabriella allen 1 +gabriella allen 3 +gabriella brown 1 +gabriella brown 1 +gabriella carson 2 +gabriella davidson 1 +gabriella ellison 1 +gabriella ellison 3 +gabriella falkner 1 +gabriella falkner 1 +gabriella falkner 3 +gabriella garcia 1 +gabriella hernandez 1 +gabriella hernandez 1 +gabriella ichabod 1 +gabriella ichabod 2 +gabriella ichabod 2 +gabriella ichabod 2 +gabriella ichabod 4 +gabriella king 1 +gabriella king 2 +gabriella laertes 1 +gabriella miller 2 +gabriella ovid 1 +gabriella ovid 2 +gabriella polk 1 +gabriella polk 2 +gabriella steinbeck 1 +gabriella steinbeck 1 +gabriella thompson 1 +gabriella thompson 2 +gabriella thompson 3 +gabriella van buren 1 +gabriella van buren 2 +gabriella white 1 +gabriella young 1 +gabriella young 2 +gabriella zipper 1 +gabriella zipper 2 +holly allen 3 +holly brown 2 +holly brown 2 +holly falkner 2 +holly hernandez 2 +holly hernandez 2 +holly hernandez 2 +holly hernandez 3 +holly ichabod 1 +holly ichabod 2 +holly ichabod 2 +holly johnson 1 +holly johnson 3 +holly johnson 4 +holly king 2 +holly king 2 +holly laertes 3 +holly miller 2 +holly nixon 1 +holly nixon 2 +holly polk 1 +holly polk 2 +holly robinson 3 +holly thompson 1 +holly thompson 3 +holly thompson 4 +holly underhill 2 +holly underhill 2 +holly underhill 3 +holly underhill 3 +holly van buren 1 +holly white 4 +holly white 4 +holly xylophone 2 +holly young 1 +holly young 2 +holly zipper 1 +holly zipper 4 +irene allen 3 +irene brown 1 +irene brown 2 +irene brown 3 +irene carson 2 +irene ellison 2 +irene ellison 2 +irene falkner 1 +irene falkner 1 +irene garcia 1 +irene garcia 2 +irene garcia 3 +irene ichabod 1 +irene ichabod 2 +irene johnson 2 +irene laertes 1 +irene laertes 3 +irene laertes 4 +irene miller 1 +irene nixon 1 +irene nixon 3 +irene nixon 3 +irene ovid 2 +irene ovid 2 +irene ovid 2 +irene polk 1 +irene polk 1 +irene polk 2 +irene polk 2 +irene polk 4 +irene quirinius 2 +irene quirinius 3 +irene quirinius 4 +irene robinson 2 +irene steinbeck 1 +irene thompson 1 +irene underhill 2 +irene underhill 3 +irene van buren 2 +irene van buren 3 +irene xylophone 2 +jessica brown 2 +jessica carson 1 +jessica carson 2 +jessica carson 4 +jessica davidson 1 +jessica davidson 2 +jessica davidson 3 +jessica davidson 3 +jessica ellison 1 +jessica ellison 3 +jessica falkner 2 +jessica garcia 1 +jessica garcia 5 +jessica ichabod 2 +jessica johnson 2 +jessica johnson 3 +jessica miller 2 +jessica nixon 2 +jessica nixon 3 +jessica ovid 2 +jessica ovid 3 +jessica polk 5 +jessica quirinius 2 +jessica quirinius 2 +jessica quirinius 3 +jessica quirinius 3 +jessica robinson 1 +jessica thompson 2 +jessica thompson 3 +jessica underhill 2 +jessica underhill 2 +jessica underhill 4 +jessica van buren 2 +jessica white 3 +jessica white 3 +jessica white 3 +jessica white 3 +jessica white 4 +jessica xylophone 4 +jessica young 4 +jessica young 4 +jessica zipper 1 +jessica zipper 2 +jessica zipper 4 +katie allen 2 +katie brown 4 +katie davidson 3 +katie ellison 3 +katie ellison 3 +katie falkner 2 +katie garcia 2 +katie garcia 3 +katie hernandez 2 +katie ichabod 2 +katie ichabod 2 +katie ichabod 2 +katie king 1 +katie king 1 +katie king 2 +katie miller 2 +katie miller 3 +katie nixon 5 +katie ovid 1 +katie polk 2 +katie polk 3 +katie robinson 4 +katie van buren 2 +katie van buren 4 +katie white 1 +katie white 2 +katie xylophone 3 +katie young 2 +katie young 2 +katie young 3 +katie zipper 1 +katie zipper 3 +luke allen 2 +luke allen 2 +luke allen 2 +luke allen 3 +luke allen 3 +luke brown 2 +luke davidson 1 +luke davidson 3 +luke ellison 3 +luke ellison 5 +luke ellison 5 +luke falkner 2 +luke falkner 4 +luke garcia 1 +luke garcia 5 +luke ichabod 3 +luke ichabod 3 +luke johnson 1 +luke johnson 2 +luke johnson 3 +luke laertes 2 +luke laertes 3 +luke laertes 3 +luke laertes 3 +luke laertes 3 +luke miller 2 +luke ovid 1 +luke ovid 2 +luke polk 2 +luke polk 3 +luke quirinius 2 +luke robinson 1 +luke robinson 4 +luke thompson 1 +luke underhill 2 +luke underhill 3 +luke underhill 5 +luke van buren 2 +luke white 3 +luke xylophone 2 +luke zipper 1 +mike allen 4 +mike brown 4 +mike carson 1 +mike carson 2 +mike carson 4 +mike davidson 3 +mike davidson 4 +mike ellison 2 +mike ellison 3 +mike ellison 3 +mike ellison 4 +mike ellison 4 +mike falkner 1 +mike garcia 1 +mike garcia 2 +mike garcia 3 +mike hernandez 2 +mike hernandez 3 +mike ichabod 1 +mike king 1 +mike king 1 +mike king 3 +mike king 3 +mike king 4 +mike king 4 +mike miller 4 +mike nixon 3 +mike nixon 4 +mike polk 4 +mike polk 5 +mike polk 5 +mike quirinius 3 +mike steinbeck 2 +mike steinbeck 3 +mike steinbeck 3 +mike steinbeck 4 +mike van buren 2 +mike van buren 3 +mike white 3 +mike white 4 +mike white 5 +mike white 6 +mike young 2 +mike young 2 +mike young 4 +mike zipper 1 +mike zipper 4 +mike zipper 6 +nick allen 2 +nick allen 2 +nick brown 5 +nick davidson 1 +nick ellison 3 +nick ellison 4 +nick falkner 2 +nick falkner 3 +nick garcia 2 +nick garcia 4 +nick garcia 4 +nick ichabod 3 +nick ichabod 3 +nick ichabod 3 +nick johnson 4 +nick johnson 4 +nick laertes 2 +nick miller 3 +nick nixon 2 +nick ovid 3 +nick polk 4 +nick quirinius 1 +nick quirinius 3 +nick robinson 4 +nick robinson 4 +nick steinbeck 1 +nick thompson 2 +nick underhill 6 +nick van buren 2 +nick xylophone 3 +nick young 3 +nick young 5 +nick zipper 3 +nick zipper 4 +oscar allen 4 +oscar brown 3 +oscar carson 3 +oscar carson 4 +oscar carson 4 +oscar carson 5 +oscar carson 5 +oscar davidson 4 +oscar ellison 1 +oscar ellison 2 +oscar falkner 1 +oscar garcia 5 +oscar hernandez 1 +oscar hernandez 2 +oscar ichabod 2 +oscar ichabod 5 +oscar ichabod 5 +oscar ichabod 6 +oscar johnson 3 +oscar johnson 7 +oscar king 3 +oscar king 4 +oscar king 4 +oscar laertes 1 +oscar laertes 2 +oscar laertes 3 +oscar laertes 3 +oscar nixon 4 +oscar ovid 3 +oscar ovid 3 +oscar ovid 6 +oscar polk 5 +oscar polk 5 +oscar quirinius 2 +oscar quirinius 4 +oscar quirinius 5 +oscar quirinius 6 +oscar robinson 2 +oscar robinson 3 +oscar robinson 5 +oscar robinson 6 +oscar steinbeck 1 +oscar thompson 2 +oscar thompson 3 +oscar thompson 3 +oscar thompson 4 +oscar underhill 2 +oscar van buren 1 +oscar van buren 2 +oscar van buren 5 +oscar white 1 +oscar white 2 +oscar white 5 +oscar white 5 +oscar xylophone 3 +oscar xylophone 3 +oscar xylophone 4 +oscar zipper 2 +oscar zipper 2 +oscar zipper 2 +priscilla brown 2 +priscilla brown 2 +priscilla brown 4 +priscilla carson 3 +priscilla carson 5 +priscilla carson 7 +priscilla ichabod 1 +priscilla ichabod 4 +priscilla johnson 1 +priscilla johnson 2 +priscilla johnson 4 +priscilla johnson 4 +priscilla johnson 6 +priscilla king 3 +priscilla nixon 3 +priscilla nixon 6 +priscilla ovid 3 +priscilla ovid 7 +priscilla polk 4 +priscilla quirinius 3 +priscilla thompson 6 +priscilla underhill 1 +priscilla underhill 4 +priscilla van buren 3 +priscilla van buren 5 +priscilla van buren 5 +priscilla white 1 +priscilla xylophone 2 +priscilla xylophone 3 +priscilla xylophone 3 +priscilla young 5 +priscilla young 8 +priscilla zipper 3 +priscilla zipper 3 +quinn allen 1 +quinn allen 4 +quinn brown 3 +quinn brown 4 +quinn brown 4 +quinn davidson 2 +quinn davidson 4 +quinn davidson 6 +quinn davidson 7 +quinn ellison 3 +quinn ellison 8 +quinn garcia 2 +quinn garcia 3 +quinn garcia 3 +quinn garcia 5 +quinn ichabod 7 +quinn king 1 +quinn king 1 +quinn laertes 2 +quinn laertes 4 +quinn laertes 5 +quinn nixon 4 +quinn ovid 4 +quinn quirinius 5 +quinn robinson 3 +quinn steinbeck 4 +quinn steinbeck 5 +quinn thompson 4 +quinn thompson 6 +quinn underhill 2 +quinn underhill 3 +quinn underhill 7 +quinn van buren 1 +quinn young 2 +quinn zipper 3 +quinn zipper 4 +rachel allen 2 +rachel allen 3 +rachel brown 2 +rachel brown 3 +rachel brown 4 +rachel brown 4 +rachel brown 5 +rachel carson 2 +rachel carson 4 +rachel davidson 7 +rachel ellison 1 +rachel falkner 1 +rachel falkner 3 +rachel falkner 5 +rachel falkner 6 +rachel johnson 9 +rachel king 3 +rachel king 7 +rachel laertes 4 +rachel laertes 6 +rachel ovid 3 +rachel ovid 4 +rachel polk 3 +rachel quirinius 4 +rachel robinson 4 +rachel robinson 4 +rachel robinson 6 +rachel thompson 4 +rachel thompson 5 +rachel thompson 5 +rachel underhill 2 +rachel white 4 +rachel white 5 +rachel young 4 +rachel zipper 1 +rachel zipper 5 +sarah carson 1 +sarah carson 4 +sarah carson 7 +sarah ellison 1 +sarah falkner 4 +sarah falkner 5 +sarah garcia 2 +sarah garcia 2 +sarah garcia 4 +sarah ichabod 3 +sarah ichabod 3 +sarah johnson 3 +sarah johnson 5 +sarah johnson 5 +sarah johnson 6 +sarah king 3 +sarah king 5 +sarah miller 2 +sarah ovid 5 +sarah robinson 5 +sarah robinson 5 +sarah steinbeck 5 +sarah white 4 +sarah white 6 +sarah xylophone 3 +sarah young 5 +sarah zipper 6 +tom brown 2 +tom brown 5 +tom carson 1 +tom carson 3 +tom carson 5 +tom davidson 2 +tom ellison 3 +tom ellison 4 +tom ellison 6 +tom falkner 3 +tom falkner 4 +tom hernandez 1 +tom hernandez 3 +tom ichabod 4 +tom johnson 6 +tom johnson 7 +tom king 3 +tom laertes 3 +tom laertes 3 +tom miller 3 +tom miller 4 +tom miller 5 +tom nixon 4 +tom ovid 3 +tom polk 2 +tom polk 2 +tom quirinius 3 +tom quirinius 5 +tom robinson 2 +tom robinson 3 +tom robinson 3 +tom robinson 5 +tom steinbeck 2 +tom van buren 2 +tom van buren 3 +tom van buren 6 +tom white 5 +tom young 1 +tom young 5 +tom zipper 7 +ulysses brown 2 +ulysses carson 2 +ulysses carson 5 +ulysses carson 6 +ulysses carson 8 +ulysses davidson 3 +ulysses ellison 4 +ulysses garcia 3 +ulysses hernandez 3 +ulysses hernandez 3 +ulysses hernandez 4 +ulysses ichabod 1 +ulysses ichabod 3 +ulysses johnson 5 +ulysses king 2 +ulysses laertes 2 +ulysses laertes 5 +ulysses laertes 6 +ulysses miller 5 +ulysses miller 7 +ulysses nixon 4 +ulysses ovid 4 +ulysses polk 2 +ulysses polk 2 +ulysses polk 3 +ulysses polk 4 +ulysses quirinius 4 +ulysses robinson 1 +ulysses steinbeck 2 +ulysses steinbeck 5 +ulysses thompson 3 +ulysses underhill 2 +ulysses underhill 2 +ulysses underhill 3 +ulysses underhill 4 +ulysses underhill 4 +ulysses underhill 4 +ulysses underhill 5 +ulysses van buren 2 +ulysses white 6 +ulysses white 7 +ulysses xylophone 2 +ulysses xylophone 3 +ulysses xylophone 6 +ulysses young 1 +ulysses young 4 +ulysses young 7 +victor allen 2 +victor allen 3 +victor brown 1 +victor brown 4 +victor brown 5 +victor brown 7 +victor davidson 4 +victor davidson 4 +victor davidson 6 +victor ellison 4 +victor ellison 4 +victor hernandez 2 +victor hernandez 3 +victor hernandez 4 +victor hernandez 4 +victor hernandez 6 +victor johnson 4 +victor johnson 5 +victor johnson 6 +victor king 2 +victor king 6 +victor laertes 3 +victor laertes 5 +victor miller 5 +victor nixon 2 +victor nixon 3 +victor ovid 3 +victor polk 4 +victor quirinius 5 +victor quirinius 5 +victor robinson 5 +victor robinson 5 +victor steinbeck 3 +victor steinbeck 4 +victor steinbeck 5 +victor thompson 6 +victor van buren 5 +victor van buren 6 +victor white 2 +victor white 7 +victor xylophone 4 +victor xylophone 6 +victor xylophone 6 +victor xylophone 8 +victor xylophone 8 +victor young 5 +victor zipper 3 +wendy allen 5 +wendy allen 6 +wendy allen 6 +wendy brown 3 +wendy brown 5 +wendy ellison 3 +wendy ellison 5 +wendy falkner 2 +wendy falkner 4 +wendy falkner 6 +wendy garcia 4 +wendy garcia 4 +wendy garcia 7 +wendy garcia 7 +wendy hernandez 4 +wendy ichabod 4 +wendy king 4 +wendy king 5 +wendy king 7 +wendy laertes 2 +wendy laertes 3 +wendy laertes 5 +wendy miller 4 +wendy miller 4 +wendy nixon 3 +wendy nixon 5 +wendy ovid 5 +wendy ovid 9 +wendy polk 2 +wendy polk 5 +wendy quirinius 3 +wendy quirinius 4 +wendy robinson 5 +wendy robinson 6 +wendy robinson 6 +wendy steinbeck 3 +wendy thompson 4 +wendy thompson 5 +wendy underhill 4 +wendy underhill 5 +wendy underhill 6 +wendy van buren 6 +wendy van buren 6 +wendy white 4 +wendy xylophone 4 +wendy xylophone 6 +wendy young 1 +wendy young 6 +xavier allen 3 +xavier allen 4 +xavier allen 5 +xavier brown 2 +xavier brown 4 +xavier brown 6 +xavier carson 4 +xavier carson 5 +xavier davidson 7 +xavier davidson 8 +xavier davidson 9 +xavier ellison 8 +xavier ellison 8 +xavier garcia 5 +xavier hernandez 5 +xavier hernandez 6 +xavier hernandez 9 +xavier ichabod 3 +xavier ichabod 4 +xavier johnson 2 +xavier johnson 9 +xavier king 3 +xavier king 5 +xavier laertes 4 +xavier ovid 4 +xavier polk 3 +xavier polk 4 +xavier polk 4 +xavier polk 8 +xavier quirinius 3 +xavier quirinius 5 +xavier quirinius 6 +xavier quirinius 6 +xavier thompson 4 +xavier underhill 2 +xavier white 3 +xavier white 3 +xavier xylophone 4 +xavier zipper 3 +yuri allen 2 +yuri allen 3 +yuri brown 2 +yuri brown 3 +yuri carson 5 +yuri carson 6 +yuri ellison 6 +yuri ellison 6 +yuri falkner 6 +yuri falkner 10 +yuri garcia 1 +yuri hernandez 5 +yuri johnson 5 +yuri johnson 5 +yuri johnson 6 +yuri king 7 +yuri laertes 7 +yuri laertes 8 +yuri nixon 3 +yuri nixon 3 +yuri polk 3 +yuri polk 5 +yuri polk 6 +yuri quirinius 3 +yuri quirinius 4 +yuri quirinius 7 +yuri steinbeck 1 +yuri steinbeck 2 +yuri thompson 3 +yuri underhill 4 +yuri underhill 4 +yuri white 8 +yuri xylophone 3 +zach allen 4 +zach brown 5 +zach brown 5 +zach brown 5 +zach brown 5 +zach brown 7 +zach carson 5 +zach ellison 2 +zach falkner 4 +zach falkner 6 +zach garcia 4 +zach garcia 5 +zach garcia 7 +zach garcia 8 +zach ichabod 4 +zach ichabod 4 +zach king 4 +zach king 5 +zach king 8 +zach miller 1 +zach miller 3 +zach miller 4 +zach ovid 4 +zach ovid 5 +zach ovid 5 +zach ovid 7 +zach quirinius 8 +zach robinson 5 +zach steinbeck 4 +zach steinbeck 6 +zach thompson 3 +zach thompson 4 +zach underhill 3 +zach white 6 +zach xylophone 3 +zach xylophone 5 +zach young 4 +zach zipper 4 +zach zipper 4 +zach zipper 5 diff --git a/sql/hive/src/test/resources/golden/windowing_ntile.q (deterministic)-2-fafa16c0f7697ca28aeb6f2698799562 b/sql/hive/src/test/resources/golden/windowing_ntile.q (deterministic)-2-fafa16c0f7697ca28aeb6f2698799562 new file mode 100644 index 0000000000000..76cbeb254c0e9 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_ntile.q (deterministic)-2-fafa16c0f7697ca28aeb6f2698799562 @@ -0,0 +1,1049 @@ +0.08 1 +0.1 1 +0.13 1 +0.15 1 +0.27 1 +0.28 1 +0.43 1 +0.52 1 +0.56 1 +0.6 1 +0.61 1 +0.79 1 +0.84 1 +0.98 1 +1.02 1 +1.08 1 +1.08 1 +1.12 1 +1.21 1 +1.25 1 +1.27 1 +1.29 1 +1.31 1 +1.58 1 +1.87 1 +1.91 1 +1.92 1 +2.07 1 +2.18 1 +2.2 1 +2.35 1 +2.6 1 +2.79 1 +2.92 1 +2.96 1 +2.96 1 +2.97 1 +3.0 1 +3.21 1 +3.28 1 +3.33 1 +3.61 1 +3.62 1 +3.82 1 +3.86 1 +3.96 1 +3.97 1 +4.17 1 +4.32 1 +4.35 1 +4.41 1 +4.46 1 +4.47 1 +4.57 1 +4.59 1 +4.71 1 +4.72 1 +4.79 1 +4.8 1 +4.92 1 +5.08 1 +5.24 1 +5.28 1 +5.4 1 +5.44 1 +5.45 1 +5.51 1 +5.54 1 +5.62 1 +5.67 1 +5.85 1 +5.88 1 +6.29 1 +6.55 1 +6.57 1 +6.63 1 +6.67 1 +6.72 1 +6.74 1 +6.84 1 +6.87 1 +7.05 1 +7.06 1 +7.11 1 +7.54 1 +7.56 1 +7.79 1 +7.82 1 +7.96 1 +7.96 1 +7.98 1 +8.07 1 +8.07 1 +8.32 1 +8.37 1 +8.42 1 +8.45 1 +8.45 1 +8.45 1 +8.45 1 +8.57 1 +8.61 1 +8.67 2 +8.71 1 +8.79 1 +8.91 1 +9.04 1 +9.13 1 +9.19 1 +9.22 1 +9.25 1 +9.26 1 +9.35 1 +9.48 1 +9.56 1 +9.57 1 +9.57 1 +9.68 1 +9.7 1 +9.71 1 +9.74 1 +9.8 1 +9.81 1 +9.93 1 +10.09 1 +10.09 1 +10.13 1 +10.16 1 +10.17 1 +10.19 1 +10.2 1 +10.22 1 +10.25 1 +10.26 1 +10.29 1 +10.6 1 +10.66 1 +10.67 1 +10.73 1 +11.15 1 +11.18 1 +11.19 1 +11.22 1 +11.34 1 +11.55 1 +11.57 1 +11.68 1 +11.82 1 +11.89 1 +11.91 1 +12.02 1 +12.16 1 +12.19 1 +12.32 1 +12.42 1 +12.44 1 +12.45 1 +12.46 1 +12.5 1 +12.54 1 +12.85 1 +12.9 1 +13.01 1 +13.1 1 +13.15 1 +13.35 1 +13.87 1 +13.89 1 +13.94 1 +13.99 1 +14.13 1 +14.21 2 +14.3 1 +14.44 1 +14.84 2 +14.92 1 +14.92 1 +14.93 1 +15.1 1 +15.15 1 +15.18 1 +15.22 1 +15.26 2 +15.3 1 +15.37 1 +15.45 1 +15.63 2 +15.75 1 +15.81 1 +15.86 1 +15.9 1 +15.92 1 +16.08 1 +16.09 1 +16.24 1 +16.25 1 +16.48 1 +16.69 1 +16.99 1 +16.99 1 +17.16 1 +17.37 1 +17.74 1 +17.79 1 +17.87 1 +18.2 1 +18.5 1 +18.56 1 +18.63 1 +18.63 1 +18.86 1 +18.89 1 +18.93 1 +19.0 1 +19.03 1 +19.06 1 +19.06 1 +19.13 1 +19.14 1 +19.28 1 +19.69 1 +20.07 1 +20.38 1 +20.64 1 +20.67 1 +20.79 1 +20.81 1 +20.82 1 +20.82 1 +21.18 1 +21.19 1 +21.23 1 +21.28 1 +21.32 1 +21.45 1 +21.49 1 +21.61 1 +21.7 1 +21.8 1 +21.94 1 +22.01 1 +22.08 1 +22.12 1 +22.12 1 +22.25 1 +22.27 1 +22.36 1 +22.68 1 +22.78 1 +22.85 1 +22.85 1 +22.94 1 +23.07 1 +23.13 1 +23.17 1 +23.19 1 +23.44 1 +23.45 1 +23.6 1 +23.77 1 +23.96 1 +24.02 1 +24.28 1 +24.49 1 +24.52 1 +24.73 1 +24.79 1 +24.8 1 +24.83 1 +24.86 1 +25.11 1 +25.28 1 +25.37 1 +25.42 1 +25.55 1 +25.67 1 +25.88 1 +26.08 1 +26.39 1 +26.43 1 +26.47 1 +26.49 1 +26.49 1 +26.64 1 +26.71 2 +26.73 1 +26.76 1 +27.07 1 +27.12 1 +27.3 1 +27.31 1 +27.63 1 +27.66 1 +27.72 2 +27.87 1 +28.11 1 +28.31 1 +28.45 1 +28.5 1 +28.56 1 +28.69 1 +28.71 1 +28.79 1 +28.89 1 +28.95 1 +29.02 1 +29.24 1 +29.36 1 +29.4 1 +29.41 2 +29.54 1 +29.59 1 +29.78 1 +30.25 2 +30.36 1 +30.37 1 +30.61 1 +30.62 1 +30.63 1 +30.65 1 +30.71 1 +30.81 1 +31.01 1 +31.15 1 +31.4 1 +31.61 1 +31.67 1 +31.77 1 +31.86 1 +31.91 1 +32.01 1 +32.18 1 +32.2 1 +32.23 1 +32.25 1 +32.37 1 +32.41 1 +32.47 1 +32.52 1 +32.75 1 +32.89 2 +32.92 1 +33.36 1 +33.52 1 +33.55 1 +33.58 1 +33.67 1 +33.76 1 +33.83 1 +33.85 2 +33.87 1 +34.03 1 +34.21 1 +34.35 1 +34.41 2 +34.58 1 +34.73 1 +34.97 1 +35.0 2 +35.08 1 +35.13 1 +35.17 1 +35.17 1 +35.56 1 +35.62 1 +35.65 1 +35.68 1 +35.72 1 +35.8 1 +35.89 1 +36.22 1 +36.26 1 +36.58 1 +36.7 1 +36.79 1 +36.89 1 +36.95 1 +37.07 2 +37.1 1 +37.14 1 +37.14 1 +37.24 1 +37.59 1 +37.6 1 +37.72 2 +37.78 1 +37.8 1 +37.85 1 +37.9 1 +38.05 1 +38.05 1 +38.3 2 +38.33 1 +38.57 1 +38.62 1 +38.79 1 +38.85 1 +38.88 1 +38.94 1 +39.01 1 +39.03 1 +39.18 1 +39.21 1 +39.34 1 +39.69 1 +39.81 1 +39.82 1 +39.83 1 +39.87 1 +39.9 1 +39.98 1 +40.0 1 +40.04 1 +40.17 1 +40.24 1 +40.42 1 +40.44 1 +40.78 1 +40.8 1 +40.98 1 +41.2 2 +41.29 1 +41.29 1 +41.31 2 +41.34 1 +41.34 1 +41.36 1 +41.44 1 +41.45 2 +41.62 1 +41.68 1 +41.71 1 +41.81 1 +41.85 1 +41.87 1 +41.89 2 +42.24 1 +42.31 1 +42.42 2 +42.48 1 +42.51 1 +42.55 1 +42.56 1 +42.67 2 +42.76 1 +42.85 1 +43.01 1 +43.02 1 +43.13 1 +43.16 1 +43.17 1 +43.19 1 +43.31 1 +43.37 1 +43.57 1 +43.71 1 +43.73 1 +43.92 1 +44.1 1 +44.22 1 +44.27 1 +44.43 1 +44.57 1 +45.06 2 +45.1 1 +45.19 1 +45.19 1 +45.24 1 +45.34 1 +45.35 1 +45.42 1 +45.45 1 +45.56 1 +45.59 1 +45.68 1 +45.92 1 +45.99 2 +46.02 1 +46.09 1 +46.1 1 +46.15 1 +46.18 1 +46.21 1 +46.27 1 +46.43 1 +46.45 1 +46.62 1 +46.8 1 +46.86 1 +46.87 1 +46.88 1 +46.97 2 +47.08 2 +47.27 1 +47.32 2 +47.57 1 +47.59 1 +47.69 1 +47.88 1 +48.01 1 +48.08 1 +48.11 1 +48.15 1 +48.22 1 +48.23 1 +48.25 1 +48.28 1 +48.37 1 +48.45 1 +48.45 1 +48.52 1 +48.59 1 +49.12 1 +49.28 2 +49.44 1 +49.68 1 +49.77 2 +49.78 1 +50.02 3 +50.08 1 +50.09 1 +50.26 1 +50.28 1 +50.31 1 +50.32 1 +50.4 1 +50.41 1 +50.66 1 +50.7 1 +50.83 2 +50.92 2 +50.96 1 +51.25 1 +51.29 1 +51.29 2 +51.72 1 +51.79 2 +51.84 1 +51.85 1 +52.17 1 +52.23 1 +52.44 1 +52.5 1 +52.53 2 +52.72 1 +52.73 1 +52.85 2 +52.87 1 +53.02 1 +53.06 1 +53.18 2 +53.27 1 +53.59 2 +53.78 1 +53.93 1 +53.94 1 +54.1 2 +54.31 1 +54.34 1 +54.43 1 +54.44 1 +54.47 1 +54.73 1 +54.75 1 +54.83 1 +54.99 1 +55.1 1 +55.18 1 +55.2 1 +55.39 1 +55.51 1 +55.63 1 +55.99 1 +56.04 1 +56.07 1 +56.1 1 +56.15 1 +56.33 1 +56.62 1 +56.68 2 +56.81 1 +57.08 1 +57.11 1 +57.12 2 +57.23 1 +57.25 1 +57.29 1 +57.35 1 +57.37 1 +57.46 1 +57.64 1 +57.67 1 +57.89 2 +57.93 1 +58.0 1 +58.08 2 +58.09 2 +58.13 1 +58.43 1 +58.52 1 +58.66 1 +58.67 1 +58.75 1 +58.86 2 +59.07 1 +59.16 1 +59.21 1 +59.34 1 +59.43 1 +59.45 1 +59.45 2 +59.5 1 +59.55 1 +59.61 1 +59.62 1 +59.68 1 +59.68 1 +59.7 1 +59.71 1 +59.83 1 +59.87 1 +59.99 1 +60.02 1 +60.06 1 +60.12 1 +60.13 1 +60.22 2 +60.26 1 +60.26 1 +60.53 1 +60.6 2 +60.71 1 +60.85 1 +61.21 1 +61.7 1 +61.86 1 +61.88 1 +61.92 2 +61.94 1 +62.14 1 +62.2 1 +62.23 1 +62.3 1 +62.39 1 +62.52 1 +62.72 1 +62.74 1 +62.85 2 +62.9 3 +62.92 1 +63.12 1 +63.33 1 +63.35 2 +63.42 1 +63.51 1 +63.9 1 +64.0 1 +64.22 1 +64.25 1 +64.3 1 +64.36 2 +64.46 1 +64.65 2 +64.67 1 +64.77 1 +64.87 1 +64.95 1 +65.02 1 +65.02 1 +65.38 1 +65.43 1 +65.43 1 +65.44 2 +65.55 1 +65.62 1 +65.7 1 +65.72 1 +66.17 1 +66.17 2 +66.36 1 +66.51 1 +66.61 1 +66.61 1 +66.67 1 +66.89 1 +67.12 1 +67.18 1 +67.26 1 +67.38 1 +67.45 1 +67.48 1 +67.59 1 +67.94 1 +67.98 1 +68.01 2 +68.04 1 +68.22 1 +68.25 1 +68.25 1 +68.32 1 +68.41 1 +68.5 1 +68.81 1 +68.85 2 +68.89 1 +68.95 1 +68.96 1 +69.32 2 +69.53 1 +69.74 3 +69.8 2 +69.88 1 +69.96 1 +69.97 1 +70.0 2 +70.04 1 +70.06 1 +70.24 1 +70.35 1 +70.38 1 +70.39 1 +70.52 1 +70.53 1 +70.56 1 +70.85 1 +70.89 1 +70.93 1 +71.01 1 +71.07 2 +71.13 1 +71.19 1 +71.26 1 +71.31 1 +71.32 1 +71.35 1 +71.5 1 +71.54 1 +71.55 3 +71.68 1 +71.68 2 +71.78 2 +71.8 1 +71.89 2 +72.04 1 +72.18 1 +72.51 1 +72.53 2 +72.56 1 +72.62 1 +72.79 1 +72.98 1 +73.18 1 +73.32 1 +73.48 2 +73.63 2 +73.65 1 +73.68 1 +73.88 1 +73.93 1 +74.0 1 +74.02 1 +74.15 1 +74.19 1 +74.19 1 +74.3 1 +74.42 1 +74.45 1 +74.52 1 +74.53 1 +74.59 1 +74.62 1 +74.72 1 +74.78 1 +75.03 2 +75.1 3 +75.19 1 +75.29 1 +75.35 2 +75.42 2 +75.66 2 +75.73 2 +75.83 1 +75.88 1 +76.05 3 +76.1 1 +76.28 1 +76.28 1 +76.33 1 +76.52 1 +76.69 1 +76.7 1 +76.71 1 +76.72 2 +76.72 2 +76.74 1 +76.92 1 +76.93 1 +77.02 3 +77.1 1 +77.36 2 +77.42 1 +77.57 2 +77.66 1 +77.81 1 +77.84 2 +77.89 1 +77.97 1 +78.21 2 +78.26 1 +78.28 1 +78.3 2 +78.31 1 +78.62 1 +78.64 1 +78.73 1 +78.89 1 +78.98 2 +79.12 1 +79.19 1 +79.21 1 +79.38 1 +79.42 1 +79.48 1 +79.48 2 +79.49 2 +79.54 1 +79.55 2 +79.75 1 +79.83 1 +79.96 1 +79.97 2 +79.99 1 +80.23 2 +80.3 1 +80.3 2 +80.46 1 +80.52 1 +80.58 1 +80.6 1 +80.71 1 +80.74 1 +80.84 2 +80.92 1 +80.96 1 +80.97 1 +80.99 1 +81.17 1 +81.32 1 +81.32 1 +81.47 1 +81.58 1 +81.64 1 +81.66 1 +82.24 1 +82.3 1 +82.34 2 +82.41 1 +82.52 1 +82.55 1 +82.56 1 +82.72 1 +82.97 1 +83.08 1 +83.27 1 +83.33 1 +83.4 1 +83.54 2 +83.57 1 +83.58 1 +83.87 1 +83.92 1 +83.93 1 +84.03 1 +84.23 2 +84.31 1 +84.38 1 +84.4 1 +84.69 1 +84.72 1 +84.83 1 +85.0 1 +85.03 2 +85.1 1 +85.14 1 +85.23 1 +85.49 1 +85.49 2 +85.51 2 +85.74 1 +85.76 1 +85.87 1 +85.9 1 +86.0 1 +86.22 1 +86.23 1 +86.63 1 +86.69 1 +86.92 2 +86.93 1 +86.93 1 +87.14 2 +87.22 1 +87.4 1 +87.48 1 +87.57 1 +87.61 1 +87.67 1 +87.83 2 +87.94 1 +87.99 1 +88.02 1 +88.05 1 +88.07 2 +88.17 1 +88.22 1 +88.36 1 +88.47 1 +88.48 1 +88.55 1 +88.77 1 +88.78 1 +88.8 1 +88.91 2 +89.01 2 +89.03 1 +89.1 3 +89.15 2 +89.28 1 +89.38 1 +89.53 1 +89.55 1 +89.55 1 +89.55 1 +89.8 1 +89.81 1 +89.93 1 +90.05 1 +90.05 1 +90.07 1 +90.12 1 +90.2 1 +90.25 1 +90.28 2 +90.35 1 +90.38 1 +90.51 1 +90.56 2 +90.69 1 +90.69 1 +90.73 1 +90.77 1 +91.05 1 +91.16 1 +91.42 1 +91.48 1 +91.53 1 +91.61 1 +91.63 1 +91.78 1 +91.88 1 +91.97 1 +92.05 2 +92.11 2 +92.33 2 +92.37 1 +92.4 2 +92.55 1 +92.61 1 +92.82 1 +92.96 2 +92.98 1 +93.03 1 +93.09 1 +93.11 1 +93.61 1 +93.64 1 +93.73 1 +94.08 1 +94.15 1 +94.25 1 +94.27 1 +94.31 1 +94.33 1 +94.34 1 +94.38 1 +94.43 1 +94.54 1 +94.66 1 +94.68 1 +94.68 1 +94.72 1 +95.07 1 +95.11 1 +95.28 1 +95.33 1 +95.34 1 +95.38 2 +95.48 1 +95.53 1 +95.53 1 +95.81 1 +95.81 2 +95.84 1 +96.09 1 +96.23 1 +96.25 1 +96.29 1 +96.38 1 +96.62 1 +96.68 1 +96.73 1 +96.78 1 +96.91 2 +96.94 1 +97.09 1 +97.24 1 +97.26 1 +97.39 1 +97.46 1 +97.51 2 +97.56 1 +97.57 1 +97.65 2 +97.68 1 +97.71 4 +97.81 1 +97.83 1 +97.87 1 +98.18 2 +98.22 1 +98.23 1 +98.31 1 +98.48 1 +98.51 1 +98.57 1 +98.72 1 +98.96 1 +99.13 1 +99.15 1 +99.21 1 +99.24 1 +99.29 1 +99.36 1 +99.62 1 +99.65 1 +99.67 1 +99.68 1 +99.91 1 +99.92 1 diff --git a/sql/hive/src/test/resources/golden/windowing_ntile.q (deterministic)-3-bda0e7c77d6f4712a03389cb5032bc6d b/sql/hive/src/test/resources/golden/windowing_ntile.q (deterministic)-3-bda0e7c77d6f4712a03389cb5032bc6d new file mode 100644 index 0000000000000..a9ec53c0cb21c --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_ntile.q (deterministic)-3-bda0e7c77d6f4712a03389cb5032bc6d @@ -0,0 +1,1049 @@ +0.02 1 +0.21 1 +0.27 1 +0.37 1 +0.37 1 +0.47 1 +0.48 1 +0.52 1 +0.6 1 +0.63 1 +0.63 1 +0.66 1 +0.73 1 +0.74 1 +0.74 1 +0.78 1 +0.8 1 +0.86 1 +0.86 1 +0.98 1 +1.17 1 +1.29 1 +1.3 1 +1.31 1 +1.37 1 +1.42 1 +1.45 1 +1.5 1 +1.53 1 +1.61 1 +1.62 1 +1.69 1 +1.71 1 +1.79 1 +1.98 1 +2.07 1 +2.09 1 +2.13 1 +2.16 1 +2.26 1 +2.29 1 +2.34 1 +2.34 1 +2.36 1 +2.43 1 +2.43 1 +2.52 1 +2.53 1 +2.53 1 +2.61 1 +2.63 1 +2.74 1 +2.75 1 +2.75 1 +2.79 1 +2.8 1 +2.82 1 +2.89 1 +2.89 1 +2.89 1 +2.9 1 +2.92 1 +3.03 1 +3.07 1 +3.07 1 +3.12 1 +3.15 1 +3.17 1 +3.27 1 +3.27 1 +3.29 1 +3.4 1 +3.4 1 +3.4 1 +3.4 1 +3.42 1 +3.66 1 +3.67 1 +3.69 1 +3.71 1 +3.78 1 +3.83 1 +3.86 1 +3.92 1 +3.98 1 +3.99 1 +4.04 1 +4.16 1 +4.25 1 +4.27 1 +4.44 1 +4.48 1 +4.53 1 +4.57 1 +4.58 1 +4.62 1 +4.81 1 +4.82 1 +4.83 1 +4.92 1 +4.95 1 +4.96 1 +4.97 1 +4.98 1 +5.09 1 +5.11 1 +5.19 1 +5.23 1 +5.3 1 +5.31 1 +5.31 1 +5.35 1 +5.42 1 +5.51 1 +5.55 1 +5.58 1 +5.74 1 +5.82 1 +5.84 1 +5.93 1 +5.93 1 +5.96 1 +6.06 1 +6.06 1 +6.21 1 +6.28 1 +6.46 1 +6.52 2 +6.54 1 +6.56 1 +6.57 1 +6.58 1 +6.61 1 +6.62 1 +6.76 1 +6.81 1 +6.81 1 +6.96 1 +6.98 1 +7.02 1 +7.03 1 +7.14 1 +7.18 1 +7.24 1 +7.24 1 +7.31 1 +7.36 1 +7.37 1 +7.45 1 +7.53 1 +7.62 1 +7.66 1 +7.71 1 +7.71 1 +7.8 1 +7.92 1 +8.05 1 +8.09 1 +8.21 1 +8.33 1 +8.33 1 +8.49 1 +8.49 1 +8.52 1 +8.56 1 +8.61 1 +8.62 2 +8.72 1 +8.76 1 +8.79 1 +8.82 1 +8.84 1 +8.95 1 +8.98 1 +9.14 1 +9.19 1 +9.21 1 +9.22 1 +9.26 1 +9.27 1 +9.39 2 +9.4 1 +9.42 1 +9.5 1 +9.51 1 +9.56 1 +9.6 1 +9.61 1 +9.62 1 +9.64 1 +9.81 1 +9.87 1 +9.88 1 +9.93 1 +9.94 1 +9.96 1 +9.99 1 +10.15 1 +10.21 1 +10.22 1 +10.23 1 +10.24 1 +10.36 1 +10.38 1 +10.38 1 +10.41 1 +10.47 1 +10.49 1 +10.49 1 +10.51 1 +10.52 1 +10.7 1 +10.71 1 +10.85 1 +10.99 1 +11.02 1 +11.12 1 +11.12 1 +11.16 1 +11.2 1 +11.26 1 +11.27 1 +11.35 1 +11.35 1 +11.4 1 +11.43 1 +11.44 1 +11.44 1 +11.46 1 +11.48 1 +11.5 1 +11.54 1 +11.63 1 +11.66 1 +11.69 1 +11.83 1 +11.9 1 +11.91 1 +11.96 1 +12.02 1 +12.13 1 +12.14 1 +12.15 1 +12.3 1 +12.3 1 +12.3 2 +12.34 1 +12.35 1 +12.43 1 +12.43 1 +12.64 1 +12.66 1 +12.7 1 +12.72 1 +12.73 1 +12.74 2 +12.82 1 +12.85 1 +13.02 1 +13.04 1 +13.08 1 +13.14 1 +13.2 1 +13.2 1 +13.22 1 +13.23 1 +13.3 1 +13.3 1 +13.44 1 +13.44 1 +13.44 1 +13.49 1 +13.6 1 +13.66 1 +13.71 1 +13.72 1 +13.8 1 +13.83 1 +13.84 1 +13.88 1 +13.95 1 +14.07 1 +14.16 1 +14.17 1 +14.22 1 +14.24 1 +14.26 1 +14.29 1 +14.33 1 +14.39 1 +14.44 1 +14.51 1 +14.51 1 +14.52 1 +14.62 1 +14.69 1 +14.72 1 +14.75 1 +14.83 1 +14.83 1 +14.84 1 +14.9 1 +14.91 1 +14.92 1 +14.99 1 +15.0 1 +15.01 1 +15.09 1 +15.09 1 +15.09 1 +15.1 1 +15.12 1 +15.13 1 +15.16 1 +15.18 1 +15.22 1 +15.27 1 +15.28 1 +15.32 1 +15.38 1 +15.46 1 +15.46 1 +15.51 1 +15.54 1 +15.87 1 +15.94 1 +15.97 1 +15.98 1 +16.04 2 +16.1 1 +16.12 1 +16.13 1 +16.15 1 +16.29 1 +16.35 1 +16.36 1 +16.38 1 +16.4 1 +16.42 1 +16.47 1 +16.49 1 +16.54 1 +16.61 1 +16.66 1 +16.79 1 +16.79 1 +16.82 1 +16.87 1 +16.87 1 +16.9 1 +16.9 1 +16.91 1 +16.92 1 +17.03 1 +17.03 2 +17.08 1 +17.15 1 +17.19 1 +17.29 1 +17.33 1 +17.44 1 +17.46 1 +17.47 1 +17.51 1 +17.52 2 +17.55 1 +17.59 1 +17.63 1 +17.69 1 +17.76 1 +17.86 1 +17.89 1 +17.99 1 +18.09 1 +18.19 1 +18.2 1 +18.28 1 +18.29 1 +18.31 1 +18.34 1 +18.35 1 +18.36 1 +18.38 1 +18.38 1 +18.41 1 +18.47 1 +18.48 1 +18.79 1 +18.82 1 +18.83 1 +18.86 1 +18.86 1 +19.03 1 +19.12 1 +19.15 1 +19.2 1 +19.31 1 +19.32 1 +19.41 1 +19.47 1 +19.47 2 +19.56 1 +19.59 1 +19.63 1 +19.65 1 +19.72 1 +19.72 1 +19.79 1 +19.79 1 +19.85 1 +19.87 1 +19.9 1 +19.92 1 +19.93 1 +19.98 1 +20.02 1 +20.02 1 +20.17 1 +20.19 1 +20.22 1 +20.3 1 +20.3 1 +20.34 1 +20.39 1 +20.42 1 +20.42 1 +20.44 1 +20.55 1 +20.55 1 +20.56 1 +20.56 1 +20.58 1 +20.58 1 +20.64 1 +20.65 1 +20.75 1 +20.76 1 +20.76 1 +20.8 1 +20.82 1 +20.91 1 +20.93 1 +20.94 1 +20.94 1 +20.97 1 +21.0 1 +21.01 1 +21.01 1 +21.02 1 +21.02 1 +21.11 1 +21.11 1 +21.14 1 +21.16 1 +21.26 1 +21.27 1 +21.3 1 +21.3 1 +21.33 1 +21.33 1 +21.38 1 +21.42 1 +21.52 1 +21.53 1 +21.57 1 +21.66 1 +21.67 1 +21.69 1 +21.77 1 +21.81 1 +21.86 1 +21.91 1 +21.95 1 +22.15 1 +22.19 1 +22.19 1 +22.22 1 +22.27 1 +22.27 1 +22.33 1 +22.48 1 +22.6 1 +22.61 1 +22.64 1 +22.68 1 +22.73 1 +22.75 1 +22.94 1 +22.95 1 +23.03 1 +23.07 1 +23.15 1 +23.15 1 +23.18 1 +23.18 1 +23.25 1 +23.27 1 +23.3 1 +23.31 1 +23.45 1 +23.48 1 +23.53 1 +23.55 1 +23.59 1 +23.61 1 +23.63 1 +23.73 1 +23.77 1 +23.78 2 +23.88 1 +23.91 1 +24.03 1 +24.03 1 +24.13 1 +24.17 1 +24.18 1 +24.25 1 +24.35 1 +24.35 2 +24.42 1 +24.46 1 +24.53 2 +24.59 1 +24.61 1 +24.61 1 +24.84 1 +24.85 1 +24.86 1 +24.95 1 +25.01 1 +25.02 1 +25.03 1 +25.08 1 +25.11 1 +25.17 1 +25.32 2 +25.36 1 +25.36 1 +25.43 1 +25.49 1 +25.51 1 +25.51 1 +25.58 1 +25.59 1 +25.63 1 +25.71 1 +25.75 1 +25.8 1 +25.92 1 +25.92 1 +25.95 1 +25.97 2 +26.0 1 +26.17 2 +26.21 1 +26.22 1 +26.24 1 +26.28 1 +26.44 1 +26.55 1 +26.55 1 +26.65 1 +26.67 1 +26.71 1 +26.71 1 +26.73 1 +26.74 1 +26.79 1 +26.84 1 +26.87 1 +27.02 1 +27.14 1 +27.2 1 +27.27 1 +27.29 1 +27.36 1 +27.39 1 +27.4 1 +27.42 1 +27.46 1 +27.54 1 +27.54 1 +27.61 1 +27.62 1 +27.89 1 +28.02 1 +28.1 1 +28.13 1 +28.14 1 +28.15 1 +28.17 1 +28.19 1 +28.29 1 +28.36 1 +28.4 1 +28.42 2 +28.44 1 +28.52 1 +28.52 1 +28.61 1 +28.64 1 +28.68 1 +28.69 1 +28.69 1 +28.71 1 +28.71 2 +28.77 1 +28.77 1 +28.85 1 +28.86 1 +28.91 1 +28.96 1 +28.96 1 +28.98 1 +29.0 1 +29.11 1 +29.19 1 +29.22 1 +29.24 1 +29.25 1 +29.36 1 +29.41 1 +29.46 1 +29.49 1 +29.52 2 +29.62 1 +29.63 1 +29.66 1 +29.73 1 +29.76 1 +29.78 1 +29.88 1 +29.96 1 +30.04 1 +30.04 1 +30.09 1 +30.12 1 +30.13 1 +30.16 1 +30.17 1 +30.21 1 +30.22 1 +30.28 1 +30.35 1 +30.37 2 +30.41 1 +30.41 1 +30.49 1 +30.55 1 +30.55 1 +30.58 1 +30.61 1 +30.62 1 +30.66 1 +30.67 1 +30.78 1 +30.78 1 +30.87 1 +30.89 1 +30.9 1 +30.92 1 +30.98 2 +30.99 1 +31.01 1 +31.16 1 +31.23 1 +31.3 1 +31.33 1 +31.36 1 +31.45 1 +31.46 1 +31.5 1 +31.61 1 +31.63 1 +31.64 1 +31.66 1 +31.68 1 +31.74 1 +31.75 1 +31.76 1 +31.84 1 +32.04 1 +32.06 1 +32.13 1 +32.17 1 +32.2 1 +32.25 1 +32.25 1 +32.31 1 +32.33 1 +32.39 1 +32.56 1 +32.56 1 +32.61 1 +32.74 1 +32.85 1 +32.89 1 +32.98 1 +33.0 1 +33.01 1 +33.02 1 +33.02 1 +33.02 1 +33.11 1 +33.12 1 +33.18 1 +33.18 1 +33.19 1 +33.24 1 +33.3 1 +33.36 1 +33.36 1 +33.38 1 +33.49 1 +33.52 2 +33.6 2 +33.64 1 +33.64 1 +33.66 1 +33.67 1 +33.72 1 +33.76 1 +33.9 1 +34.05 1 +34.06 1 +34.11 1 +34.14 1 +34.15 1 +34.17 1 +34.2 1 +34.41 2 +34.48 2 +34.49 1 +34.52 1 +34.53 1 +34.54 1 +34.62 1 +34.68 1 +34.68 1 +34.72 1 +34.81 1 +34.83 1 +34.84 1 +34.9 1 +34.95 1 +34.95 1 +34.97 1 +34.97 1 +34.98 1 +35.01 1 +35.02 1 +35.1 1 +35.15 1 +35.16 1 +35.23 1 +35.24 1 +35.36 1 +35.36 1 +35.49 1 +35.62 1 +35.68 1 +35.72 1 +35.84 1 +35.85 1 +35.9 1 +36.05 2 +36.09 1 +36.11 1 +36.12 1 +36.13 1 +36.22 1 +36.56 1 +36.57 1 +36.57 1 +36.62 1 +36.7 1 +36.72 1 +36.73 1 +36.84 1 +36.86 1 +36.93 1 +36.93 1 +37.02 1 +37.08 1 +37.12 1 +37.23 2 +37.32 1 +37.34 1 +37.37 1 +37.76 1 +37.77 1 +37.8 1 +37.93 1 +37.94 1 +37.96 1 +38.0 1 +38.04 1 +38.04 1 +38.04 1 +38.05 1 +38.07 1 +38.14 1 +38.22 1 +38.28 1 +38.37 2 +38.39 1 +38.43 1 +38.53 1 +38.6 1 +38.62 1 +38.66 1 +38.67 1 +38.73 1 +38.74 1 +38.92 1 +38.94 1 +39.01 1 +39.04 1 +39.05 1 +39.11 1 +39.27 1 +39.29 1 +39.41 1 +39.46 1 +39.49 1 +39.55 1 +39.57 1 +39.6 1 +39.63 1 +39.73 1 +39.74 1 +39.78 2 +39.8 1 +39.84 1 +39.84 1 +39.85 1 +39.92 1 +40.01 1 +40.04 1 +40.15 1 +40.21 2 +40.22 1 +40.24 1 +40.26 1 +40.27 1 +40.39 2 +40.43 1 +40.46 1 +40.5 1 +40.59 1 +40.6 1 +40.63 1 +40.76 1 +40.79 1 +40.84 1 +40.89 1 +40.91 1 +40.94 1 +40.96 1 +41.02 1 +41.08 1 +41.2 1 +41.24 1 +41.33 1 +41.34 1 +41.34 2 +41.36 1 +41.36 1 +41.37 1 +41.54 1 +41.56 1 +41.69 1 +41.73 1 +41.75 1 +41.75 1 +41.83 1 +41.86 1 +41.9 1 +42.0 2 +42.02 1 +42.03 1 +42.04 1 +42.22 1 +42.3 1 +42.37 1 +42.41 2 +42.47 1 +42.55 1 +42.56 1 +42.57 1 +42.76 1 +42.89 1 +42.96 1 +42.96 1 +43.0 1 +43.04 1 +43.04 3 +43.18 1 +43.23 1 +43.3 1 +43.34 1 +43.34 2 +43.4 1 +43.42 1 +43.58 1 +43.64 1 +43.67 1 +43.76 2 +43.84 1 +43.85 1 +43.92 1 +43.95 1 +43.95 1 +43.96 1 +43.96 1 +44.04 1 +44.11 1 +44.12 1 +44.12 1 +44.22 3 +44.24 1 +44.27 1 +44.32 1 +44.36 1 +44.4 1 +44.57 1 +44.6 1 +44.63 1 +44.66 1 +44.73 1 +44.75 1 +44.8 1 +44.83 1 +44.9 1 +44.92 1 +44.93 1 +45.0 1 +45.02 1 +45.06 1 +45.06 1 +45.09 1 +45.1 1 +45.1 1 +45.11 1 +45.12 2 +45.14 1 +45.14 2 +45.28 2 +45.29 1 +45.46 1 +45.49 1 +45.53 1 +45.53 2 +45.54 1 +45.69 1 +45.71 1 +45.78 1 +45.81 2 +45.86 1 +45.9 1 +45.94 1 +46.03 1 +46.03 1 +46.09 1 +46.18 1 +46.19 1 +46.28 1 +46.3 1 +46.31 1 +46.33 1 +46.36 1 +46.39 1 +46.52 1 +46.53 1 +46.54 1 +46.57 1 +46.59 1 +46.67 1 +46.69 1 +46.73 1 +46.73 1 +46.73 2 +46.74 1 +46.81 1 +46.87 1 +46.88 1 +46.9 1 +46.93 1 +46.98 1 +47.0 1 +47.03 1 +47.03 1 +47.06 1 +47.15 1 +47.22 1 +47.3 1 +47.31 2 +47.37 2 +47.4 1 +47.46 1 +47.49 1 +47.55 1 +47.6 1 +47.66 1 +47.68 1 +47.71 1 +47.72 1 +47.82 1 +47.86 2 +47.91 1 +47.91 2 +47.95 1 +47.98 1 +48.0 1 +48.08 1 +48.5 1 +48.52 1 +48.71 1 +48.78 1 +48.8 1 +48.85 1 +48.89 1 +48.96 1 +48.98 1 +49.04 1 +49.05 1 +49.16 1 +49.21 1 +49.32 1 +49.34 1 +49.34 1 +49.38 1 +49.44 2 +49.45 1 +49.45 1 +49.46 1 +49.46 1 +49.52 1 +49.56 1 +49.59 1 +49.63 1 +49.67 1 +49.69 1 +49.71 2 +49.72 1 +49.73 1 +49.79 1 +49.84 1 +49.85 2 diff --git a/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 1-0-2e0cbc2d7c5f16657edacd9e7209e6e7 b/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 1-0-2e0cbc2d7c5f16657edacd9e7209e6e7 new file mode 100644 index 0000000000000..612bdf44c0cdf --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 1-0-2e0cbc2d7c5f16657edacd9e7209e6e7 @@ -0,0 +1,1049 @@ + 1 + 1 + 1 +alice allen 1 +alice allen 1 +alice allen 1 +alice brown 1 +alice carson 1 +alice davidson 1 +alice falkner 1 +alice garcia 2 +alice hernandez 1 +alice hernandez 1 +alice johnson 1 +alice king 1 +alice king 1 +alice king 1 +alice laertes 1 +alice laertes 1 +alice miller 1 +alice nixon 1 +alice nixon 1 +alice nixon 1 +alice ovid 1 +alice polk 1 +alice quirinius 1 +alice quirinius 1 +alice robinson 1 +alice robinson 1 +alice steinbeck 1 +alice steinbeck 1 +alice steinbeck 1 +alice underhill 1 +alice van buren 1 +alice xylophone 1 +alice xylophone 1 +alice xylophone 1 +alice zipper 1 +alice zipper 1 +alice zipper 1 +bob brown 1 +bob brown 1 +bob brown 2 +bob carson 1 +bob davidson 1 +bob davidson 1 +bob davidson 1 +bob ellison 1 +bob ellison 1 +bob ellison 1 +bob ellison 2 +bob falkner 1 +bob garcia 1 +bob garcia 1 +bob garcia 1 +bob garcia 1 +bob garcia 2 +bob hernandez 1 +bob ichabod 1 +bob king 1 +bob king 1 +bob king 1 +bob laertes 1 +bob laertes 1 +bob miller 1 +bob ovid 1 +bob ovid 1 +bob ovid 1 +bob ovid 1 +bob polk 1 +bob quirinius 1 +bob steinbeck 1 +bob van buren 1 +bob white 1 +bob white 1 +bob xylophone 1 +bob xylophone 1 +bob young 1 +bob zipper 1 +bob zipper 1 +bob zipper 1 +calvin allen 1 +calvin brown 1 +calvin brown 1 +calvin brown 1 +calvin carson 1 +calvin davidson 1 +calvin davidson 1 +calvin ellison 1 +calvin falkner 1 +calvin falkner 1 +calvin falkner 1 +calvin falkner 1 +calvin falkner 1 +calvin falkner 1 +calvin garcia 1 +calvin hernandez 1 +calvin johnson 2 +calvin laertes 1 +calvin laertes 1 +calvin nixon 1 +calvin nixon 1 +calvin nixon 1 +calvin ovid 1 +calvin ovid 1 +calvin ovid 1 +calvin ovid 1 +calvin polk 1 +calvin quirinius 1 +calvin quirinius 1 +calvin robinson 1 +calvin steinbeck 1 +calvin steinbeck 1 +calvin steinbeck 1 +calvin thompson 1 +calvin thompson 1 +calvin underhill 1 +calvin van buren 1 +calvin van buren 1 +calvin white 1 +calvin white 1 +calvin xylophone 1 +calvin xylophone 1 +calvin xylophone 1 +calvin young 1 +calvin young 1 +calvin zipper 1 +calvin zipper 1 +david allen 1 +david allen 1 +david brown 1 +david brown 1 +david davidson 1 +david davidson 1 +david davidson 1 +david davidson 2 +david ellison 1 +david ellison 1 +david ellison 1 +david hernandez 1 +david ichabod 1 +david ichabod 1 +david laertes 1 +david nixon 1 +david ovid 1 +david ovid 1 +david quirinius 1 +david quirinius 1 +david quirinius 1 +david robinson 1 +david robinson 1 +david thompson 1 +david underhill 1 +david underhill 1 +david underhill 1 +david van buren 1 +david van buren 1 +david white 2 +david xylophone 1 +david xylophone 1 +david xylophone 1 +david young 1 +david young 1 +ethan allen 1 +ethan brown 1 +ethan brown 1 +ethan brown 1 +ethan brown 1 +ethan brown 1 +ethan brown 1 +ethan carson 1 +ethan ellison 1 +ethan ellison 1 +ethan falkner 1 +ethan falkner 1 +ethan garcia 1 +ethan hernandez 1 +ethan johnson 1 +ethan king 1 +ethan laertes 1 +ethan laertes 1 +ethan laertes 1 +ethan laertes 1 +ethan laertes 1 +ethan laertes 1 +ethan laertes 1 +ethan miller 1 +ethan nixon 1 +ethan ovid 1 +ethan polk 1 +ethan polk 1 +ethan polk 1 +ethan polk 1 +ethan quirinius 1 +ethan quirinius 1 +ethan quirinius 1 +ethan robinson 1 +ethan robinson 1 +ethan underhill 1 +ethan van buren 1 +ethan white 1 +ethan white 1 +ethan xylophone 1 +ethan zipper 1 +ethan zipper 1 +fred davidson 1 +fred davidson 1 +fred davidson 1 +fred ellison 1 +fred ellison 1 +fred ellison 1 +fred falkner 1 +fred falkner 1 +fred falkner 1 +fred hernandez 1 +fred ichabod 1 +fred ichabod 2 +fred johnson 1 +fred king 1 +fred king 1 +fred laertes 1 +fred miller 1 +fred nixon 1 +fred nixon 1 +fred nixon 1 +fred nixon 2 +fred polk 1 +fred polk 1 +fred polk 1 +fred polk 1 +fred quirinius 1 +fred quirinius 1 +fred robinson 1 +fred steinbeck 1 +fred steinbeck 1 +fred steinbeck 1 +fred underhill 1 +fred van buren 1 +fred van buren 1 +fred van buren 1 +fred van buren 1 +fred white 1 +fred young 1 +fred young 1 +fred zipper 1 +gabriella allen 1 +gabriella allen 1 +gabriella brown 1 +gabriella brown 1 +gabriella carson 1 +gabriella davidson 1 +gabriella ellison 1 +gabriella ellison 1 +gabriella falkner 1 +gabriella falkner 1 +gabriella falkner 1 +gabriella garcia 1 +gabriella hernandez 1 +gabriella hernandez 1 +gabriella ichabod 1 +gabriella ichabod 1 +gabriella ichabod 1 +gabriella ichabod 1 +gabriella ichabod 1 +gabriella king 1 +gabriella king 1 +gabriella laertes 1 +gabriella miller 1 +gabriella ovid 1 +gabriella ovid 1 +gabriella polk 1 +gabriella polk 1 +gabriella steinbeck 1 +gabriella steinbeck 1 +gabriella thompson 1 +gabriella thompson 1 +gabriella thompson 1 +gabriella van buren 1 +gabriella van buren 1 +gabriella white 1 +gabriella young 1 +gabriella young 1 +gabriella zipper 1 +gabriella zipper 1 +holly allen 1 +holly brown 1 +holly brown 1 +holly falkner 1 +holly hernandez 1 +holly hernandez 1 +holly hernandez 1 +holly hernandez 2 +holly ichabod 1 +holly ichabod 1 +holly ichabod 1 +holly johnson 1 +holly johnson 1 +holly johnson 1 +holly king 1 +holly king 1 +holly laertes 1 +holly miller 1 +holly nixon 1 +holly nixon 1 +holly polk 1 +holly polk 1 +holly robinson 1 +holly thompson 1 +holly thompson 1 +holly thompson 1 +holly underhill 1 +holly underhill 1 +holly underhill 1 +holly underhill 1 +holly van buren 1 +holly white 1 +holly white 2 +holly xylophone 1 +holly young 1 +holly young 1 +holly zipper 1 +holly zipper 1 +irene allen 1 +irene brown 1 +irene brown 1 +irene brown 1 +irene carson 1 +irene ellison 1 +irene ellison 1 +irene falkner 1 +irene falkner 1 +irene garcia 1 +irene garcia 1 +irene garcia 1 +irene ichabod 1 +irene ichabod 1 +irene johnson 1 +irene laertes 1 +irene laertes 1 +irene laertes 1 +irene miller 1 +irene nixon 1 +irene nixon 1 +irene nixon 1 +irene ovid 1 +irene ovid 1 +irene ovid 1 +irene polk 1 +irene polk 1 +irene polk 1 +irene polk 1 +irene polk 1 +irene quirinius 1 +irene quirinius 1 +irene quirinius 1 +irene robinson 1 +irene steinbeck 1 +irene thompson 1 +irene underhill 1 +irene underhill 1 +irene van buren 1 +irene van buren 1 +irene xylophone 2 +jessica brown 2 +jessica carson 1 +jessica carson 1 +jessica carson 1 +jessica davidson 1 +jessica davidson 1 +jessica davidson 1 +jessica davidson 1 +jessica ellison 1 +jessica ellison 1 +jessica falkner 1 +jessica garcia 1 +jessica garcia 1 +jessica ichabod 1 +jessica johnson 1 +jessica johnson 1 +jessica miller 1 +jessica nixon 1 +jessica nixon 1 +jessica ovid 1 +jessica ovid 2 +jessica polk 1 +jessica quirinius 1 +jessica quirinius 1 +jessica quirinius 1 +jessica quirinius 1 +jessica robinson 1 +jessica thompson 1 +jessica thompson 3 +jessica underhill 1 +jessica underhill 1 +jessica underhill 1 +jessica van buren 1 +jessica white 1 +jessica white 1 +jessica white 1 +jessica white 1 +jessica white 1 +jessica xylophone 1 +jessica young 1 +jessica young 1 +jessica zipper 1 +jessica zipper 1 +jessica zipper 1 +katie allen 1 +katie brown 1 +katie davidson 1 +katie ellison 1 +katie ellison 1 +katie falkner 1 +katie garcia 1 +katie garcia 1 +katie hernandez 1 +katie ichabod 1 +katie ichabod 1 +katie ichabod 1 +katie king 1 +katie king 1 +katie king 1 +katie miller 1 +katie miller 1 +katie nixon 1 +katie ovid 1 +katie polk 1 +katie polk 1 +katie robinson 1 +katie van buren 1 +katie van buren 1 +katie white 1 +katie white 1 +katie xylophone 1 +katie young 1 +katie young 1 +katie young 1 +katie zipper 1 +katie zipper 1 +luke allen 1 +luke allen 1 +luke allen 1 +luke allen 1 +luke allen 2 +luke brown 1 +luke davidson 1 +luke davidson 1 +luke ellison 1 +luke ellison 1 +luke ellison 1 +luke falkner 1 +luke falkner 1 +luke garcia 1 +luke garcia 1 +luke ichabod 1 +luke ichabod 1 +luke johnson 1 +luke johnson 1 +luke johnson 1 +luke laertes 1 +luke laertes 1 +luke laertes 1 +luke laertes 1 +luke laertes 1 +luke miller 1 +luke ovid 1 +luke ovid 1 +luke polk 1 +luke polk 1 +luke quirinius 1 +luke robinson 1 +luke robinson 1 +luke thompson 1 +luke underhill 1 +luke underhill 1 +luke underhill 2 +luke van buren 1 +luke white 1 +luke xylophone 1 +luke zipper 1 +mike allen 1 +mike brown 1 +mike carson 1 +mike carson 1 +mike carson 1 +mike davidson 1 +mike davidson 1 +mike ellison 1 +mike ellison 1 +mike ellison 1 +mike ellison 1 +mike ellison 1 +mike falkner 1 +mike garcia 1 +mike garcia 1 +mike garcia 1 +mike hernandez 1 +mike hernandez 2 +mike ichabod 1 +mike king 1 +mike king 1 +mike king 1 +mike king 1 +mike king 1 +mike king 2 +mike miller 1 +mike nixon 1 +mike nixon 1 +mike polk 1 +mike polk 1 +mike polk 1 +mike quirinius 1 +mike steinbeck 1 +mike steinbeck 1 +mike steinbeck 1 +mike steinbeck 1 +mike van buren 1 +mike van buren 1 +mike white 1 +mike white 1 +mike white 1 +mike white 2 +mike young 1 +mike young 1 +mike young 1 +mike zipper 1 +mike zipper 1 +mike zipper 1 +nick allen 1 +nick allen 1 +nick brown 1 +nick davidson 1 +nick ellison 1 +nick ellison 1 +nick falkner 1 +nick falkner 1 +nick garcia 1 +nick garcia 1 +nick garcia 1 +nick ichabod 1 +nick ichabod 1 +nick ichabod 1 +nick johnson 1 +nick johnson 1 +nick laertes 1 +nick miller 1 +nick nixon 1 +nick ovid 1 +nick polk 1 +nick quirinius 1 +nick quirinius 1 +nick robinson 1 +nick robinson 1 +nick steinbeck 1 +nick thompson 1 +nick underhill 1 +nick van buren 1 +nick xylophone 1 +nick young 1 +nick young 1 +nick zipper 1 +nick zipper 1 +oscar allen 2 +oscar brown 1 +oscar carson 1 +oscar carson 1 +oscar carson 1 +oscar carson 1 +oscar carson 1 +oscar davidson 1 +oscar ellison 1 +oscar ellison 1 +oscar falkner 1 +oscar garcia 1 +oscar hernandez 1 +oscar hernandez 1 +oscar ichabod 1 +oscar ichabod 1 +oscar ichabod 1 +oscar ichabod 1 +oscar johnson 1 +oscar johnson 1 +oscar king 1 +oscar king 1 +oscar king 2 +oscar laertes 1 +oscar laertes 1 +oscar laertes 1 +oscar laertes 1 +oscar nixon 1 +oscar ovid 1 +oscar ovid 1 +oscar ovid 2 +oscar polk 1 +oscar polk 1 +oscar quirinius 1 +oscar quirinius 1 +oscar quirinius 1 +oscar quirinius 1 +oscar robinson 1 +oscar robinson 1 +oscar robinson 1 +oscar robinson 1 +oscar steinbeck 1 +oscar thompson 1 +oscar thompson 1 +oscar thompson 1 +oscar thompson 2 +oscar underhill 1 +oscar van buren 1 +oscar van buren 1 +oscar van buren 1 +oscar white 1 +oscar white 1 +oscar white 1 +oscar white 1 +oscar xylophone 1 +oscar xylophone 1 +oscar xylophone 1 +oscar zipper 1 +oscar zipper 1 +oscar zipper 1 +priscilla brown 1 +priscilla brown 1 +priscilla brown 1 +priscilla carson 1 +priscilla carson 1 +priscilla carson 1 +priscilla ichabod 1 +priscilla ichabod 1 +priscilla johnson 1 +priscilla johnson 1 +priscilla johnson 1 +priscilla johnson 1 +priscilla johnson 1 +priscilla king 1 +priscilla nixon 1 +priscilla nixon 2 +priscilla ovid 1 +priscilla ovid 1 +priscilla polk 1 +priscilla quirinius 1 +priscilla thompson 1 +priscilla underhill 1 +priscilla underhill 1 +priscilla van buren 1 +priscilla van buren 1 +priscilla van buren 1 +priscilla white 1 +priscilla xylophone 1 +priscilla xylophone 1 +priscilla xylophone 1 +priscilla young 1 +priscilla young 1 +priscilla zipper 1 +priscilla zipper 1 +quinn allen 1 +quinn allen 1 +quinn brown 1 +quinn brown 1 +quinn brown 1 +quinn davidson 1 +quinn davidson 1 +quinn davidson 1 +quinn davidson 1 +quinn ellison 1 +quinn ellison 1 +quinn garcia 1 +quinn garcia 1 +quinn garcia 1 +quinn garcia 1 +quinn ichabod 1 +quinn king 1 +quinn king 1 +quinn laertes 1 +quinn laertes 1 +quinn laertes 1 +quinn nixon 1 +quinn ovid 1 +quinn quirinius 1 +quinn robinson 1 +quinn steinbeck 1 +quinn steinbeck 4 +quinn thompson 1 +quinn thompson 2 +quinn underhill 1 +quinn underhill 1 +quinn underhill 2 +quinn van buren 1 +quinn young 1 +quinn zipper 1 +quinn zipper 1 +rachel allen 1 +rachel allen 1 +rachel brown 1 +rachel brown 1 +rachel brown 1 +rachel brown 1 +rachel brown 2 +rachel carson 1 +rachel carson 1 +rachel davidson 1 +rachel ellison 1 +rachel falkner 1 +rachel falkner 1 +rachel falkner 1 +rachel falkner 1 +rachel johnson 1 +rachel king 1 +rachel king 1 +rachel laertes 1 +rachel laertes 1 +rachel ovid 1 +rachel ovid 1 +rachel polk 1 +rachel quirinius 1 +rachel robinson 1 +rachel robinson 1 +rachel robinson 1 +rachel thompson 1 +rachel thompson 1 +rachel thompson 1 +rachel underhill 1 +rachel white 1 +rachel white 1 +rachel young 1 +rachel zipper 1 +rachel zipper 1 +sarah carson 1 +sarah carson 1 +sarah carson 1 +sarah ellison 2 +sarah falkner 1 +sarah falkner 1 +sarah garcia 1 +sarah garcia 1 +sarah garcia 2 +sarah ichabod 1 +sarah ichabod 1 +sarah johnson 1 +sarah johnson 1 +sarah johnson 1 +sarah johnson 1 +sarah king 1 +sarah king 1 +sarah miller 1 +sarah ovid 1 +sarah robinson 1 +sarah robinson 1 +sarah steinbeck 1 +sarah white 1 +sarah white 1 +sarah xylophone 1 +sarah young 1 +sarah zipper 1 +tom brown 1 +tom brown 1 +tom carson 1 +tom carson 1 +tom carson 1 +tom davidson 1 +tom ellison 1 +tom ellison 1 +tom ellison 1 +tom falkner 1 +tom falkner 1 +tom hernandez 1 +tom hernandez 1 +tom ichabod 1 +tom johnson 1 +tom johnson 2 +tom king 1 +tom laertes 1 +tom laertes 1 +tom miller 1 +tom miller 1 +tom miller 2 +tom nixon 1 +tom ovid 1 +tom polk 1 +tom polk 1 +tom quirinius 1 +tom quirinius 1 +tom robinson 1 +tom robinson 1 +tom robinson 2 +tom robinson 2 +tom steinbeck 2 +tom van buren 1 +tom van buren 1 +tom van buren 1 +tom white 1 +tom young 1 +tom young 2 +tom zipper 1 +ulysses brown 1 +ulysses carson 1 +ulysses carson 1 +ulysses carson 1 +ulysses carson 1 +ulysses davidson 1 +ulysses ellison 1 +ulysses garcia 1 +ulysses hernandez 1 +ulysses hernandez 1 +ulysses hernandez 2 +ulysses ichabod 1 +ulysses ichabod 1 +ulysses johnson 1 +ulysses king 1 +ulysses laertes 1 +ulysses laertes 1 +ulysses laertes 1 +ulysses miller 1 +ulysses miller 1 +ulysses nixon 1 +ulysses ovid 1 +ulysses polk 1 +ulysses polk 1 +ulysses polk 1 +ulysses polk 2 +ulysses quirinius 1 +ulysses robinson 1 +ulysses steinbeck 1 +ulysses steinbeck 1 +ulysses thompson 1 +ulysses underhill 1 +ulysses underhill 1 +ulysses underhill 1 +ulysses underhill 1 +ulysses underhill 1 +ulysses underhill 1 +ulysses underhill 1 +ulysses van buren 1 +ulysses white 1 +ulysses white 1 +ulysses xylophone 1 +ulysses xylophone 1 +ulysses xylophone 1 +ulysses young 1 +ulysses young 1 +ulysses young 1 +victor allen 1 +victor allen 1 +victor brown 1 +victor brown 1 +victor brown 1 +victor brown 1 +victor davidson 1 +victor davidson 1 +victor davidson 2 +victor ellison 1 +victor ellison 1 +victor hernandez 1 +victor hernandez 1 +victor hernandez 1 +victor hernandez 1 +victor hernandez 1 +victor johnson 1 +victor johnson 1 +victor johnson 1 +victor king 1 +victor king 1 +victor laertes 1 +victor laertes 1 +victor miller 2 +victor nixon 1 +victor nixon 1 +victor ovid 1 +victor polk 1 +victor quirinius 1 +victor quirinius 1 +victor robinson 1 +victor robinson 1 +victor steinbeck 1 +victor steinbeck 1 +victor steinbeck 1 +victor thompson 1 +victor van buren 1 +victor van buren 1 +victor white 1 +victor white 1 +victor xylophone 1 +victor xylophone 1 +victor xylophone 1 +victor xylophone 1 +victor xylophone 2 +victor young 1 +victor zipper 1 +wendy allen 1 +wendy allen 1 +wendy allen 1 +wendy brown 1 +wendy brown 1 +wendy ellison 1 +wendy ellison 1 +wendy falkner 1 +wendy falkner 1 +wendy falkner 1 +wendy garcia 1 +wendy garcia 1 +wendy garcia 1 +wendy garcia 1 +wendy hernandez 1 +wendy ichabod 1 +wendy king 1 +wendy king 1 +wendy king 1 +wendy laertes 1 +wendy laertes 1 +wendy laertes 1 +wendy miller 1 +wendy miller 1 +wendy nixon 1 +wendy nixon 1 +wendy ovid 1 +wendy ovid 1 +wendy polk 1 +wendy polk 1 +wendy quirinius 1 +wendy quirinius 1 +wendy robinson 1 +wendy robinson 1 +wendy robinson 1 +wendy steinbeck 1 +wendy thompson 1 +wendy thompson 1 +wendy underhill 1 +wendy underhill 1 +wendy underhill 1 +wendy van buren 1 +wendy van buren 1 +wendy white 1 +wendy xylophone 1 +wendy xylophone 1 +wendy young 1 +wendy young 3 +xavier allen 1 +xavier allen 1 +xavier allen 1 +xavier brown 1 +xavier brown 1 +xavier brown 1 +xavier carson 1 +xavier carson 1 +xavier davidson 1 +xavier davidson 1 +xavier davidson 1 +xavier ellison 1 +xavier ellison 1 +xavier garcia 1 +xavier hernandez 1 +xavier hernandez 1 +xavier hernandez 1 +xavier ichabod 1 +xavier ichabod 1 +xavier johnson 1 +xavier johnson 1 +xavier king 1 +xavier king 1 +xavier laertes 1 +xavier ovid 1 +xavier polk 1 +xavier polk 1 +xavier polk 1 +xavier polk 1 +xavier quirinius 1 +xavier quirinius 1 +xavier quirinius 1 +xavier quirinius 1 +xavier thompson 1 +xavier underhill 1 +xavier white 1 +xavier white 1 +xavier xylophone 1 +xavier zipper 2 +yuri allen 1 +yuri allen 1 +yuri brown 1 +yuri brown 1 +yuri carson 1 +yuri carson 1 +yuri ellison 1 +yuri ellison 1 +yuri falkner 1 +yuri falkner 1 +yuri garcia 1 +yuri hernandez 1 +yuri johnson 1 +yuri johnson 1 +yuri johnson 1 +yuri king 1 +yuri laertes 1 +yuri laertes 1 +yuri nixon 1 +yuri nixon 1 +yuri polk 1 +yuri polk 1 +yuri polk 1 +yuri quirinius 1 +yuri quirinius 1 +yuri quirinius 1 +yuri steinbeck 1 +yuri steinbeck 1 +yuri thompson 1 +yuri underhill 1 +yuri underhill 1 +yuri white 1 +yuri xylophone 1 +zach allen 2 +zach brown 1 +zach brown 1 +zach brown 1 +zach brown 1 +zach brown 1 +zach carson 1 +zach ellison 1 +zach falkner 1 +zach falkner 1 +zach garcia 1 +zach garcia 1 +zach garcia 1 +zach garcia 1 +zach ichabod 1 +zach ichabod 1 +zach king 1 +zach king 1 +zach king 2 +zach miller 1 +zach miller 1 +zach miller 1 +zach ovid 1 +zach ovid 1 +zach ovid 1 +zach ovid 1 +zach quirinius 1 +zach robinson 2 +zach steinbeck 2 +zach steinbeck 2 +zach thompson 1 +zach thompson 1 +zach underhill 1 +zach white 1 +zach xylophone 1 +zach xylophone 1 +zach young 1 +zach zipper 1 +zach zipper 1 +zach zipper 1 diff --git a/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 1-1-5c5f373e325115d710a7a23fcb1626f1 b/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 1-1-5c5f373e325115d710a7a23fcb1626f1 new file mode 100644 index 0000000000000..22a6f27253dcf --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 1-1-5c5f373e325115d710a7a23fcb1626f1 @@ -0,0 +1,1049 @@ +zach zipper 4 +zach zipper 3 +zach zipper 1 +zach young 4 +zach xylophone 4 +zach xylophone 1 +zach white 1 +zach underhill 1 +zach thompson 2 +zach thompson 2 +zach steinbeck 5 +zach steinbeck 1 +zach robinson 1 +zach quirinius 3 +zach ovid 5 +zach ovid 4 +zach ovid 3 +zach ovid 1 +zach miller 5 +zach miller 3 +zach miller 1 +zach king 6 +zach king 4 +zach king 1 +zach ichabod 3 +zach ichabod 2 +zach garcia 6 +zach garcia 3 +zach garcia 1 +zach garcia 1 +zach falkner 2 +zach falkner 1 +zach ellison 4 +zach carson 3 +zach brown 5 +zach brown 4 +zach brown 3 +zach brown 2 +zach brown 1 +zach allen 4 +yuri xylophone 3 +yuri white 2 +yuri underhill 6 +yuri underhill 4 +yuri thompson 4 +yuri steinbeck 6 +yuri steinbeck 2 +yuri quirinius 4 +yuri quirinius 3 +yuri quirinius 1 +yuri polk 4 +yuri polk 3 +yuri polk 2 +yuri nixon 3 +yuri nixon 2 +yuri laertes 3 +yuri laertes 1 +yuri king 5 +yuri johnson 4 +yuri johnson 3 +yuri johnson 1 +yuri hernandez 4 +yuri garcia 3 +yuri falkner 7 +yuri falkner 3 +yuri ellison 1 +yuri ellison 1 +yuri carson 7 +yuri carson 4 +yuri brown 3 +yuri brown 1 +yuri allen 3 +yuri allen 2 +xavier zipper 1 +xavier xylophone 1 +xavier white 3 +xavier white 3 +xavier underhill 2 +xavier thompson 3 +xavier quirinius 6 +xavier quirinius 5 +xavier quirinius 2 +xavier quirinius 1 +xavier polk 5 +xavier polk 3 +xavier polk 3 +xavier polk 3 +xavier ovid 5 +xavier laertes 4 +xavier king 3 +xavier king 1 +xavier johnson 3 +xavier johnson 1 +xavier ichabod 2 +xavier ichabod 2 +xavier hernandez 3 +xavier hernandez 1 +xavier hernandez 1 +xavier garcia 4 +xavier ellison 1 +xavier ellison 1 +xavier davidson 5 +xavier davidson 4 +xavier davidson 1 +xavier carson 5 +xavier carson 3 +xavier brown 4 +xavier brown 2 +xavier brown 2 +xavier allen 6 +xavier allen 3 +xavier allen 1 +wendy young 8 +wendy young 2 +wendy xylophone 6 +wendy xylophone 4 +wendy white 5 +wendy van buren 2 +wendy van buren 2 +wendy underhill 6 +wendy underhill 5 +wendy underhill 4 +wendy thompson 5 +wendy thompson 2 +wendy steinbeck 1 +wendy robinson 5 +wendy robinson 3 +wendy robinson 2 +wendy quirinius 6 +wendy quirinius 4 +wendy polk 2 +wendy polk 2 +wendy ovid 4 +wendy ovid 1 +wendy nixon 3 +wendy nixon 1 +wendy miller 2 +wendy miller 1 +wendy laertes 3 +wendy laertes 3 +wendy laertes 1 +wendy king 5 +wendy king 4 +wendy king 1 +wendy ichabod 3 +wendy hernandez 1 +wendy garcia 7 +wendy garcia 5 +wendy garcia 4 +wendy garcia 1 +wendy falkner 3 +wendy falkner 1 +wendy falkner 1 +wendy ellison 2 +wendy ellison 1 +wendy brown 5 +wendy brown 2 +wendy allen 6 +wendy allen 2 +wendy allen 2 +victor zipper 3 +victor young 1 +victor xylophone 6 +victor xylophone 6 +victor xylophone 2 +victor xylophone 1 +victor xylophone 1 +victor white 2 +victor white 1 +victor van buren 4 +victor van buren 4 +victor thompson 2 +victor steinbeck 5 +victor steinbeck 2 +victor steinbeck 1 +victor robinson 4 +victor robinson 2 +victor quirinius 3 +victor quirinius 1 +victor polk 3 +victor ovid 2 +victor nixon 6 +victor nixon 4 +victor miller 1 +victor laertes 4 +victor laertes 3 +victor king 6 +victor king 1 +victor johnson 2 +victor johnson 2 +victor johnson 1 +victor hernandez 6 +victor hernandez 4 +victor hernandez 3 +victor hernandez 1 +victor hernandez 1 +victor ellison 7 +victor ellison 4 +victor davidson 6 +victor davidson 2 +victor davidson 2 +victor brown 4 +victor brown 3 +victor brown 2 +victor brown 1 +victor allen 4 +victor allen 2 +ulysses young 7 +ulysses young 6 +ulysses young 3 +ulysses xylophone 6 +ulysses xylophone 3 +ulysses xylophone 2 +ulysses white 6 +ulysses white 2 +ulysses van buren 3 +ulysses underhill 8 +ulysses underhill 4 +ulysses underhill 3 +ulysses underhill 2 +ulysses underhill 2 +ulysses underhill 1 +ulysses underhill 1 +ulysses thompson 5 +ulysses steinbeck 3 +ulysses steinbeck 1 +ulysses robinson 5 +ulysses quirinius 8 +ulysses polk 6 +ulysses polk 4 +ulysses polk 1 +ulysses polk 1 +ulysses ovid 3 +ulysses nixon 1 +ulysses miller 3 +ulysses miller 2 +ulysses laertes 5 +ulysses laertes 4 +ulysses laertes 2 +ulysses king 2 +ulysses johnson 5 +ulysses ichabod 1 +ulysses ichabod 1 +ulysses hernandez 6 +ulysses hernandez 3 +ulysses hernandez 2 +ulysses garcia 2 +ulysses ellison 2 +ulysses davidson 8 +ulysses carson 4 +ulysses carson 3 +ulysses carson 2 +ulysses carson 1 +ulysses brown 3 +tom zipper 5 +tom young 2 +tom young 1 +tom white 1 +tom van buren 5 +tom van buren 2 +tom van buren 1 +tom steinbeck 4 +tom robinson 8 +tom robinson 4 +tom robinson 3 +tom robinson 2 +tom quirinius 5 +tom quirinius 1 +tom polk 3 +tom polk 2 +tom ovid 2 +tom nixon 5 +tom miller 1 +tom miller 1 +tom miller 1 +tom laertes 4 +tom laertes 2 +tom king 1 +tom johnson 8 +tom johnson 1 +tom ichabod 1 +tom hernandez 3 +tom hernandez 2 +tom falkner 3 +tom falkner 2 +tom ellison 5 +tom ellison 3 +tom ellison 1 +tom davidson 7 +tom carson 3 +tom carson 3 +tom carson 1 +tom brown 4 +tom brown 2 +sarah zipper 1 +sarah young 1 +sarah xylophone 2 +sarah white 4 +sarah white 3 +sarah steinbeck 6 +sarah robinson 3 +sarah robinson 2 +sarah ovid 1 +sarah miller 1 +sarah king 3 +sarah king 2 +sarah johnson 7 +sarah johnson 6 +sarah johnson 4 +sarah johnson 2 +sarah ichabod 4 +sarah ichabod 3 +sarah garcia 2 +sarah garcia 2 +sarah garcia 2 +sarah falkner 7 +sarah falkner 1 +sarah ellison 1 +sarah carson 6 +sarah carson 4 +sarah carson 4 +rachel zipper 8 +rachel zipper 5 +rachel young 3 +rachel white 2 +rachel white 2 +rachel underhill 2 +rachel thompson 5 +rachel thompson 4 +rachel thompson 3 +rachel robinson 10 +rachel robinson 3 +rachel robinson 1 +rachel quirinius 5 +rachel polk 4 +rachel ovid 5 +rachel ovid 4 +rachel laertes 1 +rachel laertes 1 +rachel king 3 +rachel king 1 +rachel johnson 1 +rachel falkner 8 +rachel falkner 5 +rachel falkner 5 +rachel falkner 2 +rachel ellison 6 +rachel davidson 6 +rachel carson 7 +rachel carson 2 +rachel brown 5 +rachel brown 4 +rachel brown 3 +rachel brown 3 +rachel brown 1 +rachel allen 5 +rachel allen 1 +quinn zipper 2 +quinn zipper 2 +quinn young 2 +quinn van buren 2 +quinn underhill 7 +quinn underhill 6 +quinn underhill 2 +quinn thompson 5 +quinn thompson 2 +quinn steinbeck 3 +quinn steinbeck 2 +quinn robinson 2 +quinn quirinius 5 +quinn ovid 6 +quinn nixon 3 +quinn laertes 2 +quinn laertes 2 +quinn laertes 1 +quinn king 2 +quinn king 1 +quinn ichabod 1 +quinn garcia 6 +quinn garcia 3 +quinn garcia 2 +quinn garcia 1 +quinn ellison 7 +quinn ellison 5 +quinn davidson 7 +quinn davidson 4 +quinn davidson 3 +quinn davidson 2 +quinn brown 5 +quinn brown 3 +quinn brown 2 +quinn allen 5 +quinn allen 2 +priscilla zipper 5 +priscilla zipper 2 +priscilla young 4 +priscilla young 1 +priscilla xylophone 7 +priscilla xylophone 2 +priscilla xylophone 1 +priscilla white 4 +priscilla van buren 3 +priscilla van buren 3 +priscilla van buren 2 +priscilla underhill 5 +priscilla underhill 4 +priscilla thompson 2 +priscilla quirinius 4 +priscilla polk 5 +priscilla ovid 4 +priscilla ovid 1 +priscilla nixon 2 +priscilla nixon 1 +priscilla king 4 +priscilla johnson 4 +priscilla johnson 2 +priscilla johnson 2 +priscilla johnson 2 +priscilla johnson 1 +priscilla ichabod 3 +priscilla ichabod 2 +priscilla carson 6 +priscilla carson 5 +priscilla carson 4 +priscilla brown 5 +priscilla brown 5 +priscilla brown 3 +oscar zipper 4 +oscar zipper 4 +oscar zipper 2 +oscar xylophone 7 +oscar xylophone 5 +oscar xylophone 3 +oscar white 5 +oscar white 5 +oscar white 3 +oscar white 2 +oscar van buren 5 +oscar van buren 3 +oscar van buren 2 +oscar underhill 1 +oscar thompson 6 +oscar thompson 3 +oscar thompson 3 +oscar thompson 2 +oscar steinbeck 7 +oscar robinson 7 +oscar robinson 3 +oscar robinson 3 +oscar robinson 1 +oscar quirinius 3 +oscar quirinius 3 +oscar quirinius 2 +oscar quirinius 1 +oscar polk 2 +oscar polk 2 +oscar ovid 4 +oscar ovid 2 +oscar ovid 1 +oscar nixon 1 +oscar laertes 6 +oscar laertes 4 +oscar laertes 3 +oscar laertes 2 +oscar king 4 +oscar king 2 +oscar king 1 +oscar johnson 6 +oscar johnson 3 +oscar ichabod 3 +oscar ichabod 3 +oscar ichabod 1 +oscar ichabod 1 +oscar hernandez 6 +oscar hernandez 6 +oscar garcia 4 +oscar falkner 2 +oscar ellison 2 +oscar ellison 1 +oscar davidson 1 +oscar carson 4 +oscar carson 2 +oscar carson 2 +oscar carson 1 +oscar carson 1 +oscar brown 4 +oscar allen 2 +nick zipper 7 +nick zipper 5 +nick young 4 +nick young 2 +nick xylophone 2 +nick van buren 2 +nick underhill 2 +nick thompson 2 +nick steinbeck 4 +nick robinson 3 +nick robinson 1 +nick quirinius 5 +nick quirinius 1 +nick polk 5 +nick ovid 6 +nick nixon 4 +nick miller 2 +nick laertes 3 +nick johnson 4 +nick johnson 4 +nick ichabod 3 +nick ichabod 3 +nick ichabod 1 +nick garcia 5 +nick garcia 4 +nick garcia 4 +nick falkner 3 +nick falkner 1 +nick ellison 3 +nick ellison 2 +nick davidson 4 +nick brown 3 +nick allen 5 +nick allen 4 +mike zipper 4 +mike zipper 4 +mike zipper 1 +mike young 3 +mike young 1 +mike young 1 +mike white 9 +mike white 7 +mike white 5 +mike white 2 +mike van buren 2 +mike van buren 1 +mike steinbeck 4 +mike steinbeck 2 +mike steinbeck 2 +mike steinbeck 1 +mike quirinius 7 +mike polk 4 +mike polk 2 +mike polk 2 +mike nixon 3 +mike nixon 2 +mike miller 1 +mike king 6 +mike king 5 +mike king 4 +mike king 3 +mike king 1 +mike king 1 +mike ichabod 3 +mike hernandez 2 +mike hernandez 1 +mike garcia 3 +mike garcia 2 +mike garcia 1 +mike falkner 2 +mike ellison 6 +mike ellison 5 +mike ellison 3 +mike ellison 1 +mike ellison 1 +mike davidson 5 +mike davidson 5 +mike carson 9 +mike carson 4 +mike carson 3 +mike brown 2 +mike allen 3 +luke zipper 2 +luke xylophone 1 +luke white 1 +luke van buren 2 +luke underhill 2 +luke underhill 2 +luke underhill 1 +luke thompson 3 +luke robinson 6 +luke robinson 1 +luke quirinius 3 +luke polk 3 +luke polk 1 +luke ovid 3 +luke ovid 1 +luke miller 4 +luke laertes 4 +luke laertes 2 +luke laertes 2 +luke laertes 2 +luke laertes 1 +luke johnson 4 +luke johnson 2 +luke johnson 1 +luke ichabod 4 +luke ichabod 1 +luke garcia 5 +luke garcia 2 +luke falkner 4 +luke falkner 2 +luke ellison 3 +luke ellison 2 +luke ellison 1 +luke davidson 2 +luke davidson 2 +luke brown 5 +luke allen 5 +luke allen 2 +luke allen 1 +luke allen 1 +luke allen 1 +katie zipper 1 +katie zipper 1 +katie young 11 +katie young 6 +katie young 1 +katie xylophone 1 +katie white 5 +katie white 3 +katie van buren 6 +katie van buren 4 +katie robinson 2 +katie polk 5 +katie polk 2 +katie ovid 3 +katie nixon 1 +katie miller 1 +katie miller 1 +katie king 7 +katie king 5 +katie king 4 +katie ichabod 6 +katie ichabod 2 +katie ichabod 1 +katie hernandez 1 +katie garcia 4 +katie garcia 3 +katie falkner 4 +katie ellison 5 +katie ellison 4 +katie davidson 1 +katie brown 6 +katie allen 1 +jessica zipper 7 +jessica zipper 6 +jessica zipper 1 +jessica young 4 +jessica young 3 +jessica xylophone 3 +jessica white 8 +jessica white 6 +jessica white 3 +jessica white 1 +jessica white 1 +jessica van buren 1 +jessica underhill 5 +jessica underhill 3 +jessica underhill 2 +jessica thompson 3 +jessica thompson 2 +jessica robinson 2 +jessica quirinius 4 +jessica quirinius 4 +jessica quirinius 3 +jessica quirinius 1 +jessica polk 4 +jessica ovid 2 +jessica ovid 1 +jessica nixon 3 +jessica nixon 2 +jessica miller 5 +jessica johnson 4 +jessica johnson 3 +jessica ichabod 5 +jessica garcia 4 +jessica garcia 3 +jessica falkner 2 +jessica ellison 5 +jessica ellison 2 +jessica davidson 5 +jessica davidson 2 +jessica davidson 2 +jessica davidson 1 +jessica carson 4 +jessica carson 2 +jessica carson 1 +jessica brown 3 +irene xylophone 3 +irene van buren 2 +irene van buren 1 +irene underhill 5 +irene underhill 1 +irene thompson 6 +irene steinbeck 1 +irene robinson 1 +irene quirinius 6 +irene quirinius 5 +irene quirinius 5 +irene polk 3 +irene polk 2 +irene polk 2 +irene polk 1 +irene polk 1 +irene ovid 6 +irene ovid 6 +irene ovid 5 +irene nixon 4 +irene nixon 4 +irene nixon 1 +irene miller 6 +irene laertes 5 +irene laertes 3 +irene laertes 3 +irene johnson 2 +irene ichabod 7 +irene ichabod 1 +irene garcia 4 +irene garcia 2 +irene garcia 2 +irene falkner 5 +irene falkner 2 +irene ellison 4 +irene ellison 3 +irene carson 1 +irene brown 4 +irene brown 4 +irene brown 1 +irene allen 2 +holly zipper 3 +holly zipper 3 +holly young 2 +holly young 2 +holly xylophone 1 +holly white 3 +holly white 1 +holly van buren 4 +holly underhill 6 +holly underhill 3 +holly underhill 3 +holly underhill 2 +holly thompson 2 +holly thompson 1 +holly thompson 1 +holly robinson 2 +holly polk 7 +holly polk 4 +holly nixon 5 +holly nixon 1 +holly miller 4 +holly laertes 5 +holly king 4 +holly king 1 +holly johnson 2 +holly johnson 2 +holly johnson 2 +holly ichabod 4 +holly ichabod 4 +holly ichabod 2 +holly hernandez 9 +holly hernandez 3 +holly hernandez 3 +holly hernandez 2 +holly falkner 6 +holly brown 3 +holly brown 2 +holly allen 1 +gabriella zipper 5 +gabriella zipper 1 +gabriella young 3 +gabriella young 1 +gabriella white 3 +gabriella van buren 3 +gabriella van buren 1 +gabriella thompson 5 +gabriella thompson 5 +gabriella thompson 5 +gabriella steinbeck 4 +gabriella steinbeck 1 +gabriella polk 4 +gabriella polk 4 +gabriella ovid 2 +gabriella ovid 1 +gabriella miller 1 +gabriella laertes 4 +gabriella king 3 +gabriella king 3 +gabriella ichabod 3 +gabriella ichabod 3 +gabriella ichabod 3 +gabriella ichabod 2 +gabriella ichabod 1 +gabriella hernandez 9 +gabriella hernandez 5 +gabriella garcia 2 +gabriella falkner 4 +gabriella falkner 3 +gabriella falkner 2 +gabriella ellison 3 +gabriella ellison 1 +gabriella davidson 2 +gabriella carson 1 +gabriella brown 6 +gabriella brown 3 +gabriella allen 5 +gabriella allen 3 +fred zipper 1 +fred young 2 +fred young 1 +fred white 3 +fred van buren 7 +fred van buren 1 +fred van buren 1 +fred van buren 1 +fred underhill 4 +fred steinbeck 4 +fred steinbeck 2 +fred steinbeck 1 +fred robinson 3 +fred quirinius 7 +fred quirinius 4 +fred polk 7 +fred polk 6 +fred polk 4 +fred polk 2 +fred nixon 7 +fred nixon 5 +fred nixon 1 +fred nixon 1 +fred miller 1 +fred laertes 4 +fred king 6 +fred king 3 +fred johnson 4 +fred ichabod 3 +fred ichabod 2 +fred hernandez 1 +fred falkner 4 +fred falkner 3 +fred falkner 3 +fred ellison 5 +fred ellison 2 +fred ellison 1 +fred davidson 2 +fred davidson 2 +fred davidson 1 +ethan zipper 2 +ethan zipper 1 +ethan xylophone 3 +ethan white 5 +ethan white 2 +ethan van buren 1 +ethan underhill 1 +ethan robinson 3 +ethan robinson 1 +ethan quirinius 6 +ethan quirinius 2 +ethan quirinius 1 +ethan polk 3 +ethan polk 1 +ethan polk 1 +ethan polk 1 +ethan ovid 2 +ethan nixon 7 +ethan miller 5 +ethan laertes 4 +ethan laertes 4 +ethan laertes 3 +ethan laertes 2 +ethan laertes 2 +ethan laertes 2 +ethan laertes 1 +ethan king 1 +ethan johnson 1 +ethan hernandez 3 +ethan garcia 8 +ethan falkner 2 +ethan falkner 1 +ethan ellison 6 +ethan ellison 4 +ethan carson 6 +ethan brown 4 +ethan brown 3 +ethan brown 3 +ethan brown 1 +ethan brown 1 +ethan brown 1 +ethan allen 4 +david young 4 +david young 1 +david xylophone 6 +david xylophone 4 +david xylophone 1 +david white 2 +david van buren 3 +david van buren 2 +david underhill 7 +david underhill 4 +david underhill 1 +david thompson 1 +david robinson 3 +david robinson 2 +david quirinius 4 +david quirinius 4 +david quirinius 2 +david ovid 4 +david ovid 3 +david nixon 1 +david laertes 4 +david ichabod 6 +david ichabod 3 +david hernandez 7 +david ellison 5 +david ellison 3 +david ellison 3 +david davidson 4 +david davidson 3 +david davidson 1 +david davidson 1 +david brown 6 +david brown 2 +david allen 5 +david allen 2 +calvin zipper 9 +calvin zipper 3 +calvin young 3 +calvin young 1 +calvin xylophone 6 +calvin xylophone 3 +calvin xylophone 1 +calvin white 1 +calvin white 1 +calvin van buren 9 +calvin van buren 1 +calvin underhill 4 +calvin thompson 3 +calvin thompson 2 +calvin steinbeck 6 +calvin steinbeck 3 +calvin steinbeck 3 +calvin robinson 2 +calvin quirinius 4 +calvin quirinius 3 +calvin polk 2 +calvin ovid 5 +calvin ovid 4 +calvin ovid 3 +calvin ovid 1 +calvin nixon 7 +calvin nixon 3 +calvin nixon 2 +calvin laertes 3 +calvin laertes 1 +calvin johnson 2 +calvin hernandez 1 +calvin garcia 3 +calvin falkner 8 +calvin falkner 4 +calvin falkner 4 +calvin falkner 3 +calvin falkner 2 +calvin falkner 1 +calvin ellison 3 +calvin davidson 1 +calvin davidson 1 +calvin carson 1 +calvin brown 5 +calvin brown 3 +calvin brown 1 +calvin allen 1 +bob zipper 4 +bob zipper 1 +bob zipper 1 +bob young 1 +bob xylophone 3 +bob xylophone 2 +bob white 3 +bob white 1 +bob van buren 3 +bob steinbeck 2 +bob quirinius 4 +bob polk 2 +bob ovid 7 +bob ovid 2 +bob ovid 2 +bob ovid 1 +bob miller 1 +bob laertes 5 +bob laertes 1 +bob king 3 +bob king 3 +bob king 2 +bob ichabod 1 +bob hernandez 1 +bob garcia 4 +bob garcia 3 +bob garcia 2 +bob garcia 1 +bob garcia 1 +bob falkner 6 +bob ellison 3 +bob ellison 2 +bob ellison 1 +bob ellison 1 +bob davidson 5 +bob davidson 2 +bob davidson 2 +bob carson 3 +bob brown 8 +bob brown 6 +bob brown 2 +alice zipper 2 +alice zipper 1 +alice zipper 1 +alice xylophone 2 +alice xylophone 2 +alice xylophone 1 +alice van buren 2 +alice underhill 2 +alice steinbeck 7 +alice steinbeck 3 +alice steinbeck 1 +alice robinson 4 +alice robinson 1 +alice quirinius 6 +alice quirinius 4 +alice polk 1 +alice ovid 2 +alice nixon 2 +alice nixon 2 +alice nixon 1 +alice miller 2 +alice laertes 3 +alice laertes 2 +alice king 8 +alice king 4 +alice king 2 +alice johnson 5 +alice hernandez 8 +alice hernandez 8 +alice garcia 1 +alice falkner 5 +alice davidson 2 +alice carson 1 +alice brown 5 +alice allen 5 +alice allen 5 +alice allen 4 + 5 + 4 + 3 diff --git a/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 1-2-ac487cc1b94130bf9ce00e07c7075f65 b/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 1-2-ac487cc1b94130bf9ce00e07c7075f65 new file mode 100644 index 0000000000000..c38e7bbabc21e --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 1-2-ac487cc1b94130bf9ce00e07c7075f65 @@ -0,0 +1,1049 @@ + 0.25047801147227533 + 0.47992351816443596 + 0.6197718631178707 +alice allen 0.7300380228136882 +alice allen 0.8954372623574145 +alice allen 0.9216061185468452 +alice brown 0.22053231939163498 +alice carson 0.2889733840304182 +alice davidson 0.8593155893536122 +alice falkner 0.08604206500956023 +alice garcia 0.2870722433460076 +alice hernandez 0.011472275334608031 +alice hernandez 0.07604562737642585 +alice johnson 0.5181644359464627 +alice king 0.3652007648183556 +alice king 0.8536121673003803 +alice king 0.9771863117870723 +alice laertes 0.870722433460076 +alice laertes 0.870722433460076 +alice miller 0.12045889101338432 +alice nixon 0.4372623574144487 +alice nixon 0.47036328871892924 +alice nixon 0.768642447418738 +alice ovid 0.4665391969407266 +alice polk 0.279467680608365 +alice quirinius 0.8432122370936902 +alice quirinius 0.9923518164435946 +alice robinson 0.5722433460076045 +alice robinson 0.7984790874524715 +alice steinbeck 0.27151051625239003 +alice steinbeck 0.739961759082218 +alice steinbeck 0.9923954372623575 +alice underhill 0.5513307984790875 +alice van buren 0.4923954372623574 +alice xylophone 0.2376425855513308 +alice xylophone 0.26806083650190116 +alice xylophone 0.8776290630975143 +alice zipper 0.33460076045627374 +alice zipper 0.8814531548757171 +alice zipper 0.9445506692160612 +bob brown 0.5038022813688213 +bob brown 0.5066921606118547 +bob brown 0.5372848948374761 +bob carson 0.43346007604562736 +bob davidson 0.21673003802281368 +bob davidson 0.5285171102661597 +bob davidson 0.8413001912045889 +bob ellison 0.2045889101338432 +bob ellison 0.26577437858508607 +bob ellison 0.5793499043977055 +bob ellison 0.9144486692015209 +bob falkner 0.6940726577437859 +bob garcia 0.08555133079847908 +bob garcia 0.17680608365019013 +bob garcia 0.2887189292543021 +bob garcia 0.5418250950570342 +bob garcia 0.5736137667304015 +bob hernandez 0.7813688212927756 +bob ichabod 0.5200764818355641 +bob king 0.0076481835564053535 +bob king 0.5627376425855514 +bob king 0.9524714828897338 +bob laertes 0.32887189292543023 +bob laertes 0.6825095057034221 +bob miller 0.19771863117870722 +bob ovid 0.40304182509505704 +bob ovid 0.40344168260038243 +bob ovid 0.42065009560229444 +bob ovid 0.8403041825095057 +bob polk 0.15019011406844107 +bob quirinius 0.1844106463878327 +bob steinbeck 0.16920152091254753 +bob van buren 0.5086042065009561 +bob white 0.26045627376425856 +bob white 0.7623574144486692 +bob xylophone 0.4474187380497132 +bob xylophone 0.6539923954372624 +bob young 0.4722753346080306 +bob zipper 0.009505703422053232 +bob zipper 0.24091778202676864 +bob zipper 0.4600760456273764 +calvin allen 0.30975143403441685 +calvin brown 0.4448669201520912 +calvin brown 0.5361216730038023 +calvin brown 0.9196940726577438 +calvin carson 0.9315589353612167 +calvin davidson 0.5869980879541109 +calvin davidson 0.6653992395437263 +calvin ellison 0.6977186311787072 +calvin falkner 0.02091254752851711 +calvin falkner 0.03824091778202677 +calvin falkner 0.21223709369024857 +calvin falkner 0.46577946768060835 +calvin falkner 0.5114068441064639 +calvin falkner 0.5950570342205324 +calvin garcia 0.7896749521988528 +calvin hernandez 0.16730038022813687 +calvin johnson 0.9790874524714829 +calvin laertes 0.5487571701720841 +calvin laertes 0.8145315487571702 +calvin nixon 0.019120458891013385 +calvin nixon 0.4467680608365019 +calvin nixon 0.7395437262357415 +calvin ovid 0.14531548757170173 +calvin ovid 0.17490494296577946 +calvin ovid 0.19961977186311788 +calvin ovid 0.9407265774378585 +calvin polk 0.4619771863117871 +calvin quirinius 0.8802281368821293 +calvin quirinius 0.9254302103250478 +calvin robinson 0.13193116634799235 +calvin steinbeck 0.4818355640535373 +calvin steinbeck 0.7418738049713193 +calvin steinbeck 0.8060836501901141 +calvin thompson 0.2179732313575526 +calvin thompson 0.8422053231939164 +calvin underhill 0.7495219885277247 +calvin van buren 0.022813688212927757 +calvin van buren 0.8508604206500956 +calvin white 0.04182509505703422 +calvin white 0.9674952198852772 +calvin xylophone 0.011406844106463879 +calvin xylophone 0.3193116634799235 +calvin xylophone 0.6634799235181644 +calvin young 0.1988527724665392 +calvin young 0.4391634980988593 +calvin zipper 0.5532319391634981 +calvin zipper 0.8726235741444867 +david allen 0.30019120458891013 +david allen 0.3326959847036329 +david brown 0.1338432122370937 +david brown 0.9694072657743786 +david davidson 0.21414913957934992 +david davidson 0.655893536121673 +david davidson 0.7319391634980988 +david davidson 0.8878326996197718 +david ellison 0.6863117870722434 +david ellison 0.6883365200764818 +david ellison 0.7243346007604563 +david hernandez 0.12237093690248566 +david ichabod 0.35564053537284895 +david ichabod 0.7338403041825095 +david laertes 0.3575525812619503 +david nixon 0.33460803059273425 +david ovid 0.3916349809885932 +david ovid 0.6022944550669216 +david quirinius 0.3155893536121673 +david quirinius 0.6577437858508605 +david quirinius 0.9163498098859315 +david robinson 0.6673003802281369 +david robinson 0.6998087954110899 +david thompson 0.25285171102661597 +david underhill 0.1586998087954111 +david underhill 0.35181644359464626 +david underhill 0.7189292543021033 +david van buren 0.05927342256214149 +david van buren 0.5889101338432122 +david white 0.49429657794676807 +david xylophone 0.4875717017208413 +david xylophone 0.6901140684410646 +david xylophone 0.7571701720841301 +david young 0.0019011406844106464 +david young 0.040152963671128104 +ethan allen 0.20532319391634982 +ethan brown 0.10707456978967496 +ethan brown 0.13307984790874525 +ethan brown 0.4340344168260038 +ethan brown 0.4752851711026616 +ethan brown 0.5219885277246654 +ethan brown 0.745697896749522 +ethan carson 0.20912547528517111 +ethan ellison 0.45124282982791586 +ethan ellison 0.8680688336520076 +ethan falkner 0.0994263862332696 +ethan falkner 0.6845124282982792 +ethan garcia 0.06653992395437262 +ethan hernandez 0.2237093690248566 +ethan johnson 0.2300380228136882 +ethan king 0.47418738049713194 +ethan laertes 0.022944550669216062 +ethan laertes 0.2908745247148289 +ethan laertes 0.42638623326959846 +ethan laertes 0.48098859315589354 +ethan laertes 0.6596558317399618 +ethan laertes 0.7839388145315488 +ethan laertes 0.9201520912547528 +ethan miller 0.23709369024856597 +ethan nixon 0.8164435946462715 +ethan ovid 0.6121673003802282 +ethan polk 0.12167300380228137 +ethan polk 0.3384321223709369 +ethan polk 0.6920152091254753 +ethan polk 0.9619771863117871 +ethan quirinius 0.19391634980988592 +ethan quirinius 0.23135755258126195 +ethan quirinius 0.7908745247148289 +ethan robinson 0.24282982791587 +ethan robinson 0.8003802281368821 +ethan underhill 0.6615969581749049 +ethan van buren 0.8365019011406845 +ethan white 0.48859315589353614 +ethan white 0.5741444866920152 +ethan xylophone 0.9695817490494296 +ethan zipper 0.21102661596958175 +ethan zipper 0.6425855513307985 +fred davidson 0.5239005736137667 +fred davidson 0.7414448669201521 +fred davidson 0.8604206500956023 +fred ellison 0.3977055449330784 +fred ellison 0.5506692160611855 +fred ellison 0.7208413001912046 +fred falkner 0.024714828897338403 +fred falkner 0.19120458891013384 +fred falkner 0.9809885931558935 +fred hernandez 0.2734225621414914 +fred ichabod 0.17110266159695817 +fred ichabod 0.780114722753346 +fred johnson 0.30038022813688214 +fred king 0.2198852772466539 +fred king 0.47718631178707227 +fred laertes 0.2332695984703633 +fred miller 0.7858508604206501 +fred nixon 0.005703422053231939 +fred nixon 0.31749049429657794 +fred nixon 0.7648183556405354 +fred nixon 0.8460076045627376 +fred polk 0.16252390057361377 +fred polk 0.564638783269962 +fred polk 0.6273764258555133 +fred polk 0.8155893536121673 +fred quirinius 0.4866920152091255 +fred quirinius 0.8973384030418251 +fred robinson 0.6387832699619772 +fred steinbeck 0.14722753346080306 +fred steinbeck 0.4627151051625239 +fred steinbeck 0.7265774378585086 +fred underhill 0.35361216730038025 +fred van buren 0.3365200764818356 +fred van buren 0.5057034220532319 +fred van buren 0.6463878326996197 +fred van buren 0.904397705544933 +fred white 0.5171102661596958 +fred young 0.7705544933078394 +fred young 0.7992351816443595 +fred zipper 0.615678776290631 +gabriella allen 0.4435946462715105 +gabriella allen 0.9334600760456274 +gabriella brown 0.4359464627151052 +gabriella brown 0.9636711281070746 +gabriella carson 0.9562737642585551 +gabriella davidson 0.8174904942965779 +gabriella ellison 0.1931166347992352 +gabriella ellison 0.38022813688212925 +gabriella falkner 0.3231939163498099 +gabriella falkner 0.5659655831739961 +gabriella falkner 0.8948374760994264 +gabriella garcia 0.4695817490494297 +gabriella hernandez 0.6444866920152091 +gabriella hernandez 0.7015209125475285 +gabriella ichabod 0.09125475285171103 +gabriella ichabod 0.1520912547528517 +gabriella ichabod 0.1835564053537285 +gabriella ichabod 0.372848948374761 +gabriella ichabod 0.8107074569789675 +gabriella king 0.39961759082217974 +gabriella king 0.5190114068441065 +gabriella laertes 0.4569789674952199 +gabriella miller 0.26996197718631176 +gabriella ovid 0.7091254752851711 +gabriella ovid 0.8897338403041825 +gabriella polk 0.030418250950570342 +gabriella polk 0.44106463878326996 +gabriella steinbeck 0.5755258126195029 +gabriella steinbeck 0.8221797323135756 +gabriella thompson 0.013307984790874524 +gabriella thompson 0.44866920152091255 +gabriella thompson 0.7224334600760456 +gabriella van buren 0.6216730038022814 +gabriella van buren 0.6730038022813688 +gabriella white 0.17208413001912046 +gabriella young 0.5076045627376425 +gabriella young 0.7934990439770554 +gabriella zipper 0.23193916349809887 +gabriella zipper 0.8565965583173997 +holly allen 0.11596958174904944 +holly brown 0.11281070745697896 +holly brown 0.155893536121673 +holly falkner 0.124282982791587 +holly hernandez 0.055449330783938815 +holly hernandez 0.32509505703422054 +holly hernandez 0.97131931166348 +holly hernandez 0.9714828897338403 +holly ichabod 0.12357414448669202 +holly ichabod 0.17300380228136883 +holly ichabod 0.629277566539924 +holly johnson 0.33078393881453155 +holly johnson 0.8612167300380228 +holly johnson 0.9391634980988594 +holly king 0.25475285171102663 +holly king 0.3745247148288973 +holly laertes 0.42775665399239543 +holly miller 0.37476099426386233 +holly nixon 0.10076045627376426 +holly nixon 0.34608030592734224 +holly polk 0.40535372848948376 +holly polk 0.5209125475285171 +holly robinson 0.9273422562141491 +holly thompson 0.1596958174904943 +holly thompson 0.311787072243346 +holly thompson 0.9125475285171103 +holly underhill 0.3479923518164436 +holly underhill 0.5812619502868069 +holly underhill 0.8384030418250951 +holly underhill 0.903041825095057 +holly van buren 0.9464627151051626 +holly white 0.1089866156787763 +holly white 0.4780114722753346 +holly xylophone 0.5304182509505704 +holly young 0.7357414448669202 +holly young 0.8240917782026769 +holly zipper 0.15399239543726237 +holly zipper 0.8546845124282982 +irene allen 0.8738049713193117 +irene brown 0.4588910133843212 +irene brown 0.49619771863117873 +irene brown 0.5678776290630975 +irene carson 0.6844106463878327 +irene ellison 0.32504780114722753 +irene ellison 0.48565965583174 +irene falkner 0.41825095057034223 +irene falkner 0.9866920152091255 +irene garcia 0.11663479923518165 +irene garcia 0.29277566539923955 +irene garcia 0.8126195028680688 +irene ichabod 0.8307984790874525 +irene ichabod 0.9177820267686424 +irene johnson 0.7112810707456979 +irene laertes 0.01338432122370937 +irene laertes 0.1482889733840304 +irene laertes 0.7034220532319392 +irene miller 0.367112810707457 +irene nixon 0.11854684512428298 +irene nixon 0.7927756653992395 +irene nixon 0.9426386233269598 +irene ovid 0.24714828897338403 +irene ovid 0.30210325047801145 +irene ovid 0.779467680608365 +irene polk 0.0038022813688212928 +irene polk 0.45315487571701724 +irene polk 0.6577946768060836 +irene polk 0.8891013384321224 +irene polk 0.9789674952198852 +irene quirinius 0.27533460803059273 +irene quirinius 0.35946462715105165 +irene quirinius 0.384321223709369 +irene robinson 0.18631178707224336 +irene steinbeck 0.9942965779467681 +irene thompson 0.6939163498098859 +irene underhill 0.30401529636711283 +irene underhill 0.3403041825095057 +irene van buren 0.5908221797323135 +irene van buren 0.6634980988593155 +irene xylophone 0.5342205323193916 +jessica brown 0.7680608365019012 +jessica carson 0.3574144486692015 +jessica carson 0.6195028680688337 +jessica carson 0.8269961977186312 +jessica davidson 0.10646387832699619 +jessica davidson 0.34790874524714827 +jessica davidson 0.3593155893536122 +jessica davidson 0.6768060836501901 +jessica ellison 0.0779467680608365 +jessica ellison 0.42015209125475284 +jessica falkner 0.994263862332696 +jessica garcia 0.8279158699808795 +jessica garcia 0.9581749049429658 +jessica ichabod 0.45627376425855515 +jessica johnson 0.30228136882129275 +jessica johnson 0.8049713193116634 +jessica miller 0.8011472275334608 +jessica nixon 0.06500956022944551 +jessica nixon 0.6042065009560229 +jessica ovid 0.15105162523900573 +jessica ovid 0.8992395437262357 +jessica polk 0.4378585086042065 +jessica quirinius 0.058935361216730035 +jessica quirinius 0.4714828897338403 +jessica quirinius 0.5760456273764258 +jessica quirinius 0.8935361216730038 +jessica robinson 0.9638783269961977 +jessica thompson 0.08221797323135756 +jessica thompson 0.5893536121673004 +jessica underhill 0.034220532319391636 +jessica underhill 0.06118546845124283 +jessica underhill 0.9541108986615678 +jessica van buren 0.20650095602294455 +jessica white 0.06273764258555133 +jessica white 0.4149139579349904 +jessica white 0.5798479087452472 +jessica white 0.591254752851711 +jessica white 0.7667304015296367 +jessica xylophone 0.5009560229445507 +jessica young 0.3403441682600382 +jessica young 0.8821292775665399 +jessica zipper 0.14068441064638784 +jessica zipper 0.2984790874524715 +jessica zipper 0.6007604562737643 +katie allen 0.5665399239543726 +katie brown 0.49521988527724664 +katie davidson 0.6730401529636711 +katie ellison 0.3173996175908222 +katie ellison 0.7262357414448669 +katie falkner 0.2676864244741874 +katie garcia 0.049429657794676805 +katie garcia 0.3135755258126195 +katie hernandez 0.6026615969581749 +katie ichabod 0.15296367112810708 +katie ichabod 0.4684512428298279 +katie ichabod 0.7055449330783938 +katie king 0.16159695817490494 +katie king 0.502868068833652 +katie king 0.5927342256214149 +katie miller 0.5228136882129277 +katie miller 0.5296367112810707 +katie nixon 0.7832699619771863 +katie ovid 0.8795411089866156 +katie polk 0.35372848948374763 +katie polk 0.9657794676806084 +katie robinson 0.06844106463878327 +katie van buren 0.06883365200764818 +katie van buren 0.1739961759082218 +katie white 0.045889101338432124 +katie white 0.18546845124282982 +katie xylophone 0.7281368821292775 +katie young 0.16443594646271512 +katie young 0.20152091254752852 +katie young 0.9732313575525813 +katie zipper 0.21863117870722434 +katie zipper 0.4505703422053232 +luke allen 0.03612167300380228 +luke allen 0.21606118546845124 +luke allen 0.8346007604562737 +luke allen 0.8631178707224335 +luke allen 0.9311663479923518 +luke brown 0.7304015296367112 +luke davidson 0.25239005736137665 +luke davidson 0.9961977186311787 +luke ellison 0.1147227533460803 +luke ellison 0.2447418738049713 +luke ellison 0.49809885931558934 +luke falkner 0.24524714828897337 +luke falkner 0.5124282982791587 +luke garcia 0.03441682600382409 +luke garcia 0.32695984703632885 +luke ichabod 0.10266159695817491 +luke ichabod 0.5551330798479087 +luke johnson 0.25430210325047803 +luke johnson 0.6787762906309751 +luke johnson 0.9082217973231358 +luke laertes 0.06309751434034416 +luke laertes 0.3690248565965583 +luke laertes 0.7743785850860421 +luke laertes 0.8079847908745247 +luke laertes 0.811787072243346 +luke miller 0.8068833652007649 +luke ovid 0.435361216730038 +luke ovid 0.7547528517110266 +luke polk 0.13957934990439771 +luke polk 0.9770554493307839 +luke quirinius 0.09315589353612168 +luke robinson 0.015209125475285171 +luke robinson 0.053231939163498096 +luke thompson 0.8840304182509505 +luke underhill 0.08745247148288973 +luke underhill 0.40152963671128106 +luke underhill 0.4608030592734226 +luke van buren 0.4847908745247148 +luke white 0.8098859315589354 +luke xylophone 0.34220532319391633 +luke zipper 0.21292775665399238 +mike allen 0.7036328871892925 +mike brown 0.29063097514340347 +mike carson 0.623574144486692 +mike carson 0.7476099426386233 +mike carson 0.9885931558935361 +mike davidson 0.6520912547528517 +mike davidson 0.8298279158699808 +mike ellison 0.24665391969407266 +mike ellison 0.3821292775665399 +mike ellison 0.8355640535372849 +mike ellison 0.8986615678776291 +mike ellison 0.94106463878327 +mike falkner 0.0248565965583174 +mike garcia 0.39543726235741444 +mike garcia 0.5391969407265774 +mike garcia 0.6482889733840305 +mike hernandez 0.07984790874524715 +mike hernandez 0.7186311787072244 +mike ichabod 0.7642585551330798 +mike king 0.09695817490494296 +mike king 0.188212927756654 +mike king 0.4049429657794677 +mike king 0.5544933078393881 +mike king 0.6045627376425855 +mike king 0.9011406844106464 +mike miller 0.621414913957935 +mike nixon 0.688212927756654 +mike nixon 0.9068441064638784 +mike polk 0.3612167300380228 +mike polk 0.6749521988527725 +mike polk 0.8374760994263862 +mike quirinius 0.5105162523900574 +mike steinbeck 0.05736137667304015 +mike steinbeck 0.747148288973384 +mike steinbeck 0.8745247148288974 +mike steinbeck 0.9330783938814532 +mike van buren 0.8650190114068441 +mike van buren 0.973384030418251 +mike white 0.17782026768642448 +mike white 0.7151051625239006 +mike white 0.7566539923954373 +mike white 0.9808795411089866 +mike young 0.20722433460076045 +mike young 0.3840304182509506 +mike young 0.6405353728489483 +mike zipper 0.12810707456978968 +mike zipper 0.42829827915869984 +mike zipper 0.7946768060836502 +nick allen 0.021032504780114723 +nick allen 0.847036328871893 +nick brown 0.14258555133079848 +nick davidson 0.26003824091778205 +nick ellison 0.028680688336520075 +nick ellison 0.3935361216730038 +nick falkner 0.5684410646387833 +nick falkner 0.7590822179732314 +nick garcia 0.34980988593155893 +nick garcia 0.45817490494296575 +nick garcia 0.892925430210325 +nick ichabod 0.2944550669216061 +nick ichabod 0.37667304015296366 +nick ichabod 0.7074569789674953 +nick johnson 0.3973384030418251 +nick johnson 0.4646271510516252 +nick laertes 0.36311787072243346 +nick miller 0.9961759082217974 +nick nixon 0.7110266159695817 +nick ovid 0.7762906309751434 +nick polk 1.0 +nick quirinius 0.0019120458891013384 +nick quirinius 0.08795411089866156 +nick robinson 0.09505703422053231 +nick robinson 0.45506692160611856 +nick steinbeck 0.2224334600760456 +nick thompson 0.4225621414913958 +nick underhill 0.9101338432122371 +nick van buren 0.03802281368821293 +nick xylophone 0.6806883365200764 +nick young 0.4220532319391635 +nick young 0.8623326959847036 +nick zipper 0.2829827915869981 +nick zipper 0.5468451242829828 +oscar allen 0.785171102661597 +oscar brown 0.13498098859315588 +oscar carson 0.07224334600760456 +oscar carson 0.25665399239543724 +oscar carson 0.3422562141491396 +oscar carson 0.6061185468451242 +oscar carson 0.6826003824091779 +oscar davidson 0.7129277566539924 +oscar ellison 0.036328871892925434 +oscar ellison 0.5831739961759083 +oscar falkner 0.9049429657794676 +oscar garcia 0.02676864244741874 +oscar hernandez 0.20076481835564053 +oscar hernandez 0.7870722433460076 +oscar ichabod 0.12619502868068833 +oscar ichabod 0.14149139579349904 +oscar ichabod 0.4416826003824092 +oscar ichabod 0.8661567877629063 +oscar johnson 0.1806083650190114 +oscar johnson 0.467680608365019 +oscar king 0.6596958174904943 +oscar king 0.6787072243346007 +oscar king 0.9258555133079848 +oscar laertes 0.24904942965779467 +oscar laertes 0.5315487571701721 +oscar laertes 0.6328871892925431 +oscar laertes 0.9980988593155894 +oscar nixon 0.9292543021032504 +oscar ovid 0.43021032504780116 +oscar ovid 0.8288973384030418 +oscar ovid 0.8527724665391969 +oscar polk 0.10836501901140684 +oscar polk 0.37858508604206503 +oscar quirinius 0.3041825095057034 +oscar quirinius 0.46387832699619774 +oscar quirinius 0.6311787072243346 +oscar quirinius 0.8555133079847909 +oscar robinson 0.11216730038022814 +oscar robinson 0.22433460076045628 +oscar robinson 0.2294455066921606 +oscar robinson 0.2390057361376673 +oscar steinbeck 0.9904942965779467 +oscar thompson 0.015296367112810707 +oscar thompson 0.2946768060836502 +oscar thompson 0.3060836501901141 +oscar thompson 0.6140684410646388 +oscar underhill 0.31368821292775667 +oscar van buren 0.722753346080306 +oscar van buren 0.7889733840304183 +oscar van buren 0.8833652007648184 +oscar white 0.055133079847908745 +oscar white 0.22562141491395793 +oscar white 0.4321223709369025 +oscar white 0.6443594646271511 +oscar xylophone 0.10133843212237094 +oscar xylophone 0.4187380497131931 +oscar xylophone 0.4296577946768061 +oscar zipper 0.6233269598470363 +oscar zipper 0.7490494296577946 +oscar zipper 0.8783269961977186 +priscilla brown 0.2925430210325048 +priscilla brown 0.6501901140684411 +priscilla brown 0.9120458891013384 +priscilla carson 0.22753346080305928 +priscilla carson 0.5564053537284895 +priscilla carson 0.7820267686424475 +priscilla ichabod 0.3269961977186312 +priscilla ichabod 0.9828897338403042 +priscilla johnson 0.04206500956022945 +priscilla johnson 0.4011406844106464 +priscilla johnson 0.6368821292775665 +priscilla johnson 0.7131931166347992 +priscilla johnson 0.9429657794676806 +priscilla king 0.3517110266159696 +priscilla nixon 0.38049713193116635 +priscilla nixon 0.6864244741873805 +priscilla ovid 0.8193916349809885 +priscilla ovid 0.9139579349904398 +priscilla polk 0.5697896749521989 +priscilla quirinius 0.22179732313575526 +priscilla thompson 0.7737642585551331 +priscilla underhill 0.1682600382409178 +priscilla underhill 0.8852772466539197 +priscilla van buren 0.10325047801147227 +priscilla van buren 0.7877629063097514 +priscilla van buren 0.9598470363288719 +priscilla white 0.4894837476099426 +priscilla xylophone 0.596958174904943 +priscilla xylophone 0.6159695817490495 +priscilla xylophone 0.8393881453154876 +priscilla young 0.41064638783269963 +priscilla young 0.9182509505703422 +priscilla zipper 0.5247148288973384 +priscilla zipper 0.8574144486692015 +quinn allen 0.1634980988593156 +quinn allen 0.9617590822179732 +quinn brown 0.08986615678776291 +quinn brown 0.17590822179732313 +quinn brown 0.5836501901140685 +quinn davidson 0.11787072243346007 +quinn davidson 0.30592734225621415 +quinn davidson 0.3650190114068441 +quinn davidson 0.751434034416826 +quinn ellison 0.376425855513308 +quinn ellison 0.8517110266159695 +quinn garcia 0.17870722433460076 +quinn garcia 0.7323135755258127 +quinn garcia 0.844106463878327 +quinn garcia 0.9486692015209125 +quinn ichabod 0.42395437262357416 +quinn king 0.6653919694072657 +quinn king 0.9505703422053232 +quinn laertes 0.6080305927342257 +quinn laertes 0.9277566539923955 +quinn laertes 0.9847908745247148 +quinn nixon 0.5133079847908745 +quinn ovid 0.16539923954372623 +quinn quirinius 0.19011406844106463 +quinn robinson 0.27756653992395436 +quinn steinbeck 0.23954372623574144 +quinn steinbeck 0.6367112810707457 +quinn thompson 0.4068441064638783 +quinn thompson 0.7782026768642447 +quinn underhill 0.05353728489483748 +quinn underhill 0.5380228136882129 +quinn underhill 0.9349904397705545 +quinn van buren 0.2623574144486692 +quinn young 0.37832699619771865 +quinn zipper 0.51434034416826 +quinn zipper 0.8859315589353612 +rachel allen 0.1701720841300191 +rachel allen 0.3288973384030418 +rachel brown 0.04780114722753346 +rachel brown 0.057034220532319393 +rachel brown 0.532319391634981 +rachel brown 0.5946462715105163 +rachel brown 0.6064638783269962 +rachel carson 0.09177820267686425 +rachel carson 0.6406844106463878 +rachel davidson 0.37093690248565964 +rachel ellison 0.5162523900573613 +rachel falkner 0.1958174904942966 +rachel falkner 0.6330798479087453 +rachel falkner 0.6768642447418738 +rachel falkner 0.9751434034416826 +rachel johnson 0.9560229445506692 +rachel king 0.12547528517110265 +rachel king 0.6003824091778203 +rachel laertes 0.2638623326959847 +rachel laertes 0.5779467680608364 +rachel ovid 0.23518164435946462 +rachel ovid 0.7053231939163498 +rachel polk 0.14638783269961977 +rachel quirinius 0.0076045627376425855 +rachel robinson 0.14340344168260039 +rachel robinson 0.2084130019120459 +rachel robinson 0.6902485659655831 +rachel thompson 0.2718631178707224 +rachel thompson 0.5334608030592735 +rachel thompson 0.875717017208413 +rachel underhill 0.344106463878327 +rachel white 0.17973231357552583 +rachel white 0.6615678776290631 +rachel young 0.3862332695984704 +rachel zipper 0.33079847908745247 +rachel zipper 0.5717017208413002 +sarah carson 0.08935361216730038 +sarah carson 0.18250950570342206 +sarah carson 0.8041825095057035 +sarah ellison 0.967680608365019 +sarah falkner 0.6252390057361377 +sarah falkner 0.9024856596558317 +sarah garcia 0.3881453154875717 +sarah garcia 0.4072657743785851 +sarah garcia 0.6673040152963671 +sarah ichabod 0.29636711281070743 +sarah ichabod 0.9483747609942639 +sarah johnson 0.06463878326996197 +sarah johnson 0.10456273764258556 +sarah johnson 0.5640535372848948 +sarah johnson 0.7954110898661568 +sarah king 0.8030592734225621 +sarah king 0.9655831739961759 +sarah miller 0.6692160611854685 +sarah ovid 0.20342205323193915 +sarah robinson 0.47338403041825095 +sarah robinson 0.7775665399239544 +sarah steinbeck 0.6520076481835564 +sarah white 0.28517110266159695 +sarah white 0.8479087452471483 +sarah xylophone 0.25621414913957935 +sarah young 0.5570342205323194 +sarah zipper 0.5583173996175909 +tom brown 0.5602294455066922 +tom brown 0.8669201520912547 +tom carson 0.045627376425855515 +tom carson 0.35551330798479086 +tom carson 0.935361216730038 +tom davidson 0.8212927756653993 +tom ellison 0.21032504780114722 +tom ellison 0.26195028680688337 +tom ellison 0.7376425855513308 +tom falkner 0.3441682600382409 +tom falkner 0.6481835564053537 +tom hernandez 0.0038240917782026767 +tom hernandez 0.5399239543726235 +tom ichabod 0.6137667304015296 +tom johnson 0.5525812619502868 +tom johnson 0.7915869980879541 +tom king 0.16061185468451242 +tom laertes 0.0745697896749522 +tom laertes 0.5095057034220533 +tom miller 0.2262357414448669 +tom miller 0.2338403041825095 +tom miller 0.2813688212927757 +tom nixon 0.8451242829827916 +tom ovid 0.864244741873805 +tom polk 0.1491395793499044 +tom polk 0.9521988527724665 +tom quirinius 0.09369024856596558 +tom quirinius 0.8489483747609943 +tom robinson 0.060836501901140684 +tom robinson 0.6254752851711026 +tom robinson 0.6462715105162524 +tom robinson 0.9980879541108987 +tom steinbeck 0.5817490494296578 +tom van buren 0.12737642585551331 +tom van buren 0.3154875717017208 +tom van buren 0.7585551330798479 +tom white 0.47609942638623326 +tom young 0.9369024856596558 +tom young 0.9543726235741445 +tom zipper 0.9063097514340345 +ulysses brown 0.9448669201520913 +ulysses carson 0.07034220532319392 +ulysses carson 0.09885931558935361 +ulysses carson 0.2414448669201521 +ulysses carson 0.7604562737642585 +ulysses davidson 0.7093690248565966 +ulysses ellison 0.55893536121673 +ulysses garcia 0.7246653919694073 +ulysses hernandez 0.4091778202676864 +ulysses hernandez 0.627151051625239 +ulysses hernandez 0.982791586998088 +ulysses ichabod 0.21482889733840305 +ulysses ichabod 0.3193916349809886 +ulysses johnson 0.5621414913957935 +ulysses king 0.9467680608365019 +ulysses laertes 0.390057361376673 +ulysses laertes 0.7973231357552581 +ulysses laertes 0.9866156787762906 +ulysses miller 0.31166347992351817 +ulysses miller 0.5774378585086042 +ulysses nixon 0.0057361376673040155 +ulysses ovid 0.38593155893536124 +ulysses polk 0.04752851711026616 +ulysses polk 0.6083650190114068 +ulysses polk 0.7609942638623327 +ulysses polk 0.8326996197718631 +ulysses quirinius 0.6290630975143403 +ulysses robinson 0.9235181644359465 +ulysses steinbeck 0.039923954372623575 +ulysses steinbeck 0.7724665391969407 +ulysses thompson 0.3824091778202677 +ulysses underhill 0.11406844106463879 +ulysses underhill 0.23574144486692014 +ulysses underhill 0.3365019011406844 +ulysses underhill 0.42585551330798477 +ulysses underhill 0.6102661596958175 +ulysses underhill 0.6959847036328872 +ulysses underhill 0.9752851711026616 +ulysses van buren 0.5437262357414449 +ulysses white 0.5 +ulysses white 0.5931558935361216 +ulysses xylophone 0.5855513307984791 +ulysses xylophone 0.8317399617590823 +ulysses xylophone 0.9005736137667304 +ulysses young 0.18164435946462715 +ulysses young 0.3919694072657744 +ulysses young 0.49049429657794674 +victor allen 0.13575525812619502 +victor allen 0.6309751434034416 +victor brown 0.0497131931166348 +victor brown 0.20267686424474188 +victor brown 0.6178707224334601 +victor brown 0.8910133843212237 +victor davidson 0.026615969581749048 +victor davidson 0.491395793499044 +victor davidson 0.5850860420650096 +victor ellison 0.26425855513307983 +victor ellison 0.6692015209125475 +victor hernandez 0.04397705544933078 +victor hernandez 0.12927756653992395 +victor hernandez 0.1950286806883365 +victor hernandez 0.5411089866156787 +victor hernandez 0.7284894837476099 +victor johnson 0.11977186311787072 +victor johnson 0.4828897338403042 +victor johnson 0.7699619771863118 +victor king 0.41254752851711024 +victor king 0.714828897338403 +victor laertes 0.43155893536121676 +victor laertes 0.6500956022944551 +victor miller 0.4429657794676806 +victor nixon 0.33269961977186313 +victor nixon 0.5258126195028681 +victor ovid 0.22813688212927757 +victor polk 0.13878326996197718 +victor quirinius 0.13766730401529637 +victor quirinius 0.887189292543021 +victor robinson 0.5494296577946768 +victor robinson 0.7509505703422054 +victor steinbeck 0.08365019011406843 +victor steinbeck 0.15487571701720843 +victor steinbeck 0.3669201520912547 +victor thompson 0.10516252390057361 +victor van buren 0.27724665391969405 +victor van buren 0.9579349904397706 +victor white 0.41634980988593157 +victor white 0.6349809885931559 +victor xylophone 0.13688212927756654 +victor xylophone 0.3078393881453155 +victor xylophone 0.4110898661567878 +victor xylophone 0.5449330783938815 +victor xylophone 0.9296577946768061 +victor young 0.18738049713193117 +victor zipper 0.5430210325047801 +wendy allen 0.3231357552581262 +wendy allen 0.734225621414914 +wendy allen 0.869980879541109 +wendy brown 0.18929254302103252 +wendy brown 0.6996197718631179 +wendy ellison 0.7437858508604207 +wendy ellison 0.8498098859315589 +wendy falkner 0.07648183556405354 +wendy falkner 0.5353728489483748 +wendy falkner 0.7756653992395437 +wendy garcia 0.07074569789674952 +wendy garcia 0.0741444866920152 +wendy garcia 0.33840304182509506 +wendy garcia 0.38783269961977185 +wendy hernandez 0.017110266159695818 +wendy ichabod 0.8718929254302104 +wendy king 0.37072243346007605 +wendy king 0.497131931166348 +wendy king 0.5965583173996176 +wendy laertes 0.32122370936902483 +wendy laertes 0.49904397705544934 +wendy laertes 0.876425855513308 +wendy miller 0.7533460803059273 +wendy miller 0.7552581261950286 +wendy nixon 0.44933078393881454 +wendy nixon 0.7661596958174905 +wendy ovid 0.5019011406844106 +wendy ovid 0.6978967495219885 +wendy polk 0.3688212927756654 +wendy polk 0.526615969581749 +wendy quirinius 0.1444866920152091 +wendy quirinius 0.5874524714828897 +wendy robinson 0.030592734225621414 +wendy robinson 0.06692160611854685 +wendy robinson 0.27566539923954375 +wendy steinbeck 0.5703422053231939 +wendy thompson 0.028517110266159697 +wendy thompson 0.11089866156787763 +wendy underhill 0.4837476099426386 +wendy underhill 0.6424474187380497 +wendy underhill 0.9600760456273765 +wendy van buren 0.1920152091254753 +wendy van buren 0.7433460076045627 +wendy white 0.752851711026616 +wendy xylophone 0.6347992351816444 +wendy xylophone 0.7452471482889734 +wendy young 0.07839388145315487 +wendy young 0.3897338403041825 +xavier allen 0.043726235741444866 +xavier allen 0.361376673040153 +xavier allen 0.5456273764258555 +xavier brown 0.6711281070745698 +xavier brown 0.9158699808795411 +xavier brown 0.9847036328871893 +xavier carson 0.0841300191204589 +xavier carson 0.988527724665392 +xavier davidson 0.2585551330798479 +xavier davidson 0.4168260038240918 +xavier davidson 0.609942638623327 +xavier ellison 0.5984703632887189 +xavier ellison 0.7361376673040153 +xavier garcia 0.7017208413001912 +xavier hernandez 0.2509505703422053 +xavier hernandez 0.34990439770554493 +xavier hernandez 0.9220532319391636 +xavier ichabod 0.5475285171102662 +xavier ichabod 0.858508604206501 +xavier johnson 0.3938814531548757 +xavier johnson 0.8231939163498099 +xavier king 0.03231939163498099 +xavier king 0.6539196940726577 +xavier laertes 0.5988593155893536 +xavier ovid 0.4397705544933078 +xavier polk 0.4933078393881453 +xavier polk 0.762906309751434 +xavier polk 0.8136882129277566 +xavier polk 0.8260038240917782 +xavier quirinius 0.07265774378585087 +xavier quirinius 0.27915869980879543 +xavier quirinius 0.34600760456273766 +xavier quirinius 0.8022813688212928 +xavier thompson 0.6118546845124283 +xavier underhill 0.16634799235181644 +xavier white 0.6958174904942965 +xavier white 0.7380497131931166 +xavier xylophone 0.8183556405353728 +xavier zipper 0.9904397705544933 +yuri allen 0.9106463878326996 +yuri allen 1.0 +yuri brown 0.5152091254752852 +yuri brown 0.908745247148289 +yuri carson 0.09560229445506692 +yuri carson 0.9372623574144486 +yuri ellison 0.017208413001912046 +yuri ellison 0.39923954372623577 +yuri falkner 0.28680688336520077 +yuri falkner 0.8967495219885278 +yuri garcia 0.2661596958174905 +yuri hernandez 0.28489483747609945 +yuri johnson 0.5047801147227533 +yuri johnson 0.655831739961759 +yuri johnson 0.720532319391635 +yuri king 0.32129277566539927 +yuri laertes 0.4144486692015209 +yuri laertes 0.8916349809885932 +yuri nixon 0.05162523900573614 +yuri nixon 0.40874524714828897 +yuri polk 0.051330798479087454 +yuri polk 0.39579349904397704 +yuri polk 0.6749049429657795 +yuri quirinius 0.08030592734225621 +yuri quirinius 0.2982791586998088 +yuri quirinius 0.4130019120458891 +yuri steinbeck 0.15779467680608364 +yuri steinbeck 0.9388145315487572 +yuri thompson 0.6175908221797323 +yuri underhill 0.42447418738049714 +yuri underhill 0.8202676864244742 +yuri white 0.19694072657743786 +yuri xylophone 0.4790874524714829 +zach allen 0.8250950570342205 +zach brown 0.0817490494296578 +zach brown 0.09751434034416825 +zach brown 0.248565965583174 +zach brown 0.2965779467680608 +zach brown 0.4524714828897338 +zach carson 0.6921606118546845 +zach ellison 0.6806083650190115 +zach falkner 0.25812619502868067 +zach falkner 0.2695984703632887 +zach garcia 0.30798479087452474 +zach garcia 0.3632887189292543 +zach garcia 0.7072243346007605 +zach garcia 0.7167300380228137 +zach ichabod 0.30988593155893535 +zach ichabod 0.9502868068833652 +zach king 0.5277246653919694 +zach king 0.8336520076481836 +zach king 0.9239543726235742 +zach miller 0.15678776290630975 +zach miller 0.3726235741444867 +zach miller 0.5608365019011406 +zach ovid 0.1311787072243346 +zach ovid 0.2737642585551331 +zach ovid 0.4543726235741445 +zach ovid 0.6711026615969582 +zach quirinius 0.019011406844106463 +zach robinson 0.11026615969581749 +zach steinbeck 0.28107074569789675 +zach steinbeck 0.7170172084130019 +zach thompson 0.13001912045889102 +zach thompson 0.44550669216061184 +zach underhill 0.7718631178707225 +zach white 0.7965779467680608 +zach xylophone 0.032504780114722756 +zach xylophone 0.638623326959847 +zach young 0.009560229445506692 +zach zipper 0.24334600760456274 +zach zipper 0.2832699619771863 +zach zipper 0.8087954110898662 diff --git a/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 1-3-b82dfa24123047be4b4e3c27c3997d34 b/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 1-3-b82dfa24123047be4b4e3c27c3997d34 new file mode 100644 index 0000000000000..1e0cf03db63a0 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 1-3-b82dfa24123047be4b4e3c27c3997d34 @@ -0,0 +1,1049 @@ +zach zipper 0.0 +zach zipper 0.0 +zach zipper 0.0 +zach young 0.0 +zach xylophone 0.0 +zach xylophone 0.0 +zach white 0.0 +zach underhill 0.0 +zach thompson 0.0 +zach thompson 0.0 +zach steinbeck 0.0 +zach steinbeck 0.0 +zach robinson 0.0 +zach quirinius 0.0 +zach ovid 0.0 +zach ovid 0.0 +zach ovid 0.0 +zach ovid 0.0 +zach miller 0.0 +zach miller 0.0 +zach miller 0.0 +zach king 0.0 +zach king 0.0 +zach king 0.0 +zach ichabod 0.0 +zach ichabod 0.0 +zach garcia 0.0 +zach garcia 0.0 +zach garcia 0.0 +zach garcia 0.0 +zach falkner 0.0 +zach falkner 0.0 +zach ellison 0.0 +zach carson 0.0 +zach brown 0.0 +zach brown 0.0 +zach brown 0.0 +zach brown 0.0 +zach brown 0.0 +zach allen 0.0 +yuri xylophone 0.0 +yuri white 0.0 +yuri underhill 0.0 +yuri underhill 0.0 +yuri thompson 0.0 +yuri steinbeck 0.0 +yuri steinbeck 0.0 +yuri quirinius 0.0 +yuri quirinius 0.0 +yuri quirinius 0.0 +yuri polk 0.0 +yuri polk 0.0 +yuri polk 0.0 +yuri nixon 0.0 +yuri nixon 0.0 +yuri laertes 0.0 +yuri laertes 0.0 +yuri king 0.0 +yuri johnson 0.0 +yuri johnson 0.0 +yuri johnson 0.0 +yuri hernandez 0.0 +yuri garcia 0.0 +yuri falkner 0.0 +yuri falkner 0.0 +yuri ellison 0.0 +yuri ellison 0.0 +yuri carson 0.0 +yuri carson 0.0 +yuri brown 0.0 +yuri brown 0.0 +yuri allen 0.0 +yuri allen 0.0 +xavier zipper 1.0 +xavier xylophone 0.0 +xavier white 0.0 +xavier white 0.0 +xavier underhill 0.0 +xavier thompson 0.0 +xavier quirinius 0.0 +xavier quirinius 0.0 +xavier quirinius 0.0 +xavier quirinius 0.0 +xavier polk 0.0 +xavier polk 0.0 +xavier polk 0.0 +xavier polk 0.0 +xavier ovid 0.0 +xavier laertes 0.0 +xavier king 0.0 +xavier king 0.0 +xavier johnson 0.0 +xavier johnson 0.0 +xavier ichabod 0.0 +xavier ichabod 0.0 +xavier hernandez 0.0 +xavier hernandez 0.0 +xavier hernandez 0.0 +xavier garcia 0.0 +xavier ellison 0.0 +xavier ellison 0.0 +xavier davidson 0.0 +xavier davidson 0.0 +xavier davidson 0.0 +xavier carson 1.0 +xavier carson 0.0 +xavier brown 0.0 +xavier brown 0.0 +xavier brown 0.0 +xavier allen 0.0 +xavier allen 0.0 +xavier allen 0.0 +wendy young 1.0 +wendy young 0.0 +wendy xylophone 0.0 +wendy xylophone 0.0 +wendy white 0.0 +wendy van buren 0.0 +wendy van buren 0.0 +wendy underhill 0.0 +wendy underhill 0.0 +wendy underhill 0.0 +wendy thompson 0.0 +wendy thompson 0.0 +wendy steinbeck 0.0 +wendy robinson 0.0 +wendy robinson 0.0 +wendy robinson 0.0 +wendy quirinius 0.0 +wendy quirinius 0.0 +wendy polk 0.0 +wendy polk 0.0 +wendy ovid 0.0 +wendy ovid 0.0 +wendy nixon 0.0 +wendy nixon 0.0 +wendy miller 0.0 +wendy miller 0.0 +wendy laertes 0.0 +wendy laertes 0.0 +wendy laertes 0.0 +wendy king 0.0 +wendy king 0.0 +wendy king 0.0 +wendy ichabod 0.0 +wendy hernandez 0.0 +wendy garcia 0.0 +wendy garcia 0.0 +wendy garcia 0.0 +wendy garcia 0.0 +wendy falkner 0.0 +wendy falkner 0.0 +wendy falkner 0.0 +wendy ellison 0.0 +wendy ellison 0.0 +wendy brown 0.0 +wendy brown 0.0 +wendy allen 0.0 +wendy allen 0.0 +wendy allen 0.0 +victor zipper 0.0 +victor young 0.0 +victor xylophone 0.0 +victor xylophone 0.0 +victor xylophone 0.0 +victor xylophone 0.0 +victor xylophone 0.0 +victor white 1.0 +victor white 0.0 +victor van buren 0.0 +victor van buren 0.0 +victor thompson 0.0 +victor steinbeck 0.0 +victor steinbeck 0.0 +victor steinbeck 0.0 +victor robinson 0.0 +victor robinson 0.0 +victor quirinius 0.0 +victor quirinius 0.0 +victor polk 0.0 +victor ovid 0.0 +victor nixon 0.0 +victor nixon 0.0 +victor miller 0.0 +victor laertes 0.0 +victor laertes 0.0 +victor king 0.0 +victor king 0.0 +victor johnson 0.0 +victor johnson 0.0 +victor johnson 0.0 +victor hernandez 0.0 +victor hernandez 0.0 +victor hernandez 0.0 +victor hernandez 0.0 +victor hernandez 0.0 +victor ellison 0.0 +victor ellison 0.0 +victor davidson 0.0 +victor davidson 0.0 +victor davidson 0.0 +victor brown 0.0 +victor brown 0.0 +victor brown 0.0 +victor brown 0.0 +victor allen 0.0 +victor allen 0.0 +ulysses young 0.0 +ulysses young 0.0 +ulysses young 0.0 +ulysses xylophone 0.0 +ulysses xylophone 0.0 +ulysses xylophone 0.0 +ulysses white 0.0 +ulysses white 0.0 +ulysses van buren 0.0 +ulysses underhill 0.0 +ulysses underhill 0.0 +ulysses underhill 0.0 +ulysses underhill 0.0 +ulysses underhill 0.0 +ulysses underhill 0.0 +ulysses underhill 0.0 +ulysses thompson 1.0 +ulysses steinbeck 0.0 +ulysses steinbeck 0.0 +ulysses robinson 0.0 +ulysses quirinius 0.0 +ulysses polk 1.0 +ulysses polk 0.0 +ulysses polk 0.0 +ulysses polk 0.0 +ulysses ovid 0.0 +ulysses nixon 0.0 +ulysses miller 0.0 +ulysses miller 0.0 +ulysses laertes 0.0 +ulysses laertes 0.0 +ulysses laertes 0.0 +ulysses king 0.0 +ulysses johnson 0.0 +ulysses ichabod 0.0 +ulysses ichabod 0.0 +ulysses hernandez 1.0 +ulysses hernandez 0.0 +ulysses hernandez 0.0 +ulysses garcia 0.0 +ulysses ellison 1.0 +ulysses davidson 0.0 +ulysses carson 0.0 +ulysses carson 0.0 +ulysses carson 0.0 +ulysses carson 0.0 +ulysses brown 0.0 +tom zipper 0.0 +tom young 0.0 +tom young 0.0 +tom white 0.0 +tom van buren 0.0 +tom van buren 0.0 +tom van buren 0.0 +tom steinbeck 0.0 +tom robinson 0.0 +tom robinson 0.0 +tom robinson 0.0 +tom robinson 0.0 +tom quirinius 0.0 +tom quirinius 0.0 +tom polk 0.0 +tom polk 0.0 +tom ovid 0.0 +tom nixon 0.0 +tom miller 0.0 +tom miller 0.0 +tom miller 0.0 +tom laertes 0.0 +tom laertes 0.0 +tom king 0.0 +tom johnson 0.0 +tom johnson 0.0 +tom ichabod 0.0 +tom hernandez 0.0 +tom hernandez 0.0 +tom falkner 0.0 +tom falkner 0.0 +tom ellison 0.0 +tom ellison 0.0 +tom ellison 0.0 +tom davidson 0.0 +tom carson 0.0 +tom carson 0.0 +tom carson 0.0 +tom brown 0.0 +tom brown 0.0 +sarah zipper 0.0 +sarah young 0.0 +sarah xylophone 0.0 +sarah white 0.0 +sarah white 0.0 +sarah steinbeck 0.0 +sarah robinson 0.0 +sarah robinson 0.0 +sarah ovid 0.0 +sarah miller 0.0 +sarah king 0.0 +sarah king 0.0 +sarah johnson 0.0 +sarah johnson 0.0 +sarah johnson 0.0 +sarah johnson 0.0 +sarah ichabod 0.0 +sarah ichabod 0.0 +sarah garcia 0.0 +sarah garcia 0.0 +sarah garcia 0.0 +sarah falkner 0.0 +sarah falkner 0.0 +sarah ellison 0.0 +sarah carson 0.0 +sarah carson 0.0 +sarah carson 0.0 +rachel zipper 0.0 +rachel zipper 0.0 +rachel young 0.0 +rachel white 0.0 +rachel white 0.0 +rachel underhill 0.0 +rachel thompson 0.0 +rachel thompson 0.0 +rachel thompson 0.0 +rachel robinson 1.0 +rachel robinson 0.0 +rachel robinson 0.0 +rachel quirinius 0.0 +rachel polk 0.0 +rachel ovid 0.0 +rachel ovid 0.0 +rachel laertes 0.0 +rachel laertes 0.0 +rachel king 0.0 +rachel king 0.0 +rachel johnson 0.0 +rachel falkner 0.0 +rachel falkner 0.0 +rachel falkner 0.0 +rachel falkner 0.0 +rachel ellison 0.0 +rachel davidson 0.0 +rachel carson 0.0 +rachel carson 0.0 +rachel brown 1.0 +rachel brown 0.0 +rachel brown 0.0 +rachel brown 0.0 +rachel brown 0.0 +rachel allen 0.0 +rachel allen 0.0 +quinn zipper 0.0 +quinn zipper 0.0 +quinn young 0.0 +quinn van buren 0.0 +quinn underhill 0.0 +quinn underhill 0.0 +quinn underhill 0.0 +quinn thompson 0.0 +quinn thompson 0.0 +quinn steinbeck 0.0 +quinn steinbeck 0.0 +quinn robinson 0.0 +quinn quirinius 0.0 +quinn ovid 0.0 +quinn nixon 0.0 +quinn laertes 1.0 +quinn laertes 0.0 +quinn laertes 0.0 +quinn king 1.0 +quinn king 0.0 +quinn ichabod 0.0 +quinn garcia 1.0 +quinn garcia 0.0 +quinn garcia 0.0 +quinn garcia 0.0 +quinn ellison 0.0 +quinn ellison 0.0 +quinn davidson 1.0 +quinn davidson 0.0 +quinn davidson 0.0 +quinn davidson 0.0 +quinn brown 0.0 +quinn brown 0.0 +quinn brown 0.0 +quinn allen 1.0 +quinn allen 0.0 +priscilla zipper 0.0 +priscilla zipper 0.0 +priscilla young 0.0 +priscilla young 0.0 +priscilla xylophone 0.0 +priscilla xylophone 0.0 +priscilla xylophone 0.0 +priscilla white 1.0 +priscilla van buren 0.0 +priscilla van buren 0.0 +priscilla van buren 0.0 +priscilla underhill 0.0 +priscilla underhill 0.0 +priscilla thompson 0.0 +priscilla quirinius 0.0 +priscilla polk 0.0 +priscilla ovid 0.0 +priscilla ovid 0.0 +priscilla nixon 0.0 +priscilla nixon 0.0 +priscilla king 0.0 +priscilla johnson 0.0 +priscilla johnson 0.0 +priscilla johnson 0.0 +priscilla johnson 0.0 +priscilla johnson 0.0 +priscilla ichabod 0.0 +priscilla ichabod 0.0 +priscilla carson 0.0 +priscilla carson 0.0 +priscilla carson 0.0 +priscilla brown 0.0 +priscilla brown 0.0 +priscilla brown 0.0 +oscar zipper 0.0 +oscar zipper 0.0 +oscar zipper 0.0 +oscar xylophone 0.0 +oscar xylophone 0.0 +oscar xylophone 0.0 +oscar white 0.0 +oscar white 0.0 +oscar white 0.0 +oscar white 0.0 +oscar van buren 1.0 +oscar van buren 0.0 +oscar van buren 0.0 +oscar underhill 0.0 +oscar thompson 0.0 +oscar thompson 0.0 +oscar thompson 0.0 +oscar thompson 0.0 +oscar steinbeck 0.0 +oscar robinson 0.0 +oscar robinson 0.0 +oscar robinson 0.0 +oscar robinson 0.0 +oscar quirinius 0.0 +oscar quirinius 0.0 +oscar quirinius 0.0 +oscar quirinius 0.0 +oscar polk 1.0 +oscar polk 0.0 +oscar ovid 0.0 +oscar ovid 0.0 +oscar ovid 0.0 +oscar nixon 0.0 +oscar laertes 0.0 +oscar laertes 0.0 +oscar laertes 0.0 +oscar laertes 0.0 +oscar king 0.0 +oscar king 0.0 +oscar king 0.0 +oscar johnson 0.0 +oscar johnson 0.0 +oscar ichabod 0.0 +oscar ichabod 0.0 +oscar ichabod 0.0 +oscar ichabod 0.0 +oscar hernandez 0.0 +oscar hernandez 0.0 +oscar garcia 0.0 +oscar falkner 1.0 +oscar ellison 0.0 +oscar ellison 0.0 +oscar davidson 0.0 +oscar carson 0.0 +oscar carson 0.0 +oscar carson 0.0 +oscar carson 0.0 +oscar carson 0.0 +oscar brown 0.0 +oscar allen 0.0 +nick zipper 0.0 +nick zipper 0.0 +nick young 1.0 +nick young 0.0 +nick xylophone 0.0 +nick van buren 0.0 +nick underhill 0.0 +nick thompson 0.0 +nick steinbeck 0.0 +nick robinson 0.0 +nick robinson 0.0 +nick quirinius 0.0 +nick quirinius 0.0 +nick polk 0.0 +nick ovid 0.0 +nick nixon 0.0 +nick miller 0.0 +nick laertes 0.0 +nick johnson 0.0 +nick johnson 0.0 +nick ichabod 0.0 +nick ichabod 0.0 +nick ichabod 0.0 +nick garcia 0.0 +nick garcia 0.0 +nick garcia 0.0 +nick falkner 0.0 +nick falkner 0.0 +nick ellison 0.0 +nick ellison 0.0 +nick davidson 0.0 +nick brown 0.0 +nick allen 0.0 +nick allen 0.0 +mike zipper 0.0 +mike zipper 0.0 +mike zipper 0.0 +mike young 0.0 +mike young 0.0 +mike young 0.0 +mike white 0.0 +mike white 0.0 +mike white 0.0 +mike white 0.0 +mike van buren 0.0 +mike van buren 0.0 +mike steinbeck 0.0 +mike steinbeck 0.0 +mike steinbeck 0.0 +mike steinbeck 0.0 +mike quirinius 0.0 +mike polk 0.0 +mike polk 0.0 +mike polk 0.0 +mike nixon 0.0 +mike nixon 0.0 +mike miller 0.0 +mike king 0.0 +mike king 0.0 +mike king 0.0 +mike king 0.0 +mike king 0.0 +mike king 0.0 +mike ichabod 0.0 +mike hernandez 0.0 +mike hernandez 0.0 +mike garcia 0.0 +mike garcia 0.0 +mike garcia 0.0 +mike falkner 0.0 +mike ellison 0.0 +mike ellison 0.0 +mike ellison 0.0 +mike ellison 0.0 +mike ellison 0.0 +mike davidson 0.0 +mike davidson 0.0 +mike carson 0.0 +mike carson 0.0 +mike carson 0.0 +mike brown 0.0 +mike allen 0.0 +luke zipper 0.0 +luke xylophone 0.0 +luke white 0.0 +luke van buren 0.0 +luke underhill 1.0 +luke underhill 0.0 +luke underhill 0.0 +luke thompson 0.0 +luke robinson 0.0 +luke robinson 0.0 +luke quirinius 0.0 +luke polk 0.0 +luke polk 0.0 +luke ovid 0.0 +luke ovid 0.0 +luke miller 0.0 +luke laertes 0.0 +luke laertes 0.0 +luke laertes 0.0 +luke laertes 0.0 +luke laertes 0.0 +luke johnson 0.0 +luke johnson 0.0 +luke johnson 0.0 +luke ichabod 0.0 +luke ichabod 0.0 +luke garcia 0.0 +luke garcia 0.0 +luke falkner 0.0 +luke falkner 0.0 +luke ellison 0.0 +luke ellison 0.0 +luke ellison 0.0 +luke davidson 0.0 +luke davidson 0.0 +luke brown 0.0 +luke allen 1.0 +luke allen 0.0 +luke allen 0.0 +luke allen 0.0 +luke allen 0.0 +katie zipper 1.0 +katie zipper 0.0 +katie young 1.0 +katie young 0.0 +katie young 0.0 +katie xylophone 0.0 +katie white 0.0 +katie white 0.0 +katie van buren 0.0 +katie van buren 0.0 +katie robinson 0.0 +katie polk 0.0 +katie polk 0.0 +katie ovid 0.0 +katie nixon 1.0 +katie miller 0.0 +katie miller 0.0 +katie king 0.0 +katie king 0.0 +katie king 0.0 +katie ichabod 0.0 +katie ichabod 0.0 +katie ichabod 0.0 +katie hernandez 0.0 +katie garcia 0.0 +katie garcia 0.0 +katie falkner 0.0 +katie ellison 0.0 +katie ellison 0.0 +katie davidson 0.0 +katie brown 0.0 +katie allen 0.0 +jessica zipper 0.0 +jessica zipper 0.0 +jessica zipper 0.0 +jessica young 0.0 +jessica young 0.0 +jessica xylophone 0.0 +jessica white 0.0 +jessica white 0.0 +jessica white 0.0 +jessica white 0.0 +jessica white 0.0 +jessica van buren 0.0 +jessica underhill 1.0 +jessica underhill 0.0 +jessica underhill 0.0 +jessica thompson 0.0 +jessica thompson 0.0 +jessica robinson 0.0 +jessica quirinius 1.0 +jessica quirinius 0.0 +jessica quirinius 0.0 +jessica quirinius 0.0 +jessica polk 0.0 +jessica ovid 0.0 +jessica ovid 0.0 +jessica nixon 0.0 +jessica nixon 0.0 +jessica miller 0.0 +jessica johnson 0.0 +jessica johnson 0.0 +jessica ichabod 1.0 +jessica garcia 0.0 +jessica garcia 0.0 +jessica falkner 0.0 +jessica ellison 0.0 +jessica ellison 0.0 +jessica davidson 0.0 +jessica davidson 0.0 +jessica davidson 0.0 +jessica davidson 0.0 +jessica carson 0.0 +jessica carson 0.0 +jessica carson 0.0 +jessica brown 0.0 +irene xylophone 0.0 +irene van buren 0.0 +irene van buren 0.0 +irene underhill 0.0 +irene underhill 0.0 +irene thompson 0.0 +irene steinbeck 0.0 +irene robinson 0.0 +irene quirinius 0.5 +irene quirinius 0.0 +irene quirinius 0.0 +irene polk 0.0 +irene polk 0.0 +irene polk 0.0 +irene polk 0.0 +irene polk 0.0 +irene ovid 0.0 +irene ovid 0.0 +irene ovid 0.0 +irene nixon 0.0 +irene nixon 0.0 +irene nixon 0.0 +irene miller 0.0 +irene laertes 0.0 +irene laertes 0.0 +irene laertes 0.0 +irene johnson 0.0 +irene ichabod 0.0 +irene ichabod 0.0 +irene garcia 0.0 +irene garcia 0.0 +irene garcia 0.0 +irene falkner 0.0 +irene falkner 0.0 +irene ellison 0.0 +irene ellison 0.0 +irene carson 0.0 +irene brown 0.0 +irene brown 0.0 +irene brown 0.0 +irene allen 0.0 +holly zipper 1.0 +holly zipper 0.0 +holly young 0.0 +holly young 0.0 +holly xylophone 0.0 +holly white 0.0 +holly white 0.0 +holly van buren 0.0 +holly underhill 1.0 +holly underhill 0.0 +holly underhill 0.0 +holly underhill 0.0 +holly thompson 1.0 +holly thompson 0.0 +holly thompson 0.0 +holly robinson 0.0 +holly polk 0.0 +holly polk 0.0 +holly nixon 0.0 +holly nixon 0.0 +holly miller 1.0 +holly laertes 0.0 +holly king 0.0 +holly king 0.0 +holly johnson 0.0 +holly johnson 0.0 +holly johnson 0.0 +holly ichabod 0.0 +holly ichabod 0.0 +holly ichabod 0.0 +holly hernandez 0.0 +holly hernandez 0.0 +holly hernandez 0.0 +holly hernandez 0.0 +holly falkner 0.0 +holly brown 0.0 +holly brown 0.0 +holly allen 0.0 +gabriella zipper 0.0 +gabriella zipper 0.0 +gabriella young 0.0 +gabriella young 0.0 +gabriella white 0.0 +gabriella van buren 0.0 +gabriella van buren 0.0 +gabriella thompson 0.0 +gabriella thompson 0.0 +gabriella thompson 0.0 +gabriella steinbeck 0.0 +gabriella steinbeck 0.0 +gabriella polk 0.0 +gabriella polk 0.0 +gabriella ovid 0.0 +gabriella ovid 0.0 +gabriella miller 0.0 +gabriella laertes 0.0 +gabriella king 0.0 +gabriella king 0.0 +gabriella ichabod 1.0 +gabriella ichabod 0.0 +gabriella ichabod 0.0 +gabriella ichabod 0.0 +gabriella ichabod 0.0 +gabriella hernandez 1.0 +gabriella hernandez 0.0 +gabriella garcia 0.0 +gabriella falkner 0.0 +gabriella falkner 0.0 +gabriella falkner 0.0 +gabriella ellison 0.0 +gabriella ellison 0.0 +gabriella davidson 0.0 +gabriella carson 0.0 +gabriella brown 0.0 +gabriella brown 0.0 +gabriella allen 0.0 +gabriella allen 0.0 +fred zipper 0.0 +fred young 0.0 +fred young 0.0 +fred white 0.0 +fred van buren 0.0 +fred van buren 0.0 +fred van buren 0.0 +fred van buren 0.0 +fred underhill 0.0 +fred steinbeck 0.0 +fred steinbeck 0.0 +fred steinbeck 0.0 +fred robinson 1.0 +fred quirinius 0.0 +fred quirinius 0.0 +fred polk 0.0 +fred polk 0.0 +fred polk 0.0 +fred polk 0.0 +fred nixon 0.0 +fred nixon 0.0 +fred nixon 0.0 +fred nixon 0.0 +fred miller 0.0 +fred laertes 0.0 +fred king 0.0 +fred king 0.0 +fred johnson 1.0 +fred ichabod 0.0 +fred ichabod 0.0 +fred hernandez 0.0 +fred falkner 1.0 +fred falkner 0.0 +fred falkner 0.0 +fred ellison 0.0 +fred ellison 0.0 +fred ellison 0.0 +fred davidson 0.0 +fred davidson 0.0 +fred davidson 0.0 +ethan zipper 0.0 +ethan zipper 0.0 +ethan xylophone 0.0 +ethan white 0.0 +ethan white 0.0 +ethan van buren 0.0 +ethan underhill 0.5 +ethan robinson 0.0 +ethan robinson 0.0 +ethan quirinius 0.0 +ethan quirinius 0.0 +ethan quirinius 0.0 +ethan polk 1.0 +ethan polk 0.0 +ethan polk 0.0 +ethan polk 0.0 +ethan ovid 0.0 +ethan nixon 0.0 +ethan miller 0.0 +ethan laertes 0.0 +ethan laertes 0.0 +ethan laertes 0.0 +ethan laertes 0.0 +ethan laertes 0.0 +ethan laertes 0.0 +ethan laertes 0.0 +ethan king 0.0 +ethan johnson 0.0 +ethan hernandez 0.0 +ethan garcia 0.0 +ethan falkner 0.0 +ethan falkner 0.0 +ethan ellison 0.0 +ethan ellison 0.0 +ethan carson 0.0 +ethan brown 1.0 +ethan brown 0.0 +ethan brown 0.0 +ethan brown 0.0 +ethan brown 0.0 +ethan brown 0.0 +ethan allen 0.0 +david young 0.0 +david young 0.0 +david xylophone 1.0 +david xylophone 0.0 +david xylophone 0.0 +david white 0.0 +david van buren 0.0 +david van buren 0.0 +david underhill 0.0 +david underhill 0.0 +david underhill 0.0 +david thompson 1.0 +david robinson 0.0 +david robinson 0.0 +david quirinius 0.0 +david quirinius 0.0 +david quirinius 0.0 +david ovid 0.0 +david ovid 0.0 +david nixon 0.0 +david laertes 0.0 +david ichabod 1.0 +david ichabod 0.0 +david hernandez 1.0 +david ellison 0.0 +david ellison 0.0 +david ellison 0.0 +david davidson 0.0 +david davidson 0.0 +david davidson 0.0 +david davidson 0.0 +david brown 0.0 +david brown 0.0 +david allen 0.0 +david allen 0.0 +calvin zipper 0.0 +calvin zipper 0.0 +calvin young 0.0 +calvin young 0.0 +calvin xylophone 0.0 +calvin xylophone 0.0 +calvin xylophone 0.0 +calvin white 0.0 +calvin white 0.0 +calvin van buren 1.0 +calvin van buren 0.0 +calvin underhill 0.0 +calvin thompson 0.0 +calvin thompson 0.0 +calvin steinbeck 0.0 +calvin steinbeck 0.0 +calvin steinbeck 0.0 +calvin robinson 0.0 +calvin quirinius 0.0 +calvin quirinius 0.0 +calvin polk 0.0 +calvin ovid 0.0 +calvin ovid 0.0 +calvin ovid 0.0 +calvin ovid 0.0 +calvin nixon 0.0 +calvin nixon 0.0 +calvin nixon 0.0 +calvin laertes 0.0 +calvin laertes 0.0 +calvin johnson 0.0 +calvin hernandez 0.0 +calvin garcia 0.0 +calvin falkner 0.0 +calvin falkner 0.0 +calvin falkner 0.0 +calvin falkner 0.0 +calvin falkner 0.0 +calvin falkner 0.0 +calvin ellison 0.0 +calvin davidson 0.0 +calvin davidson 0.0 +calvin carson 0.0 +calvin brown 0.0 +calvin brown 0.0 +calvin brown 0.0 +calvin allen 0.0 +bob zipper 0.0 +bob zipper 0.0 +bob zipper 0.0 +bob young 0.0 +bob xylophone 0.0 +bob xylophone 0.0 +bob white 0.0 +bob white 0.0 +bob van buren 0.0 +bob steinbeck 0.0 +bob quirinius 0.0 +bob polk 0.0 +bob ovid 0.0 +bob ovid 0.0 +bob ovid 0.0 +bob ovid 0.0 +bob miller 0.0 +bob laertes 0.0 +bob laertes 0.0 +bob king 1.0 +bob king 0.0 +bob king 0.0 +bob ichabod 0.0 +bob hernandez 1.0 +bob garcia 0.0 +bob garcia 0.0 +bob garcia 0.0 +bob garcia 0.0 +bob garcia 0.0 +bob falkner 0.0 +bob ellison 1.0 +bob ellison 0.0 +bob ellison 0.0 +bob ellison 0.0 +bob davidson 0.0 +bob davidson 0.0 +bob davidson 0.0 +bob carson 0.0 +bob brown 0.0 +bob brown 0.0 +bob brown 0.0 +alice zipper 0.0 +alice zipper 0.0 +alice zipper 0.0 +alice xylophone 0.0 +alice xylophone 0.0 +alice xylophone 0.0 +alice van buren 0.0 +alice underhill 0.0 +alice steinbeck 0.0 +alice steinbeck 0.0 +alice steinbeck 0.0 +alice robinson 0.0 +alice robinson 0.0 +alice quirinius 0.0 +alice quirinius 0.0 +alice polk 1.0 +alice ovid 0.0 +alice nixon 0.0 +alice nixon 0.0 +alice nixon 0.0 +alice miller 0.0 +alice laertes 0.0 +alice laertes 0.0 +alice king 0.0 +alice king 0.0 +alice king 0.0 +alice johnson 0.0 +alice hernandez 0.0 +alice hernandez 0.0 +alice garcia 0.0 +alice falkner 0.0 +alice davidson 0.0 +alice carson 0.0 +alice brown 0.0 +alice allen 0.0 +alice allen 0.0 +alice allen 0.0 + 0.0 + 0.0 + 0.0 diff --git a/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 2-0-81bb7f49a55385878637c8aac4d08e5 b/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 2-0-81bb7f49a55385878637c8aac4d08e5 new file mode 100644 index 0000000000000..9091a9156134c --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 2-0-81bb7f49a55385878637c8aac4d08e5 @@ -0,0 +1,1294 @@ +2013-03-01 09:11:58.70307 52.64 1 +2013-03-01 09:11:58.70307 52.64 1 +2013-03-01 09:11:58.70307 52.64 1 +2013-03-01 09:11:58.70307 52.64 1 +2013-03-01 09:11:58.703071 71.07 1 +2013-03-01 09:11:58.703071 71.07 1 +2013-03-01 09:11:58.703071 71.07 1 +2013-03-01 09:11:58.703071 71.07 1 +2013-03-01 09:11:58.703071 71.07 1 +2013-03-01 09:11:58.703071 71.07 1 +2013-03-01 09:11:58.703071 71.07 1 +2013-03-01 09:11:58.703072 2.96 1 +2013-03-01 09:11:58.703072 2.96 1 +2013-03-01 09:11:58.703072 2.96 1 +2013-03-01 09:11:58.703072 2.96 1 +2013-03-01 09:11:58.703072 2.96 1 +2013-03-01 09:11:58.703073 10.07 1 +2013-03-01 09:11:58.703073 10.07 1 +2013-03-01 09:11:58.703073 10.07 1 +2013-03-01 09:11:58.703073 10.07 1 +2013-03-01 09:11:58.703074 37.8 1 +2013-03-01 09:11:58.703074 37.8 1 +2013-03-01 09:11:58.703074 37.8 1 +2013-03-01 09:11:58.703074 37.8 1 +2013-03-01 09:11:58.703074 37.8 1 +2013-03-01 09:11:58.703074 37.8 1 +2013-03-01 09:11:58.703075 5.64 1 +2013-03-01 09:11:58.703075 5.64 1 +2013-03-01 09:11:58.703075 5.64 1 +2013-03-01 09:11:58.703075 5.64 1 +2013-03-01 09:11:58.703076 18.51 1 +2013-03-01 09:11:58.703076 18.51 1 +2013-03-01 09:11:58.703076 18.51 1 +2013-03-01 09:11:58.703076 18.51 1 +2013-03-01 09:11:58.703076 18.51 1 +2013-03-01 09:11:58.703076 18.51 1 +2013-03-01 09:11:58.703076 18.51 1 +2013-03-01 09:11:58.703077 10.16 1 +2013-03-01 09:11:58.703077 10.16 1 +2013-03-01 09:11:58.703077 10.16 1 +2013-03-01 09:11:58.703077 10.16 1 +2013-03-01 09:11:58.703077 10.16 1 +2013-03-01 09:11:58.703077 10.16 1 +2013-03-01 09:11:58.703078 61.52 1 +2013-03-01 09:11:58.703078 61.52 1 +2013-03-01 09:11:58.703078 61.52 1 +2013-03-01 09:11:58.703078 61.52 1 +2013-03-01 09:11:58.703078 61.52 1 +2013-03-01 09:11:58.703078 61.52 1 +2013-03-01 09:11:58.703079 27.32 1 +2013-03-01 09:11:58.703079 27.32 1 +2013-03-01 09:11:58.703079 27.32 1 +2013-03-01 09:11:58.703079 27.32 1 +2013-03-01 09:11:58.70308 1.76 1 +2013-03-01 09:11:58.70308 1.76 1 +2013-03-01 09:11:58.70308 1.76 1 +2013-03-01 09:11:58.70308 1.76 1 +2013-03-01 09:11:58.70308 1.76 1 +2013-03-01 09:11:58.70308 1.76 1 +2013-03-01 09:11:58.70308 1.76 1 +2013-03-01 09:11:58.70308 1.76 1 +2013-03-01 09:11:58.703081 67.9 1 +2013-03-01 09:11:58.703081 67.9 1 +2013-03-01 09:11:58.703081 67.9 1 +2013-03-01 09:11:58.703081 67.9 1 +2013-03-01 09:11:58.703081 67.9 1 +2013-03-01 09:11:58.703082 37.25 1 +2013-03-01 09:11:58.703082 37.25 1 +2013-03-01 09:11:58.703082 37.25 1 +2013-03-01 09:11:58.703082 37.25 1 +2013-03-01 09:11:58.703082 37.25 1 +2013-03-01 09:11:58.703082 37.25 1 +2013-03-01 09:11:58.703083 20.52 1 +2013-03-01 09:11:58.703083 20.52 1 +2013-03-01 09:11:58.703083 20.52 1 +2013-03-01 09:11:58.703083 20.52 1 +2013-03-01 09:11:58.703083 20.52 1 +2013-03-01 09:11:58.703083 20.52 1 +2013-03-01 09:11:58.703083 20.52 1 +2013-03-01 09:11:58.703083 20.52 1 +2013-03-01 09:11:58.703084 1.76 1 +2013-03-01 09:11:58.703084 1.76 1 +2013-03-01 09:11:58.703084 1.76 1 +2013-03-01 09:11:58.703084 1.76 1 +2013-03-01 09:11:58.703084 1.76 1 +2013-03-01 09:11:58.703085 1.01 1 +2013-03-01 09:11:58.703085 1.01 1 +2013-03-01 09:11:58.703085 1.01 1 +2013-03-01 09:11:58.703085 1.01 1 +2013-03-01 09:11:58.703085 1.01 1 +2013-03-01 09:11:58.703085 1.01 1 +2013-03-01 09:11:58.703085 1.01 1 +2013-03-01 09:11:58.703085 1.01 1 +2013-03-01 09:11:58.703086 9.96 1 +2013-03-01 09:11:58.703086 9.96 1 +2013-03-01 09:11:58.703086 9.96 1 +2013-03-01 09:11:58.703086 9.96 1 +2013-03-01 09:11:58.703086 9.96 1 +2013-03-01 09:11:58.703087 10.63 1 +2013-03-01 09:11:58.703087 10.63 1 +2013-03-01 09:11:58.703087 10.63 1 +2013-03-01 09:11:58.703087 10.63 1 +2013-03-01 09:11:58.703088 1.97 1 +2013-03-01 09:11:58.703088 1.97 1 +2013-03-01 09:11:58.703088 1.97 1 +2013-03-01 09:11:58.703088 1.97 1 +2013-03-01 09:11:58.703088 1.97 1 +2013-03-01 09:11:58.703088 1.97 1 +2013-03-01 09:11:58.703089 8.76 1 +2013-03-01 09:11:58.703089 8.76 1 +2013-03-01 09:11:58.703089 8.76 1 +2013-03-01 09:11:58.703089 8.76 1 +2013-03-01 09:11:58.70309 50.99 1 +2013-03-01 09:11:58.70309 50.99 1 +2013-03-01 09:11:58.70309 50.99 1 +2013-03-01 09:11:58.70309 50.99 1 +2013-03-01 09:11:58.70309 50.99 1 +2013-03-01 09:11:58.70309 50.99 1 +2013-03-01 09:11:58.70309 50.99 1 +2013-03-01 09:11:58.703091 15.85 1 +2013-03-01 09:11:58.703091 15.85 1 +2013-03-01 09:11:58.703091 15.85 1 +2013-03-01 09:11:58.703091 15.85 1 +2013-03-01 09:11:58.703092 36.84 1 +2013-03-01 09:11:58.703092 36.84 1 +2013-03-01 09:11:58.703092 36.84 1 +2013-03-01 09:11:58.703092 36.84 1 +2013-03-01 09:11:58.703092 36.84 1 +2013-03-01 09:11:58.703092 36.84 1 +2013-03-01 09:11:58.703093 14.85 1 +2013-03-01 09:11:58.703093 14.85 1 +2013-03-01 09:11:58.703093 14.85 1 +2013-03-01 09:11:58.703094 57.11 1 +2013-03-01 09:11:58.703094 57.11 1 +2013-03-01 09:11:58.703094 57.11 1 +2013-03-01 09:11:58.703095 9.77 1 +2013-03-01 09:11:58.703095 9.77 1 +2013-03-01 09:11:58.703095 9.77 1 +2013-03-01 09:11:58.703095 9.77 1 +2013-03-01 09:11:58.703095 9.77 1 +2013-03-01 09:11:58.703095 9.77 1 +2013-03-01 09:11:58.703095 9.77 1 +2013-03-01 09:11:58.703096 11.64 1 +2013-03-01 09:11:58.703096 11.64 1 +2013-03-01 09:11:58.703096 11.64 1 +2013-03-01 09:11:58.703096 11.64 1 +2013-03-01 09:11:58.703096 11.64 1 +2013-03-01 09:11:58.703096 11.64 1 +2013-03-01 09:11:58.703096 11.64 1 +2013-03-01 09:11:58.703096 11.64 1 +2013-03-01 09:11:58.703097 0.9 1 +2013-03-01 09:11:58.703097 0.9 1 +2013-03-01 09:11:58.703097 0.9 1 +2013-03-01 09:11:58.703098 1.35 1 +2013-03-01 09:11:58.703098 1.35 1 +2013-03-01 09:11:58.703098 1.35 1 +2013-03-01 09:11:58.703098 1.35 1 +2013-03-01 09:11:58.703098 1.35 1 +2013-03-01 09:11:58.703099 11.69 1 +2013-03-01 09:11:58.703099 11.69 1 +2013-03-01 09:11:58.703099 11.69 1 +2013-03-01 09:11:58.703101 8.72 1 +2013-03-01 09:11:58.703101 8.72 1 +2013-03-01 09:11:58.703101 8.72 1 +2013-03-01 09:11:58.703101 8.72 1 +2013-03-01 09:11:58.703101 8.72 1 +2013-03-01 09:11:58.703101 8.72 1 +2013-03-01 09:11:58.703101 8.72 1 +2013-03-01 09:11:58.703102 63.65 1 +2013-03-01 09:11:58.703102 63.65 1 +2013-03-01 09:11:58.703102 63.65 1 +2013-03-01 09:11:58.703102 63.65 1 +2013-03-01 09:11:58.703102 63.65 1 +2013-03-01 09:11:58.703102 63.65 1 +2013-03-01 09:11:58.703103 8.65 1 +2013-03-01 09:11:58.703103 8.65 1 +2013-03-01 09:11:58.703103 8.65 1 +2013-03-01 09:11:58.703103 8.65 1 +2013-03-01 09:11:58.703103 8.65 1 +2013-03-01 09:11:58.703103 8.65 1 +2013-03-01 09:11:58.703103 8.65 1 +2013-03-01 09:11:58.703104 2.04 1 +2013-03-01 09:11:58.703104 2.04 1 +2013-03-01 09:11:58.703104 2.04 1 +2013-03-01 09:11:58.703104 2.04 1 +2013-03-01 09:11:58.703104 2.04 1 +2013-03-01 09:11:58.703104 2.04 1 +2013-03-01 09:11:58.703105 28.47 1 +2013-03-01 09:11:58.703105 28.47 1 +2013-03-01 09:11:58.703106 11.81 1 +2013-03-01 09:11:58.703106 11.81 1 +2013-03-01 09:11:58.703106 11.81 1 +2013-03-01 09:11:58.703106 11.81 1 +2013-03-01 09:11:58.703107 16.26 1 +2013-03-01 09:11:58.703107 16.26 1 +2013-03-01 09:11:58.703107 16.26 1 +2013-03-01 09:11:58.703107 16.26 1 +2013-03-01 09:11:58.703107 16.26 1 +2013-03-01 09:11:58.703107 16.26 1 +2013-03-01 09:11:58.703107 16.26 1 +2013-03-01 09:11:58.703108 28.47 1 +2013-03-01 09:11:58.703108 28.47 1 +2013-03-01 09:11:58.703108 28.47 1 +2013-03-01 09:11:58.703108 28.47 1 +2013-03-01 09:11:58.703109 38.98 1 +2013-03-01 09:11:58.703109 38.98 1 +2013-03-01 09:11:58.703109 38.98 1 +2013-03-01 09:11:58.703109 38.98 1 +2013-03-01 09:11:58.70311 8.16 1 +2013-03-01 09:11:58.70311 8.16 1 +2013-03-01 09:11:58.70311 8.16 1 +2013-03-01 09:11:58.70311 8.16 1 +2013-03-01 09:11:58.703111 18.8 1 +2013-03-01 09:11:58.703111 18.8 1 +2013-03-01 09:11:58.703111 18.8 1 +2013-03-01 09:11:58.703111 18.8 1 +2013-03-01 09:11:58.703111 18.8 1 +2013-03-01 09:11:58.703111 18.8 1 +2013-03-01 09:11:58.703111 18.8 1 +2013-03-01 09:11:58.703112 13.29 1 +2013-03-01 09:11:58.703112 13.29 1 +2013-03-01 09:11:58.703112 13.29 1 +2013-03-01 09:11:58.703112 13.29 1 +2013-03-01 09:11:58.703113 21.8 1 +2013-03-01 09:11:58.703113 21.8 1 +2013-03-01 09:11:58.703113 21.8 1 +2013-03-01 09:11:58.703113 21.8 1 +2013-03-01 09:11:58.703113 21.8 1 +2013-03-01 09:11:58.703113 21.8 1 +2013-03-01 09:11:58.703113 21.8 1 +2013-03-01 09:11:58.703113 21.8 1 +2013-03-01 09:11:58.703113 21.8 1 +2013-03-01 09:11:58.703113 21.8 1 +2013-03-01 09:11:58.703114 73.94 1 +2013-03-01 09:11:58.703114 73.94 1 +2013-03-01 09:11:58.703114 73.94 1 +2013-03-01 09:11:58.703114 73.94 1 +2013-03-01 09:11:58.703114 73.94 1 +2013-03-01 09:11:58.703114 73.94 1 +2013-03-01 09:11:58.703115 27.52 1 +2013-03-01 09:11:58.703115 27.52 1 +2013-03-01 09:11:58.703115 27.52 1 +2013-03-01 09:11:58.703115 27.52 1 +2013-03-01 09:11:58.703115 27.52 1 +2013-03-01 09:11:58.703116 33.45 1 +2013-03-01 09:11:58.703116 33.45 1 +2013-03-01 09:11:58.703116 33.45 1 +2013-03-01 09:11:58.703116 33.45 1 +2013-03-01 09:11:58.703117 21.81 1 +2013-03-01 09:11:58.703117 21.81 1 +2013-03-01 09:11:58.703117 21.81 1 +2013-03-01 09:11:58.703117 21.81 1 +2013-03-01 09:11:58.703117 21.81 1 +2013-03-01 09:11:58.703117 21.81 1 +2013-03-01 09:11:58.703118 8.69 1 +2013-03-01 09:11:58.703118 8.69 1 +2013-03-01 09:11:58.703119 58.02 1 +2013-03-01 09:11:58.703119 58.02 1 +2013-03-01 09:11:58.70312 52.6 1 +2013-03-01 09:11:58.70312 52.6 1 +2013-03-01 09:11:58.70312 52.6 1 +2013-03-01 09:11:58.70312 52.6 1 +2013-03-01 09:11:58.703121 96.9 1 +2013-03-01 09:11:58.703121 96.9 1 +2013-03-01 09:11:58.703121 96.9 1 +2013-03-01 09:11:58.703121 96.9 1 +2013-03-01 09:11:58.703122 53.56 1 +2013-03-01 09:11:58.703122 53.56 1 +2013-03-01 09:11:58.703122 53.56 1 +2013-03-01 09:11:58.703122 53.56 1 +2013-03-01 09:11:58.703122 53.56 1 +2013-03-01 09:11:58.703123 94.35 1 +2013-03-01 09:11:58.703123 94.35 1 +2013-03-01 09:11:58.703123 94.35 1 +2013-03-01 09:11:58.703123 94.35 1 +2013-03-01 09:11:58.703124 8.93 1 +2013-03-01 09:11:58.703124 8.93 1 +2013-03-01 09:11:58.703124 8.93 1 +2013-03-01 09:11:58.703125 14.94 1 +2013-03-01 09:11:58.703125 14.94 1 +2013-03-01 09:11:58.703125 14.94 1 +2013-03-01 09:11:58.703126 5.49 1 +2013-03-01 09:11:58.703126 5.49 1 +2013-03-01 09:11:58.703126 5.49 1 +2013-03-01 09:11:58.703126 5.49 1 +2013-03-01 09:11:58.703127 3.98 1 +2013-03-01 09:11:58.703127 3.98 1 +2013-03-01 09:11:58.703127 3.98 1 +2013-03-01 09:11:58.703127 3.98 1 +2013-03-01 09:11:58.703128 11.45 1 +2013-03-01 09:11:58.703128 11.45 1 +2013-03-01 09:11:58.703128 11.45 1 +2013-03-01 09:11:58.703128 11.45 1 +2013-03-01 09:11:58.70313 5.83 1 +2013-03-01 09:11:58.70313 5.83 1 +2013-03-01 09:11:58.70313 5.83 1 +2013-03-01 09:11:58.70313 5.83 1 +2013-03-01 09:11:58.70313 5.83 1 +2013-03-01 09:11:58.70313 5.83 1 +2013-03-01 09:11:58.703131 1.75 1 +2013-03-01 09:11:58.703131 1.75 1 +2013-03-01 09:11:58.703131 1.75 1 +2013-03-01 09:11:58.703131 1.75 1 +2013-03-01 09:11:58.703131 1.75 1 +2013-03-01 09:11:58.703131 1.75 1 +2013-03-01 09:11:58.703131 1.75 1 +2013-03-01 09:11:58.703132 1.86 1 +2013-03-01 09:11:58.703132 1.86 1 +2013-03-01 09:11:58.703133 27.34 1 +2013-03-01 09:11:58.703133 27.34 1 +2013-03-01 09:11:58.703133 27.34 1 +2013-03-01 09:11:58.703133 27.34 1 +2013-03-01 09:11:58.703134 98.9 1 +2013-03-01 09:11:58.703134 98.9 1 +2013-03-01 09:11:58.703134 98.9 1 +2013-03-01 09:11:58.703134 98.9 1 +2013-03-01 09:11:58.703134 98.9 1 +2013-03-01 09:11:58.703135 29.14 1 +2013-03-01 09:11:58.703135 29.14 1 +2013-03-01 09:11:58.703135 29.14 1 +2013-03-01 09:11:58.703136 11.87 1 +2013-03-01 09:11:58.703136 11.87 1 +2013-03-01 09:11:58.703136 11.87 1 +2013-03-01 09:11:58.703136 11.87 1 +2013-03-01 09:11:58.703136 11.87 1 +2013-03-01 09:11:58.703136 11.87 1 +2013-03-01 09:11:58.703136 11.87 1 +2013-03-01 09:11:58.703137 18.11 1 +2013-03-01 09:11:58.703137 18.11 1 +2013-03-01 09:11:58.703137 18.11 1 +2013-03-01 09:11:58.703137 18.11 1 +2013-03-01 09:11:58.703137 18.11 1 +2013-03-01 09:11:58.703138 55.68 1 +2013-03-01 09:11:58.703138 55.68 1 +2013-03-01 09:11:58.703138 55.68 1 +2013-03-01 09:11:58.703138 55.68 1 +2013-03-01 09:11:58.703139 12.67 1 +2013-03-01 09:11:58.703139 12.67 1 +2013-03-01 09:11:58.70314 2.83 1 +2013-03-01 09:11:58.70314 2.83 1 +2013-03-01 09:11:58.70314 2.83 1 +2013-03-01 09:11:58.70314 2.83 1 +2013-03-01 09:11:58.70314 2.83 1 +2013-03-01 09:11:58.70314 2.83 1 +2013-03-01 09:11:58.70314 2.83 1 +2013-03-01 09:11:58.703141 76.06 1 +2013-03-01 09:11:58.703141 76.06 1 +2013-03-01 09:11:58.703141 76.06 1 +2013-03-01 09:11:58.703141 76.06 1 +2013-03-01 09:11:58.703141 76.06 1 +2013-03-01 09:11:58.703141 76.06 1 +2013-03-01 09:11:58.703141 76.06 1 +2013-03-01 09:11:58.703141 76.06 1 +2013-03-01 09:11:58.703142 24.25 1 +2013-03-01 09:11:58.703142 24.25 1 +2013-03-01 09:11:58.703142 24.25 1 +2013-03-01 09:11:58.703143 26.86 1 +2013-03-01 09:11:58.703143 26.86 1 +2013-03-01 09:11:58.703143 26.86 1 +2013-03-01 09:11:58.703143 26.86 1 +2013-03-01 09:11:58.703143 26.86 1 +2013-03-01 09:11:58.703143 26.86 1 +2013-03-01 09:11:58.703143 26.86 1 +2013-03-01 09:11:58.703144 3.43 1 +2013-03-01 09:11:58.703144 3.43 1 +2013-03-01 09:11:58.703144 3.43 1 +2013-03-01 09:11:58.703145 8.46 1 +2013-03-01 09:11:58.703145 8.46 1 +2013-03-01 09:11:58.703145 8.46 1 +2013-03-01 09:11:58.703145 8.46 1 +2013-03-01 09:11:58.703145 8.46 1 +2013-03-01 09:11:58.703145 8.46 1 +2013-03-01 09:11:58.703145 8.46 1 +2013-03-01 09:11:58.703145 8.46 1 +2013-03-01 09:11:58.703146 89.12 1 +2013-03-01 09:11:58.703146 89.12 1 +2013-03-01 09:11:58.703146 89.12 1 +2013-03-01 09:11:58.703146 89.12 1 +2013-03-01 09:11:58.703146 89.12 1 +2013-03-01 09:11:58.703146 89.12 1 +2013-03-01 09:11:58.703146 89.12 1 +2013-03-01 09:11:58.703146 89.12 1 +2013-03-01 09:11:58.703146 89.12 1 +2013-03-01 09:11:58.703146 89.12 1 +2013-03-01 09:11:58.703147 54.94 1 +2013-03-01 09:11:58.703147 54.94 1 +2013-03-01 09:11:58.703147 54.94 1 +2013-03-01 09:11:58.703147 54.94 1 +2013-03-01 09:11:58.703147 54.94 1 +2013-03-01 09:11:58.703148 26.97 1 +2013-03-01 09:11:58.703148 26.97 1 +2013-03-01 09:11:58.703148 26.97 1 +2013-03-01 09:11:58.703148 26.97 1 +2013-03-01 09:11:58.703148 26.97 1 +2013-03-01 09:11:58.703148 26.97 1 +2013-03-01 09:11:58.703149 58.05 1 +2013-03-01 09:11:58.703149 58.05 1 +2013-03-01 09:11:58.703149 58.05 1 +2013-03-01 09:11:58.703149 58.05 1 +2013-03-01 09:11:58.703149 58.05 1 +2013-03-01 09:11:58.703149 58.05 1 +2013-03-01 09:11:58.70315 33.01 1 +2013-03-01 09:11:58.70315 33.01 1 +2013-03-01 09:11:58.70315 33.01 1 +2013-03-01 09:11:58.70315 33.01 1 +2013-03-01 09:11:58.703151 95.69 1 +2013-03-01 09:11:58.703151 95.69 1 +2013-03-01 09:11:58.703151 95.69 1 +2013-03-01 09:11:58.703151 95.69 1 +2013-03-01 09:11:58.703151 95.69 1 +2013-03-01 09:11:58.703152 6.85 1 +2013-03-01 09:11:58.703152 6.85 1 +2013-03-01 09:11:58.703152 6.85 1 +2013-03-01 09:11:58.703152 6.85 1 +2013-03-01 09:11:58.703152 6.85 1 +2013-03-01 09:11:58.703153 4.11 1 +2013-03-01 09:11:58.703153 4.11 1 +2013-03-01 09:11:58.703153 4.11 1 +2013-03-01 09:11:58.703153 4.11 1 +2013-03-01 09:11:58.703154 16.93 1 +2013-03-01 09:11:58.703154 16.93 1 +2013-03-01 09:11:58.703154 16.93 1 +2013-03-01 09:11:58.703154 16.93 1 +2013-03-01 09:11:58.703154 16.93 1 +2013-03-01 09:11:58.703155 6.93 1 +2013-03-01 09:11:58.703155 6.93 1 +2013-03-01 09:11:58.703155 6.93 1 +2013-03-01 09:11:58.703155 6.93 1 +2013-03-01 09:11:58.703155 6.93 1 +2013-03-01 09:11:58.703155 6.93 1 +2013-03-01 09:11:58.703155 6.93 1 +2013-03-01 09:11:58.703155 6.93 1 +2013-03-01 09:11:58.703156 21.79 1 +2013-03-01 09:11:58.703156 21.79 1 +2013-03-01 09:11:58.703156 21.79 1 +2013-03-01 09:11:58.703156 21.79 1 +2013-03-01 09:11:58.703156 21.79 1 +2013-03-01 09:11:58.703156 21.79 1 +2013-03-01 09:11:58.703156 21.79 1 +2013-03-01 09:11:58.703156 21.79 1 +2013-03-01 09:11:58.703157 1.29 1 +2013-03-01 09:11:58.703157 1.29 1 +2013-03-01 09:11:58.703157 1.29 1 +2013-03-01 09:11:58.703157 1.29 1 +2013-03-01 09:11:58.703157 1.29 1 +2013-03-01 09:11:58.703157 1.29 1 +2013-03-01 09:11:58.703158 71.89 1 +2013-03-01 09:11:58.703158 71.89 1 +2013-03-01 09:11:58.703158 71.89 1 +2013-03-01 09:11:58.703159 64.06 1 +2013-03-01 09:11:58.703159 64.06 1 +2013-03-01 09:11:58.703159 64.06 1 +2013-03-01 09:11:58.703159 64.06 1 +2013-03-01 09:11:58.703159 64.06 1 +2013-03-01 09:11:58.703159 64.06 1 +2013-03-01 09:11:58.70316 11.93 1 +2013-03-01 09:11:58.70316 11.93 1 +2013-03-01 09:11:58.70316 11.93 1 +2013-03-01 09:11:58.70316 11.93 1 +2013-03-01 09:11:58.703161 15.82 1 +2013-03-01 09:11:58.703161 15.82 1 +2013-03-01 09:11:58.703161 15.82 1 +2013-03-01 09:11:58.703161 15.82 1 +2013-03-01 09:11:58.703161 15.82 1 +2013-03-01 09:11:58.703162 3.51 1 +2013-03-01 09:11:58.703162 3.51 1 +2013-03-01 09:11:58.703162 3.51 1 +2013-03-01 09:11:58.703162 3.51 1 +2013-03-01 09:11:58.703162 3.51 1 +2013-03-01 09:11:58.703163 15.7 1 +2013-03-01 09:11:58.703163 15.7 1 +2013-03-01 09:11:58.703163 15.7 1 +2013-03-01 09:11:58.703163 15.7 1 +2013-03-01 09:11:58.703163 15.7 1 +2013-03-01 09:11:58.703163 15.7 1 +2013-03-01 09:11:58.703164 30.27 1 +2013-03-01 09:11:58.703164 30.27 1 +2013-03-01 09:11:58.703164 30.27 1 +2013-03-01 09:11:58.703164 30.27 1 +2013-03-01 09:11:58.703164 30.27 1 +2013-03-01 09:11:58.703164 30.27 1 +2013-03-01 09:11:58.703165 8.38 1 +2013-03-01 09:11:58.703165 8.38 1 +2013-03-01 09:11:58.703165 8.38 1 +2013-03-01 09:11:58.703166 16.6 1 +2013-03-01 09:11:58.703166 16.6 1 +2013-03-01 09:11:58.703166 16.6 1 +2013-03-01 09:11:58.703167 17.66 1 +2013-03-01 09:11:58.703167 17.66 1 +2013-03-01 09:11:58.703167 17.66 1 +2013-03-01 09:11:58.703167 17.66 1 +2013-03-01 09:11:58.703167 17.66 1 +2013-03-01 09:11:58.703167 17.66 1 +2013-03-01 09:11:58.703168 32.03 1 +2013-03-01 09:11:58.703168 32.03 1 +2013-03-01 09:11:58.703168 32.03 1 +2013-03-01 09:11:58.703168 32.03 1 +2013-03-01 09:11:58.703168 32.03 1 +2013-03-01 09:11:58.703168 32.03 1 +2013-03-01 09:11:58.703168 32.03 1 +2013-03-01 09:11:58.703168 32.03 1 +2013-03-01 09:11:58.703169 39.96 1 +2013-03-01 09:11:58.703169 39.96 1 +2013-03-01 09:11:58.703169 39.96 1 +2013-03-01 09:11:58.703169 39.96 1 +2013-03-01 09:11:58.703169 39.96 1 +2013-03-01 09:11:58.70317 11.44 1 +2013-03-01 09:11:58.70317 11.44 1 +2013-03-01 09:11:58.70317 11.44 1 +2013-03-01 09:11:58.70317 11.44 1 +2013-03-01 09:11:58.70317 11.44 1 +2013-03-01 09:11:58.703171 24.94 1 +2013-03-01 09:11:58.703171 24.94 1 +2013-03-01 09:11:58.703171 24.94 1 +2013-03-01 09:11:58.703171 24.94 1 +2013-03-01 09:11:58.703171 24.94 1 +2013-03-01 09:11:58.703171 24.94 1 +2013-03-01 09:11:58.703172 3.44 1 +2013-03-01 09:11:58.703172 3.44 1 +2013-03-01 09:11:58.703172 3.44 1 +2013-03-01 09:11:58.703172 3.44 1 +2013-03-01 09:11:58.703172 3.44 1 +2013-03-01 09:11:58.703172 3.44 1 +2013-03-01 09:11:58.703172 3.44 1 +2013-03-01 09:11:58.703172 3.44 1 +2013-03-01 09:11:58.703173 8.77 1 +2013-03-01 09:11:58.703173 8.77 1 +2013-03-01 09:11:58.703173 8.77 1 +2013-03-01 09:11:58.703173 8.77 1 +2013-03-01 09:11:58.703174 36.33 1 +2013-03-01 09:11:58.703174 36.33 1 +2013-03-01 09:11:58.703174 36.33 1 +2013-03-01 09:11:58.703174 36.33 1 +2013-03-01 09:11:58.703175 33.37 1 +2013-03-01 09:11:58.703175 33.37 1 +2013-03-01 09:11:58.703175 33.37 1 +2013-03-01 09:11:58.703175 33.37 1 +2013-03-01 09:11:58.703175 33.37 1 +2013-03-01 09:11:58.703175 33.37 1 +2013-03-01 09:11:58.703176 28.2 1 +2013-03-01 09:11:58.703176 28.2 1 +2013-03-01 09:11:58.703176 28.2 1 +2013-03-01 09:11:58.703176 28.2 1 +2013-03-01 09:11:58.703176 28.2 1 +2013-03-01 09:11:58.703177 11.43 1 +2013-03-01 09:11:58.703177 11.43 1 +2013-03-01 09:11:58.703177 11.43 1 +2013-03-01 09:11:58.703177 11.43 1 +2013-03-01 09:11:58.703177 11.43 1 +2013-03-01 09:11:58.703177 11.43 1 +2013-03-01 09:11:58.703177 11.43 1 +2013-03-01 09:11:58.703178 9.12 1 +2013-03-01 09:11:58.703178 9.12 1 +2013-03-01 09:11:58.703178 9.12 1 +2013-03-01 09:11:58.703178 9.12 1 +2013-03-01 09:11:58.703178 9.12 1 +2013-03-01 09:11:58.703178 9.12 1 +2013-03-01 09:11:58.703178 9.12 1 +2013-03-01 09:11:58.703178 9.12 1 +2013-03-01 09:11:58.703179 10.82 1 +2013-03-01 09:11:58.703179 10.82 1 +2013-03-01 09:11:58.703179 10.82 1 +2013-03-01 09:11:58.703179 10.82 1 +2013-03-01 09:11:58.70318 10.28 1 +2013-03-01 09:11:58.70318 10.28 1 +2013-03-01 09:11:58.70318 10.28 1 +2013-03-01 09:11:58.70318 10.28 1 +2013-03-01 09:11:58.70318 10.28 1 +2013-03-01 09:11:58.70318 10.28 1 +2013-03-01 09:11:58.703181 26.6 1 +2013-03-01 09:11:58.703181 26.6 1 +2013-03-01 09:11:58.703181 26.6 1 +2013-03-01 09:11:58.703181 26.6 1 +2013-03-01 09:11:58.703181 26.6 1 +2013-03-01 09:11:58.703181 26.6 1 +2013-03-01 09:11:58.703181 26.6 1 +2013-03-01 09:11:58.703182 1.23 1 +2013-03-01 09:11:58.703182 1.23 1 +2013-03-01 09:11:58.703182 1.23 1 +2013-03-01 09:11:58.703182 1.23 1 +2013-03-01 09:11:58.703182 1.23 1 +2013-03-01 09:11:58.703183 36.74 1 +2013-03-01 09:11:58.703183 36.74 1 +2013-03-01 09:11:58.703183 36.74 1 +2013-03-01 09:11:58.703183 36.74 1 +2013-03-01 09:11:58.703184 8.95 1 +2013-03-01 09:11:58.703184 8.95 1 +2013-03-01 09:11:58.703184 8.95 1 +2013-03-01 09:11:58.703184 8.95 1 +2013-03-01 09:11:58.703184 8.95 1 +2013-03-01 09:11:58.703184 8.95 1 +2013-03-01 09:11:58.703184 8.95 1 +2013-03-01 09:11:58.703185 8.91 1 +2013-03-01 09:11:58.703185 8.91 1 +2013-03-01 09:11:58.703185 8.91 1 +2013-03-01 09:11:58.703185 8.91 1 +2013-03-01 09:11:58.703185 8.91 1 +2013-03-01 09:11:58.703186 13.81 1 +2013-03-01 09:11:58.703186 13.81 1 +2013-03-01 09:11:58.703186 13.81 1 +2013-03-01 09:11:58.703186 13.81 1 +2013-03-01 09:11:58.703187 64.89 1 +2013-03-01 09:11:58.703187 64.89 1 +2013-03-01 09:11:58.703187 64.89 1 +2013-03-01 09:11:58.703187 64.89 1 +2013-03-01 09:11:58.703187 64.89 1 +2013-03-01 09:11:58.703187 64.89 1 +2013-03-01 09:11:58.703188 32.85 1 +2013-03-01 09:11:58.703188 32.85 1 +2013-03-01 09:11:58.703188 32.85 1 +2013-03-01 09:11:58.703188 32.85 1 +2013-03-01 09:11:58.703189 36.96 1 +2013-03-01 09:11:58.703189 36.96 1 +2013-03-01 09:11:58.703189 36.96 1 +2013-03-01 09:11:58.703189 36.96 1 +2013-03-01 09:11:58.703189 36.96 1 +2013-03-01 09:11:58.703189 36.96 1 +2013-03-01 09:11:58.703189 36.96 1 +2013-03-01 09:11:58.70319 90.09 1 +2013-03-01 09:11:58.70319 90.09 1 +2013-03-01 09:11:58.70319 90.09 1 +2013-03-01 09:11:58.703192 2.63 1 +2013-03-01 09:11:58.703192 2.63 1 +2013-03-01 09:11:58.703192 2.63 1 +2013-03-01 09:11:58.703192 2.63 1 +2013-03-01 09:11:58.703193 28.42 1 +2013-03-01 09:11:58.703193 28.42 1 +2013-03-01 09:11:58.703193 28.42 1 +2013-03-01 09:11:58.703194 15.07 1 +2013-03-01 09:11:58.703194 15.07 1 +2013-03-01 09:11:58.703194 15.07 1 +2013-03-01 09:11:58.703194 15.07 1 +2013-03-01 09:11:58.703194 15.07 1 +2013-03-01 09:11:58.703194 15.07 1 +2013-03-01 09:11:58.703195 3.81 1 +2013-03-01 09:11:58.703195 3.81 1 +2013-03-01 09:11:58.703195 3.81 1 +2013-03-01 09:11:58.703195 3.81 1 +2013-03-01 09:11:58.703195 3.81 1 +2013-03-01 09:11:58.703195 3.81 1 +2013-03-01 09:11:58.703196 0.08 1 +2013-03-01 09:11:58.703196 0.08 1 +2013-03-01 09:11:58.703196 0.08 1 +2013-03-01 09:11:58.703197 16.01 1 +2013-03-01 09:11:58.703197 16.01 1 +2013-03-01 09:11:58.703197 16.01 1 +2013-03-01 09:11:58.703197 16.01 1 +2013-03-01 09:11:58.703197 16.01 1 +2013-03-01 09:11:58.703197 16.01 1 +2013-03-01 09:11:58.703198 30.6 1 +2013-03-01 09:11:58.703198 30.6 1 +2013-03-01 09:11:58.703198 30.6 1 +2013-03-01 09:11:58.703198 30.6 1 +2013-03-01 09:11:58.703199 45.69 1 +2013-03-01 09:11:58.703199 45.69 1 +2013-03-01 09:11:58.703199 45.69 1 +2013-03-01 09:11:58.703199 45.69 1 +2013-03-01 09:11:58.7032 12.72 1 +2013-03-01 09:11:58.7032 12.72 1 +2013-03-01 09:11:58.703201 35.15 1 +2013-03-01 09:11:58.703201 35.15 1 +2013-03-01 09:11:58.703202 31.41 1 +2013-03-01 09:11:58.703202 31.41 1 +2013-03-01 09:11:58.703202 31.41 1 +2013-03-01 09:11:58.703202 31.41 1 +2013-03-01 09:11:58.703202 31.41 1 +2013-03-01 09:11:58.703203 11.63 1 +2013-03-01 09:11:58.703203 11.63 1 +2013-03-01 09:11:58.703203 11.63 1 +2013-03-01 09:11:58.703203 11.63 1 +2013-03-01 09:11:58.703203 11.63 1 +2013-03-01 09:11:58.703205 35.8 1 +2013-03-01 09:11:58.703205 35.8 1 +2013-03-01 09:11:58.703205 35.8 1 +2013-03-01 09:11:58.703205 35.8 1 +2013-03-01 09:11:58.703205 35.8 1 +2013-03-01 09:11:58.703206 6.61 1 +2013-03-01 09:11:58.703206 6.61 1 +2013-03-01 09:11:58.703206 6.61 1 +2013-03-01 09:11:58.703206 6.61 1 +2013-03-01 09:11:58.703207 21.14 1 +2013-03-01 09:11:58.703207 21.14 1 +2013-03-01 09:11:58.703207 21.14 1 +2013-03-01 09:11:58.703207 21.14 1 +2013-03-01 09:11:58.703207 21.14 1 +2013-03-01 09:11:58.703207 21.14 1 +2013-03-01 09:11:58.703207 21.14 1 +2013-03-01 09:11:58.703207 21.14 1 +2013-03-01 09:11:58.703208 1.23 1 +2013-03-01 09:11:58.703208 1.23 1 +2013-03-01 09:11:58.703208 1.23 1 +2013-03-01 09:11:58.703208 1.23 1 +2013-03-01 09:11:58.703208 1.23 1 +2013-03-01 09:11:58.703208 1.23 1 +2013-03-01 09:11:58.703208 1.23 1 +2013-03-01 09:11:58.703208 1.23 1 +2013-03-01 09:11:58.703209 25.92 1 +2013-03-01 09:11:58.703209 25.92 1 +2013-03-01 09:11:58.70321 37.12 1 +2013-03-01 09:11:58.70321 37.12 1 +2013-03-01 09:11:58.70321 37.12 1 +2013-03-01 09:11:58.70321 37.12 1 +2013-03-01 09:11:58.70321 37.12 1 +2013-03-01 09:11:58.70321 37.12 1 +2013-03-01 09:11:58.70321 37.12 1 +2013-03-01 09:11:58.70321 37.12 1 +2013-03-01 09:11:58.703211 5.24 1 +2013-03-01 09:11:58.703211 5.24 1 +2013-03-01 09:11:58.703211 5.24 1 +2013-03-01 09:11:58.703211 5.24 1 +2013-03-01 09:11:58.703211 5.24 1 +2013-03-01 09:11:58.703211 5.24 1 +2013-03-01 09:11:58.703212 10.52 1 +2013-03-01 09:11:58.703212 10.52 1 +2013-03-01 09:11:58.703212 10.52 1 +2013-03-01 09:11:58.703212 10.52 1 +2013-03-01 09:11:58.703212 10.52 1 +2013-03-01 09:11:58.703212 10.52 1 +2013-03-01 09:11:58.703213 38.71 1 +2013-03-01 09:11:58.703213 38.71 1 +2013-03-01 09:11:58.703213 38.71 1 +2013-03-01 09:11:58.703213 38.71 1 +2013-03-01 09:11:58.703214 31.35 1 +2013-03-01 09:11:58.703214 31.35 1 +2013-03-01 09:11:58.703214 31.35 1 +2013-03-01 09:11:58.703215 18.78 1 +2013-03-01 09:11:58.703215 18.78 1 +2013-03-01 09:11:58.703215 18.78 1 +2013-03-01 09:11:58.703215 18.78 1 +2013-03-01 09:11:58.703216 11.23 1 +2013-03-01 09:11:58.703216 11.23 1 +2013-03-01 09:11:58.703216 11.23 1 +2013-03-01 09:11:58.703216 11.23 1 +2013-03-01 09:11:58.703216 11.23 1 +2013-03-01 09:11:58.703216 11.23 1 +2013-03-01 09:11:58.703216 11.23 1 +2013-03-01 09:11:58.703216 11.23 1 +2013-03-01 09:11:58.703217 23.57 1 +2013-03-01 09:11:58.703217 23.57 1 +2013-03-01 09:11:58.703217 23.57 1 +2013-03-01 09:11:58.703218 9.67 1 +2013-03-01 09:11:58.703218 9.67 1 +2013-03-01 09:11:58.703218 9.67 1 +2013-03-01 09:11:58.703218 9.67 1 +2013-03-01 09:11:58.703218 9.67 1 +2013-03-01 09:11:58.703219 1.42 1 +2013-03-01 09:11:58.703219 1.42 1 +2013-03-01 09:11:58.703219 1.42 1 +2013-03-01 09:11:58.703219 1.42 1 +2013-03-01 09:11:58.703219 1.42 1 +2013-03-01 09:11:58.703219 1.42 1 +2013-03-01 09:11:58.70322 7.37 1 +2013-03-01 09:11:58.70322 7.37 1 +2013-03-01 09:11:58.70322 7.37 1 +2013-03-01 09:11:58.703221 26.64 1 +2013-03-01 09:11:58.703221 26.64 1 +2013-03-01 09:11:58.703221 26.64 1 +2013-03-01 09:11:58.703221 26.64 1 +2013-03-01 09:11:58.703221 26.64 1 +2013-03-01 09:11:58.703221 26.64 1 +2013-03-01 09:11:58.703221 26.64 1 +2013-03-01 09:11:58.703222 38.23 1 +2013-03-01 09:11:58.703222 38.23 1 +2013-03-01 09:11:58.703222 38.23 1 +2013-03-01 09:11:58.703222 38.23 1 +2013-03-01 09:11:58.703222 38.23 1 +2013-03-01 09:11:58.703222 38.23 1 +2013-03-01 09:11:58.703223 3.43 1 +2013-03-01 09:11:58.703223 3.43 1 +2013-03-01 09:11:58.703223 3.43 1 +2013-03-01 09:11:58.703223 3.43 1 +2013-03-01 09:11:58.703223 3.43 1 +2013-03-01 09:11:58.703224 17.92 1 +2013-03-01 09:11:58.703224 17.92 1 +2013-03-01 09:11:58.703224 17.92 1 +2013-03-01 09:11:58.703224 17.92 1 +2013-03-01 09:11:58.703224 17.92 1 +2013-03-01 09:11:58.703224 17.92 1 +2013-03-01 09:11:58.703225 35.51 1 +2013-03-01 09:11:58.703225 35.51 1 +2013-03-01 09:11:58.703225 35.51 1 +2013-03-01 09:11:58.703225 35.51 1 +2013-03-01 09:11:58.703226 34.31 1 +2013-03-01 09:11:58.703226 34.31 1 +2013-03-01 09:11:58.703226 34.31 1 +2013-03-01 09:11:58.703226 34.31 1 +2013-03-01 09:11:58.703226 34.31 1 +2013-03-01 09:11:58.703226 34.31 1 +2013-03-01 09:11:58.703226 34.31 1 +2013-03-01 09:11:58.703227 17.65 1 +2013-03-01 09:11:58.703227 17.65 1 +2013-03-01 09:11:58.703227 17.65 1 +2013-03-01 09:11:58.703227 17.65 1 +2013-03-01 09:11:58.703227 17.65 1 +2013-03-01 09:11:58.703228 4.19 1 +2013-03-01 09:11:58.703228 4.19 1 +2013-03-01 09:11:58.703229 88.52 1 +2013-03-01 09:11:58.703229 88.52 1 +2013-03-01 09:11:58.703229 88.52 1 +2013-03-01 09:11:58.703229 88.52 1 +2013-03-01 09:11:58.703229 88.52 1 +2013-03-01 09:11:58.703229 88.52 1 +2013-03-01 09:11:58.703229 88.52 1 +2013-03-01 09:11:58.703229 88.52 1 +2013-03-01 09:11:58.703229 88.52 1 +2013-03-01 09:11:58.703229 88.52 1 +2013-03-01 09:11:58.70323 28.02 1 +2013-03-01 09:11:58.70323 28.02 1 +2013-03-01 09:11:58.70323 28.02 1 +2013-03-01 09:11:58.70323 28.02 1 +2013-03-01 09:11:58.70323 28.02 1 +2013-03-01 09:11:58.703231 11.99 1 +2013-03-01 09:11:58.703231 11.99 1 +2013-03-01 09:11:58.703231 11.99 1 +2013-03-01 09:11:58.703231 11.99 1 +2013-03-01 09:11:58.703231 11.99 1 +2013-03-01 09:11:58.703231 11.99 1 +2013-03-01 09:11:58.703232 61.96 1 +2013-03-01 09:11:58.703232 61.96 1 +2013-03-01 09:11:58.703232 61.96 1 +2013-03-01 09:11:58.703233 40.81 1 +2013-03-01 09:11:58.703233 40.81 1 +2013-03-01 09:11:58.703233 40.81 1 +2013-03-01 09:11:58.703233 40.81 1 +2013-03-01 09:11:58.703233 40.81 1 +2013-03-01 09:11:58.703233 40.81 1 +2013-03-01 09:11:58.703234 44.1 1 +2013-03-01 09:11:58.703234 44.1 1 +2013-03-01 09:11:58.703234 44.1 1 +2013-03-01 09:11:58.703235 6.35 1 +2013-03-01 09:11:58.703235 6.35 1 +2013-03-01 09:11:58.703235 6.35 1 +2013-03-01 09:11:58.703235 6.35 1 +2013-03-01 09:11:58.703235 6.35 1 +2013-03-01 09:11:58.703235 6.35 1 +2013-03-01 09:11:58.703235 6.35 1 +2013-03-01 09:11:58.703236 37.8 1 +2013-03-01 09:11:58.703236 37.8 1 +2013-03-01 09:11:58.703236 37.8 1 +2013-03-01 09:11:58.703236 37.8 1 +2013-03-01 09:11:58.703236 37.8 1 +2013-03-01 09:11:58.703237 0.24 1 +2013-03-01 09:11:58.703237 0.24 1 +2013-03-01 09:11:58.703237 0.24 1 +2013-03-01 09:11:58.703237 0.24 1 +2013-03-01 09:11:58.703237 0.24 1 +2013-03-01 09:11:58.703237 0.24 1 +2013-03-01 09:11:58.703237 0.24 1 +2013-03-01 09:11:58.703237 0.24 1 +2013-03-01 09:11:58.703238 6 1 +2013-03-01 09:11:58.703238 6 1 +2013-03-01 09:11:58.703238 6 1 +2013-03-01 09:11:58.703238 6 1 +2013-03-01 09:11:58.703239 24.8 1 +2013-03-01 09:11:58.703239 24.8 1 +2013-03-01 09:11:58.703239 24.8 1 +2013-03-01 09:11:58.703239 24.8 1 +2013-03-01 09:11:58.703239 24.8 1 +2013-03-01 09:11:58.70324 5.1 1 +2013-03-01 09:11:58.70324 5.1 1 +2013-03-01 09:11:58.703241 19.33 1 +2013-03-01 09:11:58.703241 19.33 1 +2013-03-01 09:11:58.703241 19.33 1 +2013-03-01 09:11:58.703241 19.33 1 +2013-03-01 09:11:58.703241 19.33 1 +2013-03-01 09:11:58.703241 19.33 1 +2013-03-01 09:11:58.703241 19.33 1 +2013-03-01 09:11:58.703242 31.23 1 +2013-03-01 09:11:58.703242 31.23 1 +2013-03-01 09:11:58.703242 31.23 1 +2013-03-01 09:11:58.703242 31.23 1 +2013-03-01 09:11:58.703243 6.15 1 +2013-03-01 09:11:58.703243 6.15 1 +2013-03-01 09:11:58.703243 6.15 1 +2013-03-01 09:11:58.703243 6.15 1 +2013-03-01 09:11:58.703244 4.47 1 +2013-03-01 09:11:58.703244 4.47 1 +2013-03-01 09:11:58.703244 4.47 1 +2013-03-01 09:11:58.703245 0.72 1 +2013-03-01 09:11:58.703245 0.72 1 +2013-03-01 09:11:58.703245 0.72 1 +2013-03-01 09:11:58.703245 0.72 1 +2013-03-01 09:11:58.703246 45.94 1 +2013-03-01 09:11:58.703246 45.94 1 +2013-03-01 09:11:58.703247 1.29 1 +2013-03-01 09:11:58.703247 1.29 1 +2013-03-01 09:11:58.703247 1.29 1 +2013-03-01 09:11:58.703247 1.29 1 +2013-03-01 09:11:58.703247 1.29 1 +2013-03-01 09:11:58.703247 1.29 1 +2013-03-01 09:11:58.703248 14.36 1 +2013-03-01 09:11:58.703248 14.36 1 +2013-03-01 09:11:58.703248 14.36 1 +2013-03-01 09:11:58.703248 14.36 1 +2013-03-01 09:11:58.703248 14.36 1 +2013-03-01 09:11:58.703248 14.36 1 +2013-03-01 09:11:58.703248 14.36 1 +2013-03-01 09:11:58.703248 14.36 1 +2013-03-01 09:11:58.703249 19.42 1 +2013-03-01 09:11:58.703249 19.42 1 +2013-03-01 09:11:58.70325 25.89 1 +2013-03-01 09:11:58.70325 25.89 1 +2013-03-01 09:11:58.70325 25.89 1 +2013-03-01 09:11:58.70325 25.89 1 +2013-03-01 09:11:58.70325 25.89 1 +2013-03-01 09:11:58.70325 25.89 1 +2013-03-01 09:11:58.703251 68.98 1 +2013-03-01 09:11:58.703251 68.98 1 +2013-03-01 09:11:58.703251 68.98 1 +2013-03-01 09:11:58.703252 49.85 1 +2013-03-01 09:11:58.703252 49.85 1 +2013-03-01 09:11:58.703252 49.85 1 +2013-03-01 09:11:58.703252 49.85 1 +2013-03-01 09:11:58.703252 49.85 1 +2013-03-01 09:11:58.703252 49.85 1 +2013-03-01 09:11:58.703252 49.85 1 +2013-03-01 09:11:58.703253 55.75 1 +2013-03-01 09:11:58.703253 55.75 1 +2013-03-01 09:11:58.703253 55.75 1 +2013-03-01 09:11:58.703253 55.75 1 +2013-03-01 09:11:58.703253 55.75 1 +2013-03-01 09:11:58.703254 0.32 1 +2013-03-01 09:11:58.703254 0.32 1 +2013-03-01 09:11:58.703254 0.32 1 +2013-03-01 09:11:58.703254 0.32 1 +2013-03-01 09:11:58.703255 5.42 1 +2013-03-01 09:11:58.703255 5.42 1 +2013-03-01 09:11:58.703255 5.42 1 +2013-03-01 09:11:58.703255 5.42 1 +2013-03-01 09:11:58.703255 5.42 1 +2013-03-01 09:11:58.703255 5.42 1 +2013-03-01 09:11:58.703256 23.78 1 +2013-03-01 09:11:58.703256 23.78 1 +2013-03-01 09:11:58.703256 23.78 1 +2013-03-01 09:11:58.703257 14.96 1 +2013-03-01 09:11:58.703257 14.96 1 +2013-03-01 09:11:58.703257 14.96 1 +2013-03-01 09:11:58.703257 14.96 1 +2013-03-01 09:11:58.703257 14.96 1 +2013-03-01 09:11:58.703257 14.96 1 +2013-03-01 09:11:58.703257 14.96 1 +2013-03-01 09:11:58.703257 14.96 1 +2013-03-01 09:11:58.703258 19.65 1 +2013-03-01 09:11:58.703258 19.65 1 +2013-03-01 09:11:58.703258 19.65 1 +2013-03-01 09:11:58.703258 19.65 1 +2013-03-01 09:11:58.703258 19.65 1 +2013-03-01 09:11:58.703258 19.65 1 +2013-03-01 09:11:58.703259 11.37 1 +2013-03-01 09:11:58.703259 11.37 1 +2013-03-01 09:11:58.703259 11.37 1 +2013-03-01 09:11:58.703259 11.37 1 +2013-03-01 09:11:58.703259 11.37 1 +2013-03-01 09:11:58.703259 11.37 1 +2013-03-01 09:11:58.703259 11.37 1 +2013-03-01 09:11:58.70326 3.81 1 +2013-03-01 09:11:58.70326 3.81 1 +2013-03-01 09:11:58.70326 3.81 1 +2013-03-01 09:11:58.70326 3.81 1 +2013-03-01 09:11:58.703261 8.66 1 +2013-03-01 09:11:58.703261 8.66 1 +2013-03-01 09:11:58.703261 8.66 1 +2013-03-01 09:11:58.703261 8.66 1 +2013-03-01 09:11:58.703261 8.66 1 +2013-03-01 09:11:58.703261 8.66 1 +2013-03-01 09:11:58.703261 8.66 1 +2013-03-01 09:11:58.703262 1.81 1 +2013-03-01 09:11:58.703262 1.81 1 +2013-03-01 09:11:58.703262 1.81 1 +2013-03-01 09:11:58.703262 1.81 1 +2013-03-01 09:11:58.703262 1.81 1 +2013-03-01 09:11:58.703262 1.81 1 +2013-03-01 09:11:58.703262 1.81 1 +2013-03-01 09:11:58.703262 1.81 1 +2013-03-01 09:11:58.703262 1.81 1 +2013-03-01 09:11:58.703263 14.4 1 +2013-03-01 09:11:58.703263 14.4 1 +2013-03-01 09:11:58.703263 14.4 1 +2013-03-01 09:11:58.703263 14.4 1 +2013-03-01 09:11:58.703263 14.4 1 +2013-03-01 09:11:58.703263 14.4 1 +2013-03-01 09:11:58.703263 14.4 1 +2013-03-01 09:11:58.703263 14.4 1 +2013-03-01 09:11:58.703263 14.4 1 +2013-03-01 09:11:58.703263 14.4 1 +2013-03-01 09:11:58.703264 52.49 1 +2013-03-01 09:11:58.703264 52.49 1 +2013-03-01 09:11:58.703264 52.49 1 +2013-03-01 09:11:58.703264 52.49 1 +2013-03-01 09:11:58.703265 6.88 1 +2013-03-01 09:11:58.703265 6.88 1 +2013-03-01 09:11:58.703265 6.88 1 +2013-03-01 09:11:58.703265 6.88 1 +2013-03-01 09:11:58.703265 6.88 1 +2013-03-01 09:11:58.703265 6.88 1 +2013-03-01 09:11:58.703265 6.88 1 +2013-03-01 09:11:58.703266 47.71 1 +2013-03-01 09:11:58.703266 47.71 1 +2013-03-01 09:11:58.703266 47.71 1 +2013-03-01 09:11:58.703266 47.71 1 +2013-03-01 09:11:58.703267 12.22 1 +2013-03-01 09:11:58.703267 12.22 1 +2013-03-01 09:11:58.703267 12.22 1 +2013-03-01 09:11:58.703267 12.22 1 +2013-03-01 09:11:58.703267 12.22 1 +2013-03-01 09:11:58.703267 12.22 1 +2013-03-01 09:11:58.703268 94.73 1 +2013-03-01 09:11:58.703268 94.73 1 +2013-03-01 09:11:58.703268 94.73 1 +2013-03-01 09:11:58.703268 94.73 1 +2013-03-01 09:11:58.703268 94.73 1 +2013-03-01 09:11:58.703268 94.73 1 +2013-03-01 09:11:58.703268 94.73 1 +2013-03-01 09:11:58.703268 94.73 1 +2013-03-01 09:11:58.703269 43.84 1 +2013-03-01 09:11:58.703269 43.84 1 +2013-03-01 09:11:58.703269 43.84 1 +2013-03-01 09:11:58.70327 5.01 1 +2013-03-01 09:11:58.70327 5.01 1 +2013-03-01 09:11:58.70327 5.01 1 +2013-03-01 09:11:58.70327 5.01 1 +2013-03-01 09:11:58.703271 61.16 1 +2013-03-01 09:11:58.703271 61.16 1 +2013-03-01 09:11:58.703272 7.49 1 +2013-03-01 09:11:58.703272 7.49 1 +2013-03-01 09:11:58.703272 7.49 1 +2013-03-01 09:11:58.703272 7.49 1 +2013-03-01 09:11:58.703273 10.94 1 +2013-03-01 09:11:58.703274 10.79 1 +2013-03-01 09:11:58.703274 10.79 1 +2013-03-01 09:11:58.703274 10.79 1 +2013-03-01 09:11:58.703274 10.79 1 +2013-03-01 09:11:58.703274 10.79 1 +2013-03-01 09:11:58.703274 10.79 1 +2013-03-01 09:11:58.703274 10.79 1 +2013-03-01 09:11:58.703275 20.57 1 +2013-03-01 09:11:58.703275 20.57 1 +2013-03-01 09:11:58.703275 20.57 1 +2013-03-01 09:11:58.703276 63.54 1 +2013-03-01 09:11:58.703276 63.54 1 +2013-03-01 09:11:58.703276 63.54 1 +2013-03-01 09:11:58.703276 63.54 1 +2013-03-01 09:11:58.703277 3.37 1 +2013-03-01 09:11:58.703277 3.37 1 +2013-03-01 09:11:58.703277 3.37 1 +2013-03-01 09:11:58.703277 3.37 1 +2013-03-01 09:11:58.703278 9.74 1 +2013-03-01 09:11:58.703278 9.74 1 +2013-03-01 09:11:58.703278 9.74 1 +2013-03-01 09:11:58.703278 9.74 1 +2013-03-01 09:11:58.703278 9.74 1 +2013-03-01 09:11:58.703278 9.74 1 +2013-03-01 09:11:58.703279 20.85 1 +2013-03-01 09:11:58.703279 20.85 1 +2013-03-01 09:11:58.703279 20.85 1 +2013-03-01 09:11:58.703279 20.85 1 +2013-03-01 09:11:58.703279 20.85 1 +2013-03-01 09:11:58.703279 20.85 1 +2013-03-01 09:11:58.703279 20.85 1 +2013-03-01 09:11:58.70328 40.68 1 +2013-03-01 09:11:58.70328 40.68 1 +2013-03-01 09:11:58.70328 40.68 1 +2013-03-01 09:11:58.70328 40.68 1 +2013-03-01 09:11:58.70328 40.68 1 +2013-03-01 09:11:58.70328 40.68 1 +2013-03-01 09:11:58.70328 40.68 1 +2013-03-01 09:11:58.703281 19.95 1 +2013-03-01 09:11:58.703281 19.95 1 +2013-03-01 09:11:58.703281 19.95 1 +2013-03-01 09:11:58.703281 19.95 1 +2013-03-01 09:11:58.703282 7.5 1 +2013-03-01 09:11:58.703282 7.5 1 +2013-03-01 09:11:58.703282 7.5 1 +2013-03-01 09:11:58.703282 7.5 1 +2013-03-01 09:11:58.703282 7.5 1 +2013-03-01 09:11:58.703282 7.5 1 +2013-03-01 09:11:58.703283 17.62 1 +2013-03-01 09:11:58.703283 17.62 1 +2013-03-01 09:11:58.703283 17.62 1 +2013-03-01 09:11:58.703283 17.62 1 +2013-03-01 09:11:58.703283 17.62 1 +2013-03-01 09:11:58.703283 17.62 1 +2013-03-01 09:11:58.703283 17.62 1 +2013-03-01 09:11:58.703283 17.62 1 +2013-03-01 09:11:58.703284 99.88 1 +2013-03-01 09:11:58.703284 99.88 1 +2013-03-01 09:11:58.703284 99.88 1 +2013-03-01 09:11:58.703284 99.88 1 +2013-03-01 09:11:58.703284 99.88 1 +2013-03-01 09:11:58.703284 99.88 1 +2013-03-01 09:11:58.703285 58.66 1 +2013-03-01 09:11:58.703285 58.66 1 +2013-03-01 09:11:58.703285 58.66 1 +2013-03-01 09:11:58.703285 58.66 1 +2013-03-01 09:11:58.703285 58.66 1 +2013-03-01 09:11:58.703286 9.53 1 +2013-03-01 09:11:58.703286 9.53 1 +2013-03-01 09:11:58.703286 9.53 1 +2013-03-01 09:11:58.703286 9.53 1 +2013-03-01 09:11:58.703287 0.89 1 +2013-03-01 09:11:58.703287 0.89 1 +2013-03-01 09:11:58.703287 0.89 1 +2013-03-01 09:11:58.703288 60.57 1 +2013-03-01 09:11:58.703288 60.57 1 +2013-03-01 09:11:58.703289 0.56 1 +2013-03-01 09:11:58.703289 0.56 1 +2013-03-01 09:11:58.703289 0.56 1 +2013-03-01 09:11:58.703289 0.56 1 +2013-03-01 09:11:58.703289 0.56 1 +2013-03-01 09:11:58.703289 0.56 1 +2013-03-01 09:11:58.703289 0.56 1 +2013-03-01 09:11:58.70329 16.89 1 +2013-03-01 09:11:58.70329 16.89 1 +2013-03-01 09:11:58.70329 16.89 1 +2013-03-01 09:11:58.70329 16.89 1 +2013-03-01 09:11:58.70329 16.89 1 +2013-03-01 09:11:58.70329 16.89 1 +2013-03-01 09:11:58.703291 1.15 1 +2013-03-01 09:11:58.703291 1.15 1 +2013-03-01 09:11:58.703291 1.15 1 +2013-03-01 09:11:58.703291 1.15 1 +2013-03-01 09:11:58.703292 4.24 1 +2013-03-01 09:11:58.703292 4.24 1 +2013-03-01 09:11:58.703292 4.24 1 +2013-03-01 09:11:58.703292 4.24 1 +2013-03-01 09:11:58.703293 42.86 1 +2013-03-01 09:11:58.703293 42.86 1 +2013-03-01 09:11:58.703293 42.86 1 +2013-03-01 09:11:58.703294 29.74 1 +2013-03-01 09:11:58.703294 29.74 1 +2013-03-01 09:11:58.703294 29.74 1 +2013-03-01 09:11:58.703294 29.74 1 +2013-03-01 09:11:58.703294 29.74 1 +2013-03-01 09:11:58.703294 29.74 1 +2013-03-01 09:11:58.703295 8.58 1 +2013-03-01 09:11:58.703295 8.58 1 +2013-03-01 09:11:58.703295 8.58 1 +2013-03-01 09:11:58.703295 8.58 1 +2013-03-01 09:11:58.703295 8.58 1 +2013-03-01 09:11:58.703295 8.58 1 +2013-03-01 09:11:58.703295 8.58 1 +2013-03-01 09:11:58.703295 8.58 1 +2013-03-01 09:11:58.703296 30.43 1 +2013-03-01 09:11:58.703296 30.43 1 +2013-03-01 09:11:58.703296 30.43 1 +2013-03-01 09:11:58.703296 30.43 1 +2013-03-01 09:11:58.703296 30.43 1 +2013-03-01 09:11:58.703296 30.43 1 +2013-03-01 09:11:58.703296 30.43 1 +2013-03-01 09:11:58.703297 25.67 1 +2013-03-01 09:11:58.703297 25.67 1 +2013-03-01 09:11:58.703297 25.67 1 +2013-03-01 09:11:58.703297 25.67 1 +2013-03-01 09:11:58.703297 25.67 1 +2013-03-01 09:11:58.703297 25.67 1 +2013-03-01 09:11:58.703298 8.8 1 +2013-03-01 09:11:58.703298 8.8 1 +2013-03-01 09:11:58.703298 8.8 1 +2013-03-01 09:11:58.703298 8.8 1 +2013-03-01 09:11:58.703299 9 1 +2013-03-01 09:11:58.703299 9 1 +2013-03-01 09:11:58.7033 7.51 1 +2013-03-01 09:11:58.7033 7.51 1 +2013-03-01 09:11:58.7033 7.51 1 +2013-03-01 09:11:58.7033 7.51 1 +2013-03-01 09:11:58.7033 7.51 1 +2013-03-01 09:11:58.703301 39.32 1 +2013-03-01 09:11:58.703301 39.32 1 +2013-03-01 09:11:58.703301 39.32 1 +2013-03-01 09:11:58.703301 39.32 1 +2013-03-01 09:11:58.703301 39.32 1 +2013-03-01 09:11:58.703301 39.32 1 +2013-03-01 09:11:58.703302 23.55 1 +2013-03-01 09:11:58.703302 23.55 1 +2013-03-01 09:11:58.703302 23.55 1 +2013-03-01 09:11:58.703302 23.55 1 +2013-03-01 09:11:58.703302 23.55 1 +2013-03-01 09:11:58.703303 88.64 1 +2013-03-01 09:11:58.703303 88.64 1 +2013-03-01 09:11:58.703303 88.64 1 +2013-03-01 09:11:58.703303 88.64 1 +2013-03-01 09:11:58.703303 88.64 1 +2013-03-01 09:11:58.703304 9.04 1 +2013-03-01 09:11:58.703304 9.04 1 +2013-03-01 09:11:58.703304 9.04 1 +2013-03-01 09:11:58.703304 9.04 1 +2013-03-01 09:11:58.703305 18.68 1 +2013-03-01 09:11:58.703305 18.68 1 +2013-03-01 09:11:58.703305 18.68 1 +2013-03-01 09:11:58.703305 18.68 1 +2013-03-01 09:11:58.703306 3.95 1 +2013-03-01 09:11:58.703306 3.95 1 +2013-03-01 09:11:58.703306 3.95 1 +2013-03-01 09:11:58.703306 3.95 1 +2013-03-01 09:11:58.703307 31.28 1 +2013-03-01 09:11:58.703307 31.28 1 +2013-03-01 09:11:58.703307 31.28 1 +2013-03-01 09:11:58.703308 16.95 1 +2013-03-01 09:11:58.703308 16.95 1 +2013-03-01 09:11:58.703308 16.95 1 +2013-03-01 09:11:58.703308 16.95 1 +2013-03-01 09:11:58.703309 11.16 1 +2013-03-01 09:11:58.703309 11.16 1 +2013-03-01 09:11:58.703309 11.16 1 +2013-03-01 09:11:58.703309 11.16 1 +2013-03-01 09:11:58.70331 9.24 1 +2013-03-01 09:11:58.70331 9.24 1 +2013-03-01 09:11:58.70331 9.24 1 +2013-03-01 09:11:58.70331 9.24 1 +2013-03-01 09:11:58.70331 9.24 1 +2013-03-01 09:11:58.70331 9.24 1 +2013-03-01 09:11:58.70331 9.24 1 +2013-03-01 09:11:58.70331 9.24 1 +2013-03-01 09:11:58.703311 7.38 1 +2013-03-01 09:11:58.703311 7.38 1 +2013-03-01 09:11:58.703311 7.38 1 +2013-03-01 09:11:58.703311 7.38 1 +2013-03-01 09:11:58.703311 7.38 1 +2013-03-01 09:11:58.703311 7.38 1 +2013-03-01 09:11:58.703312 18.2 1 +2013-03-01 09:11:58.703312 18.2 1 +2013-03-01 09:11:58.703312 18.2 1 +2013-03-01 09:11:58.703312 18.2 1 +2013-03-01 09:11:58.703312 18.2 1 +2013-03-01 09:11:58.703312 18.2 1 +2013-03-01 09:11:58.703313 9.35 1 +2013-03-01 09:11:58.703313 9.35 1 +2013-03-01 09:11:58.703313 9.35 1 +2013-03-01 09:11:58.703313 9.35 1 +2013-03-01 09:11:58.703313 9.35 1 +2013-03-01 09:11:58.703313 9.35 1 +2013-03-01 09:11:58.703313 9.35 1 +2013-03-01 09:11:58.703313 9.35 1 +2013-03-01 09:11:58.703314 39.12 1 +2013-03-01 09:11:58.703314 39.12 1 +2013-03-01 09:11:58.703314 39.12 1 +2013-03-01 09:11:58.703314 39.12 1 +2013-03-01 09:11:58.703315 4.93 1 +2013-03-01 09:11:58.703315 4.93 1 +2013-03-01 09:11:58.703315 4.93 1 +2013-03-01 09:11:58.703315 4.93 1 +2013-03-01 09:11:58.703315 4.93 1 +2013-03-01 09:11:58.703315 4.93 1 +2013-03-01 09:11:58.703315 4.93 1 +2013-03-01 09:11:58.703315 4.93 1 +2013-03-01 09:11:58.703316 16.86 1 +2013-03-01 09:11:58.703316 16.86 1 +2013-03-01 09:11:58.703316 16.86 1 +2013-03-01 09:11:58.703316 16.86 1 +2013-03-01 09:11:58.703316 16.86 1 +2013-03-01 09:11:58.703317 1.45 1 +2013-03-01 09:11:58.703317 1.45 1 +2013-03-01 09:11:58.703317 1.45 1 +2013-03-01 09:11:58.703317 1.45 1 +2013-03-01 09:11:58.703318 21.81 1 +2013-03-01 09:11:58.703318 21.81 1 +2013-03-01 09:11:58.703318 21.81 1 +2013-03-01 09:11:58.703318 21.81 1 +2013-03-01 09:11:58.703319 83.21 1 +2013-03-01 09:11:58.703319 83.21 1 +2013-03-01 09:11:58.703319 83.21 1 +2013-03-01 09:11:58.703319 83.21 1 +2013-03-01 09:11:58.703319 83.21 1 +2013-03-01 09:11:58.70332 77.09 1 +2013-03-01 09:11:58.70332 77.09 1 +2013-03-01 09:11:58.70332 77.09 1 +2013-03-01 09:11:58.70332 77.09 1 +2013-03-01 09:11:58.70332 77.09 1 +2013-03-01 09:11:58.70332 77.09 1 +2013-03-01 09:11:58.703321 3.91 1 +2013-03-01 09:11:58.703321 3.91 1 +2013-03-01 09:11:58.703321 3.91 1 +2013-03-01 09:11:58.703321 3.91 1 +2013-03-01 09:11:58.703322 2.48 1 +2013-03-01 09:11:58.703322 2.48 1 +2013-03-01 09:11:58.703322 2.48 1 +2013-03-01 09:11:58.703322 2.48 1 +2013-03-01 09:11:58.703322 2.48 1 +2013-03-01 09:11:58.703322 2.48 1 +2013-03-01 09:11:58.703323 36.22 1 +2013-03-01 09:11:58.703323 36.22 1 +2013-03-01 09:11:58.703323 36.22 1 +2013-03-01 09:11:58.703324 14.08 1 +2013-03-01 09:11:58.703324 14.08 1 +2013-03-01 09:11:58.703324 14.08 1 +2013-03-01 09:11:58.703324 14.08 1 +2013-03-01 09:11:58.703324 14.08 1 +2013-03-01 09:11:58.703324 14.08 1 +2013-03-01 09:11:58.703325 9.24 1 +2013-03-01 09:11:58.703325 9.24 1 +2013-03-01 09:11:58.703325 9.24 1 +2013-03-01 09:11:58.703325 9.24 1 +2013-03-01 09:11:58.703325 9.24 1 +2013-03-01 09:11:58.703325 9.24 1 diff --git a/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 3-0-58a982694ba2b1e34de82b1de54936a0 b/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 3-0-58a982694ba2b1e34de82b1de54936a0 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 4-0-12cc78f3953c3e6b5411ddc729541bf0 b/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 4-0-12cc78f3953c3e6b5411ddc729541bf0 new file mode 100644 index 0000000000000..d02ca48857b5f --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_rank.q (deterministic) 4-0-12cc78f3953c3e6b5411ddc729541bf0 @@ -0,0 +1,474 @@ +2013-03-01 09:11:58.703074 58.47 1 +2013-03-01 09:11:58.703074 58.47 1 +2013-03-01 09:11:58.703074 58.47 1 +2013-03-01 09:11:58.703074 58.47 1 +2013-03-01 09:11:58.703074 58.47 1 +2013-03-01 09:11:58.703074 58.47 1 +2013-03-01 09:11:58.703076 18.51 1 +2013-03-01 09:11:58.703076 18.51 1 +2013-03-01 09:11:58.703076 18.51 1 +2013-03-01 09:11:58.703076 18.51 1 +2013-03-01 09:11:58.703076 18.51 1 +2013-03-01 09:11:58.703076 18.51 1 +2013-03-01 09:11:58.703076 18.51 1 +2013-03-01 09:11:58.703077 66.68 1 +2013-03-01 09:11:58.703077 66.68 1 +2013-03-01 09:11:58.703077 66.68 1 +2013-03-01 09:11:58.703077 66.68 1 +2013-03-01 09:11:58.703087 25.19 1 +2013-03-01 09:11:58.703087 25.19 1 +2013-03-01 09:11:58.703087 25.19 1 +2013-03-01 09:11:58.703087 25.19 1 +2013-03-01 09:11:58.703087 25.19 1 +2013-03-01 09:11:58.703087 25.19 1 +2013-03-01 09:11:58.703087 25.19 1 +2013-03-01 09:11:58.703088 1.97 1 +2013-03-01 09:11:58.703088 1.97 1 +2013-03-01 09:11:58.703088 1.97 1 +2013-03-01 09:11:58.703088 1.97 1 +2013-03-01 09:11:58.703088 1.97 1 +2013-03-01 09:11:58.703088 1.97 1 +2013-03-01 09:11:58.703089 41.57 1 +2013-03-01 09:11:58.703089 41.57 1 +2013-03-01 09:11:58.703089 41.57 1 +2013-03-01 09:11:58.703089 41.57 1 +2013-03-01 09:11:58.703089 41.57 1 +2013-03-01 09:11:58.703089 41.57 1 +2013-03-01 09:11:58.703089 41.57 1 +2013-03-01 09:11:58.703091 68.85 1 +2013-03-01 09:11:58.703091 68.85 1 +2013-03-01 09:11:58.703091 68.85 1 +2013-03-01 09:11:58.703091 68.85 1 +2013-03-01 09:11:58.703091 68.85 1 +2013-03-01 09:11:58.703091 68.85 1 +2013-03-01 09:11:58.703091 68.85 1 +2013-03-01 09:11:58.703092 54.02 1 +2013-03-01 09:11:58.703092 54.02 1 +2013-03-01 09:11:58.703092 54.02 1 +2013-03-01 09:11:58.703096 87.84 1 +2013-03-01 09:11:58.703097 0.9 1 +2013-03-01 09:11:58.703097 0.9 1 +2013-03-01 09:11:58.703097 0.9 1 +2013-03-01 09:11:58.703098 21.29 1 +2013-03-01 09:11:58.703098 21.29 1 +2013-03-01 09:11:58.703098 21.29 1 +2013-03-01 09:11:58.703098 21.29 1 +2013-03-01 09:11:58.703098 21.29 1 +2013-03-01 09:11:58.703098 21.29 1 +2013-03-01 09:11:58.703098 21.29 1 +2013-03-01 09:11:58.703098 21.29 1 +2013-03-01 09:11:58.703098 21.29 1 +2013-03-01 09:11:58.703098 21.29 1 +2013-03-01 09:11:58.703104 75.85 1 +2013-03-01 09:11:58.703104 75.85 1 +2013-03-01 09:11:58.703104 75.85 1 +2013-03-01 09:11:58.703104 75.85 1 +2013-03-01 09:11:58.703104 75.85 1 +2013-03-01 09:11:58.70311 65.88 1 +2013-03-01 09:11:58.70311 65.88 1 +2013-03-01 09:11:58.70311 65.88 1 +2013-03-01 09:11:58.70311 65.88 1 +2013-03-01 09:11:58.703111 85.94 1 +2013-03-01 09:11:58.703111 85.94 1 +2013-03-01 09:11:58.703111 85.94 1 +2013-03-01 09:11:58.703111 85.94 1 +2013-03-01 09:11:58.703111 85.94 1 +2013-03-01 09:11:58.703111 85.94 1 +2013-03-01 09:11:58.703112 13.29 1 +2013-03-01 09:11:58.703112 13.29 1 +2013-03-01 09:11:58.703112 13.29 1 +2013-03-01 09:11:58.703112 13.29 1 +2013-03-01 09:11:58.703113 58.65 1 +2013-03-01 09:11:58.703113 58.65 1 +2013-03-01 09:11:58.703113 58.65 1 +2013-03-01 09:11:58.703113 58.65 1 +2013-03-01 09:11:58.703113 58.65 1 +2013-03-01 09:11:58.703113 58.65 1 +2013-03-01 09:11:58.703113 58.65 1 +2013-03-01 09:11:58.703113 58.65 1 +2013-03-01 09:11:58.703118 8.69 1 +2013-03-01 09:11:58.703118 8.69 1 +2013-03-01 09:11:58.70312 52.6 1 +2013-03-01 09:11:58.70312 52.6 1 +2013-03-01 09:11:58.70312 52.6 1 +2013-03-01 09:11:58.70312 52.6 1 +2013-03-01 09:11:58.703125 78.52 1 +2013-03-01 09:11:58.703125 78.52 1 +2013-03-01 09:11:58.703125 78.52 1 +2013-03-01 09:11:58.703125 78.52 1 +2013-03-01 09:11:58.703125 78.52 1 +2013-03-01 09:11:58.703125 78.52 1 +2013-03-01 09:11:58.703131 63.81 1 +2013-03-01 09:11:58.703131 63.81 1 +2013-03-01 09:11:58.703131 63.81 1 +2013-03-01 09:11:58.703131 63.81 1 +2013-03-01 09:11:58.703131 63.81 1 +2013-03-01 09:11:58.703131 63.81 1 +2013-03-01 09:11:58.703132 1.86 1 +2013-03-01 09:11:58.703132 1.86 1 +2013-03-01 09:11:58.703135 88.38 1 +2013-03-01 09:11:58.703135 88.38 1 +2013-03-01 09:11:58.703135 88.38 1 +2013-03-01 09:11:58.703135 88.38 1 +2013-03-01 09:11:58.703135 88.38 1 +2013-03-01 09:11:58.703135 88.38 1 +2013-03-01 09:11:58.703135 88.38 1 +2013-03-01 09:11:58.703135 88.38 1 +2013-03-01 09:11:58.703136 27.89 1 +2013-03-01 09:11:58.703136 27.89 1 +2013-03-01 09:11:58.703136 27.89 1 +2013-03-01 09:11:58.703136 27.89 1 +2013-03-01 09:11:58.703136 27.89 1 +2013-03-01 09:11:58.703138 86.7 1 +2013-03-01 09:11:58.703138 86.7 1 +2013-03-01 09:11:58.703138 86.7 1 +2013-03-01 09:11:58.703138 86.7 1 +2013-03-01 09:11:58.703138 86.7 1 +2013-03-01 09:11:58.703139 43.53 1 +2013-03-01 09:11:58.703139 43.53 1 +2013-03-01 09:11:58.703139 43.53 1 +2013-03-01 09:11:58.703139 43.53 1 +2013-03-01 09:11:58.703144 21.59 1 +2013-03-01 09:11:58.703144 21.59 1 +2013-03-01 09:11:58.703144 21.59 1 +2013-03-01 09:11:58.703144 21.59 1 +2013-03-01 09:11:58.703144 21.59 1 +2013-03-01 09:11:58.703144 21.59 1 +2013-03-01 09:11:58.703144 21.59 1 +2013-03-01 09:11:58.703154 16.93 1 +2013-03-01 09:11:58.703154 16.93 1 +2013-03-01 09:11:58.703154 16.93 1 +2013-03-01 09:11:58.703154 16.93 1 +2013-03-01 09:11:58.703154 16.93 1 +2013-03-01 09:11:58.703156 62.42 1 +2013-03-01 09:11:58.703156 62.42 1 +2013-03-01 09:11:58.703156 62.42 1 +2013-03-01 09:11:58.703156 62.42 1 +2013-03-01 09:11:58.703157 8.99 1 +2013-03-01 09:11:58.703157 8.99 1 +2013-03-01 09:11:58.703157 8.99 1 +2013-03-01 09:11:58.703157 8.99 1 +2013-03-01 09:11:58.703157 8.99 1 +2013-03-01 09:11:58.703157 8.99 1 +2013-03-01 09:11:58.703157 8.99 1 +2013-03-01 09:11:58.703157 8.99 1 +2013-03-01 09:11:58.703174 36.33 1 +2013-03-01 09:11:58.703174 36.33 1 +2013-03-01 09:11:58.703174 36.33 1 +2013-03-01 09:11:58.703174 36.33 1 +2013-03-01 09:11:58.703178 93.29 1 +2013-03-01 09:11:58.703178 93.29 1 +2013-03-01 09:11:58.703178 93.29 1 +2013-03-01 09:11:58.703178 93.29 1 +2013-03-01 09:11:58.703179 60.94 1 +2013-03-01 09:11:58.703179 60.94 1 +2013-03-01 09:11:58.703179 60.94 1 +2013-03-01 09:11:58.703179 60.94 1 +2013-03-01 09:11:58.703179 60.94 1 +2013-03-01 09:11:58.703179 60.94 1 +2013-03-01 09:11:58.703179 60.94 1 +2013-03-01 09:11:58.703181 26.6 1 +2013-03-01 09:11:58.703181 26.6 1 +2013-03-01 09:11:58.703181 26.6 1 +2013-03-01 09:11:58.703181 26.6 1 +2013-03-01 09:11:58.703181 26.6 1 +2013-03-01 09:11:58.703181 26.6 1 +2013-03-01 09:11:58.703181 26.6 1 +2013-03-01 09:11:58.703184 73.93 1 +2013-03-01 09:11:58.703184 73.93 1 +2013-03-01 09:11:58.703184 73.93 1 +2013-03-01 09:11:58.703184 73.93 1 +2013-03-01 09:11:58.703184 73.93 1 +2013-03-01 09:11:58.703185 8.91 1 +2013-03-01 09:11:58.703185 8.91 1 +2013-03-01 09:11:58.703185 8.91 1 +2013-03-01 09:11:58.703185 8.91 1 +2013-03-01 09:11:58.703185 8.91 1 +2013-03-01 09:11:58.703186 91.46 1 +2013-03-01 09:11:58.703186 91.46 1 +2013-03-01 09:11:58.703186 91.46 1 +2013-03-01 09:11:58.703186 91.46 1 +2013-03-01 09:11:58.703186 91.46 1 +2013-03-01 09:11:58.703186 91.46 1 +2013-03-01 09:11:58.703186 91.46 1 +2013-03-01 09:11:58.703186 91.46 1 +2013-03-01 09:11:58.703188 32.85 1 +2013-03-01 09:11:58.703188 32.85 1 +2013-03-01 09:11:58.703188 32.85 1 +2013-03-01 09:11:58.703188 32.85 1 +2013-03-01 09:11:58.703189 37.74 1 +2013-03-01 09:11:58.703189 37.74 1 +2013-03-01 09:11:58.703189 37.74 1 +2013-03-01 09:11:58.703189 37.74 1 +2013-03-01 09:11:58.703189 37.74 1 +2013-03-01 09:11:58.703189 37.74 1 +2013-03-01 09:11:58.703195 82.5 1 +2013-03-01 09:11:58.703195 82.5 1 +2013-03-01 09:11:58.703195 82.5 1 +2013-03-01 09:11:58.703195 82.5 1 +2013-03-01 09:11:58.703195 82.5 1 +2013-03-01 09:11:58.703195 82.5 1 +2013-03-01 09:11:58.703198 97.18 1 +2013-03-01 09:11:58.703198 97.18 1 +2013-03-01 09:11:58.703198 97.18 1 +2013-03-01 09:11:58.703198 97.18 1 +2013-03-01 09:11:58.703206 80.94 1 +2013-03-01 09:11:58.703206 80.94 1 +2013-03-01 09:11:58.703206 80.94 1 +2013-03-01 09:11:58.703206 80.94 1 +2013-03-01 09:11:58.703206 80.94 1 +2013-03-01 09:11:58.703206 80.94 1 +2013-03-01 09:11:58.703207 55.06 1 +2013-03-01 09:11:58.703207 55.06 1 +2013-03-01 09:11:58.703207 55.06 1 +2013-03-01 09:11:58.703207 55.06 1 +2013-03-01 09:11:58.703207 55.06 1 +2013-03-01 09:11:58.703209 25.92 1 +2013-03-01 09:11:58.703209 25.92 1 +2013-03-01 09:11:58.70321 37.12 1 +2013-03-01 09:11:58.70321 37.12 1 +2013-03-01 09:11:58.70321 37.12 1 +2013-03-01 09:11:58.70321 37.12 1 +2013-03-01 09:11:58.70321 37.12 1 +2013-03-01 09:11:58.70321 37.12 1 +2013-03-01 09:11:58.70321 37.12 1 +2013-03-01 09:11:58.70321 37.12 1 +2013-03-01 09:11:58.703213 48.8 1 +2013-03-01 09:11:58.703213 48.8 1 +2013-03-01 09:11:58.703213 48.8 1 +2013-03-01 09:11:58.703213 48.8 1 +2013-03-01 09:11:58.703219 32.73 1 +2013-03-01 09:11:58.703219 32.73 1 +2013-03-01 09:11:58.703219 32.73 1 +2013-03-01 09:11:58.703219 32.73 1 +2013-03-01 09:11:58.703219 32.73 1 +2013-03-01 09:11:58.703219 32.73 1 +2013-03-01 09:11:58.70322 7.37 1 +2013-03-01 09:11:58.70322 7.37 1 +2013-03-01 09:11:58.70322 7.37 1 +2013-03-01 09:11:58.703221 26.64 1 +2013-03-01 09:11:58.703221 26.64 1 +2013-03-01 09:11:58.703221 26.64 1 +2013-03-01 09:11:58.703221 26.64 1 +2013-03-01 09:11:58.703221 26.64 1 +2013-03-01 09:11:58.703221 26.64 1 +2013-03-01 09:11:58.703221 26.64 1 +2013-03-01 09:11:58.703223 57.1 1 +2013-03-01 09:11:58.703223 57.1 1 +2013-03-01 09:11:58.703223 57.1 1 +2013-03-01 09:11:58.703223 57.1 1 +2013-03-01 09:11:58.703223 57.1 1 +2013-03-01 09:11:58.703223 57.1 1 +2013-03-01 09:11:58.703223 57.1 1 +2013-03-01 09:11:58.703224 42.93 1 +2013-03-01 09:11:58.703224 42.93 1 +2013-03-01 09:11:58.703224 42.93 1 +2013-03-01 09:11:58.703224 42.93 1 +2013-03-01 09:11:58.703226 68.3 1 +2013-03-01 09:11:58.703226 68.3 1 +2013-03-01 09:11:58.703226 68.3 1 +2013-03-01 09:11:58.703226 68.3 1 +2013-03-01 09:11:58.703226 68.3 1 +2013-03-01 09:11:58.703226 68.3 1 +2013-03-01 09:11:58.703231 18.7 1 +2013-03-01 09:11:58.703231 18.7 1 +2013-03-01 09:11:58.703231 18.7 1 +2013-03-01 09:11:58.703231 18.7 1 +2013-03-01 09:11:58.703231 18.7 1 +2013-03-01 09:11:58.703231 18.7 1 +2013-03-01 09:11:58.703231 18.7 1 +2013-03-01 09:11:58.703233 40.81 1 +2013-03-01 09:11:58.703233 40.81 1 +2013-03-01 09:11:58.703233 40.81 1 +2013-03-01 09:11:58.703233 40.81 1 +2013-03-01 09:11:58.703233 40.81 1 +2013-03-01 09:11:58.703233 40.81 1 +2013-03-01 09:11:58.703242 31.23 1 +2013-03-01 09:11:58.703242 31.23 1 +2013-03-01 09:11:58.703242 31.23 1 +2013-03-01 09:11:58.703242 31.23 1 +2013-03-01 09:11:58.703244 25.67 1 +2013-03-01 09:11:58.703244 25.67 1 +2013-03-01 09:11:58.703244 25.67 1 +2013-03-01 09:11:58.703244 25.67 1 +2013-03-01 09:11:58.703244 25.67 1 +2013-03-01 09:11:58.703244 25.67 1 +2013-03-01 09:11:58.703244 25.67 1 +2013-03-01 09:11:58.703244 25.67 1 +2013-03-01 09:11:58.703245 32.3 1 +2013-03-01 09:11:58.703245 32.3 1 +2013-03-01 09:11:58.703245 32.3 1 +2013-03-01 09:11:58.703245 32.3 1 +2013-03-01 09:11:58.703245 32.3 1 +2013-03-01 09:11:58.703245 32.3 1 +2013-03-01 09:11:58.703245 32.3 1 +2013-03-01 09:11:58.703245 32.3 1 +2013-03-01 09:11:58.703245 32.3 1 +2013-03-01 09:11:58.703246 72.87 1 +2013-03-01 09:11:58.703246 72.87 1 +2013-03-01 09:11:58.703248 81.28 1 +2013-03-01 09:11:58.703248 81.28 1 +2013-03-01 09:11:58.703248 81.28 1 +2013-03-01 09:11:58.703249 93.3 1 +2013-03-01 09:11:58.703249 93.3 1 +2013-03-01 09:11:58.703249 93.3 1 +2013-03-01 09:11:58.703249 93.3 1 +2013-03-01 09:11:58.70325 93.79 1 +2013-03-01 09:11:58.70325 93.79 1 +2013-03-01 09:11:58.70325 93.79 1 +2013-03-01 09:11:58.70325 93.79 1 +2013-03-01 09:11:58.70325 93.79 1 +2013-03-01 09:11:58.70325 93.79 1 +2013-03-01 09:11:58.70325 93.79 1 +2013-03-01 09:11:58.703254 0.32 1 +2013-03-01 09:11:58.703254 0.32 1 +2013-03-01 09:11:58.703254 0.32 1 +2013-03-01 09:11:58.703254 0.32 1 +2013-03-01 09:11:58.703256 43.8 1 +2013-03-01 09:11:58.703256 43.8 1 +2013-03-01 09:11:58.703256 43.8 1 +2013-03-01 09:11:58.703256 43.8 1 +2013-03-01 09:11:58.703256 43.8 1 +2013-03-01 09:11:58.703256 43.8 1 +2013-03-01 09:11:58.703258 21.21 1 +2013-03-01 09:11:58.703258 21.21 1 +2013-03-01 09:11:58.703258 21.21 1 +2013-03-01 09:11:58.703259 52.28 1 +2013-03-01 09:11:58.703259 52.28 1 +2013-03-01 09:11:58.703259 52.28 1 +2013-03-01 09:11:58.703259 52.28 1 +2013-03-01 09:11:58.703259 52.28 1 +2013-03-01 09:11:58.703259 52.28 1 +2013-03-01 09:11:58.703262 78.56 1 +2013-03-01 09:11:58.703262 78.56 1 +2013-03-01 09:11:58.703262 78.56 1 +2013-03-01 09:11:58.703263 14.4 1 +2013-03-01 09:11:58.703263 14.4 1 +2013-03-01 09:11:58.703263 14.4 1 +2013-03-01 09:11:58.703263 14.4 1 +2013-03-01 09:11:58.703263 14.4 1 +2013-03-01 09:11:58.703263 14.4 1 +2013-03-01 09:11:58.703263 14.4 1 +2013-03-01 09:11:58.703263 14.4 1 +2013-03-01 09:11:58.703263 14.4 1 +2013-03-01 09:11:58.703263 14.4 1 +2013-03-01 09:11:58.703264 52.49 1 +2013-03-01 09:11:58.703264 52.49 1 +2013-03-01 09:11:58.703264 52.49 1 +2013-03-01 09:11:58.703264 52.49 1 +2013-03-01 09:11:58.703265 11.46 1 +2013-03-01 09:11:58.703265 11.46 1 +2013-03-01 09:11:58.703265 11.46 1 +2013-03-01 09:11:58.703265 11.46 1 +2013-03-01 09:11:58.703266 83.67 1 +2013-03-01 09:11:58.703266 83.67 1 +2013-03-01 09:11:58.703266 83.67 1 +2013-03-01 09:11:58.703266 83.67 1 +2013-03-01 09:11:58.703266 83.67 1 +2013-03-01 09:11:58.703269 61.06 1 +2013-03-01 09:11:58.703269 61.06 1 +2013-03-01 09:11:58.703269 61.06 1 +2013-03-01 09:11:58.703269 61.06 1 +2013-03-01 09:11:58.703269 61.06 1 +2013-03-01 09:11:58.703272 7.49 1 +2013-03-01 09:11:58.703272 7.49 1 +2013-03-01 09:11:58.703272 7.49 1 +2013-03-01 09:11:58.703272 7.49 1 +2013-03-01 09:11:58.703273 30.49 1 +2013-03-01 09:11:58.703273 30.49 1 +2013-03-01 09:11:58.703273 30.49 1 +2013-03-01 09:11:58.703275 90.69 1 +2013-03-01 09:11:58.703275 90.69 1 +2013-03-01 09:11:58.703275 90.69 1 +2013-03-01 09:11:58.703275 90.69 1 +2013-03-01 09:11:58.703275 90.69 1 +2013-03-01 09:11:58.703275 90.69 1 +2013-03-01 09:11:58.703276 88.46 1 +2013-03-01 09:11:58.703276 88.46 1 +2013-03-01 09:11:58.703276 88.46 1 +2013-03-01 09:11:58.703276 88.46 1 +2013-03-01 09:11:58.703278 69.42 1 +2013-03-01 09:11:58.703278 69.42 1 +2013-03-01 09:11:58.703278 69.42 1 +2013-03-01 09:11:58.70328 45.81 1 +2013-03-01 09:11:58.70328 45.81 1 +2013-03-01 09:11:58.70328 45.81 1 +2013-03-01 09:11:58.70328 45.81 1 +2013-03-01 09:11:58.70328 45.81 1 +2013-03-01 09:11:58.70328 45.81 1 +2013-03-01 09:11:58.703281 62.11 1 +2013-03-01 09:11:58.703281 62.11 1 +2013-03-01 09:11:58.703281 62.11 1 +2013-03-01 09:11:58.703281 62.11 1 +2013-03-01 09:11:58.703281 62.11 1 +2013-03-01 09:11:58.703283 17.62 1 +2013-03-01 09:11:58.703283 17.62 1 +2013-03-01 09:11:58.703283 17.62 1 +2013-03-01 09:11:58.703283 17.62 1 +2013-03-01 09:11:58.703283 17.62 1 +2013-03-01 09:11:58.703283 17.62 1 +2013-03-01 09:11:58.703283 17.62 1 +2013-03-01 09:11:58.703283 17.62 1 +2013-03-01 09:11:58.703289 0.56 1 +2013-03-01 09:11:58.703289 0.56 1 +2013-03-01 09:11:58.703289 0.56 1 +2013-03-01 09:11:58.703289 0.56 1 +2013-03-01 09:11:58.703289 0.56 1 +2013-03-01 09:11:58.703289 0.56 1 +2013-03-01 09:11:58.703289 0.56 1 +2013-03-01 09:11:58.703293 42.86 1 +2013-03-01 09:11:58.703293 42.86 1 +2013-03-01 09:11:58.703293 42.86 1 +2013-03-01 09:11:58.703294 29.74 1 +2013-03-01 09:11:58.703294 29.74 1 +2013-03-01 09:11:58.703294 29.74 1 +2013-03-01 09:11:58.703294 29.74 1 +2013-03-01 09:11:58.703294 29.74 1 +2013-03-01 09:11:58.703294 29.74 1 +2013-03-01 09:11:58.703296 43.66 1 +2013-03-01 09:11:58.703296 43.66 1 +2013-03-01 09:11:58.703296 43.66 1 +2013-03-01 09:11:58.703296 43.66 1 +2013-03-01 09:11:58.703296 43.66 1 +2013-03-01 09:11:58.703296 43.66 1 +2013-03-01 09:11:58.703296 43.66 1 +2013-03-01 09:11:58.703296 43.66 1 +2013-03-01 09:11:58.703299 23.19 1 +2013-03-01 09:11:58.703299 23.19 1 +2013-03-01 09:11:58.703299 23.19 1 +2013-03-01 09:11:58.703299 23.19 1 +2013-03-01 09:11:58.703299 23.19 1 +2013-03-01 09:11:58.703309 89.4 1 +2013-03-01 09:11:58.703309 89.4 1 +2013-03-01 09:11:58.703309 89.4 1 +2013-03-01 09:11:58.703309 89.4 1 +2013-03-01 09:11:58.70331 69.26 1 +2013-03-01 09:11:58.70331 69.26 1 +2013-03-01 09:11:58.70331 69.26 1 +2013-03-01 09:11:58.703313 20.69 1 +2013-03-01 09:11:58.703313 20.69 1 +2013-03-01 09:11:58.703313 20.69 1 +2013-03-01 09:11:58.703313 20.69 1 +2013-03-01 09:11:58.703315 53.04 1 +2013-03-01 09:11:58.703315 53.04 1 +2013-03-01 09:11:58.703315 53.04 1 +2013-03-01 09:11:58.703315 53.04 1 +2013-03-01 09:11:58.703318 85.62 1 +2013-03-01 09:11:58.703318 85.62 1 +2013-03-01 09:11:58.703318 85.62 1 +2013-03-01 09:11:58.703323 65.61 1 +2013-03-01 09:11:58.703323 65.61 1 +2013-03-01 09:11:58.703323 65.61 1 +2013-03-01 09:11:58.703323 65.61 1 +2013-03-01 09:11:58.703323 65.61 1 +2013-03-01 09:11:58.703323 65.61 1 +2013-03-01 09:11:58.703323 65.61 1 +2013-03-01 09:11:58.703323 65.61 1 +2013-03-01 09:11:58.703324 98.36 1 +2013-03-01 09:11:58.703324 98.36 1 +2013-03-01 09:11:58.703324 98.36 1 +2013-03-01 09:11:58.703324 98.36 1 +2013-03-01 09:11:58.703325 65.81 1 +2013-03-01 09:11:58.703325 65.81 1 +2013-03-01 09:11:58.703325 65.81 1 +2013-03-01 09:11:58.703325 65.81 1 diff --git a/sql/hive/src/test/resources/golden/windowing_udaf.q (deterministic)-0-6642a21d87e0401ba1a668ea8b244f0c b/sql/hive/src/test/resources/golden/windowing_udaf.q (deterministic)-0-6642a21d87e0401ba1a668ea8b244f0c new file mode 100644 index 0000000000000..119dd71df1426 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_udaf.q (deterministic)-0-6642a21d87e0401ba1a668ea8b244f0c @@ -0,0 +1,1049 @@ + 65560 + 65560 + 65560 +alice allen 65662 +alice allen 65662 +alice allen 65662 +alice brown 65696 +alice carson 65559 +alice davidson 65547 +alice falkner 65669 +alice garcia 65613 +alice hernandez 65737 +alice hernandez 65737 +alice johnson 65739 +alice king 65660 +alice king 65660 +alice king 65660 +alice laertes 65669 +alice laertes 65669 +alice miller 65590 +alice nixon 65586 +alice nixon 65586 +alice nixon 65586 +alice ovid 65737 +alice polk 65548 +alice quirinius 65636 +alice quirinius 65636 +alice robinson 65606 +alice robinson 65606 +alice steinbeck 65578 +alice steinbeck 65578 +alice steinbeck 65578 +alice underhill 65750 +alice van buren 65562 +alice xylophone 65578 +alice xylophone 65578 +alice xylophone 65578 +alice zipper 65553 +alice zipper 65553 +alice zipper 65553 +bob brown 65584 +bob brown 65584 +bob brown 65584 +bob carson 65713 +bob davidson 65664 +bob davidson 65664 +bob davidson 65664 +bob ellison 65591 +bob ellison 65591 +bob ellison 65591 +bob ellison 65591 +bob falkner 65789 +bob garcia 65585 +bob garcia 65585 +bob garcia 65585 +bob garcia 65585 +bob garcia 65585 +bob hernandez 65557 +bob ichabod 65549 +bob king 65715 +bob king 65715 +bob king 65715 +bob laertes 65602 +bob laertes 65602 +bob miller 65608 +bob ovid 65564 +bob ovid 65564 +bob ovid 65564 +bob ovid 65564 +bob polk 65594 +bob quirinius 65700 +bob steinbeck 65637 +bob van buren 65778 +bob white 65543 +bob white 65543 +bob xylophone 65574 +bob xylophone 65574 +bob young 65556 +bob zipper 65559 +bob zipper 65559 +bob zipper 65559 +calvin allen 65669 +calvin brown 65537 +calvin brown 65537 +calvin brown 65537 +calvin carson 65637 +calvin davidson 65541 +calvin davidson 65541 +calvin ellison 65667 +calvin falkner 65573 +calvin falkner 65573 +calvin falkner 65573 +calvin falkner 65573 +calvin falkner 65573 +calvin falkner 65573 +calvin garcia 65664 +calvin hernandez 65578 +calvin johnson 65731 +calvin laertes 65570 +calvin laertes 65570 +calvin nixon 65654 +calvin nixon 65654 +calvin nixon 65654 +calvin ovid 65554 +calvin ovid 65554 +calvin ovid 65554 +calvin ovid 65554 +calvin polk 65731 +calvin quirinius 65741 +calvin quirinius 65741 +calvin robinson 65581 +calvin steinbeck 65680 +calvin steinbeck 65680 +calvin steinbeck 65680 +calvin thompson 65560 +calvin thompson 65560 +calvin underhill 65732 +calvin van buren 65552 +calvin van buren 65552 +calvin white 65553 +calvin white 65553 +calvin xylophone 65575 +calvin xylophone 65575 +calvin xylophone 65575 +calvin young 65574 +calvin young 65574 +calvin zipper 65669 +calvin zipper 65669 +david allen 65588 +david allen 65588 +david brown 65637 +david brown 65637 +david davidson 65559 +david davidson 65559 +david davidson 65559 +david davidson 65559 +david ellison 65634 +david ellison 65634 +david ellison 65634 +david hernandez 65763 +david ichabod 65699 +david ichabod 65699 +david laertes 65762 +david nixon 65536 +david ovid 65623 +david ovid 65623 +david quirinius 65697 +david quirinius 65697 +david quirinius 65697 +david robinson 65762 +david robinson 65762 +david thompson 65550 +david underhill 65602 +david underhill 65602 +david underhill 65602 +david van buren 65625 +david van buren 65625 +david white 65678 +david xylophone 65537 +david xylophone 65537 +david xylophone 65537 +david young 65551 +david young 65551 +ethan allen 65747 +ethan brown 65539 +ethan brown 65539 +ethan brown 65539 +ethan brown 65539 +ethan brown 65539 +ethan brown 65539 +ethan carson 65742 +ethan ellison 65714 +ethan ellison 65714 +ethan falkner 65577 +ethan falkner 65577 +ethan garcia 65736 +ethan hernandez 65618 +ethan johnson 65536 +ethan king 65614 +ethan laertes 65562 +ethan laertes 65562 +ethan laertes 65562 +ethan laertes 65562 +ethan laertes 65562 +ethan laertes 65562 +ethan laertes 65562 +ethan miller 65712 +ethan nixon 65766 +ethan ovid 65697 +ethan polk 65589 +ethan polk 65589 +ethan polk 65589 +ethan polk 65589 +ethan quirinius 65542 +ethan quirinius 65542 +ethan quirinius 65542 +ethan robinson 65547 +ethan robinson 65547 +ethan underhill 65570 +ethan van buren 65572 +ethan white 65677 +ethan white 65677 +ethan xylophone 65595 +ethan zipper 65593 +ethan zipper 65593 +fred davidson 65552 +fred davidson 65552 +fred davidson 65552 +fred ellison 65548 +fred ellison 65548 +fred ellison 65548 +fred falkner 65637 +fred falkner 65637 +fred falkner 65637 +fred hernandez 65541 +fred ichabod 65572 +fred ichabod 65572 +fred johnson 65758 +fred king 65694 +fred king 65694 +fred laertes 65769 +fred miller 65536 +fred nixon 65560 +fred nixon 65560 +fred nixon 65560 +fred nixon 65560 +fred polk 65603 +fred polk 65603 +fred polk 65603 +fred polk 65603 +fred quirinius 65697 +fred quirinius 65697 +fred robinson 65623 +fred steinbeck 65544 +fred steinbeck 65544 +fred steinbeck 65544 +fred underhill 65629 +fred van buren 65537 +fred van buren 65537 +fred van buren 65537 +fred van buren 65537 +fred white 65589 +fred young 65594 +fred young 65594 +fred zipper 65553 +gabriella allen 65646 +gabriella allen 65646 +gabriella brown 65704 +gabriella brown 65704 +gabriella carson 65586 +gabriella davidson 65565 +gabriella ellison 65706 +gabriella ellison 65706 +gabriella falkner 65623 +gabriella falkner 65623 +gabriella falkner 65623 +gabriella garcia 65571 +gabriella hernandez 65587 +gabriella hernandez 65587 +gabriella ichabod 65559 +gabriella ichabod 65559 +gabriella ichabod 65559 +gabriella ichabod 65559 +gabriella ichabod 65559 +gabriella king 65651 +gabriella king 65651 +gabriella laertes 65781 +gabriella miller 65646 +gabriella ovid 65556 +gabriella ovid 65556 +gabriella polk 65701 +gabriella polk 65701 +gabriella steinbeck 65582 +gabriella steinbeck 65582 +gabriella thompson 65682 +gabriella thompson 65682 +gabriella thompson 65682 +gabriella van buren 65581 +gabriella van buren 65581 +gabriella white 65638 +gabriella young 65699 +gabriella young 65699 +gabriella zipper 65540 +gabriella zipper 65540 +holly allen 65596 +holly brown 65599 +holly brown 65599 +holly falkner 65720 +holly hernandez 65602 +holly hernandez 65602 +holly hernandez 65602 +holly hernandez 65602 +holly ichabod 65711 +holly ichabod 65711 +holly ichabod 65711 +holly johnson 65655 +holly johnson 65655 +holly johnson 65655 +holly king 65549 +holly king 65549 +holly laertes 65664 +holly miller 65653 +holly nixon 65539 +holly nixon 65539 +holly polk 65743 +holly polk 65743 +holly robinson 65564 +holly thompson 65538 +holly thompson 65538 +holly thompson 65538 +holly underhill 65634 +holly underhill 65634 +holly underhill 65634 +holly underhill 65634 +holly van buren 65727 +holly white 65536 +holly white 65536 +holly xylophone 65544 +holly young 65606 +holly young 65606 +holly zipper 65607 +holly zipper 65607 +irene allen 65556 +irene brown 65633 +irene brown 65633 +irene brown 65633 +irene carson 65590 +irene ellison 65659 +irene ellison 65659 +irene falkner 65620 +irene falkner 65620 +irene garcia 65660 +irene garcia 65660 +irene garcia 65660 +irene ichabod 65645 +irene ichabod 65645 +irene johnson 65583 +irene laertes 65664 +irene laertes 65664 +irene laertes 65664 +irene miller 65730 +irene nixon 65631 +irene nixon 65631 +irene nixon 65631 +irene ovid 65691 +irene ovid 65691 +irene ovid 65691 +irene polk 65551 +irene polk 65551 +irene polk 65551 +irene polk 65551 +irene polk 65551 +irene quirinius 65724 +irene quirinius 65724 +irene quirinius 65724 +irene robinson 65554 +irene steinbeck 65683 +irene thompson 65688 +irene underhill 65591 +irene underhill 65591 +irene van buren 65579 +irene van buren 65579 +irene xylophone 65775 +jessica brown 65588 +jessica carson 65553 +jessica carson 65553 +jessica carson 65553 +jessica davidson 65549 +jessica davidson 65549 +jessica davidson 65549 +jessica davidson 65549 +jessica ellison 65567 +jessica ellison 65567 +jessica falkner 65584 +jessica garcia 65676 +jessica garcia 65676 +jessica ichabod 65704 +jessica johnson 65607 +jessica johnson 65607 +jessica miller 65733 +jessica nixon 65590 +jessica nixon 65590 +jessica ovid 65582 +jessica ovid 65582 +jessica polk 65637 +jessica quirinius 65562 +jessica quirinius 65562 +jessica quirinius 65562 +jessica quirinius 65562 +jessica robinson 65576 +jessica thompson 65581 +jessica thompson 65581 +jessica underhill 65656 +jessica underhill 65656 +jessica underhill 65656 +jessica van buren 65615 +jessica white 65544 +jessica white 65544 +jessica white 65544 +jessica white 65544 +jessica white 65544 +jessica xylophone 65562 +jessica young 65623 +jessica young 65623 +jessica zipper 65600 +jessica zipper 65600 +jessica zipper 65600 +katie allen 65542 +katie brown 65590 +katie davidson 65619 +katie ellison 65675 +katie ellison 65675 +katie falkner 65728 +katie garcia 65625 +katie garcia 65625 +katie hernandez 65550 +katie ichabod 65658 +katie ichabod 65658 +katie ichabod 65658 +katie king 65629 +katie king 65629 +katie king 65629 +katie miller 65541 +katie miller 65541 +katie nixon 65669 +katie ovid 65681 +katie polk 65746 +katie polk 65746 +katie robinson 65697 +katie van buren 65643 +katie van buren 65643 +katie white 65620 +katie white 65620 +katie xylophone 65585 +katie young 65644 +katie young 65644 +katie young 65644 +katie zipper 65568 +katie zipper 65568 +luke allen 65547 +luke allen 65547 +luke allen 65547 +luke allen 65547 +luke allen 65547 +luke brown 65719 +luke davidson 65656 +luke davidson 65656 +luke ellison 65582 +luke ellison 65582 +luke ellison 65582 +luke falkner 65589 +luke falkner 65589 +luke garcia 65687 +luke garcia 65687 +luke ichabod 65629 +luke ichabod 65629 +luke johnson 65545 +luke johnson 65545 +luke johnson 65545 +luke laertes 65608 +luke laertes 65608 +luke laertes 65608 +luke laertes 65608 +luke laertes 65608 +luke miller 65752 +luke ovid 65569 +luke ovid 65569 +luke polk 65645 +luke polk 65645 +luke quirinius 65655 +luke robinson 65634 +luke robinson 65634 +luke thompson 65626 +luke underhill 65553 +luke underhill 65553 +luke underhill 65553 +luke van buren 65678 +luke white 65693 +luke xylophone 65597 +luke zipper 65641 +mike allen 65706 +mike brown 65654 +mike carson 65698 +mike carson 65698 +mike carson 65698 +mike davidson 65658 +mike davidson 65658 +mike ellison 65598 +mike ellison 65598 +mike ellison 65598 +mike ellison 65598 +mike ellison 65598 +mike falkner 65609 +mike garcia 65571 +mike garcia 65571 +mike garcia 65571 +mike hernandez 65548 +mike hernandez 65548 +mike ichabod 65621 +mike king 65563 +mike king 65563 +mike king 65563 +mike king 65563 +mike king 65563 +mike king 65563 +mike miller 65549 +mike nixon 65619 +mike nixon 65619 +mike polk 65619 +mike polk 65619 +mike polk 65619 +mike quirinius 65717 +mike steinbeck 65550 +mike steinbeck 65550 +mike steinbeck 65550 +mike steinbeck 65550 +mike van buren 65620 +mike van buren 65620 +mike white 65648 +mike white 65648 +mike white 65648 +mike white 65648 +mike young 65545 +mike young 65545 +mike young 65545 +mike zipper 65552 +mike zipper 65552 +mike zipper 65552 +nick allen 65641 +nick allen 65641 +nick brown 65724 +nick davidson 65601 +nick ellison 65691 +nick ellison 65691 +nick falkner 65583 +nick falkner 65583 +nick garcia 65695 +nick garcia 65695 +nick garcia 65695 +nick ichabod 65572 +nick ichabod 65572 +nick ichabod 65572 +nick johnson 65585 +nick johnson 65585 +nick laertes 65624 +nick miller 65757 +nick nixon 65650 +nick ovid 65719 +nick polk 65716 +nick quirinius 65588 +nick quirinius 65588 +nick robinson 65547 +nick robinson 65547 +nick steinbeck 65689 +nick thompson 65610 +nick underhill 65619 +nick van buren 65603 +nick xylophone 65644 +nick young 65654 +nick young 65654 +nick zipper 65757 +nick zipper 65757 +oscar allen 65644 +oscar brown 65614 +oscar carson 65537 +oscar carson 65537 +oscar carson 65537 +oscar carson 65537 +oscar carson 65537 +oscar davidson 65556 +oscar ellison 65630 +oscar ellison 65630 +oscar falkner 65692 +oscar garcia 65751 +oscar hernandez 65683 +oscar hernandez 65683 +oscar ichabod 65536 +oscar ichabod 65536 +oscar ichabod 65536 +oscar ichabod 65536 +oscar johnson 65645 +oscar johnson 65645 +oscar king 65541 +oscar king 65541 +oscar king 65541 +oscar laertes 65625 +oscar laertes 65625 +oscar laertes 65625 +oscar laertes 65625 +oscar nixon 65596 +oscar ovid 65536 +oscar ovid 65536 +oscar ovid 65536 +oscar polk 65541 +oscar polk 65541 +oscar quirinius 65541 +oscar quirinius 65541 +oscar quirinius 65541 +oscar quirinius 65541 +oscar robinson 65537 +oscar robinson 65537 +oscar robinson 65537 +oscar robinson 65537 +oscar steinbeck 65709 +oscar thompson 65542 +oscar thompson 65542 +oscar thompson 65542 +oscar thompson 65542 +oscar underhill 65626 +oscar van buren 65581 +oscar van buren 65581 +oscar van buren 65581 +oscar white 65552 +oscar white 65552 +oscar white 65552 +oscar white 65552 +oscar xylophone 65773 +oscar xylophone 65773 +oscar xylophone 65773 +oscar zipper 65568 +oscar zipper 65568 +oscar zipper 65568 +priscilla brown 65670 +priscilla brown 65670 +priscilla brown 65670 +priscilla carson 65658 +priscilla carson 65658 +priscilla carson 65658 +priscilla ichabod 65627 +priscilla ichabod 65627 +priscilla johnson 65543 +priscilla johnson 65543 +priscilla johnson 65543 +priscilla johnson 65543 +priscilla johnson 65543 +priscilla king 65646 +priscilla nixon 65564 +priscilla nixon 65564 +priscilla ovid 65541 +priscilla ovid 65541 +priscilla polk 65747 +priscilla quirinius 65672 +priscilla thompson 65654 +priscilla underhill 65715 +priscilla underhill 65715 +priscilla van buren 65607 +priscilla van buren 65607 +priscilla van buren 65607 +priscilla white 65652 +priscilla xylophone 65538 +priscilla xylophone 65538 +priscilla xylophone 65538 +priscilla young 65585 +priscilla young 65585 +priscilla zipper 65622 +priscilla zipper 65622 +quinn allen 65657 +quinn allen 65657 +quinn brown 65691 +quinn brown 65691 +quinn brown 65691 +quinn davidson 65549 +quinn davidson 65549 +quinn davidson 65549 +quinn davidson 65549 +quinn ellison 65705 +quinn ellison 65705 +quinn garcia 65568 +quinn garcia 65568 +quinn garcia 65568 +quinn garcia 65568 +quinn ichabod 65564 +quinn king 65558 +quinn king 65558 +quinn laertes 65542 +quinn laertes 65542 +quinn laertes 65542 +quinn nixon 65659 +quinn ovid 65699 +quinn quirinius 65747 +quinn robinson 65627 +quinn steinbeck 65578 +quinn steinbeck 65578 +quinn thompson 65643 +quinn thompson 65643 +quinn underhill 65549 +quinn underhill 65549 +quinn underhill 65549 +quinn van buren 65725 +quinn young 65647 +quinn zipper 65579 +quinn zipper 65579 +rachel allen 65661 +rachel allen 65661 +rachel brown 65586 +rachel brown 65586 +rachel brown 65586 +rachel brown 65586 +rachel brown 65586 +rachel carson 65677 +rachel carson 65677 +rachel davidson 65755 +rachel ellison 65761 +rachel falkner 65616 +rachel falkner 65616 +rachel falkner 65616 +rachel falkner 65616 +rachel johnson 65658 +rachel king 65604 +rachel king 65604 +rachel laertes 65562 +rachel laertes 65562 +rachel ovid 65721 +rachel ovid 65721 +rachel polk 65686 +rachel quirinius 65787 +rachel robinson 65544 +rachel robinson 65544 +rachel robinson 65544 +rachel thompson 65648 +rachel thompson 65648 +rachel thompson 65648 +rachel underhill 65667 +rachel white 65615 +rachel white 65615 +rachel young 65727 +rachel zipper 65757 +rachel zipper 65757 +sarah carson 65679 +sarah carson 65679 +sarah carson 65679 +sarah ellison 65611 +sarah falkner 65606 +sarah falkner 65606 +sarah garcia 65563 +sarah garcia 65563 +sarah garcia 65563 +sarah ichabod 65667 +sarah ichabod 65667 +sarah johnson 65659 +sarah johnson 65659 +sarah johnson 65659 +sarah johnson 65659 +sarah king 65650 +sarah king 65650 +sarah miller 65557 +sarah ovid 65550 +sarah robinson 65677 +sarah robinson 65677 +sarah steinbeck 65721 +sarah white 65622 +sarah white 65622 +sarah xylophone 65678 +sarah young 65595 +sarah zipper 65550 +tom brown 65593 +tom brown 65593 +tom carson 65539 +tom carson 65539 +tom carson 65539 +tom davidson 65780 +tom ellison 65578 +tom ellison 65578 +tom ellison 65578 +tom falkner 65574 +tom falkner 65574 +tom hernandez 65575 +tom hernandez 65575 +tom ichabod 65588 +tom johnson 65536 +tom johnson 65536 +tom king 65576 +tom laertes 65617 +tom laertes 65617 +tom miller 65594 +tom miller 65594 +tom miller 65594 +tom nixon 65672 +tom ovid 65628 +tom polk 65652 +tom polk 65652 +tom quirinius 65563 +tom quirinius 65563 +tom robinson 65626 +tom robinson 65626 +tom robinson 65626 +tom robinson 65626 +tom steinbeck 65666 +tom van buren 65621 +tom van buren 65621 +tom van buren 65621 +tom white 65548 +tom young 65544 +tom young 65544 +tom zipper 65789 +ulysses brown 65735 +ulysses carson 65602 +ulysses carson 65602 +ulysses carson 65602 +ulysses carson 65602 +ulysses davidson 65750 +ulysses ellison 65575 +ulysses garcia 65666 +ulysses hernandez 65651 +ulysses hernandez 65651 +ulysses hernandez 65651 +ulysses ichabod 65551 +ulysses ichabod 65551 +ulysses johnson 65776 +ulysses king 65649 +ulysses laertes 65691 +ulysses laertes 65691 +ulysses laertes 65691 +ulysses miller 65610 +ulysses miller 65610 +ulysses nixon 65603 +ulysses ovid 65656 +ulysses polk 65563 +ulysses polk 65563 +ulysses polk 65563 +ulysses polk 65563 +ulysses quirinius 65786 +ulysses robinson 65744 +ulysses steinbeck 65611 +ulysses steinbeck 65611 +ulysses thompson 65788 +ulysses underhill 65570 +ulysses underhill 65570 +ulysses underhill 65570 +ulysses underhill 65570 +ulysses underhill 65570 +ulysses underhill 65570 +ulysses underhill 65570 +ulysses van buren 65684 +ulysses white 65654 +ulysses white 65654 +ulysses xylophone 65623 +ulysses xylophone 65623 +ulysses xylophone 65623 +ulysses young 65675 +ulysses young 65675 +ulysses young 65675 +victor allen 65684 +victor allen 65684 +victor brown 65550 +victor brown 65550 +victor brown 65550 +victor brown 65550 +victor davidson 65579 +victor davidson 65579 +victor davidson 65579 +victor ellison 65641 +victor ellison 65641 +victor hernandez 65571 +victor hernandez 65571 +victor hernandez 65571 +victor hernandez 65571 +victor hernandez 65571 +victor johnson 65606 +victor johnson 65606 +victor johnson 65606 +victor king 65721 +victor king 65721 +victor laertes 65638 +victor laertes 65638 +victor miller 65570 +victor nixon 65709 +victor nixon 65709 +victor ovid 65649 +victor polk 65625 +victor quirinius 65620 +victor quirinius 65620 +victor robinson 65596 +victor robinson 65596 +victor steinbeck 65618 +victor steinbeck 65618 +victor steinbeck 65618 +victor thompson 65548 +victor van buren 65664 +victor van buren 65664 +victor white 65548 +victor white 65548 +victor xylophone 65549 +victor xylophone 65549 +victor xylophone 65549 +victor xylophone 65549 +victor xylophone 65549 +victor young 65628 +victor zipper 65743 +wendy allen 65628 +wendy allen 65628 +wendy allen 65628 +wendy brown 65580 +wendy brown 65580 +wendy ellison 65545 +wendy ellison 65545 +wendy falkner 65595 +wendy falkner 65595 +wendy falkner 65595 +wendy garcia 65659 +wendy garcia 65659 +wendy garcia 65659 +wendy garcia 65659 +wendy hernandez 65650 +wendy ichabod 65730 +wendy king 65586 +wendy king 65586 +wendy king 65586 +wendy laertes 65566 +wendy laertes 65566 +wendy laertes 65566 +wendy miller 65582 +wendy miller 65582 +wendy nixon 65611 +wendy nixon 65611 +wendy ovid 65589 +wendy ovid 65589 +wendy polk 65656 +wendy polk 65656 +wendy quirinius 65766 +wendy quirinius 65766 +wendy robinson 65622 +wendy robinson 65622 +wendy robinson 65622 +wendy steinbeck 65612 +wendy thompson 65650 +wendy thompson 65650 +wendy underhill 65662 +wendy underhill 65662 +wendy underhill 65662 +wendy van buren 65680 +wendy van buren 65680 +wendy white 65705 +wendy xylophone 65687 +wendy xylophone 65687 +wendy young 65674 +wendy young 65674 +xavier allen 65611 +xavier allen 65611 +xavier allen 65611 +xavier brown 65600 +xavier brown 65600 +xavier brown 65600 +xavier carson 65731 +xavier carson 65731 +xavier davidson 65644 +xavier davidson 65644 +xavier davidson 65644 +xavier ellison 65541 +xavier ellison 65541 +xavier garcia 65672 +xavier hernandez 65541 +xavier hernandez 65541 +xavier hernandez 65541 +xavier ichabod 65597 +xavier ichabod 65597 +xavier johnson 65654 +xavier johnson 65654 +xavier king 65590 +xavier king 65590 +xavier laertes 65743 +xavier ovid 65788 +xavier polk 65587 +xavier polk 65587 +xavier polk 65587 +xavier polk 65587 +xavier quirinius 65599 +xavier quirinius 65599 +xavier quirinius 65599 +xavier quirinius 65599 +xavier thompson 65608 +xavier underhill 65710 +xavier white 65703 +xavier white 65703 +xavier xylophone 65572 +xavier zipper 65561 +yuri allen 65565 +yuri allen 65565 +yuri brown 65538 +yuri brown 65538 +yuri carson 65670 +yuri carson 65670 +yuri ellison 65570 +yuri ellison 65570 +yuri falkner 65658 +yuri falkner 65658 +yuri garcia 65639 +yuri hernandez 65706 +yuri johnson 65587 +yuri johnson 65587 +yuri johnson 65587 +yuri king 65721 +yuri laertes 65637 +yuri laertes 65637 +yuri nixon 65635 +yuri nixon 65635 +yuri polk 65607 +yuri polk 65607 +yuri polk 65607 +yuri quirinius 65544 +yuri quirinius 65544 +yuri quirinius 65544 +yuri steinbeck 65592 +yuri steinbeck 65592 +yuri thompson 65676 +yuri underhill 65718 +yuri underhill 65718 +yuri white 65659 +yuri xylophone 65714 +zach allen 65667 +zach brown 65559 +zach brown 65559 +zach brown 65559 +zach brown 65559 +zach brown 65559 +zach carson 65572 +zach ellison 65748 +zach falkner 65620 +zach falkner 65620 +zach garcia 65544 +zach garcia 65544 +zach garcia 65544 +zach garcia 65544 +zach ichabod 65599 +zach ichabod 65599 +zach king 65556 +zach king 65556 +zach king 65556 +zach miller 65584 +zach miller 65584 +zach miller 65584 +zach ovid 65578 +zach ovid 65578 +zach ovid 65578 +zach ovid 65578 +zach quirinius 65691 +zach robinson 65599 +zach steinbeck 65602 +zach steinbeck 65602 +zach thompson 65636 +zach thompson 65636 +zach underhill 65573 +zach white 65733 +zach xylophone 65542 +zach xylophone 65542 +zach young 65576 +zach zipper 65579 +zach zipper 65579 +zach zipper 65579 diff --git a/sql/hive/src/test/resources/golden/windowing_udaf.q (deterministic)-1-2bf20f39e6ffef258858f7943a974e7e b/sql/hive/src/test/resources/golden/windowing_udaf.q (deterministic)-1-2bf20f39e6ffef258858f7943a974e7e new file mode 100644 index 0000000000000..657e81a94f4c5 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_udaf.q (deterministic)-1-2bf20f39e6ffef258858f7943a974e7e @@ -0,0 +1,1049 @@ + 9.220000267028809 + 43.72999954223633 + 89.52999877929688 +alice allen 2.7899999618530273 +alice allen 21.450000762939453 +alice allen 73.62999725341797 +alice brown 71.30999755859375 +alice carson 39.029998779296875 +alice davidson 70.3499984741211 +alice falkner 90.25 +alice garcia 48.45000076293945 +alice hernandez 88.16999816894531 +alice hernandez 90.55999755859375 +alice johnson 47.359999895095825 +alice king 19.139999389648438 +alice king 23.170000076293945 +alice king 52.22999954223633 +alice laertes 68.94999694824219 +alice laertes 69.52999877929688 +alice miller 68.95999908447266 +alice nixon 40.0 +alice nixon 48.150001525878906 +alice nixon 79.83000183105469 +alice ovid 9.039999961853027 +alice polk 62.900001525878906 +alice quirinius 37.13999938964844 +alice quirinius 62.29999923706055 +alice robinson 3.934999942779541 +alice robinson 56.099998474121094 +alice steinbeck 38.619998931884766 +alice steinbeck 63.40999794006348 +alice steinbeck 92.37000274658203 +alice underhill 93.17499923706055 +alice van buren 38.939998626708984 +alice xylophone 13.816667238871256 +alice xylophone 43.15999984741211 +alice xylophone 78.20999908447266 +alice zipper 26.43000030517578 +alice zipper 42.47999954223633 +alice zipper 89.93000030517578 +bob brown 8.069999694824219 +bob brown 70.93000030517578 +bob brown 93.08999633789062 +bob carson 50.09000015258789 +bob davidson 1.2899999618530273 +bob davidson 71.93000030517578 +bob davidson 74.72000122070312 +bob ellison 41.34000015258789 +bob ellison 56.584999084472656 +bob ellison 75.02999877929688 +bob ellison 80.30000305175781 +bob falkner 16.989999771118164 +bob garcia 4.460000038146973 +bob garcia 5.400000095367432 +bob garcia 43.46500015258789 +bob garcia 80.30000305175781 +bob garcia 87.56999969482422 +bob hernandez 55.51333363850912 +bob ichabod 82.55999755859375 +bob king 8.789999961853027 +bob king 12.539999961853027 +bob king 39.0099983215332 +bob laertes 0.7900000214576721 +bob laertes 10.670000076293945 +bob miller 61.91999816894531 +bob ovid 46.86000061035156 +bob ovid 62.849998474121094 +bob ovid 88.77999877929688 +bob ovid 97.08999633789062 +bob polk 7.980000019073486 +bob quirinius 48.09499931335449 +bob steinbeck 9.699999809265137 +bob van buren 33.66999816894531 +bob white 45.34000015258789 +bob white 45.349998474121094 +bob xylophone 27.6299991607666 +bob xylophone 29.359999656677246 +bob young 35.16999816894531 +bob zipper 3.819999933242798 +bob zipper 32.07500076293945 +bob zipper 52.63999938964844 +calvin allen 63.119998931884766 +calvin brown 28.110000610351562 +calvin brown 85.9000015258789 +calvin brown 90.19999694824219 +calvin carson 59.42500114440918 +calvin davidson 20.40500020980835 +calvin davidson 86.54000091552734 +calvin ellison 26.489999771118164 +calvin falkner 2.9700000286102295 +calvin falkner 56.040000915527344 +calvin falkner 56.33000183105469 +calvin falkner 80.5999984741211 +calvin falkner 93.61000061035156 +calvin falkner 94.30999755859375 +calvin garcia 41.849998474121094 +calvin hernandez 33.869998931884766 +calvin johnson 66.61000061035156 +calvin laertes 23.1299991607666 +calvin laertes 62.670000076293945 +calvin nixon 9.8100004196167 +calvin nixon 41.20000076293945 +calvin nixon 69.73999786376953 +calvin ovid 69.95999908447266 +calvin ovid 71.26000213623047 +calvin ovid 79.12000274658203 +calvin ovid 84.72000122070312 +calvin polk 65.72000122070312 +calvin quirinius 29.540000915527344 +calvin quirinius 60.98499870300293 +calvin robinson 40.439998626708984 +calvin steinbeck 15.220000267028809 +calvin steinbeck 22.850000381469727 +calvin steinbeck 52.06666819254557 +calvin thompson 8.90999984741211 +calvin thompson 89.22500228881836 +calvin underhill 59.70000076293945 +calvin van buren 34.209999084472656 +calvin van buren 64.0 +calvin white 35.864999771118164 +calvin white 90.69000244140625 +calvin xylophone 21.700000762939453 +calvin xylophone 25.420000076293945 +calvin xylophone 56.810001373291016 +calvin young 39.810001373291016 +calvin young 70.27999941507976 +calvin zipper 9.1899995803833 +calvin zipper 95.37999725341797 +david allen 51.005001068115234 +david allen 51.25 +david brown 32.56499910354614 +david brown 93.63999938964844 +david davidson 1.0800000429153442 +david davidson 54.17499876022339 +david davidson 62.720001220703125 +david davidson 74.1500015258789 +david ellison 47.689998626708984 +david ellison 62.587501525878906 +david ellison 85.2300033569336 +david hernandez 75.0833346048991 +david ichabod 35.763334115346275 +david ichabod 82.55000305175781 +david laertes 76.70999908447266 +david nixon 34.72999954223633 +david ovid 43.915000915527344 +david ovid 58.89999961853027 +david quirinius 23.5649995803833 +david quirinius 29.239999771118164 +david quirinius 79.97000122070312 +david robinson 37.703334172566734 +david robinson 70.55999755859375 +david thompson 43.619998931884766 +david underhill 49.719998359680176 +david underhill 60.36499881744385 +david underhill 60.46666717529297 +david van buren 25.500000476837158 +david van buren 50.27999973297119 +david white 45.189998626708984 +david xylophone 30.465000867843628 +david xylophone 30.465000867843628 +david xylophone 54.34000015258789 +david young 10.25 +david young 19.310000777244568 +ethan allen 23.790000518163044 +ethan brown 15.630000114440918 +ethan brown 22.93666648864746 +ethan brown 27.78000044822693 +ethan brown 52.19333346684774 +ethan brown 73.18000030517578 +ethan brown 82.30000305175781 +ethan carson 57.635000228881836 +ethan ellison 0.2800000011920929 +ethan ellison 81.47000122070312 +ethan falkner 50.02000045776367 +ethan falkner 59.43000030517578 +ethan garcia 26.44499921798706 +ethan hernandez 32.30333264668783 +ethan johnson 90.05000305175781 +ethan king 4.349999904632568 +ethan laertes 54.75 +ethan laertes 54.87999868392944 +ethan laertes 59.209999084472656 +ethan laertes 75.31500053405762 +ethan laertes 76.94499969482422 +ethan laertes 80.4749984741211 +ethan laertes 95.06999969482422 +ethan miller 25.3700008392334 +ethan nixon 50.88999938964844 +ethan ovid 58.4950008392334 +ethan polk 2.3499999046325684 +ethan polk 21.31999969482422 +ethan polk 23.440000534057617 +ethan polk 59.869998931884766 +ethan quirinius 13.650000214576721 +ethan quirinius 70.94500160217285 +ethan quirinius 88.18000030517578 +ethan robinson 67.94000244140625 +ethan robinson 80.58500289916992 +ethan underhill 55.630001068115234 +ethan van buren 37.85499954223633 +ethan white 58.70666631062826 +ethan white 63.41999816894531 +ethan xylophone 57.11000061035156 +ethan zipper 5.914999961853027 +ethan zipper 97.51000213623047 +fred davidson 26.435000896453857 +fred davidson 28.144000816345216 +fred davidson 78.30999755859375 +fred ellison 46.65999984741211 +fred ellison 65.40666516621907 +fred ellison 71.98499870300293 +fred falkner 25.144999980926514 +fred falkner 37.62000068028768 +fred falkner 75.125 +fred hernandez 55.9900016784668 +fred ichabod 37.06999969482422 +fred ichabod 75.83499908447266 +fred johnson 96.08999633789062 +fred king 10.220000267028809 +fred king 68.40500068664551 +fred laertes 42.68750047683716 +fred miller 70.02999877929688 +fred nixon 30.589999198913574 +fred nixon 32.71666653951009 +fred nixon 70.5199966430664 +fred nixon 93.02999877929688 +fred polk 23.959999084472656 +fred polk 47.31999969482422 +fred polk 63.97999954223633 +fred polk 90.12000274658203 +fred quirinius 15.300000190734863 +fred quirinius 27.40999984741211 +fred robinson 64.42000007629395 +fred steinbeck 21.239999771118164 +fred steinbeck 21.30000066757202 +fred steinbeck 65.44333394368489 +fred underhill 85.36999893188477 +fred van buren 22.37499976158142 +fred van buren 44.49000096321106 +fred van buren 45.94999980926514 +fred van buren 56.88999938964844 +fred white 42.329999923706055 +fred young 46.79999923706055 +fred young 72.69999885559082 +fred zipper 50.14000129699707 +gabriella allen 28.040000438690186 +gabriella allen 79.64500045776367 +gabriella brown 26.164999961853027 +gabriella brown 84.83000183105469 +gabriella carson 42.7599983215332 +gabriella davidson 18.946666717529297 +gabriella ellison 48.08000183105469 +gabriella ellison 71.54000091552734 +gabriella falkner 31.609999974568684 +gabriella falkner 51.720001220703125 +gabriella falkner 87.61000061035156 +gabriella garcia 43.0099983215332 +gabriella hernandez 62.22666517893473 +gabriella hernandez 92.9800033569336 +gabriella ichabod 10.729999542236328 +gabriella ichabod 26.639999389648438 +gabriella ichabod 66.36000061035156 +gabriella ichabod 71.12999725341797 +gabriella ichabod 83.76666514078777 +gabriella king 20.670000076293945 +gabriella king 62.89999961853027 +gabriella laertes 62.62499809265137 +gabriella miller 26.043334086736042 +gabriella ovid 44.78000068664551 +gabriella ovid 92.4000015258789 +gabriella polk 35.68000030517578 +gabriella polk 90.22500228881836 +gabriella steinbeck 46.45000076293945 +gabriella steinbeck 66.86999893188477 +gabriella thompson 72.34500122070312 +gabriella thompson 75.73666636149089 +gabriella thompson 94.25 +gabriella van buren 38.935001373291016 +gabriella van buren 48.349998474121094 +gabriella white 55.18000030517578 +gabriella young 32.16333262125651 +gabriella young 59.709999084472656 +gabriella zipper 36.2599983215332 +gabriella zipper 91.62999725341797 +holly allen 63.435001373291016 +holly brown 68.51666641235352 +holly brown 86.08666737874348 +holly falkner 83.82666524251302 +holly hernandez 21.190000534057617 +holly hernandez 24.790000915527344 +holly hernandez 28.649999300638836 +holly hernandez 50.22999954223633 +holly ichabod 48.86333433787028 +holly ichabod 73.77000045776367 +holly ichabod 82.91499710083008 +holly johnson 23.447500228881836 +holly johnson 64.36000061035156 +holly johnson 65.62000274658203 +holly king 35.34499979019165 +holly king 42.310001373291016 +holly laertes 70.59666697184245 +holly miller 35.86000061035156 +holly nixon 43.82499885559082 +holly nixon 78.80749893188477 +holly polk 30.365000247955322 +holly polk 75.96499919891357 +holly robinson 82.70499801635742 +holly thompson 0.07999999821186066 +holly thompson 65.52499842643738 +holly thompson 86.69000244140625 +holly underhill 42.54999923706055 +holly underhill 53.02000045776367 +holly underhill 56.13333384195963 +holly underhill 65.84000015258789 +holly van buren 48.893333435058594 +holly white 26.5633331934611 +holly white 40.5 +holly xylophone 49.55666716893514 +holly young 41.698571750095915 +holly young 75.20999908447266 +holly zipper 79.72999827067058 +holly zipper 81.08666610717773 +irene allen 29.75999927520752 +irene brown 28.596666653951008 +irene brown 47.189998626708984 +irene brown 49.46666622161865 +irene carson 86.64999898274739 +irene ellison 38.255001068115234 +irene ellison 45.71333376566569 +irene falkner 22.079999923706055 +irene falkner 83.44666544596355 +irene garcia 38.93499994277954 +irene garcia 42.84666601816813 +irene garcia 58.43000030517578 +irene ichabod 60.7079984664917 +irene ichabod 64.58000183105469 +irene johnson 26.165000438690186 +irene laertes 21.02999997138977 +irene laertes 40.04499816894531 +irene laertes 47.04333241780599 +irene miller 65.44000244140625 +irene nixon 46.03999996185303 +irene nixon 46.96666653951009 +irene nixon 67.09499931335449 +irene ovid 35.130001068115234 +irene ovid 42.535000801086426 +irene ovid 79.75 +irene polk 0.9800000190734863 +irene polk 35.17500114440918 +irene polk 48.94666576385498 +irene polk 49.6 +irene polk 51.885000228881836 +irene quirinius 33.78000005086263 +irene quirinius 42.610000133514404 +irene quirinius 53.4800001780192 +irene robinson 92.19499969482422 +irene steinbeck 73.28000068664551 +irene thompson 46.27375066280365 +irene underhill 24.75 +irene underhill 57.349998474121094 +irene van buren 50.8799991607666 +irene van buren 74.5625 +irene xylophone 83.53499984741211 +jessica brown 26.185000479221344 +jessica carson 56.22999954223633 +jessica carson 62.20000076293945 +jessica carson 62.2400016784668 +jessica davidson 50.01666768391927 +jessica davidson 63.59499931335449 +jessica davidson 69.26666514078777 +jessica davidson 94.53333282470703 +jessica ellison 11.180000305175781 +jessica ellison 64.2060001373291 +jessica falkner 61.57333437601725 +jessica garcia 38.55250036716461 +jessica garcia 57.00999975204468 +jessica ichabod 32.63250035047531 +jessica johnson 9.5600004196167 +jessica johnson 51.959999084472656 +jessica miller 77.83999633789062 +jessica nixon 40.72249960899353 +jessica nixon 90.06999969482422 +jessica ovid 37.45250064134598 +jessica ovid 59.68000030517578 +jessica polk 49.68000030517578 +jessica quirinius 25.65750002861023 +jessica quirinius 37.64200019836426 +jessica quirinius 54.25500011444092 +jessica quirinius 58.019999186197914 +jessica robinson 42.66333484649658 +jessica thompson 30.40666675567627 +jessica thompson 43.87500023841858 +jessica underhill 43.33333269755045 +jessica underhill 45.639999866485596 +jessica underhill 57.584999084472656 +jessica van buren 67.00000047683716 +jessica white 6.170000106096268 +jessica white 63.32500076293945 +jessica white 65.1450023651123 +jessica white 73.93000030517578 +jessica white 96.62000274658203 +jessica xylophone 69.87500190734863 +jessica young 11.1899995803833 +jessica young 43.369998931884766 +jessica zipper 42.43833335240682 +jessica zipper 46.7450008392334 +jessica zipper 56.97999954223633 +katie allen 55.47666549682617 +katie brown 31.699999809265137 +katie davidson 93.22000122070312 +katie ellison 48.31999933719635 +katie ellison 64.08499892552693 +katie falkner 51.665000915527344 +katie garcia 57.71000099182129 +katie garcia 61.21000051498413 +katie hernandez 41.150000381469724 +katie ichabod 44.243333180745445 +katie ichabod 51.800000508626304 +katie ichabod 69.18799896240235 +katie king 39.83000183105469 +katie king 46.80333296457926 +katie king 51.85000038146973 +katie miller 31.399999618530273 +katie miller 74.77999877929688 +katie nixon 23.190000534057617 +katie ovid 67.94500160217285 +katie polk 26.62750005722046 +katie polk 33.9350004196167 +katie robinson 13.890000343322754 +katie van buren 44.434998512268066 +katie van buren 65.41999816894531 +katie white 37.96500015258789 +katie white 59.223333517710365 +katie xylophone 39.30000019073486 +katie young 36.660000801086426 +katie young 67.78333282470703 +katie young 72.76666577657063 +katie zipper 23.766667087872822 +katie zipper 58.75 +luke allen 50.959999084472656 +luke allen 53.36666742960612 +luke allen 54.63249969482422 +luke allen 57.670000076293945 +luke allen 70.39500045776367 +luke brown 49.595000982284546 +luke davidson 7.050000190734863 +luke davidson 18.87000036239624 +luke ellison 16.25 +luke ellison 32.9519996881485 +luke ellison 71.93500137329102 +luke falkner 21.71999979019165 +luke falkner 31.81250023841858 +luke garcia 18.65499973297119 +luke garcia 41.2300001780192 +luke ichabod 41.25750005245209 +luke ichabod 73.55000114440918 +luke johnson 31.670000076293945 +luke johnson 32.84499979019165 +luke johnson 39.54500102996826 +luke laertes 11.819999694824219 +luke laertes 21.184999227523804 +luke laertes 21.993332862854004 +luke laertes 26.696666717529297 +luke laertes 45.9900016784668 +luke miller 52.350000858306885 +luke ovid 23.804999828338623 +luke ovid 64.30000305175781 +luke polk 41.02499961853027 +luke polk 58.4566650390625 +luke quirinius 40.41999816894531 +luke robinson 48.559998750686646 +luke robinson 56.76499938964844 +luke thompson 78.04333368937175 +luke underhill 34.0166662534078 +luke underhill 47.28999996185303 +luke underhill 59.32000160217285 +luke van buren 59.91999944051107 +luke white 74.19599990844726 +luke xylophone 64.77999925613403 +luke zipper 30.434999465942383 +mike allen 30.539999961853027 +mike brown 69.86833318074544 +mike carson 30.25333309173584 +mike carson 61.33799934387207 +mike carson 89.375 +mike davidson 32.55333391825358 +mike davidson 66.74333318074544 +mike ellison 35.905999755859376 +mike ellison 39.82499885559082 +mike ellison 58.56399993896484 +mike ellison 64.52999877929688 +mike ellison 66.93749856948853 +mike falkner 48.53750002384186 +mike garcia 51.02999973297119 +mike garcia 67.93000030517578 +mike garcia 70.8499984741211 +mike hernandez 37.900001525878906 +mike hernandez 59.45000076293945 +mike ichabod 64.7699966430664 +mike king 36.17800045013428 +mike king 41.69500136375427 +mike king 49.57000017166138 +mike king 59.654998779296875 +mike king 71.57000122070312 +mike king 78.50999927520752 +mike miller 29.570000171661377 +mike nixon 45.029999828338624 +mike nixon 48.429999669392906 +mike polk 30.864000129699708 +mike polk 46.95499873161316 +mike polk 79.55500030517578 +mike quirinius 85.0699971516927 +mike steinbeck 24.267500042915344 +mike steinbeck 43.52500021457672 +mike steinbeck 61.426666577657066 +mike steinbeck 68.46000022888184 +mike van buren 27.639999389648438 +mike van buren 56.16333134969076 +mike white 34.8924994468689 +mike white 43.5566660563151 +mike white 53.689998626708984 +mike white 77.54499864578247 +mike young 34.3319993019104 +mike young 52.8100004196167 +mike young 55.64333359400431 +mike zipper 56.86666742960612 +mike zipper 63.3149995803833 +mike zipper 83.91999816894531 +nick allen 57.086001586914065 +nick allen 60.15400094985962 +nick brown 42.939998626708984 +nick davidson 63.07499885559082 +nick ellison 45.34000015258789 +nick ellison 65.88500022888184 +nick falkner 41.87999868392944 +nick falkner 64.05666732788086 +nick garcia 34.34499979019165 +nick garcia 51.08666737874349 +nick garcia 62.88600044250488 +nick ichabod 20.253333409627277 +nick ichabod 53.635000228881836 +nick ichabod 77.36000061035156 +nick johnson 20.114999175071716 +nick johnson 81.91666666666667 +nick laertes 91.56666819254558 +nick miller 71.5500005086263 +nick nixon 77.04249954223633 +nick ovid 74.62666702270508 +nick polk 39.27500009536743 +nick quirinius 60.79499816894531 +nick quirinius 67.44999694824219 +nick robinson 31.672499418258667 +nick robinson 57.66999816894531 +nick steinbeck 59.15999984741211 +nick thompson 18.88666645685832 +nick underhill 43.009998893737794 +nick van buren 34.720000902811684 +nick xylophone 75.3499984741211 +nick young 0.27000001072883606 +nick young 47.813334147135414 +nick zipper 46.22333272298177 +nick zipper 52.54333209991455 +oscar allen 37.396666844685875 +oscar brown 13.100000381469727 +oscar carson 31.91333230336507 +oscar carson 41.77333331108093 +oscar carson 57.3149995803833 +oscar carson 73.59500122070312 +oscar carson 95.44000244140625 +oscar davidson 75.18000030517578 +oscar ellison 34.04499959945679 +oscar ellison 34.04499959945679 +oscar falkner 61.72000050544739 +oscar garcia 67.4800033569336 +oscar hernandez 41.63333400090536 +oscar hernandez 47.93999986648559 +oscar ichabod 45.839999516805015 +oscar ichabod 68.62000274658203 +oscar ichabod 72.18249797821045 +oscar ichabod 76.69000244140625 +oscar johnson 23.880000114440918 +oscar johnson 65.04000091552734 +oscar king 36.69500017166138 +oscar king 49.7049994468689 +oscar king 67.98399925231934 +oscar laertes 43.616665522257485 +oscar laertes 44.755000591278076 +oscar laertes 45.26666831970215 +oscar laertes 53.710001945495605 +oscar nixon 36.56999937693278 +oscar ovid 45.89999961853027 +oscar ovid 46.93999934196472 +oscar ovid 55.277999591827395 +oscar polk 42.31999969482422 +oscar polk 63.900001525878906 +oscar quirinius 63.81500053405762 +oscar quirinius 66.28428527287075 +oscar quirinius 70.24000295003255 +oscar quirinius 81.26249980926514 +oscar robinson 11.34000015258789 +oscar robinson 47.845001220703125 +oscar robinson 59.74333349863688 +oscar robinson 63.346666971842446 +oscar steinbeck 42.49999976158142 +oscar thompson 38.23500061035156 +oscar thompson 51.469999154408775 +oscar thompson 60.029999542236325 +oscar thompson 63.079999923706055 +oscar underhill 66.97666676839192 +oscar van buren 24.085000872612 +oscar van buren 61.880001068115234 +oscar van buren 72.9533322652181 +oscar white 44.72333272298177 +oscar white 46.60999870300293 +oscar white 54.7599983215332 +oscar white 60.85500144958496 +oscar xylophone 34.946666399637856 +oscar xylophone 39.8299994468689 +oscar xylophone 57.119998931884766 +oscar zipper 28.499999046325684 +oscar zipper 47.46750068664551 +oscar zipper 59.1933339436849 +priscilla brown 47.40400066375732 +priscilla brown 77.1479995727539 +priscilla brown 80.5199966430664 +priscilla carson 7.960000038146973 +priscilla carson 28.480000153183937 +priscilla carson 45.92750144004822 +priscilla ichabod 38.95666758219401 +priscilla ichabod 62.32999928792318 +priscilla johnson 44.04499912261963 +priscilla johnson 50.53750038146973 +priscilla johnson 55.98333485921224 +priscilla johnson 59.64499855041504 +priscilla johnson 89.1500015258789 +priscilla king 50.44666735331217 +priscilla nixon 44.32222270965576 +priscilla nixon 45.267999792099 +priscilla ovid 44.78333361943563 +priscilla ovid 52.72999954223633 +priscilla polk 34.89399948120117 +priscilla quirinius 35.609999895095825 +priscilla thompson 35.16249918937683 +priscilla underhill 68.22000122070312 +priscilla underhill 73.97200012207031 +priscilla van buren 50.47000026702881 +priscilla van buren 51.39500045776367 +priscilla van buren 53.541999435424806 +priscilla white 50.47599992752075 +priscilla xylophone 0.15000000596046448 +priscilla xylophone 41.106666485468544 +priscilla xylophone 63.9574990272522 +priscilla young 0.2900000065565109 +priscilla young 19.866666316986084 +priscilla zipper 32.084999322891235 +priscilla zipper 43.90333366394043 +quinn allen 47.90333382288615 +quinn allen 83.33000183105469 +quinn brown 24.280000686645508 +quinn brown 53.98666508992513 +quinn brown 66.82500171661377 +quinn davidson 40.666666666666664 +quinn davidson 54.095001220703125 +quinn davidson 79.78333282470703 +quinn davidson 92.13000106811523 +quinn ellison 52.714999198913574 +quinn ellison 63.352500438690186 +quinn garcia 20.19000056385994 +quinn garcia 54.60000038146973 +quinn garcia 59.010000824928284 +quinn garcia 68.98999881744385 +quinn ichabod 48.60499930381775 +quinn king 61.27333450317383 +quinn king 81.46000289916992 +quinn laertes 32.08000040054321 +quinn laertes 44.45666694641113 +quinn laertes 49.85499858856201 +quinn nixon 72.2471422467913 +quinn ovid 34.423333168029785 +quinn quirinius 53.165000915527344 +quinn robinson 32.624999046325684 +quinn steinbeck 24.802499771118164 +quinn steinbeck 55.477500915527344 +quinn thompson 50.500000381469725 +quinn thompson 55.68600006103516 +quinn underhill 39.66600060462952 +quinn underhill 41.47666676839193 +quinn underhill 56.580000162124634 +quinn van buren 49.40333207448324 +quinn young 55.59000142415365 +quinn zipper 11.359999974568685 +quinn zipper 48.45000123977661 +rachel allen 45.940001249313354 +rachel allen 85.97999954223633 +rachel brown 33.01999984184901 +rachel brown 34.08250021934509 +rachel brown 37.999999046325684 +rachel brown 41.75000019868215 +rachel brown 53.679999669392906 +rachel carson 43.32400016784668 +rachel carson 66.2233320871989 +rachel davidson 14.220000267028809 +rachel ellison 17.549999833106995 +rachel falkner 56.883334159851074 +rachel falkner 57.5199998219808 +rachel falkner 58.80666637420654 +rachel falkner 70.69428443908691 +rachel johnson 36.22499990463257 +rachel king 50.970001220703125 +rachel king 83.53750133514404 +rachel laertes 42.29857151848929 +rachel laertes 71.65999984741211 +rachel ovid 42.25333329041799 +rachel ovid 47.01749947667122 +rachel polk 64.90333239237468 +rachel quirinius 53.2624990940094 +rachel robinson 40.712857246398926 +rachel robinson 53.092498898506165 +rachel robinson 64.94999694824219 +rachel thompson 24.555000439286232 +rachel thompson 31.460000038146973 +rachel thompson 46.804000282287596 +rachel underhill 47.22333272298177 +rachel white 39.87999979654948 +rachel white 41.83428575311388 +rachel young 75.7966677347819 +rachel zipper 45.794999519983925 +rachel zipper 56.909999179840085 +sarah carson 24.576666196187336 +sarah carson 36.33750060200691 +sarah carson 43.65749907493591 +sarah ellison 37.054999351501465 +sarah falkner 48.58285754067557 +sarah falkner 62.36500072479248 +sarah garcia 33.38000011444092 +sarah garcia 35.513333002726235 +sarah garcia 64.31333287556966 +sarah ichabod 36.10599975585937 +sarah ichabod 45.830000162124634 +sarah johnson 26.464999675750732 +sarah johnson 40.9300012588501 +sarah johnson 43.44000196456909 +sarah johnson 64.24333318074544 +sarah king 49.06999909877777 +sarah king 63.01333363850912 +sarah miller 41.709999084472656 +sarah ovid 63.682499408721924 +sarah robinson 39.196666399637856 +sarah robinson 66.88999938964844 +sarah steinbeck 66.89000034332275 +sarah white 41.42599945068359 +sarah white 52.95249938964844 +sarah xylophone 68.31999969482422 +sarah young 35.92750024795532 +sarah zipper 53.697500705718994 +tom brown 38.37000020345052 +tom brown 44.68000049591065 +tom carson 27.994999766349792 +tom carson 54.25250005722046 +tom carson 62.790000915527344 +tom davidson 38.679999113082886 +tom ellison 33.68600053787232 +tom ellison 46.00666618347168 +tom ellison 67.79666646321614 +tom falkner 55.61800079345703 +tom falkner 58.82500012715658 +tom hernandez 50.52250051498413 +tom hernandez 50.52250051498413 +tom ichabod 24.98399963378906 +tom johnson 34.83750009536743 +tom johnson 73.72399978637695 +tom king 69.98000106811523 +tom laertes 41.97285750934056 +tom laertes 70.40333429972331 +tom miller 43.885000586509705 +tom miller 57.10500144958496 +tom miller 76.20499992370605 +tom nixon 62.43000030517578 +tom ovid 38.096666971842446 +tom polk 51.26750087738037 +tom polk 68.22666676839192 +tom quirinius 37.720001220703125 +tom quirinius 53.20399913787842 +tom robinson 43.44333299001058 +tom robinson 54.637142998831614 +tom robinson 59.34250068664551 +tom robinson 99.1500015258789 +tom steinbeck 51.883334477742515 +tom van buren 28.380000829696655 +tom van buren 35.64999930063883 +tom van buren 54.59000015258789 +tom white 51.970001220703125 +tom young 44.7319995880127 +tom young 53.894999980926514 +tom zipper 55.44000116984049 +ulysses brown 48.72666708628336 +ulysses carson 38.742000579833984 +ulysses carson 45.513333002726235 +ulysses carson 48.75249934196472 +ulysses carson 74.64600067138672 +ulysses davidson 63.20857129778181 +ulysses ellison 68.52666759490967 +ulysses garcia 58.77250051498413 +ulysses hernandez 32.371999168395995 +ulysses hernandez 50.57000102996826 +ulysses hernandez 61.39999961853027 +ulysses ichabod 19.1299991607666 +ulysses ichabod 83.06666692097981 +ulysses johnson 51.485000451405845 +ulysses king 46.98333422342936 +ulysses laertes 29.046666741371155 +ulysses laertes 32.88599967956543 +ulysses laertes 60.12399845123291 +ulysses miller 44.552857535226 +ulysses miller 71.39249873161316 +ulysses nixon 51.300000286102296 +ulysses ovid 29.360000610351562 +ulysses polk 40.74399948120117 +ulysses polk 48.9800017674764 +ulysses polk 57.86249828338623 +ulysses polk 81.21333312988281 +ulysses quirinius 68.41500091552734 +ulysses robinson 69.53999853134155 +ulysses steinbeck 44.61833381652832 +ulysses steinbeck 48.362499713897705 +ulysses thompson 45.063334465026855 +ulysses underhill 30.829999764760334 +ulysses underhill 41.43857192993164 +ulysses underhill 44.08333269755045 +ulysses underhill 55.470001220703125 +ulysses underhill 58.9471435546875 +ulysses underhill 68.1900007724762 +ulysses underhill 78.83333333333333 +ulysses van buren 72.38428633553642 +ulysses white 36.17250043153763 +ulysses white 39.084000778198245 +ulysses xylophone 27.519999504089355 +ulysses xylophone 47.65999937057495 +ulysses xylophone 50.29999923706055 +ulysses young 23.308333079020183 +ulysses young 34.6339994430542 +ulysses young 88.06999969482422 +victor allen 49.43800010681152 +victor allen 56.7299998147147 +victor brown 40.80600037574768 +victor brown 63.5024995803833 +victor brown 71.03500080108643 +victor brown 81.71999931335449 +victor davidson 44.70333290100098 +victor davidson 59.070000076293944 +victor davidson 67.27199935913086 +victor ellison 31.28999964396159 +victor ellison 42.54999923706055 +victor hernandez 44.41333452860514 +victor hernandez 47.20249938964844 +victor hernandez 47.73333215713501 +victor hernandez 51.04999923706055 +victor hernandez 59.2399995803833 +victor johnson 54.868000626564026 +victor johnson 55.22999954223633 +victor johnson 57.41000175476074 +victor king 38.27999997138977 +victor king 49.993333180745445 +victor laertes 40.63500006993612 +victor laertes 80.5999984741211 +victor miller 71.00000190734863 +victor nixon 38.393332640329994 +victor nixon 52.920000076293945 +victor ovid 53.260000228881836 +victor polk 3.0 +victor quirinius 57.81666644414266 +victor quirinius 59.39999923706055 +victor robinson 24.614999771118164 +victor robinson 74.5049991607666 +victor steinbeck 28.862000381946565 +victor steinbeck 36.61000006539481 +victor steinbeck 43.09000015258789 +victor thompson 42.67599925994873 +victor van buren 44.669999877611794 +victor van buren 45.121999740600586 +victor white 53.67999887466431 +victor white 54.45000012715658 +victor xylophone 12.160000324249268 +victor xylophone 26.0 +victor xylophone 31.769999821980793 +victor xylophone 52.31499926249186 +victor xylophone 69.2899996439616 +victor young 64.25833320617676 +victor zipper 65.24999904632568 +wendy allen 34.04999955495199 +wendy allen 36.88199939727783 +wendy allen 44.96000012755394 +wendy brown 45.97833283742269 +wendy brown 52.73857225690569 +wendy ellison 42.91333246231079 +wendy ellison 53.56000073750814 +wendy falkner 47.602500438690186 +wendy falkner 64.9099988937378 +wendy falkner 77.5999984741211 +wendy garcia 37.38571425846645 +wendy garcia 48.76666768391927 +wendy garcia 53.225000858306885 +wendy garcia 63.93999926249186 +wendy hernandez 36.195000648498535 +wendy ichabod 10.56499981880188 +wendy king 37.57500076293945 +wendy king 53.44333457946777 +wendy king 56.319997787475586 +wendy laertes 38.39249920845032 +wendy laertes 60.19999885559082 +wendy laertes 65.30624961853027 +wendy miller 44.273332595825195 +wendy miller 53.5675014257431 +wendy nixon 54.995998764038085 +wendy nixon 64.28250026702881 +wendy ovid 43.80499863624573 +wendy ovid 61.64600105285645 +wendy polk 26.784999758005142 +wendy polk 35.21599998474121 +wendy quirinius 28.75666618347168 +wendy quirinius 60.70000092188517 +wendy robinson 42.5799994468689 +wendy robinson 42.90799944400787 +wendy robinson 43.426000237464905 +wendy steinbeck 43.42333388328552 +wendy thompson 40.352857317243306 +wendy thompson 75.93666712443034 +wendy underhill 24.459999561309814 +wendy underhill 33.440000693003334 +wendy underhill 45.51625019311905 +wendy van buren 43.30333296457926 +wendy van buren 65.58666737874348 +wendy white 39.015000104904175 +wendy xylophone 42.42500034968058 +wendy xylophone 53.981666247049965 +wendy young 27.929999828338623 +wendy young 59.609999656677246 +xavier allen 49.24500061571598 +xavier allen 67.39000034332275 +xavier allen 70.29800033569336 +xavier brown 19.772500306367874 +xavier brown 58.87000111171177 +xavier brown 74.6200008392334 +xavier carson 47.3199987411499 +xavier carson 61.52250027656555 +xavier davidson 48.41999936103821 +xavier davidson 52.70666758219401 +xavier davidson 52.78166747093201 +xavier ellison 41.84999942779541 +xavier ellison 62.80200090408325 +xavier garcia 35.0600004568696 +xavier hernandez 47.45200023651123 +xavier hernandez 49.676666259765625 +xavier hernandez 53.446667432785034 +xavier ichabod 56.70625042915344 +xavier ichabod 60.54799928665161 +xavier johnson 41.16333262125651 +xavier johnson 53.85333331425985 +xavier king 42.72800064086914 +xavier king 66.05333455403645 +xavier laertes 38.47999954223633 +xavier ovid 48.89250057935715 +xavier polk 37.05500018596649 +xavier polk 46.82666703065237 +xavier polk 55.385000705718994 +xavier polk 55.65000057220459 +xavier quirinius 59.62499964237213 +xavier quirinius 60.055998992919925 +xavier quirinius 62.52000045776367 +xavier quirinius 65.2933349609375 +xavier thompson 40.244998931884766 +xavier underhill 26.27800006866455 +xavier white 47.8671429497855 +xavier white 63.38428551810129 +xavier xylophone 49.072500228881836 +xavier zipper 8.204999923706055 +yuri allen 53.61250066757202 +yuri allen 64.86833254496257 +yuri brown 46.57500044504801 +yuri brown 66.75250005722046 +yuri carson 39.40750050544739 +yuri carson 49.01600036621094 +yuri ellison 27.49000017642975 +yuri ellison 70.5933329264323 +yuri falkner 47.23285675048828 +yuri falkner 62.807999801635745 +yuri garcia 43.967499017715454 +yuri hernandez 31.94000039100647 +yuri johnson 21.40666739145915 +yuri johnson 34.02333414554596 +yuri johnson 65.7750015258789 +yuri king 49.47333272298177 +yuri laertes 42.070000648498535 +yuri laertes 60.7549991607666 +yuri nixon 49.87142838750567 +yuri nixon 59.945000330607094 +yuri polk 37.56249952316284 +yuri polk 47.583333333333336 +yuri polk 72.60888735453288 +yuri quirinius 18.62000060081482 +yuri quirinius 51.217501401901245 +yuri quirinius 67.24000072479248 +yuri steinbeck 55.757999420166016 +yuri steinbeck 75.87999725341797 +yuri thompson 36.93499946594238 +yuri underhill 51.533334732055664 +yuri underhill 62.31888887617323 +yuri white 44.34999983651297 +yuri xylophone 25.117499828338623 +zach allen 25.92333350578944 +zach brown 38.3799991607666 +zach brown 47.404998779296875 +zach brown 54.30600090026856 +zach brown 58.970001220703125 +zach brown 65.22499942779541 +zach carson 60.783999633789065 +zach ellison 36.211428437914165 +zach falkner 41.225714683532715 +zach falkner 65.99499940872192 +zach garcia 42.8885714326586 +zach garcia 46.8870005607605 +zach garcia 47.5049991607666 +zach garcia 66.09399967193603 +zach ichabod 40.10166613260905 +zach ichabod 53.16749954223633 +zach king 39.137500405311584 +zach king 48.2825003862381 +zach king 61.18999965985616 +zach miller 44.82800054550171 +zach miller 48.52428477151053 +zach miller 53.593332608540855 +zach ovid 35.19399921447039 +zach ovid 38.35833342870077 +zach ovid 43.87200012207031 +zach ovid 83.01999918619792 +zach quirinius 42.638333320617676 +zach robinson 82.04999923706055 +zach steinbeck 55.86599960327148 +zach steinbeck 67.81428473336356 +zach thompson 29.303333282470703 +zach thompson 46.48999913533529 +zach underhill 48.681429045540945 +zach white 66.60250091552734 +zach xylophone 41.875 +zach xylophone 57.2416664759318 +zach young 73.5999984741211 +zach zipper 58.1480016708374 +zach zipper 60.1825008392334 +zach zipper 62.794999837875366 diff --git a/sql/hive/src/test/resources/golden/windowing_udaf.q (deterministic)-2-16239d2b069789ba99fbac50c4f0724f b/sql/hive/src/test/resources/golden/windowing_udaf.q (deterministic)-2-16239d2b069789ba99fbac50c4f0724f new file mode 100644 index 0000000000000..6cfa5ad413fa4 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_udaf.q (deterministic)-2-16239d2b069789ba99fbac50c4f0724f @@ -0,0 +1,1049 @@ + 65560.0 + 65718.0 + 65740.0 +alice allen 65662.0 +alice allen 65720.0 +alice allen 65758.0 +alice brown 65696.0 +alice carson 65559.0 +alice davidson 65547.0 +alice falkner 65669.0 +alice garcia 65613.0 +alice hernandez 65737.0 +alice hernandez 65784.0 +alice johnson 65739.0 +alice king 65660.0 +alice king 65734.0 +alice king 65738.0 +alice laertes 65669.0 +alice laertes 65671.0 +alice miller 65590.0 +alice nixon 65586.0 +alice nixon 65595.0 +alice nixon 65604.0 +alice ovid 65737.0 +alice polk 65548.0 +alice quirinius 65636.0 +alice quirinius 65728.0 +alice robinson 65606.0 +alice robinson 65789.0 +alice steinbeck 65578.0 +alice steinbeck 65673.0 +alice steinbeck 65786.0 +alice underhill 65750.0 +alice van buren 65562.0 +alice xylophone 65578.0 +alice xylophone 65585.0 +alice xylophone 65599.0 +alice zipper 65553.0 +alice zipper 65662.0 +alice zipper 65766.0 +bob brown 65584.0 +bob brown 65777.0 +bob brown 65783.0 +bob carson 65713.0 +bob davidson 65664.0 +bob davidson 65693.0 +bob davidson 65768.0 +bob ellison 65591.0 +bob ellison 65624.0 +bob ellison 65721.0 +bob ellison 65760.0 +bob falkner 65789.0 +bob garcia 65585.0 +bob garcia 65598.0 +bob garcia 65673.0 +bob garcia 65754.0 +bob garcia 65782.0 +bob hernandez 65557.0 +bob ichabod 65549.0 +bob king 65715.0 +bob king 65757.0 +bob king 65783.0 +bob laertes 65602.0 +bob laertes 65663.0 +bob miller 65608.0 +bob ovid 65564.0 +bob ovid 65619.0 +bob ovid 65686.0 +bob ovid 65726.0 +bob polk 65594.0 +bob quirinius 65700.0 +bob steinbeck 65637.0 +bob van buren 65778.0 +bob white 65543.0 +bob white 65605.0 +bob xylophone 65574.0 +bob xylophone 65666.0 +bob young 65556.0 +bob zipper 65559.0 +bob zipper 65633.0 +bob zipper 65739.0 +calvin allen 65669.0 +calvin brown 65537.0 +calvin brown 65580.0 +calvin brown 65677.0 +calvin carson 65637.0 +calvin davidson 65541.0 +calvin davidson 65564.0 +calvin ellison 65667.0 +calvin falkner 65573.0 +calvin falkner 65596.0 +calvin falkner 65738.0 +calvin falkner 65762.0 +calvin falkner 65778.0 +calvin falkner 65784.0 +calvin garcia 65664.0 +calvin hernandez 65578.0 +calvin johnson 65731.0 +calvin laertes 65570.0 +calvin laertes 65684.0 +calvin nixon 65654.0 +calvin nixon 65724.0 +calvin nixon 65749.0 +calvin ovid 65554.0 +calvin ovid 65643.0 +calvin ovid 65663.0 +calvin ovid 65715.0 +calvin polk 65731.0 +calvin quirinius 65741.0 +calvin quirinius 65769.0 +calvin robinson 65581.0 +calvin steinbeck 65680.0 +calvin steinbeck 65762.0 +calvin steinbeck 65779.0 +calvin thompson 65560.0 +calvin thompson 65640.0 +calvin underhill 65732.0 +calvin van buren 65552.0 +calvin van buren 65771.0 +calvin white 65553.0 +calvin white 65561.0 +calvin xylophone 65575.0 +calvin xylophone 65614.5 +calvin xylophone 65713.0 +calvin young 65574.0 +calvin young 65746.0 +calvin zipper 65669.0 +calvin zipper 65739.0 +david allen 65588.0 +david allen 65617.0 +david brown 65637.0 +david brown 65760.0 +david davidson 65559.0 +david davidson 65756.0 +david davidson 65778.0 +david davidson 65779.0 +david ellison 65634.0 +david ellison 65724.0 +david ellison 65724.0 +david hernandez 65763.0 +david ichabod 65699.0 +david ichabod 65715.0 +david laertes 65762.0 +david nixon 65536.0 +david ovid 65623.0 +david ovid 65628.0 +david quirinius 65697.0 +david quirinius 65759.0 +david quirinius 65779.0 +david robinson 65762.0 +david robinson 65775.0 +david thompson 65550.0 +david underhill 65602.0 +david underhill 65662.0 +david underhill 65751.0 +david van buren 65625.0 +david van buren 65634.0 +david white 65678.0 +david xylophone 65537.0 +david xylophone 65670.0 +david xylophone 65764.0 +david young 65551.0 +david young 65694.0 +ethan allen 65747.0 +ethan brown 65539.0 +ethan brown 65617.0 +ethan brown 65685.0 +ethan brown 65685.0 +ethan brown 65722.0 +ethan brown 65733.0 +ethan carson 65742.0 +ethan ellison 65714.0 +ethan ellison 65732.0 +ethan falkner 65577.0 +ethan falkner 65614.0 +ethan garcia 65736.0 +ethan hernandez 65630.5 +ethan johnson 65536.0 +ethan king 65614.0 +ethan laertes 65562.0 +ethan laertes 65597.0 +ethan laertes 65628.0 +ethan laertes 65643.0 +ethan laertes 65680.0 +ethan laertes 65745.0 +ethan laertes 65760.0 +ethan miller 65712.0 +ethan nixon 65766.0 +ethan ovid 65697.0 +ethan polk 65589.0 +ethan polk 65615.0 +ethan polk 65622.0 +ethan polk 65622.0 +ethan quirinius 65542.0 +ethan quirinius 65591.0 +ethan quirinius 65706.0 +ethan robinson 65547.0 +ethan robinson 65659.0 +ethan underhill 65570.0 +ethan van buren 65572.0 +ethan white 65677.0 +ethan white 65788.0 +ethan xylophone 65595.0 +ethan zipper 65593.0 +ethan zipper 65680.0 +fred davidson 65552.0 +fred davidson 65595.0 +fred davidson 65721.0 +fred ellison 65548.0 +fred ellison 65691.0 +fred ellison 65771.0 +fred falkner 65637.0 +fred falkner 65648.0 +fred falkner 65783.0 +fred hernandez 65541.0 +fred ichabod 65572.0 +fred ichabod 65789.0 +fred johnson 65758.0 +fred king 65694.0 +fred king 65745.0 +fred laertes 65769.0 +fred miller 65536.0 +fred nixon 65560.0 +fred nixon 65612.0 +fred nixon 65703.0 +fred nixon 65705.0 +fred polk 65603.0 +fred polk 65656.0 +fred polk 65701.0 +fred polk 65706.0 +fred quirinius 65697.0 +fred quirinius 65782.0 +fred robinson 65623.0 +fred steinbeck 65544.0 +fred steinbeck 65651.0 +fred steinbeck 65755.0 +fred underhill 65629.0 +fred van buren 65537.0 +fred van buren 65561.0 +fred van buren 65745.0 +fred van buren 65789.0 +fred white 65589.0 +fred young 65594.0 +fred young 65773.0 +fred zipper 65553.0 +gabriella allen 65646.0 +gabriella allen 65677.0 +gabriella brown 65704.0 +gabriella brown 65753.0 +gabriella carson 65586.0 +gabriella davidson 65565.0 +gabriella ellison 65706.0 +gabriella ellison 65716.0 +gabriella falkner 65623.0 +gabriella falkner 65711.0 +gabriella falkner 65767.0 +gabriella garcia 65571.0 +gabriella hernandez 65587.0 +gabriella hernandez 65717.0 +gabriella ichabod 65559.0 +gabriella ichabod 65633.0 +gabriella ichabod 65702.0 +gabriella ichabod 65712.0 +gabriella ichabod 65717.0 +gabriella king 65651.0 +gabriella king 65657.0 +gabriella laertes 65781.0 +gabriella miller 65646.0 +gabriella ovid 65556.0 +gabriella ovid 65583.0 +gabriella polk 65701.0 +gabriella polk 65790.0 +gabriella steinbeck 65582.0 +gabriella steinbeck 65653.0 +gabriella thompson 65682.0 +gabriella thompson 65755.0 +gabriella thompson 65766.0 +gabriella van buren 65581.0 +gabriella van buren 65644.0 +gabriella white 65638.0 +gabriella young 65699.0 +gabriella young 65774.0 +gabriella zipper 65540.0 +gabriella zipper 65754.0 +holly allen 65596.0 +holly brown 65599.0 +holly brown 65619.0 +holly falkner 65720.0 +holly hernandez 65602.0 +holly hernandez 65686.0 +holly hernandez 65750.0 +holly hernandez 65788.0 +holly ichabod 65711.0 +holly ichabod 65749.0 +holly ichabod 65752.0 +holly johnson 65655.0 +holly johnson 65662.0 +holly johnson 65755.0 +holly king 65549.0 +holly king 65648.0 +holly laertes 65664.0 +holly miller 65653.0 +holly nixon 65539.0 +holly nixon 65658.0 +holly polk 65743.0 +holly polk 65751.0 +holly robinson 65564.0 +holly thompson 65538.0 +holly thompson 65578.0 +holly thompson 65713.0 +holly underhill 65634.0 +holly underhill 65654.0 +holly underhill 65721.0 +holly underhill 65759.0 +holly van buren 65727.0 +holly white 65536.0 +holly white 65602.0 +holly xylophone 65544.0 +holly young 65606.0 +holly young 65765.0 +holly zipper 65607.0 +holly zipper 65755.0 +irene allen 65556.0 +irene brown 65633.0 +irene brown 65664.0 +irene brown 65765.0 +irene carson 65590.0 +irene ellison 65659.0 +irene ellison 65696.0 +irene falkner 65620.0 +irene falkner 65661.0 +irene garcia 65660.0 +irene garcia 65711.0 +irene garcia 65787.0 +irene ichabod 65645.0 +irene ichabod 65722.0 +irene johnson 65583.0 +irene laertes 65664.0 +irene laertes 65710.0 +irene laertes 65722.0 +irene miller 65730.0 +irene nixon 65631.0 +irene nixon 65643.0 +irene nixon 65653.0 +irene ovid 65691.0 +irene ovid 65734.0 +irene ovid 65753.0 +irene polk 65551.0 +irene polk 65575.0 +irene polk 65579.0 +irene polk 65595.0 +irene polk 65610.0 +irene quirinius 65724.0 +irene quirinius 65769.0 +irene quirinius 65773.0 +irene robinson 65554.0 +irene steinbeck 65683.0 +irene thompson 65688.0 +irene underhill 65591.0 +irene underhill 65707.5 +irene van buren 65579.0 +irene van buren 65589.0 +irene xylophone 65775.0 +jessica brown 65588.0 +jessica carson 65553.0 +jessica carson 65672.0 +jessica carson 65747.0 +jessica davidson 65549.0 +jessica davidson 65606.0 +jessica davidson 65675.0 +jessica davidson 65727.0 +jessica ellison 65567.0 +jessica ellison 65663.0 +jessica falkner 65584.0 +jessica garcia 65676.0 +jessica garcia 65789.0 +jessica ichabod 65704.0 +jessica johnson 65607.0 +jessica johnson 65720.0 +jessica miller 65733.0 +jessica nixon 65590.0 +jessica nixon 65774.0 +jessica ovid 65582.0 +jessica ovid 65751.0 +jessica polk 65637.0 +jessica quirinius 65562.0 +jessica quirinius 65608.0 +jessica quirinius 65712.0 +jessica quirinius 65716.0 +jessica robinson 65576.0 +jessica thompson 65581.0 +jessica thompson 65675.0 +jessica underhill 65656.0 +jessica underhill 65702.0 +jessica underhill 65783.0 +jessica van buren 65615.0 +jessica white 65544.0 +jessica white 65570.0 +jessica white 65594.0 +jessica white 65673.0 +jessica white 65779.0 +jessica xylophone 65562.0 +jessica young 65623.0 +jessica young 65711.0 +jessica zipper 65600.0 +jessica zipper 65657.0 +jessica zipper 65778.0 +katie allen 65542.0 +katie brown 65590.0 +katie davidson 65619.0 +katie ellison 65675.0 +katie ellison 65699.0 +katie falkner 65728.0 +katie garcia 65625.0 +katie garcia 65747.0 +katie hernandez 65550.0 +katie ichabod 65658.0 +katie ichabod 65726.0 +katie ichabod 65757.0 +katie king 65629.0 +katie king 65647.0 +katie king 65776.0 +katie miller 65541.0 +katie miller 65661.0 +katie nixon 65669.0 +katie ovid 65681.0 +katie polk 65746.0 +katie polk 65784.0 +katie robinson 65697.0 +katie van buren 65643.0 +katie van buren 65730.0 +katie white 65620.0 +katie white 65719.0 +katie xylophone 65585.0 +katie young 65644.0 +katie young 65746.0 +katie young 65764.0 +katie zipper 65568.0 +katie zipper 65733.0 +luke allen 65547.0 +luke allen 65552.0 +luke allen 65576.0 +luke allen 65681.0 +luke allen 65776.0 +luke brown 65719.0 +luke davidson 65656.0 +luke davidson 65791.0 +luke ellison 65582.0 +luke ellison 65664.0 +luke ellison 65779.0 +luke falkner 65589.0 +luke falkner 65618.0 +luke garcia 65687.0 +luke garcia 65778.0 +luke ichabod 65629.0 +luke ichabod 65654.0 +luke johnson 65545.0 +luke johnson 65716.0 +luke johnson 65718.0 +luke laertes 65608.0 +luke laertes 65657.0 +luke laertes 65685.0 +luke laertes 65730.0 +luke laertes 65756.0 +luke miller 65752.0 +luke ovid 65569.0 +luke ovid 65693.0 +luke polk 65645.0 +luke polk 65658.0 +luke quirinius 65655.0 +luke robinson 65634.0 +luke robinson 65772.0 +luke thompson 65626.0 +luke underhill 65553.0 +luke underhill 65571.0 +luke underhill 65651.0 +luke van buren 65678.0 +luke white 65693.0 +luke xylophone 65597.0 +luke zipper 65641.0 +mike allen 65706.0 +mike brown 65654.0 +mike carson 65698.0 +mike carson 65700.0 +mike carson 65751.0 +mike davidson 65658.0 +mike davidson 65759.0 +mike ellison 65598.0 +mike ellison 65606.0 +mike ellison 65718.0 +mike ellison 65738.0 +mike ellison 65760.0 +mike falkner 65609.0 +mike garcia 65571.0 +mike garcia 65600.0 +mike garcia 65770.0 +mike hernandez 65548.0 +mike hernandez 65672.0 +mike ichabod 65621.0 +mike king 65563.0 +mike king 65586.0 +mike king 65591.0 +mike king 65642.0 +mike king 65769.0 +mike king 65776.0 +mike miller 65549.0 +mike nixon 65619.0 +mike nixon 65704.0 +mike polk 65619.0 +mike polk 65658.0 +mike polk 65704.0 +mike quirinius 65717.0 +mike steinbeck 65550.0 +mike steinbeck 65564.0 +mike steinbeck 65573.0 +mike steinbeck 65749.0 +mike van buren 65620.0 +mike van buren 65770.0 +mike white 65648.0 +mike white 65685.0 +mike white 65769.0 +mike white 65778.0 +mike young 65545.0 +mike young 65581.0 +mike young 65736.0 +mike zipper 65552.0 +mike zipper 65695.0 +mike zipper 65779.0 +nick allen 65641.0 +nick allen 65786.0 +nick brown 65724.0 +nick davidson 65601.0 +nick ellison 65691.0 +nick ellison 65745.0 +nick falkner 65583.0 +nick falkner 65676.0 +nick garcia 65712.0 +nick garcia 65720.0 +nick garcia 65723.0 +nick ichabod 65572.0 +nick ichabod 65681.0 +nick ichabod 65737.0 +nick johnson 65585.0 +nick johnson 65784.0 +nick laertes 65624.0 +nick miller 65757.0 +nick nixon 65650.0 +nick ovid 65719.0 +nick polk 65716.0 +nick quirinius 65588.0 +nick quirinius 65723.0 +nick robinson 65547.0 +nick robinson 65675.0 +nick steinbeck 65689.0 +nick thompson 65610.0 +nick underhill 65619.0 +nick van buren 65603.0 +nick xylophone 65644.0 +nick young 65654.0 +nick young 65660.0 +nick zipper 65757.0 +nick zipper 65765.0 +oscar allen 65644.0 +oscar brown 65614.0 +oscar carson 65537.0 +oscar carson 65548.0 +oscar carson 65549.0 +oscar carson 65624.0 +oscar carson 65697.0 +oscar davidson 65556.0 +oscar ellison 65630.0 +oscar ellison 65630.0 +oscar falkner 65692.0 +oscar garcia 65751.0 +oscar hernandez 65683.0 +oscar hernandez 65707.0 +oscar ichabod 65536.0 +oscar ichabod 65562.0 +oscar ichabod 65637.0 +oscar ichabod 65763.0 +oscar johnson 65645.0 +oscar johnson 65778.0 +oscar king 65541.0 +oscar king 65550.0 +oscar king 65787.0 +oscar laertes 65625.0 +oscar laertes 65690.0 +oscar laertes 65756.0 +oscar laertes 65790.0 +oscar nixon 65596.0 +oscar ovid 65536.0 +oscar ovid 65615.0 +oscar ovid 65665.5 +oscar polk 65541.0 +oscar polk 65643.0 +oscar quirinius 65541.0 +oscar quirinius 65560.0 +oscar quirinius 65689.0 +oscar quirinius 65720.0 +oscar robinson 65537.0 +oscar robinson 65658.0 +oscar robinson 65687.0 +oscar robinson 65782.0 +oscar steinbeck 65709.0 +oscar thompson 65542.0 +oscar thompson 65681.0 +oscar thompson 65727.0 +oscar thompson 65738.0 +oscar underhill 65626.0 +oscar van buren 65581.0 +oscar van buren 65635.0 +oscar van buren 65705.0 +oscar white 65552.0 +oscar white 65564.0 +oscar white 65671.0 +oscar white 65735.0 +oscar xylophone 65773.0 +oscar xylophone 65773.0 +oscar xylophone 65775.0 +oscar zipper 65568.0 +oscar zipper 65740.0 +oscar zipper 65777.0 +priscilla brown 65670.0 +priscilla brown 65690.0 +priscilla brown 65749.0 +priscilla carson 65658.0 +priscilla carson 65687.0 +priscilla carson 65755.0 +priscilla ichabod 65627.0 +priscilla ichabod 65759.0 +priscilla johnson 65543.0 +priscilla johnson 65668.0 +priscilla johnson 65674.5 +priscilla johnson 65681.0 +priscilla johnson 65755.0 +priscilla king 65646.0 +priscilla nixon 65564.0 +priscilla nixon 65600.0 +priscilla ovid 65541.0 +priscilla ovid 65790.0 +priscilla polk 65747.0 +priscilla quirinius 65672.0 +priscilla thompson 65654.0 +priscilla underhill 65715.0 +priscilla underhill 65729.0 +priscilla van buren 65607.0 +priscilla van buren 65685.0 +priscilla van buren 65749.0 +priscilla white 65652.0 +priscilla xylophone 65538.0 +priscilla xylophone 65763.0 +priscilla xylophone 65774.0 +priscilla young 65585.0 +priscilla young 65658.0 +priscilla zipper 65622.0 +priscilla zipper 65726.0 +quinn allen 65657.0 +quinn allen 65708.0 +quinn brown 65691.0 +quinn brown 65700.0 +quinn brown 65733.0 +quinn davidson 65549.0 +quinn davidson 65714.0 +quinn davidson 65776.0 +quinn davidson 65779.0 +quinn ellison 65705.0 +quinn ellison 65778.0 +quinn garcia 65568.0 +quinn garcia 65604.0 +quinn garcia 65610.0 +quinn garcia 65773.0 +quinn ichabod 65609.0 +quinn king 65558.0 +quinn king 65649.0 +quinn laertes 65542.0 +quinn laertes 65560.0 +quinn laertes 65627.0 +quinn nixon 65659.0 +quinn ovid 65699.0 +quinn quirinius 65747.0 +quinn robinson 65627.0 +quinn steinbeck 65578.0 +quinn steinbeck 65763.0 +quinn thompson 65643.0 +quinn thompson 65774.0 +quinn underhill 65549.0 +quinn underhill 65694.0 +quinn underhill 65767.0 +quinn van buren 65725.0 +quinn young 65647.0 +quinn zipper 65579.0 +quinn zipper 65693.0 +rachel allen 65661.0 +rachel allen 65709.0 +rachel brown 65586.0 +rachel brown 65587.0 +rachel brown 65587.0 +rachel brown 65610.0 +rachel brown 65693.0 +rachel carson 65677.0 +rachel carson 65682.0 +rachel davidson 65755.0 +rachel ellison 65761.0 +rachel falkner 65616.0 +rachel falkner 65681.0 +rachel falkner 65693.0 +rachel falkner 65764.0 +rachel johnson 65658.0 +rachel king 65604.0 +rachel king 65643.0 +rachel laertes 65562.0 +rachel laertes 65624.0 +rachel ovid 65721.0 +rachel ovid 65736.0 +rachel polk 65686.0 +rachel quirinius 65787.0 +rachel robinson 65544.0 +rachel robinson 65717.0 +rachel robinson 65724.0 +rachel thompson 65648.0 +rachel thompson 65662.0 +rachel thompson 65733.0 +rachel underhill 65667.0 +rachel white 65615.0 +rachel white 65717.0 +rachel young 65727.0 +rachel zipper 65757.0 +rachel zipper 65785.0 +sarah carson 65616.0 +sarah carson 65693.0 +sarah carson 65694.0 +sarah ellison 65611.0 +sarah falkner 65606.0 +sarah falkner 65680.0 +sarah garcia 65563.0 +sarah garcia 65638.0 +sarah garcia 65661.0 +sarah ichabod 65667.0 +sarah ichabod 65671.0 +sarah johnson 65659.0 +sarah johnson 65716.0 +sarah johnson 65731.0 +sarah johnson 65751.0 +sarah king 65650.0 +sarah king 65699.0 +sarah miller 65557.0 +sarah ovid 65550.0 +sarah robinson 65677.0 +sarah robinson 65763.0 +sarah steinbeck 65721.0 +sarah white 65622.0 +sarah white 65747.0 +sarah xylophone 65678.0 +sarah young 65595.0 +sarah zipper 65550.0 +tom brown 65593.0 +tom brown 65675.0 +tom carson 65539.0 +tom carson 65624.0 +tom carson 65780.0 +tom davidson 65780.0 +tom ellison 65578.0 +tom ellison 65670.0 +tom ellison 65756.0 +tom falkner 65574.0 +tom falkner 65625.0 +tom hernandez 65575.0 +tom hernandez 65632.0 +tom ichabod 65588.0 +tom johnson 65536.0 +tom johnson 65789.0 +tom king 65576.0 +tom laertes 65617.0 +tom laertes 65701.0 +tom miller 65594.0 +tom miller 65603.0 +tom miller 65704.0 +tom nixon 65672.0 +tom ovid 65628.0 +tom polk 65652.0 +tom polk 65742.0 +tom quirinius 65563.0 +tom quirinius 65783.0 +tom robinson 65626.0 +tom robinson 65632.0 +tom robinson 65691.0 +tom robinson 65758.0 +tom steinbeck 65666.0 +tom van buren 65621.0 +tom van buren 65652.0 +tom van buren 65669.0 +tom white 65548.0 +tom young 65544.0 +tom young 65546.0 +tom zipper 65789.0 +ulysses brown 65735.0 +ulysses carson 65602.0 +ulysses carson 65643.0 +ulysses carson 65703.0 +ulysses carson 65716.0 +ulysses davidson 65750.0 +ulysses ellison 65575.0 +ulysses garcia 65666.0 +ulysses hernandez 65651.0 +ulysses hernandez 65702.0 +ulysses hernandez 65786.0 +ulysses ichabod 65551.0 +ulysses ichabod 65566.0 +ulysses johnson 65776.0 +ulysses king 65649.0 +ulysses laertes 65691.0 +ulysses laertes 65711.0 +ulysses laertes 65781.0 +ulysses miller 65610.0 +ulysses miller 65637.0 +ulysses nixon 65603.0 +ulysses ovid 65656.0 +ulysses polk 65563.0 +ulysses polk 65580.0 +ulysses polk 65612.0 +ulysses polk 65777.0 +ulysses quirinius 65786.0 +ulysses robinson 65744.0 +ulysses steinbeck 65611.0 +ulysses steinbeck 65680.0 +ulysses thompson 65788.0 +ulysses underhill 65570.0 +ulysses underhill 65616.0 +ulysses underhill 65620.0 +ulysses underhill 65623.0 +ulysses underhill 65641.0 +ulysses underhill 65713.0 +ulysses underhill 65785.0 +ulysses van buren 65684.0 +ulysses white 65654.0 +ulysses white 65675.0 +ulysses xylophone 65623.0 +ulysses xylophone 65636.0 +ulysses xylophone 65781.0 +ulysses young 65675.0 +ulysses young 65736.0 +ulysses young 65748.0 +victor allen 65684.0 +victor allen 65707.0 +victor brown 65550.0 +victor brown 65555.0 +victor brown 65622.0 +victor brown 65673.0 +victor davidson 65579.0 +victor davidson 65628.0 +victor davidson 65783.0 +victor ellison 65641.0 +victor ellison 65782.0 +victor hernandez 65571.0 +victor hernandez 65659.0 +victor hernandez 65708.0 +victor hernandez 65735.0 +victor hernandez 65775.0 +victor johnson 65606.0 +victor johnson 65607.0 +victor johnson 65607.0 +victor king 65721.0 +victor king 65743.0 +victor laertes 65638.0 +victor laertes 65644.0 +victor miller 65570.0 +victor nixon 65709.0 +victor nixon 65791.0 +victor ovid 65649.0 +victor polk 65625.0 +victor quirinius 65620.0 +victor quirinius 65651.0 +victor robinson 65596.0 +victor robinson 65673.0 +victor steinbeck 65618.0 +victor steinbeck 65661.0 +victor steinbeck 65686.0 +victor thompson 65548.0 +victor van buren 65664.0 +victor van buren 65774.0 +victor white 65548.0 +victor white 65601.0 +victor xylophone 65549.0 +victor xylophone 65618.0 +victor xylophone 65644.0 +victor xylophone 65677.0 +victor xylophone 65755.0 +victor young 65628.0 +victor zipper 65743.0 +wendy allen 65628.0 +wendy allen 65711.0 +wendy allen 65782.0 +wendy brown 65580.0 +wendy brown 65657.0 +wendy ellison 65545.0 +wendy ellison 65603.0 +wendy falkner 65595.0 +wendy falkner 65604.0 +wendy falkner 65635.0 +wendy garcia 65659.0 +wendy garcia 65746.0 +wendy garcia 65747.0 +wendy garcia 65777.0 +wendy hernandez 65650.0 +wendy ichabod 65730.0 +wendy king 65586.0 +wendy king 65664.0 +wendy king 65670.0 +wendy laertes 65566.0 +wendy laertes 65683.0 +wendy laertes 65727.0 +wendy miller 65582.0 +wendy miller 65626.0 +wendy nixon 65611.0 +wendy nixon 65746.0 +wendy ovid 65589.0 +wendy ovid 65643.0 +wendy polk 65656.0 +wendy polk 65692.0 +wendy quirinius 65766.0 +wendy quirinius 65767.0 +wendy robinson 65622.0 +wendy robinson 65715.0 +wendy robinson 65774.0 +wendy steinbeck 65612.0 +wendy thompson 65650.0 +wendy thompson 65737.0 +wendy underhill 65662.0 +wendy underhill 65758.0 +wendy underhill 65775.0 +wendy van buren 65680.0 +wendy van buren 65699.0 +wendy white 65705.0 +wendy xylophone 65687.0 +wendy xylophone 65773.0 +wendy young 65674.0 +wendy young 65685.0 +xavier allen 65611.0 +xavier allen 65618.0 +xavier allen 65771.0 +xavier brown 65600.0 +xavier brown 65704.0 +xavier brown 65723.0 +xavier carson 65731.0 +xavier carson 65758.0 +xavier davidson 65644.0 +xavier davidson 65664.0 +xavier davidson 65755.0 +xavier ellison 65541.0 +xavier ellison 65622.0 +xavier garcia 65672.0 +xavier hernandez 65541.0 +xavier hernandez 65544.0 +xavier hernandez 65766.0 +xavier ichabod 65597.0 +xavier ichabod 65663.0 +xavier johnson 65655.0 +xavier johnson 65744.0 +xavier king 65590.0 +xavier king 65601.0 +xavier laertes 65743.0 +xavier ovid 65788.0 +xavier polk 65587.0 +xavier polk 65653.0 +xavier polk 65675.0 +xavier polk 65696.0 +xavier quirinius 65599.0 +xavier quirinius 65650.0 +xavier quirinius 65656.0 +xavier quirinius 65737.0 +xavier thompson 65608.0 +xavier underhill 65710.0 +xavier white 65703.0 +xavier white 65732.0 +xavier xylophone 65572.0 +xavier zipper 65561.0 +yuri allen 65565.0 +yuri allen 65682.0 +yuri brown 65538.0 +yuri brown 65688.0 +yuri carson 65670.0 +yuri carson 65769.0 +yuri ellison 65570.0 +yuri ellison 65581.0 +yuri falkner 65658.0 +yuri falkner 65681.0 +yuri garcia 65639.0 +yuri hernandez 65706.0 +yuri johnson 65587.0 +yuri johnson 65697.0 +yuri johnson 65712.0 +yuri king 65721.0 +yuri laertes 65637.0 +yuri laertes 65773.0 +yuri nixon 65635.0 +yuri nixon 65740.0 +yuri polk 65607.0 +yuri polk 65713.0 +yuri polk 65742.0 +yuri quirinius 65544.0 +yuri quirinius 65617.0 +yuri quirinius 65695.0 +yuri steinbeck 65592.0 +yuri steinbeck 65679.0 +yuri thompson 65676.0 +yuri underhill 65718.0 +yuri underhill 65750.0 +yuri white 65659.0 +yuri xylophone 65714.0 +zach allen 65667.0 +zach brown 65559.0 +zach brown 65588.0 +zach brown 65691.0 +zach brown 65759.0 +zach brown 65762.0 +zach carson 65572.0 +zach ellison 65748.0 +zach falkner 65620.0 +zach falkner 65627.0 +zach garcia 65544.0 +zach garcia 65623.0 +zach garcia 65629.0 +zach garcia 65764.5 +zach ichabod 65599.0 +zach ichabod 65612.0 +zach king 65556.0 +zach king 65702.0 +zach king 65773.0 +zach miller 65583.0 +zach miller 65665.0 +zach miller 65719.0 +zach ovid 65578.0 +zach ovid 65669.0 +zach ovid 65703.0 +zach ovid 65784.0 +zach quirinius 65691.0 +zach robinson 65599.0 +zach steinbeck 65602.0 +zach steinbeck 65695.0 +zach thompson 65636.0 +zach thompson 65696.0 +zach underhill 65573.0 +zach white 65733.0 +zach xylophone 65542.0 +zach xylophone 65780.0 +zach young 65576.0 +zach zipper 65579.0 +zach zipper 65649.0 +zach zipper 65676.0 diff --git a/sql/hive/src/test/resources/golden/windowing_udaf.q (deterministic)-3-d90b27fca067b0b3c48d873b3ef32af7 b/sql/hive/src/test/resources/golden/windowing_udaf.q (deterministic)-3-d90b27fca067b0b3c48d873b3ef32af7 new file mode 100644 index 0000000000000..072a8a891a836 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_udaf.q (deterministic)-3-d90b27fca067b0b3c48d873b3ef32af7 @@ -0,0 +1,1049 @@ +65536 +65536 +65536 +65536 +65536 +65536 +65537 +65537 +65537 +65537 +65538 +65538 +65538 +65539 +65539 +65539 +65540 +65541 +65541 +65541 +65541 +65541 +65541 +65541 +65541 +65542 +65542 +65542 +65542 +65543 +65543 +65544 +65544 +65544 +65544 +65544 +65544 +65544 +65544 +65545 +65545 +65545 +65547 +65547 +65547 +65547 +65548 +65548 +65548 +65548 +65548 +65548 +65548 +65549 +65549 +65549 +65549 +65549 +65549 +65549 +65549 +65550 +65550 +65550 +65550 +65550 +65550 +65550 +65551 +65551 +65551 +65552 +65552 +65552 +65552 +65552 +65553 +65553 +65553 +65553 +65554 +65554 +65555 +65556 +65556 +65556 +65557 +65557 +65558 +65559 +65559 +65559 +65559 +65560 +65560 +65560 +65560 +65561 +65562 +65562 +65562 +65562 +65562 +65562 +65563 +65563 +65563 +65563 +65563 +65564 +65564 +65564 +65564 +65564 +65564 +65564 +65564 +65565 +65565 +65566 +65566 +65567 +65568 +65568 +65568 +65569 +65570 +65570 +65570 +65570 +65570 +65571 +65571 +65571 +65572 +65572 +65572 +65572 +65572 +65573 +65573 +65573 +65574 +65574 +65574 +65575 +65575 +65575 +65575 +65576 +65576 +65576 +65576 +65577 +65578 +65578 +65578 +65578 +65578 +65578 +65579 +65579 +65579 +65579 +65579 +65580 +65580 +65580 +65581 +65581 +65581 +65581 +65581 +65582 +65582 +65582 +65582 +65582 +65583 +65583 +65583 +65583 +65584 +65584 +65584 +65584 +65585 +65585 +65585 +65585 +65585 +65586 +65586 +65586 +65586 +65586 +65587 +65587 +65587 +65587 +65588 +65588 +65588 +65588 +65588 +65589 +65589 +65589 +65589 +65589 +65590 +65590 +65590 +65590 +65590 +65591 +65591 +65591 +65591 +65592 +65593 +65593 +65594 +65594 +65594 +65594 +65595 +65595 +65595 +65595 +65595 +65595 +65596 +65596 +65596 +65596 +65596 +65597 +65597 +65597 +65598 +65599 +65599 +65599 +65600 +65600 +65600 +65600 +65601 +65601 +65601 +65602 +65602 +65602 +65602 +65603 +65603 +65603 +65603 +65603 +65604 +65604 +65604 +65605 +65606 +65606 +65606 +65606 +65606 +65607 +65607 +65607 +65607 +65607 +65607 +65608 +65608 +65608 +65608 +65609 +65610 +65610 +65610 +65610 +65610 +65610 +65611 +65611 +65611 +65612 +65612 +65612 +65614 +65614 +65614 +65615 +65615 +65615 +65616 +65616 +65617 +65617 +65617 +65617 +65618 +65618 +65618 +65618 +65618 +65619 +65619 +65619 +65619 +65619 +65619 +65620 +65620 +65620 +65620 +65620 +65620 +65621 +65621 +65622 +65622 +65622 +65622 +65622 +65622 +65623 +65623 +65623 +65623 +65623 +65623 +65623 +65624 +65624 +65624 +65624 +65625 +65625 +65625 +65625 +65626 +65626 +65626 +65626 +65627 +65627 +65627 +65627 +65628 +65628 +65628 +65628 +65628 +65628 +65629 +65629 +65629 +65630 +65630 +65631 +65632 +65632 +65632 +65633 +65633 +65633 +65633 +65634 +65634 +65634 +65634 +65635 +65635 +65635 +65636 +65636 +65636 +65636 +65637 +65637 +65637 +65637 +65637 +65637 +65637 +65638 +65638 +65638 +65639 +65640 +65641 +65641 +65641 +65641 +65642 +65643 +65643 +65643 +65643 +65643 +65643 +65643 +65643 +65643 +65643 +65644 +65644 +65644 +65644 +65644 +65645 +65645 +65645 +65646 +65646 +65646 +65647 +65647 +65648 +65648 +65648 +65648 +65649 +65649 +65649 +65650 +65650 +65650 +65650 +65650 +65650 +65650 +65651 +65651 +65651 +65651 +65651 +65651 +65652 +65652 +65652 +65653 +65653 +65653 +65653 +65654 +65654 +65654 +65654 +65654 +65654 +65654 +65654 +65654 +65655 +65655 +65656 +65656 +65656 +65656 +65656 +65656 +65657 +65657 +65657 +65657 +65657 +65658 +65658 +65658 +65658 +65658 +65658 +65658 +65658 +65658 +65658 +65658 +65659 +65659 +65659 +65659 +65659 +65659 +65659 +65659 +65660 +65660 +65660 +65661 +65661 +65661 +65661 +65661 +65662 +65662 +65662 +65662 +65662 +65662 +65663 +65663 +65663 +65663 +65664 +65664 +65664 +65664 +65664 +65664 +65664 +65665 +65666 +65666 +65667 +65667 +65667 +65667 +65667 +65667 +65668 +65669 +65669 +65669 +65669 +65669 +65669 +65670 +65670 +65670 +65670 +65670 +65671 +65671 +65671 +65672 +65672 +65672 +65672 +65672 +65672 +65673 +65673 +65673 +65673 +65673 +65674 +65674 +65674 +65674 +65675 +65675 +65675 +65675 +65675 +65675 +65675 +65675 +65676 +65676 +65676 +65677 +65677 +65677 +65677 +65677 +65677 +65678 +65678 +65678 +65678 +65679 +65679 +65680 +65680 +65680 +65680 +65680 +65680 +65680 +65681 +65681 +65681 +65681 +65681 +65682 +65682 +65682 +65683 +65683 +65683 +65683 +65684 +65684 +65684 +65684 +65685 +65685 +65685 +65685 +65685 +65685 +65686 +65686 +65686 +65687 +65687 +65687 +65687 +65688 +65688 +65689 +65689 +65690 +65690 +65691 +65691 +65691 +65691 +65691 +65691 +65691 +65691 +65692 +65692 +65693 +65693 +65693 +65693 +65693 +65693 +65694 +65694 +65694 +65695 +65695 +65695 +65695 +65695 +65696 +65696 +65696 +65696 +65697 +65697 +65697 +65697 +65697 +65697 +65698 +65698 +65698 +65699 +65699 +65699 +65699 +65699 +65699 +65700 +65700 +65700 +65701 +65701 +65701 +65702 +65702 +65702 +65702 +65702 +65703 +65703 +65703 +65703 +65703 +65704 +65704 +65704 +65704 +65704 +65704 +65705 +65705 +65705 +65705 +65706 +65706 +65706 +65706 +65706 +65706 +65707 +65707 +65708 +65708 +65709 +65709 +65709 +65710 +65711 +65711 +65711 +65711 +65711 +65711 +65712 +65712 +65712 +65712 +65712 +65713 +65713 +65713 +65713 +65713 +65714 +65714 +65714 +65715 +65715 +65715 +65715 +65715 +65716 +65716 +65716 +65716 +65716 +65716 +65717 +65717 +65717 +65717 +65717 +65718 +65718 +65718 +65718 +65719 +65719 +65719 +65719 +65720 +65720 +65720 +65720 +65720 +65720 +65721 +65721 +65721 +65721 +65721 +65721 +65721 +65722 +65722 +65722 +65722 +65723 +65723 +65724 +65724 +65724 +65724 +65724 +65724 +65725 +65726 +65726 +65726 +65726 +65727 +65727 +65727 +65727 +65727 +65728 +65728 +65729 +65730 +65730 +65730 +65730 +65731 +65731 +65731 +65731 +65732 +65732 +65732 +65733 +65733 +65733 +65733 +65733 +65733 +65734 +65734 +65735 +65735 +65735 +65736 +65736 +65736 +65736 +65737 +65737 +65737 +65737 +65737 +65738 +65738 +65738 +65738 +65739 +65739 +65739 +65740 +65740 +65740 +65741 +65742 +65742 +65742 +65743 +65743 +65743 +65743 +65744 +65744 +65745 +65745 +65745 +65745 +65746 +65746 +65746 +65746 +65747 +65747 +65747 +65747 +65747 +65747 +65747 +65748 +65748 +65749 +65749 +65749 +65749 +65749 +65750 +65750 +65750 +65750 +65750 +65751 +65751 +65751 +65751 +65751 +65752 +65752 +65753 +65753 +65754 +65754 +65755 +65755 +65755 +65755 +65755 +65755 +65755 +65755 +65755 +65756 +65756 +65756 +65756 +65756 +65757 +65757 +65757 +65757 +65757 +65758 +65758 +65758 +65758 +65758 +65758 +65759 +65759 +65759 +65759 +65759 +65760 +65760 +65760 +65760 +65760 +65761 +65762 +65762 +65762 +65762 +65762 +65763 +65763 +65763 +65763 +65763 +65764 +65764 +65764 +65765 +65765 +65765 +65766 +65766 +65766 +65766 +65766 +65767 +65767 +65767 +65768 +65769 +65769 +65769 +65769 +65769 +65769 +65769 +65770 +65770 +65771 +65771 +65771 +65772 +65773 +65773 +65773 +65773 +65773 +65773 +65773 +65773 +65774 +65774 +65774 +65774 +65774 +65774 +65775 +65775 +65775 +65775 +65775 +65775 +65776 +65776 +65776 +65776 +65776 +65776 +65776 +65777 +65777 +65777 +65777 +65777 +65777 +65778 +65778 +65778 +65778 +65778 +65778 +65778 +65778 +65778 +65779 +65779 +65779 +65779 +65779 +65779 +65779 +65780 +65780 +65780 +65781 +65781 +65781 +65782 +65782 +65782 +65782 +65782 +65783 +65783 +65783 +65783 +65783 +65783 +65783 +65784 +65784 +65784 +65784 +65784 +65785 +65785 +65786 +65786 +65786 +65786 +65786 +65787 +65787 +65787 +65787 +65787 +65788 +65788 +65788 +65788 +65789 +65789 +65789 +65789 +65789 +65789 +65789 +65789 +65789 +65789 +65790 +65790 +65790 +65791 +65791 diff --git a/sql/hive/src/test/resources/golden/windowing_udaf.q (deterministic)-4-f2e4d659b65a833e9281b6786d3d55c1 b/sql/hive/src/test/resources/golden/windowing_udaf.q (deterministic)-4-f2e4d659b65a833e9281b6786d3d55c1 new file mode 100644 index 0000000000000..9cc7e7ea6c2b2 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_udaf.q (deterministic)-4-f2e4d659b65a833e9281b6786d3d55c1 @@ -0,0 +1,1049 @@ + 24.37875 + 27.900000000000002 + 43.64 +alice allen 16.919999999999998 +alice allen 20.39 +alice allen 23.59 +alice brown 6.91 +alice carson 41.74 +alice davidson 26.346000000000004 +alice falkner 32.166666666666664 +alice garcia 15.412 +alice hernandez 19.958181818181817 +alice hernandez 30.482857142857142 +alice johnson 25.51 +alice king 13.085 +alice king 25.616666666666664 +alice king 42.335 +alice laertes 20.549999999999997 +alice laertes 30.436 +alice miller 29.563333333333333 +alice nixon 19.28666666666667 +alice nixon 24.5625 +alice nixon 32.99 +alice ovid 31.35 +alice polk 17.863999999999997 +alice quirinius 19.032857142857143 +alice quirinius 23.9425 +alice robinson 23.338750000000005 +alice robinson 39.85 +alice steinbeck 22.862000000000002 +alice steinbeck 26.328000000000003 +alice steinbeck 27.08777777777778 +alice underhill 24.032222222222224 +alice van buren 19.642000000000003 +alice xylophone 24.438000000000002 +alice xylophone 28.739999999999995 +alice xylophone 30.0825 +alice zipper 26.3 +alice zipper 28.735000000000003 +alice zipper 31.05545454545455 +bob brown 12.902222222222223 +bob brown 13.945 +bob brown 33.843333333333334 +bob carson 28.627999999999997 +bob davidson 19.8525 +bob davidson 23.482 +bob davidson 24.67 +bob ellison 16.315714285714286 +bob ellison 18.4 +bob ellison 26.913999999999998 +bob ellison 27.59 +bob falkner 9.27 +bob garcia 11.63 +bob garcia 22.221249999999998 +bob garcia 23.59636363636364 +bob garcia 26.88857142857143 +bob garcia 28.715000000000003 +bob hernandez 37.23 +bob ichabod 28.33875 +bob king 8.615 +bob king 19.77 +bob king 26.7325 +bob laertes 21.33 +bob laertes 37.88 +bob miller 25.495 +bob ovid 25.675 +bob ovid 25.83 +bob ovid 28.37875 +bob ovid 32.5025 +bob polk 9.74 +bob quirinius 34.57 +bob steinbeck 9.725 +bob van buren 29.552857142857142 +bob white 17.685 +bob white 29.46285714285715 +bob xylophone 17.03 +bob xylophone 33.24 +bob young 19.824 +bob zipper 24.095 +bob zipper 33.36 +bob zipper 34.99 +calvin allen 21.3 +calvin brown 20.808 +calvin brown 24.16 +calvin brown 24.636666666666667 +calvin carson 22.815 +calvin davidson 22.116666666666664 +calvin davidson 22.364 +calvin ellison 24.92 +calvin falkner 18.343999999999998 +calvin falkner 19.56 +calvin falkner 22.946000000000005 +calvin falkner 23.327777777777776 +calvin falkner 23.974999999999998 +calvin falkner 33.382 +calvin garcia 17.285 +calvin hernandez 12.663333333333334 +calvin johnson 24.898571428571433 +calvin laertes 28.105 +calvin laertes 28.362000000000002 +calvin nixon 26.784285714285716 +calvin nixon 27.36 +calvin nixon 32.282 +calvin ovid 22.063333333333336 +calvin ovid 22.81500000000001 +calvin ovid 25.495714285714286 +calvin ovid 30.926666666666666 +calvin polk 27.820000000000004 +calvin quirinius 16.28 +calvin quirinius 25.552500000000002 +calvin robinson 31.814999999999998 +calvin steinbeck 12.85 +calvin steinbeck 14.939999999999998 +calvin steinbeck 17.535 +calvin thompson 28.592857142857145 +calvin thompson 40.79 +calvin underhill 24.062 +calvin van buren 26.525 +calvin van buren 28.865 +calvin white 28.256249999999998 +calvin white 43.275 +calvin xylophone 24.13111111111111 +calvin xylophone 25.27 +calvin xylophone 36.455 +calvin young 19.06 +calvin young 21.455999999999996 +calvin zipper 10.674999999999999 +calvin zipper 26.012857142857143 +david allen 25.134285714285713 +david allen 41.72333333333333 +david brown 8.52 +david brown 28.968181818181815 +david davidson 17.63 +david davidson 26.563333333333336 +david davidson 30.7325 +david davidson 33.33 +david ellison 23.79909090909091 +david ellison 24.74888888888889 +david ellison 26.198571428571427 +david hernandez 27.766 +david ichabod 16.66 +david ichabod 19.538 +david laertes 24.587500000000002 +david nixon 26.01375 +david ovid 24.131428571428575 +david ovid 32.72 +david quirinius 16.5 +david quirinius 25.08 +david quirinius 29.415 +david robinson 22.2175 +david robinson 30.99 +david thompson 25.38 +david underhill 1.17 +david underhill 21.546666666666667 +david underhill 28.26 +david van buren 26.45833333333334 +david van buren 35.7825 +david white 15.833333333333334 +david xylophone 10.71 +david xylophone 26.341428571428565 +david xylophone 33.224000000000004 +david young 9.64 +david young 21.22 +ethan allen 22.68 +ethan brown 19.37 +ethan brown 21.58666666666667 +ethan brown 21.799999999999997 +ethan brown 29.099999999999998 +ethan brown 32.43666666666667 +ethan brown 39.84 +ethan carson 24.15666666666667 +ethan ellison 27.80777777777778 +ethan ellison 48.71 +ethan falkner 17.993333333333336 +ethan falkner 26.775000000000002 +ethan garcia 19.15 +ethan hernandez 25.081111111111113 +ethan johnson 32.81875 +ethan king 19.51 +ethan laertes 16.463 +ethan laertes 17.625999999999998 +ethan laertes 25.020714285714288 +ethan laertes 26.697142857142858 +ethan laertes 28.14 +ethan laertes 29.668571428571425 +ethan laertes 36.589999999999996 +ethan miller 24.326666666666664 +ethan nixon 34.78666666666667 +ethan ovid 20.642857142857142 +ethan polk 6.98 +ethan polk 12.756666666666666 +ethan polk 30.324 +ethan polk 40.46 +ethan quirinius 23.419999999999998 +ethan quirinius 24.36 +ethan quirinius 29.068 +ethan robinson 24.463750000000005 +ethan robinson 31.630000000000003 +ethan underhill 19.86 +ethan van buren 22.241999999999997 +ethan white 31.3175 +ethan white 32.87 +ethan xylophone 30.996000000000002 +ethan zipper 22.728333333333335 +ethan zipper 29.66 +fred davidson 30.116666666666667 +fred davidson 33.55200000000001 +fred davidson 39.37 +fred ellison 16.72 +fred ellison 17.462 +fred ellison 35.1 +fred falkner 14.51 +fred falkner 27.207000000000004 +fred falkner 27.887500000000003 +fred hernandez 36.045 +fred ichabod 29.017000000000003 +fred ichabod 30.405000000000005 +fred johnson 16.9925 +fred king 20.024 +fred king 32.54666666666667 +fred laertes 25.610000000000003 +fred miller 25.92 +fred nixon 14.915 +fred nixon 21.830000000000002 +fred nixon 24.4125 +fred nixon 31.360000000000003 +fred polk 18.698 +fred polk 19.743000000000002 +fred polk 20.96 +fred polk 31.11 +fred quirinius 20.085 +fred quirinius 33.9 +fred robinson 22.502 +fred steinbeck 21.123749999999998 +fred steinbeck 25.572 +fred steinbeck 30.81 +fred underhill 29.198888888888884 +fred van buren 21.34 +fred van buren 23.285 +fred van buren 26.520000000000003 +fred van buren 33.6 +fred white 21.41 +fred young 16.876250000000002 +fred young 20.996666666666666 +fred zipper 23.627499999999998 +gabriella allen 24.113333333333333 +gabriella allen 28.4725 +gabriella brown 29.963333333333335 +gabriella brown 30.65222222222222 +gabriella carson 16.6325 +gabriella davidson 34.52 +gabriella ellison 20.18 +gabriella ellison 29.62 +gabriella falkner 14.37 +gabriella falkner 17.738333333333333 +gabriella falkner 28.61 +gabriella garcia 39.025 +gabriella hernandez 20.818333333333335 +gabriella hernandez 24.601666666666663 +gabriella ichabod 10.4925 +gabriella ichabod 20.686666666666667 +gabriella ichabod 23.185 +gabriella ichabod 23.43 +gabriella ichabod 27.44636363636364 +gabriella king 13.645 +gabriella king 22.23 +gabriella laertes 23.735 +gabriella miller 17.165 +gabriella ovid 22.884545454545453 +gabriella ovid 25.29 +gabriella polk 20.38714285714286 +gabriella polk 25.832000000000004 +gabriella steinbeck 6.226666666666667 +gabriella steinbeck 29.683333333333337 +gabriella thompson 25.565454545454546 +gabriella thompson 29.031 +gabriella thompson 29.122500000000006 +gabriella van buren 24.353 +gabriella van buren 34.21666666666667 +gabriella white 36.5175 +gabriella young 21.28142857142857 +gabriella young 21.32 +gabriella zipper 21.798461538461545 +gabriella zipper 28.676666666666666 +holly allen 27.18 +holly brown 22.76 +holly brown 30.950000000000003 +holly falkner 29.666666666666668 +holly hernandez 19.875 +holly hernandez 23.7075 +holly hernandez 24.5 +holly hernandez 26.50333333333333 +holly ichabod 23.262857142857143 +holly ichabod 25.85090909090909 +holly ichabod 29.521666666666665 +holly johnson 18.939999999999998 +holly johnson 23.2625 +holly johnson 26.49285714285714 +holly king 20.61333333333333 +holly king 30.95888888888889 +holly laertes 17.509999999999998 +holly miller 40.8975 +holly nixon 27.775714285714287 +holly nixon 30.642500000000002 +holly polk 21.02 +holly polk 24.446666666666665 +holly robinson 26.083750000000006 +holly thompson 18.801428571428573 +holly thompson 23.91 +holly thompson 29.97125 +holly underhill 18.19 +holly underhill 22.22888888888889 +holly underhill 22.813333333333336 +holly underhill 30.613999999999997 +holly van buren 20.113333333333333 +holly white 25.284999999999997 +holly white 41.0125 +holly xylophone 26.88571428571429 +holly young 30.8425 +holly young 33.24333333333334 +holly zipper 27.784000000000002 +holly zipper 28.384285714285713 +irene allen 34.605000000000004 +irene brown 18.740000000000002 +irene brown 28.974999999999998 +irene brown 32.230000000000004 +irene carson 25.665833333333335 +irene ellison 10.225000000000001 +irene ellison 26.119999999999997 +irene falkner 9.94 +irene falkner 19.41 +irene garcia 9.790000000000001 +irene garcia 19.666666666666668 +irene garcia 21.22666666666667 +irene ichabod 20.956666666666667 +irene ichabod 24.488333333333333 +irene johnson 25.34 +irene laertes 15.85 +irene laertes 21.573333333333334 +irene laertes 22.041999999999998 +irene miller 34.994285714285716 +irene nixon 22.52 +irene nixon 32.485 +irene nixon 33.165 +irene ovid 17.73 +irene ovid 22.96 +irene ovid 30.92 +irene polk 5.35 +irene polk 25.535 +irene polk 33.76 +irene polk 35.05 +irene polk 45.14 +irene quirinius 38.36 +irene quirinius 41.864999999999995 +irene quirinius 42.0 +irene robinson 30.86 +irene steinbeck 15.08 +irene thompson 28.419999999999998 +irene underhill 27.977999999999998 +irene underhill 28.438 +irene van buren 26.93625 +irene van buren 27.797999999999995 +irene xylophone 29.10454545454546 +jessica brown 38.325 +jessica carson 16.038 +jessica carson 29.668333333333337 +jessica carson 33.06 +jessica davidson 18.926 +jessica davidson 26.2975 +jessica davidson 27.611428571428572 +jessica davidson 29.86 +jessica ellison 26.873333333333335 +jessica ellison 27.123333333333335 +jessica falkner 21.75142857142858 +jessica garcia 16.939090909090908 +jessica garcia 26.48 +jessica ichabod 28.971666666666664 +jessica johnson 21.601428571428574 +jessica johnson 24.42 +jessica miller 26.90571428571429 +jessica nixon 19.15 +jessica nixon 27.025000000000002 +jessica ovid 30.72285714285714 +jessica ovid 30.895 +jessica polk 27.912857142857145 +jessica quirinius 17.05 +jessica quirinius 21.529999999999998 +jessica quirinius 25.16 +jessica quirinius 26.347999999999995 +jessica robinson 24.322857142857142 +jessica thompson 28.658000000000005 +jessica thompson 30.873636363636365 +jessica underhill 14.6725 +jessica underhill 25.831666666666667 +jessica underhill 31.345000000000002 +jessica van buren 19.575 +jessica white 18.35 +jessica white 19.175 +jessica white 20.812 +jessica white 26.0 +jessica white 29.307142857142857 +jessica xylophone 22.26 +jessica young 27.9525 +jessica young 37.61333333333334 +jessica zipper 7.03 +jessica zipper 15.794999999999998 +jessica zipper 19.95 +katie allen 27.283846153846152 +katie brown 24.156666666666666 +katie davidson 13.498000000000001 +katie ellison 19.2 +katie ellison 24.888571428571428 +katie falkner 28.959999999999997 +katie garcia 28.287142857142857 +katie garcia 36.196666666666665 +katie hernandez 25.14428571428572 +katie ichabod 19.363333333333333 +katie ichabod 20.458571428571428 +katie ichabod 28.924999999999997 +katie king 21.64125 +katie king 21.855 +katie king 22.895 +katie miller 16.263333333333335 +katie miller 30.274285714285718 +katie nixon 25.022499999999997 +katie ovid 24.055000000000003 +katie polk 21.296666666666667 +katie polk 32.03 +katie robinson 36.26 +katie van buren 28.332 +katie van buren 31.408000000000005 +katie white 23.48 +katie white 26.236666666666665 +katie xylophone 32.415 +katie young 18.209999999999997 +katie young 22.88125 +katie young 28.39888888888889 +katie zipper 10.285 +katie zipper 27.495 +luke allen 9.42 +luke allen 21.374615384615385 +luke allen 25.32 +luke allen 27.174999999999997 +luke allen 35.434 +luke brown 25.08 +luke davidson 28.205 +luke davidson 28.790000000000003 +luke ellison 7.8 +luke ellison 16.04 +luke ellison 23.426666666666666 +luke falkner 18.0 +luke falkner 22.19 +luke garcia 29.619999999999997 +luke garcia 32.722 +luke ichabod 21.150000000000002 +luke ichabod 32.78142857142857 +luke johnson 21.58666666666667 +luke johnson 23.03 +luke johnson 23.054 +luke laertes 20.264 +luke laertes 33.72 +luke laertes 39.8 +luke laertes 41.36 +luke laertes 42.254999999999995 +luke miller 20.054444444444446 +luke ovid 19.819999999999997 +luke ovid 30.832857142857147 +luke polk 24.348750000000003 +luke polk 26.57625 +luke quirinius 38.07 +luke robinson 30.119999999999994 +luke robinson 30.31375 +luke thompson 29.026874999999997 +luke underhill 21.735714285714288 +luke underhill 22.175 +luke underhill 26.785714285714285 +luke van buren 17.072222222222223 +luke white 29.063333333333333 +luke xylophone 28.994 +luke zipper 33.995 +mike allen 32.78 +mike brown 27.592222222222222 +mike carson 28.8675 +mike carson 29.88 +mike carson 32.07142857142857 +mike davidson 21.240000000000002 +mike davidson 46.31 +mike ellison 20.5275 +mike ellison 21.99 +mike ellison 24.36 +mike ellison 24.511111111111113 +mike ellison 27.703333333333337 +mike falkner 40.335 +mike garcia 24.3525 +mike garcia 24.582 +mike garcia 35.12 +mike hernandez 8.783333333333333 +mike hernandez 19.40666666666667 +mike ichabod 29.120000000000005 +mike king 14.256666666666668 +mike king 17.889999999999997 +mike king 20.493333333333336 +mike king 23.86 +mike king 26.081 +mike king 30.974 +mike miller 29.275 +mike nixon 17.306 +mike nixon 25.572 +mike polk 18.96 +mike polk 23.75142857142857 +mike polk 33.42 +mike quirinius 19.37375 +mike steinbeck 14.155 +mike steinbeck 19.305833333333332 +mike steinbeck 20.721249999999998 +mike steinbeck 31.75 +mike van buren 15.520000000000001 +mike van buren 25.828333333333333 +mike white 19.13111111111111 +mike white 22.4025 +mike white 24.7725 +mike white 35.235 +mike young 1.5 +mike young 24.679 +mike young 34.02833333333333 +mike zipper 17.97 +mike zipper 26.247333333333337 +mike zipper 44.169999999999995 +nick allen 23.744999999999997 +nick allen 36.93 +nick brown 27.669999999999998 +nick davidson 31.97285714285714 +nick ellison 23.061666666666667 +nick ellison 27.676666666666666 +nick falkner 22.555714285714284 +nick falkner 27.46 +nick garcia 17.465 +nick garcia 18.854 +nick garcia 33.60333333333333 +nick ichabod 19.231428571428573 +nick ichabod 27.645000000000003 +nick ichabod 35.836666666666666 +nick johnson 5.58 +nick johnson 25.274 +nick laertes 26.57857142857143 +nick miller 22.208333333333332 +nick nixon 16.107499999999998 +nick ovid 31.350000000000005 +nick polk 35.70333333333334 +nick quirinius 20.753333333333334 +nick quirinius 30.573333333333334 +nick robinson 21.48 +nick robinson 23.185 +nick steinbeck 19.56555555555556 +nick thompson 31.474999999999998 +nick underhill 38.24 +nick van buren 20.77375 +nick xylophone 30.909999999999997 +nick young 10.725000000000001 +nick young 24.95 +nick zipper 16.185000000000002 +nick zipper 34.72 +oscar allen 24.645 +oscar brown 39.55 +oscar carson 21.893333333333334 +oscar carson 22.868 +oscar carson 27.4875 +oscar carson 28.09428571428571 +oscar carson 30.373333333333335 +oscar davidson 9.046666666666667 +oscar ellison 24.185000000000002 +oscar ellison 30.1675 +oscar falkner 19.295 +oscar garcia 22.495833333333334 +oscar hernandez 16.6825 +oscar hernandez 25.736 +oscar ichabod 17.64 +oscar ichabod 21.11 +oscar ichabod 23.508000000000003 +oscar ichabod 30.392222222222227 +oscar johnson 19.9375 +oscar johnson 21.114444444444445 +oscar king 24.590000000000003 +oscar king 26.675 +oscar king 39.6 +oscar laertes 14.975 +oscar laertes 15.525 +oscar laertes 22.6 +oscar laertes 41.6 +oscar nixon 25.4025 +oscar ovid 24.854285714285712 +oscar ovid 25.309 +oscar ovid 29.63 +oscar polk 21.235999999999997 +oscar polk 21.27 +oscar quirinius 24.200000000000003 +oscar quirinius 24.391428571428573 +oscar quirinius 27.83285714285714 +oscar quirinius 27.853333333333328 +oscar robinson 12.3625 +oscar robinson 12.545 +oscar robinson 20.234 +oscar robinson 28.071666666666673 +oscar steinbeck 31.101111111111113 +oscar thompson 19.4875 +oscar thompson 19.975714285714286 +oscar thompson 21.1425 +oscar thompson 21.166363636363638 +oscar underhill 27.644 +oscar van buren 25.843333333333334 +oscar van buren 29.073333333333334 +oscar van buren 29.682727272727274 +oscar white 19.0775 +oscar white 23.483333333333334 +oscar white 24.705000000000002 +oscar white 28.0075 +oscar xylophone 30.020000000000003 +oscar xylophone 30.46833333333333 +oscar xylophone 33.64 +oscar zipper 21.69 +oscar zipper 23.478 +oscar zipper 31.36 +priscilla brown 14.222 +priscilla brown 27.044999999999998 +priscilla brown 31.14769230769231 +priscilla carson 14.33 +priscilla carson 18.951428571428572 +priscilla carson 27.084999999999997 +priscilla ichabod 28.160999999999994 +priscilla ichabod 49.46 +priscilla johnson 8.365 +priscilla johnson 18.176666666666666 +priscilla johnson 25.02666666666667 +priscilla johnson 26.918333333333337 +priscilla johnson 30.695999999999998 +priscilla king 19.747142857142855 +priscilla nixon 29.035555555555554 +priscilla nixon 30.27333333333333 +priscilla ovid 13.591999999999999 +priscilla ovid 35.879999999999995 +priscilla polk 23.12 +priscilla quirinius 21.826666666666668 +priscilla thompson 20.44 +priscilla underhill 28.23 +priscilla underhill 34.33200000000001 +priscilla van buren 18.122857142857143 +priscilla van buren 20.16 +priscilla van buren 26.447999999999997 +priscilla white 26.37769230769231 +priscilla xylophone 13.95 +priscilla xylophone 20.596666666666668 +priscilla xylophone 27.22 +priscilla young 29.19 +priscilla young 46.28 +priscilla zipper 11.64 +priscilla zipper 31.159999999999997 +quinn allen 26.347272727272728 +quinn allen 26.85833333333333 +quinn brown 26.822857142857146 +quinn brown 30.406000000000006 +quinn brown 41.53 +quinn davidson 17.375714285714288 +quinn davidson 20.22666666666667 +quinn davidson 25.6375 +quinn davidson 30.173333333333332 +quinn ellison 23.052 +quinn ellison 40.565 +quinn garcia 20.544 +quinn garcia 24.104999999999997 +quinn garcia 25.174 +quinn garcia 28.446000000000005 +quinn ichabod 15.12 +quinn king 12.73 +quinn king 15.12125 +quinn laertes 17.29 +quinn laertes 28.221666666666668 +quinn laertes 32.96 +quinn nixon 26.034000000000002 +quinn ovid 28.71 +quinn quirinius 8.61 +quinn robinson 16.852 +quinn steinbeck 30.093333333333334 +quinn steinbeck 49.21 +quinn thompson 7.365 +quinn thompson 33.43125 +quinn underhill 24.045 +quinn underhill 27.905454545454543 +quinn underhill 31.21 +quinn van buren 27.807692307692314 +quinn young 30.56 +quinn zipper 18.31 +quinn zipper 21.380000000000003 +rachel allen 32.501666666666665 +rachel allen 46.57 +rachel brown 23.08 +rachel brown 23.880000000000003 +rachel brown 24.43 +rachel brown 34.11 +rachel brown 35.345 +rachel carson 27.468125 +rachel carson 37.446666666666665 +rachel davidson 22.75 +rachel ellison 22.848333333333333 +rachel falkner 18.78125 +rachel falkner 28.876250000000002 +rachel falkner 29.577777777777776 +rachel falkner 31.831249999999997 +rachel johnson 31.108000000000004 +rachel king 17.4175 +rachel king 30.873749999999998 +rachel laertes 17.470000000000002 +rachel laertes 33.51 +rachel ovid 3.03 +rachel ovid 15.38 +rachel polk 18.564285714285713 +rachel quirinius 31.692500000000003 +rachel robinson 0.6 +rachel robinson 23.953333333333333 +rachel robinson 37.645 +rachel thompson 11.96 +rachel thompson 29.484 +rachel thompson 38.43 +rachel underhill 27.55333333333333 +rachel white 23.511428571428574 +rachel white 33.7 +rachel young 24.85166666666667 +rachel zipper 22.85 +rachel zipper 37.382 +sarah carson 10.38 +sarah carson 22.639 +sarah carson 44.92 +sarah ellison 16.36 +sarah falkner 29.34875 +sarah falkner 29.64125 +sarah garcia 11.296666666666667 +sarah garcia 20.723333333333333 +sarah garcia 24.115 +sarah ichabod 26.948333333333327 +sarah ichabod 33.80428571428571 +sarah johnson 18.3925 +sarah johnson 23.087500000000002 +sarah johnson 26.57857142857143 +sarah johnson 37.01 +sarah king 9.556666666666667 +sarah king 25.6125 +sarah miller 19.14875 +sarah ovid 29.205 +sarah robinson 11.326666666666668 +sarah robinson 35.809999999999995 +sarah steinbeck 23.26 +sarah white 21.75111111111111 +sarah white 26.850000000000005 +sarah xylophone 33.40571428571429 +sarah young 30.66 +sarah zipper 29.521666666666672 +tom brown 16.38 +tom brown 23.645 +tom carson 23.630000000000003 +tom carson 31.935 +tom carson 41.83 +tom davidson 30.404285714285717 +tom ellison 27.056 +tom ellison 27.401999999999997 +tom ellison 29.812 +tom falkner 15.901999999999997 +tom falkner 25.49857142857143 +tom hernandez 11.418000000000001 +tom hernandez 30.705000000000002 +tom ichabod 14.83 +tom johnson 30.748571428571434 +tom johnson 37.086666666666666 +tom king 17.923333333333332 +tom laertes 19.201666666666668 +tom laertes 22.276666666666667 +tom miller 17.9925 +tom miller 19.791666666666668 +tom miller 19.9225 +tom nixon 25.70625 +tom ovid 29.66 +tom polk 27.0975 +tom polk 28.646666666666672 +tom quirinius 37.68333333333333 +tom quirinius 38.28 +tom robinson 18.07 +tom robinson 19.094 +tom robinson 27.34125 +tom robinson 31.135714285714283 +tom steinbeck 32.70333333333333 +tom van buren 20.723333333333333 +tom van buren 24.8525 +tom van buren 31.631666666666664 +tom white 25.646000000000004 +tom young 3.12 +tom young 19.588333333333335 +tom zipper 23.317272727272726 +ulysses brown 16.196666666666665 +ulysses carson 16.3475 +ulysses carson 22.448181818181823 +ulysses carson 28.258 +ulysses carson 32.10833333333333 +ulysses davidson 37.775 +ulysses ellison 30.517000000000003 +ulysses garcia 32.92 +ulysses hernandez 13.877500000000001 +ulysses hernandez 20.856666666666666 +ulysses hernandez 21.32625 +ulysses ichabod 3.29 +ulysses ichabod 24.629999999999995 +ulysses johnson 32.208333333333336 +ulysses king 25.29111111111111 +ulysses laertes 14.936666666666667 +ulysses laertes 25.89 +ulysses laertes 26.63 +ulysses miller 2.36 +ulysses miller 26.403333333333336 +ulysses nixon 34.4575 +ulysses ovid 23.810000000000002 +ulysses polk 22.4075 +ulysses polk 26.778000000000002 +ulysses polk 38.73166666666667 +ulysses polk 47.68 +ulysses quirinius 33.07833333333333 +ulysses robinson 17.386666666666667 +ulysses steinbeck 22.2675 +ulysses steinbeck 24.904000000000003 +ulysses thompson 22.687142857142856 +ulysses underhill 6.66 +ulysses underhill 22.539 +ulysses underhill 24.853333333333335 +ulysses underhill 27.314 +ulysses underhill 29.424999999999997 +ulysses underhill 32.905 +ulysses underhill 41.653333333333336 +ulysses van buren 21.868181818181817 +ulysses white 15.296666666666667 +ulysses white 28.343333333333334 +ulysses xylophone 24.718 +ulysses xylophone 30.205 +ulysses xylophone 35.61 +ulysses young 21.56 +ulysses young 32.28125 +ulysses young 37.275 +victor allen 23.548000000000002 +victor allen 24.759999999999998 +victor brown 22.10181818181818 +victor brown 23.73 +victor brown 25.427272727272726 +victor brown 26.218571428571433 +victor davidson 20.55 +victor davidson 22.21666666666667 +victor davidson 29.778 +victor ellison 13.0775 +victor ellison 33.666 +victor hernandez 10.896 +victor hernandez 18.922 +victor hernandez 24.908888888888892 +victor hernandez 27.426666666666666 +victor hernandez 35.6675 +victor johnson 20.02 +victor johnson 27.070000000000004 +victor johnson 29.0775 +victor king 18.066666666666666 +victor king 21.488 +victor laertes 26.77777777777778 +victor laertes 28.095000000000002 +victor miller 5.3100000000000005 +victor nixon 21.395714285714288 +victor nixon 28.33 +victor ovid 35.225 +victor polk 21.990000000000002 +victor quirinius 24.62833333333333 +victor quirinius 29.742500000000003 +victor robinson 14.575 +victor robinson 25.92 +victor steinbeck 26.136666666666667 +victor steinbeck 26.485 +victor steinbeck 34.745999999999995 +victor thompson 18.735 +victor van buren 27.758333333333336 +victor van buren 37.38333333333333 +victor white 24.607999999999997 +victor white 30.66 +victor xylophone 2.775 +victor xylophone 8.356666666666667 +victor xylophone 24.259999999999998 +victor xylophone 25.636666666666667 +victor xylophone 31.610000000000003 +victor young 22.264444444444443 +victor zipper 39.84 +wendy allen 3.4 +wendy allen 24.695000000000004 +wendy allen 29.912 +wendy brown 28.22 +wendy brown 36.74 +wendy ellison 17.549999999999997 +wendy ellison 22.720000000000002 +wendy falkner 13.765 +wendy falkner 24.424444444444443 +wendy falkner 27.86733333333333 +wendy garcia 12.3 +wendy garcia 22.396666666666665 +wendy garcia 26.8325 +wendy garcia 28.596666666666664 +wendy hernandez 21.111428571428572 +wendy ichabod 4.44 +wendy king 23.654285714285713 +wendy king 29.325714285714287 +wendy king 34.21666666666667 +wendy laertes 31.160714285714285 +wendy laertes 31.46666666666667 +wendy laertes 39.22 +wendy miller 12.73 +wendy miller 30.343333333333334 +wendy nixon 19.92714285714286 +wendy nixon 29.675714285714285 +wendy ovid 21.193749999999998 +wendy ovid 28.49846153846154 +wendy polk 20.94 +wendy polk 22.999999999999996 +wendy quirinius 21.05 +wendy quirinius 26.8425 +wendy robinson 8.39 +wendy robinson 24.05 +wendy robinson 26.974285714285713 +wendy steinbeck 26.765 +wendy thompson 24.14 +wendy thompson 28.995384615384616 +wendy underhill 23.118333333333336 +wendy underhill 25.581666666666667 +wendy underhill 32.985 +wendy van buren 25.151666666666667 +wendy van buren 27.077142857142857 +wendy white 24.4025 +wendy xylophone 22.85181818181818 +wendy xylophone 26.96 +wendy young 4.83 +wendy young 21.325 +xavier allen 19.133333333333333 +xavier allen 26.11466666666667 +xavier allen 34.58 +xavier brown 2.63 +xavier brown 24.764285714285712 +xavier brown 30.166666666666668 +xavier carson 29.006666666666664 +xavier carson 32.106 +xavier davidson 14.094999999999999 +xavier davidson 15.906666666666666 +xavier davidson 27.353333333333335 +xavier ellison 22.174166666666668 +xavier ellison 35.01 +xavier garcia 30.357500000000005 +xavier hernandez 19.87 +xavier hernandez 20.805 +xavier hernandez 33.497499999999995 +xavier ichabod 12.34 +xavier ichabod 26.166249999999998 +xavier johnson 20.33222222222222 +xavier johnson 22.503333333333334 +xavier king 1.3 +xavier king 31.348571428571425 +xavier laertes 7.420000000000001 +xavier ovid 25.576 +xavier polk 11.094285714285714 +xavier polk 19.93 +xavier polk 23.63125 +xavier polk 30.194 +xavier quirinius 13.776666666666666 +xavier quirinius 22.27 +xavier quirinius 24.977692307692312 +xavier quirinius 34.95 +xavier thompson 16.47 +xavier underhill 1.31 +xavier white 19.331666666666667 +xavier white 34.68 +xavier xylophone 21.09625 +xavier zipper 14.89 +yuri allen 18.490000000000002 +yuri allen 22.689999999999998 +yuri brown 15.502857142857142 +yuri brown 22.934285714285714 +yuri carson 27.139999999999997 +yuri carson 35.27 +yuri ellison 10.52 +yuri ellison 25.2025 +yuri falkner 24.633076923076924 +yuri falkner 28.52 +yuri garcia 25.545 +yuri hernandez 16.35 +yuri johnson 19.9525 +yuri johnson 27.636000000000003 +yuri johnson 39.92 +yuri king 15.450000000000001 +yuri laertes 0.41000000000000003 +yuri laertes 33.15 +yuri nixon 27.795 +yuri nixon 39.145 +yuri polk 0.8 +yuri polk 9.705 +yuri polk 25.513333333333332 +yuri quirinius 16.29 +yuri quirinius 19.254999999999995 +yuri quirinius 37.878 +yuri steinbeck 27.6275 +yuri steinbeck 48.89 +yuri thompson 23.330000000000002 +yuri underhill 20.504444444444445 +yuri underhill 21.66 +yuri white 31.205 +yuri xylophone 18.790000000000003 +zach allen 13.06 +zach brown 19.985 +zach brown 26.52333333333333 +zach brown 34.66 +zach brown 34.972857142857144 +zach brown 37.45399999999999 +zach carson 26.195999999999998 +zach ellison 17.55 +zach falkner 3.42 +zach falkner 16.18 +zach garcia 20.062 +zach garcia 25.935 +zach garcia 28.974285714285717 +zach garcia 35.449999999999996 +zach ichabod 10.59 +zach ichabod 31.691999999999997 +zach king 6.81 +zach king 20.817 +zach king 32.542500000000004 +zach miller 13.23 +zach miller 26.30666666666667 +zach miller 26.73 +zach ovid 21.122500000000002 +zach ovid 26.983999999999998 +zach ovid 33.15 +zach ovid 40.59 +zach quirinius 13.38 +zach robinson 20.451999999999998 +zach steinbeck 20.358333333333334 +zach steinbeck 29.65 +zach thompson 16.45 +zach thompson 21.430000000000003 +zach underhill 31.438333333333333 +zach white 23.111428571428572 +zach xylophone 21.221428571428568 +zach xylophone 23.156666666666666 +zach young 24.72666666666667 +zach zipper 19.878888888888884 +zach zipper 34.84571428571429 +zach zipper 35.36 diff --git a/sql/hive/src/test/resources/golden/windowing_udaf2-0-96659fde37d7a38ea15b367b47f59ce2 b/sql/hive/src/test/resources/golden/windowing_udaf2-0-96659fde37d7a38ea15b367b47f59ce2 new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/sql/hive/src/test/resources/golden/windowing_udaf2-1-b4bdee4908b1cb8e240c549ae5cfe4c0 b/sql/hive/src/test/resources/golden/windowing_udaf2-1-b4bdee4908b1cb8e240c549ae5cfe4c0 new file mode 100644 index 0000000000000..17c31c0f04592 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_udaf2-1-b4bdee4908b1cb8e240c549ae5cfe4c0 @@ -0,0 +1 @@ +130091 130091 diff --git a/sql/hive/src/test/resources/golden/windowing_windowspec.q (deterministic)-0-f498cccf82480be03022d2a36f87651e b/sql/hive/src/test/resources/golden/windowing_windowspec.q (deterministic)-0-f498cccf82480be03022d2a36f87651e new file mode 100644 index 0000000000000..31b1f85a5eb5a --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_windowspec.q (deterministic)-0-f498cccf82480be03022d2a36f87651e @@ -0,0 +1,1049 @@ + 4294967354 + 4294967416 + 4294967457 +alice allen 4294967487 +alice allen 4294967525 +alice allen 4294967531 +alice brown 4294967355 +alice carson 4294967370 +alice davidson 4294967517 +alice falkner 4294967316 +alice garcia 4294967369 +alice hernandez 4294967299 +alice hernandez 4294967314 +alice johnson 4294967424 +alice king 4294967387 +alice king 4294967516 +alice king 4294967546 +alice laertes 4294967519 +alice laertes 8589934835 +alice miller 4294967324 +alice nixon 4294967410 +alice nixon 4294967413 +alice nixon 4294967484 +alice ovid 8589934726 +alice polk 4294967366 +alice quirinius 4294967505 +alice quirinius 4294967549 +alice robinson 4294967445 +alice robinson 4294967502 +alice steinbeck 4294967364 +alice steinbeck 4294967474 +alice steinbeck 4294967549 +alice underhill 4294967441 +alice van buren 4294967428 +alice xylophone 4294967363 +alice xylophone 4294967519 +alice xylophone 8589934832 +alice zipper 4294967380 +alice zipper 4294967520 +alice zipper 8589935026 +bob brown 4294967422 +bob brown 4294967427 +bob brown 4294967431 +bob carson 4294967408 +bob davidson 4294967354 +bob davidson 4294967435 +bob davidson 4294967504 +bob ellison 4294967344 +bob ellison 4294967362 +bob ellison 4294967436 +bob ellison 4294967530 +bob falkner 8589934966 +bob garcia 4294967369 +bob garcia 4294967435 +bob garcia 4294967439 +bob garcia 8589934707 +bob garcia 8589934867 +bob hernandez 4294967500 +bob ichabod 4294967424 +bob king 4294967297 +bob king 4294967539 +bob king 8589934870 +bob laertes 4294967380 +bob laertes 4294967472 +bob miller 4294967349 +bob ovid 4294967395 +bob ovid 4294967400 +bob ovid 4294967401 +bob ovid 4294967512 +bob polk 4294967337 +bob quirinius 4294967346 +bob steinbeck 4294967342 +bob van buren 4294967422 +bob white 4294967362 +bob white 4294967493 +bob xylophone 4294967407 +bob xylophone 4294967465 +bob young 4294967413 +bob zipper 4294967299 +bob zipper 8589934723 +bob zipper 8589934840 +calvin allen 12884902208 +calvin brown 4294967411 +calvin brown 4294967437 +calvin brown 4294967530 +calvin carson 8589934876 +calvin davidson 4294967468 +calvin davidson 8589934837 +calvin ellison 4294967480 +calvin falkner 4294967300 +calvin falkner 4294967305 +calvin falkner 4294967345 +calvin falkner 8589934749 +calvin falkner 8589934840 +calvin falkner 8589934978 +calvin garcia 8589934927 +calvin hernandez 12884902173 +calvin johnson 4294967546 +calvin laertes 4294967431 +calvin laertes 4294967499 +calvin nixon 4294967300 +calvin nixon 4294967412 +calvin nixon 4294967488 +calvin ovid 4294967329 +calvin ovid 4294967349 +calvin ovid 8589934723 +calvin ovid 8589934835 +calvin polk 8589934962 +calvin quirinius 4294967521 +calvin quirinius 4294967532 +calvin robinson 4294967326 +calvin steinbeck 4294967474 +calvin steinbeck 4294967505 +calvin steinbeck 8589934722 +calvin thompson 4294967513 +calvin thompson 8589934700 +calvin underhill 4294967478 +calvin van buren 4294967300 +calvin van buren 4294967508 +calvin white 4294967304 +calvin white 8589934924 +calvin xylophone 4294967376 +calvin xylophone 8589934599 +calvin xylophone 8589934866 +calvin young 4294967342 +calvin young 8589934817 +calvin zipper 12884902359 +calvin zipper 17179869649 +david allen 4294967371 +david allen 4294967381 +david brown 8589934762 +david brown 12884902420 +david davidson 4294967522 +david davidson 8589934819 +david davidson 12884902188 +david davidson 12884902327 +david ellison 4294967463 +david ellison 8589934777 +david ellison 12884902263 +david hernandez 4294967324 +david ichabod 4294967487 +david ichabod 12884902220 +david laertes 12884902107 +david nixon 4294967381 +david ovid 4294967396 +david ovid 4294967443 +david quirinius 4294967457 +david quirinius 4294967530 +david quirinius 12884902194 +david robinson 4294967465 +david robinson 17179869575 +david thompson 4294967361 +david underhill 4294967384 +david underhill 8589934942 +david underhill 12884902357 +david van buren 4294967309 +david van buren 8589934901 +david white 4294967428 +david xylophone 4294967479 +david xylophone 4294967480 +david xylophone 8589934856 +david young 4294967296 +david young 4294967305 +ethan allen 4294967351 +ethan brown 4294967320 +ethan brown 4294967331 +ethan brown 4294967403 +ethan brown 4294967420 +ethan brown 8589934797 +ethan brown 8589934805 +ethan carson 4294967352 +ethan ellison 4294967514 +ethan ellison 8589934887 +ethan falkner 4294967318 +ethan falkner 4294967461 +ethan garcia 4294967310 +ethan hernandez 4294967349 +ethan johnson 8589934738 +ethan king 8589934731 +ethan laertes 4294967422 +ethan laertes 4294967531 +ethan laertes 8589934767 +ethan laertes 8589934806 +ethan laertes 8589934830 +ethan laertes 8589934995 +ethan laertes 12884902063 +ethan miller 4294967352 +ethan nixon 8589935019 +ethan ovid 8589934909 +ethan polk 4294967329 +ethan polk 4294967382 +ethan polk 4294967479 +ethan polk 8589935021 +ethan quirinius 4294967348 +ethan quirinius 4294967501 +ethan quirinius 8589934695 +ethan robinson 4294967353 +ethan robinson 8589935019 +ethan underhill 8589934897 +ethan van buren 4294967511 +ethan white 4294967427 +ethan white 8589934975 +ethan xylophone 8589934956 +ethan zipper 4294967462 +ethan zipper 12884902348 +fred davidson 8589934724 +fred davidson 8589934850 +fred davidson 12884902468 +fred ellison 4294967393 +fred ellison 8589934797 +fred ellison 8589934978 +fred falkner 4294967547 +fred falkner 12884902170 +fred falkner 17179869760 +fred hernandez 8589934833 +fred ichabod 8589934853 +fred ichabod 12884902455 +fred johnson 8589934904 +fred king 8589934651 +fred king 8589934951 +fred laertes 8589934883 +fred miller 12884902228 +fred nixon 4294967297 +fred nixon 4294967375 +fred nixon 4294967514 +fred nixon 12884902182 +fred polk 4294967332 +fred polk 4294967458 +fred polk 4294967507 +fred polk 8589934944 +fred quirinius 8589934894 +fred quirinius 12884902335 +fred robinson 8589934904 +fred steinbeck 4294967329 +fred steinbeck 4294967411 +fred steinbeck 4294967472 +fred underhill 4294967387 +fred van buren 8589934830 +fred van buren 12884902319 +fred van buren 12884902382 +fred van buren 17179869836 +fred white 8589934763 +fred young 4294967485 +fred young 8589934832 +fred zipper 12884902371 +gabriella allen 4294967405 +gabriella allen 12884902509 +gabriella brown 4294967403 +gabriella brown 4294967543 +gabriella carson 8589934950 +gabriella davidson 4294967507 +gabriella ellison 4294967393 +gabriella ellison 12884902284 +gabriella falkner 4294967378 +gabriella falkner 4294967523 +gabriella falkner 12884902338 +gabriella garcia 4294967419 +gabriella hernandez 4294967462 +gabriella hernandez 4294967481 +gabriella ichabod 4294967337 +gabriella ichabod 8589934740 +gabriella ichabod 8589934797 +gabriella ichabod 8589934818 +gabriella ichabod 17179869508 +gabriella king 4294967393 +gabriella king 8589934906 +gabriella laertes 4294967410 +gabriella miller 8589934768 +gabriella ovid 4294967522 +gabriella ovid 8589934895 +gabriella polk 4294967302 +gabriella polk 8589934868 +gabriella steinbeck 4294967435 +gabriella steinbeck 4294967500 +gabriella thompson 4294967412 +gabriella thompson 8589934814 +gabriella thompson 12884902318 +gabriella van buren 4294967470 +gabriella van buren 8589934783 +gabriella white 4294967335 +gabriella young 4294967431 +gabriella young 8589934980 +gabriella zipper 4294967510 +gabriella zipper 8589934792 +holly allen 12884901926 +holly brown 8589934722 +holly brown 8589934857 +holly falkner 8589934849 +holly hernandez 8589934749 +holly hernandez 8589934805 +holly hernandez 8589935056 +holly hernandez 12884902485 +holly ichabod 4294967329 +holly ichabod 8589934754 +holly ichabod 8589934981 +holly johnson 4294967535 +holly johnson 12884902194 +holly johnson 17179869874 +holly king 8589934785 +holly king 8589934939 +holly laertes 12884902333 +holly miller 8589934823 +holly nixon 4294967383 +holly nixon 8589934744 +holly polk 4294967434 +holly polk 8589934782 +holly robinson 12884902369 +holly thompson 4294967339 +holly thompson 12884902395 +holly thompson 17179869547 +holly underhill 8589934913 +holly underhill 8589934924 +holly underhill 12884902376 +holly underhill 12884902412 +holly van buren 4294967539 +holly white 17179869548 +holly white 17179869900 +holly xylophone 8589934846 +holly young 4294967500 +holly young 8589934932 +holly zipper 4294967509 +holly zipper 17179869531 +irene allen 12884902413 +irene brown 4294967428 +irene brown 8589934934 +irene brown 12884902207 +irene carson 8589934797 +irene ellison 8589934732 +irene ellison 8589934773 +irene falkner 4294967404 +irene falkner 4294967548 +irene garcia 4294967323 +irene garcia 8589934887 +irene garcia 12884902479 +irene ichabod 4294967509 +irene ichabod 8589934860 +irene johnson 8589934990 +irene laertes 4294967481 +irene laertes 12884902196 +irene laertes 17179869632 +irene miller 4294967387 +irene nixon 4294967538 +irene nixon 12884902129 +irene nixon 12884902324 +irene ovid 8589934764 +irene ovid 8589934886 +irene ovid 8589934903 +irene polk 4294967465 +irene polk 4294967521 +irene polk 8589934672 +irene polk 8589934842 +irene polk 17179869877 +irene quirinius 8589934875 +irene quirinius 12884902269 +irene quirinius 17179869628 +irene robinson 8589934676 +irene steinbeck 4294967549 +irene thompson 4294967479 +irene underhill 8589934694 +irene underhill 12884902077 +irene van buren 8589934932 +irene van buren 12884902202 +irene xylophone 8589934901 +jessica brown 8589934867 +jessica carson 4294967508 +jessica carson 8589934740 +jessica carson 17179869819 +jessica davidson 4294967384 +jessica davidson 8589934864 +jessica davidson 12884902256 +jessica davidson 12884902321 +jessica ellison 4294967316 +jessica ellison 12884902128 +jessica falkner 8589934980 +jessica garcia 4294967540 +jessica garcia 21474837337 +jessica ichabod 8589934816 +jessica johnson 8589935006 +jessica johnson 12884902222 +jessica miller 8589934898 +jessica nixon 8589934742 +jessica nixon 12884902240 +jessica ovid 8589934830 +jessica ovid 12884902307 +jessica polk 21474837163 +jessica quirinius 8589934701 +jessica quirinius 8589934872 +jessica quirinius 12884902159 +jessica quirinius 12884902276 +jessica robinson 4294967542 +jessica thompson 8589934698 +jessica thompson 12884902232 +jessica underhill 8589934810 +jessica underhill 8589934878 +jessica underhill 17179869479 +jessica van buren 8589934726 +jessica white 12884902155 +jessica white 12884902281 +jessica white 12884902296 +jessica white 12884902314 +jessica white 17179869676 +jessica xylophone 17179869697 +jessica young 17179869859 +jessica young 17179869861 +jessica zipper 4294967372 +jessica zipper 8589934727 +jessica zipper 17179869778 +katie allen 8589934791 +katie brown 17179869660 +katie davidson 12884902181 +katie ellison 12884902184 +katie ellison 12884902355 +katie falkner 8589934911 +katie garcia 8589934683 +katie garcia 12884902046 +katie hernandez 8589934812 +katie ichabod 8589934795 +katie ichabod 8589934862 +katie ichabod 8589934869 +katie king 4294967339 +katie king 4294967421 +katie king 8589934826 +katie miller 8589934829 +katie miller 12884902267 +katie nixon 21474837149 +katie ovid 4294967519 +katie polk 8589934726 +katie polk 12884902291 +katie robinson 17179869645 +katie van buren 8589934722 +katie van buren 17179869441 +katie white 4294967306 +katie white 8589934885 +katie xylophone 12884902193 +katie young 8589934819 +katie young 8589935024 +katie young 12884902058 +katie zipper 4294967354 +katie zipper 12884902310 +luke allen 8589934864 +luke allen 8589934931 +luke allen 8589935059 +luke allen 12884902257 +luke allen 12884902322 +luke brown 8589934779 +luke davidson 4294967354 +luke davidson 12884902360 +luke ellison 12884902183 +luke ellison 21474836998 +luke ellison 21474837060 +luke falkner 8589934772 +luke falkner 17179869561 +luke garcia 4294967304 +luke garcia 21474837157 +luke ichabod 12884902150 +luke ichabod 12884902366 +luke johnson 4294967527 +luke johnson 8589934812 +luke johnson 12884902161 +luke laertes 8589935027 +luke laertes 12884902031 +luke laertes 12884902184 +luke laertes 12884902213 +luke laertes 12884902378 +luke miller 8589934826 +luke ovid 4294967492 +luke ovid 8589934913 +luke polk 8589934837 +luke polk 12884902340 +luke quirinius 8589934855 +luke robinson 4294967307 +luke robinson 17179869711 +luke thompson 4294967521 +luke underhill 8589934829 +luke underhill 12884902299 +luke underhill 21474837138 +luke van buren 8589934852 +luke white 12884902418 +luke xylophone 8589934804 +luke zipper 4294967353 +mike allen 17179869750 +mike brown 17179869735 +mike carson 4294967477 +mike carson 8589934803 +mike carson 17179869855 +mike davidson 12884902377 +mike davidson 17179869841 +mike ellison 8589934833 +mike ellison 12884902165 +mike ellison 12884902513 +mike ellison 17179869587 +mike ellison 17179869824 +mike falkner 4294967301 +mike garcia 4294967398 +mike garcia 8589934800 +mike garcia 12884902292 +mike hernandez 8589934824 +mike hernandez 12884902281 +mike ichabod 4294967494 +mike king 4294967347 +mike king 4294967400 +mike king 12884902363 +mike king 12884902475 +mike king 17179869528 +mike king 17179869592 +mike miller 17179869705 +mike nixon 12884902293 +mike nixon 17179869708 +mike polk 17179869752 +mike polk 21474837097 +mike polk 21474837344 +mike quirinius 12884902240 +mike steinbeck 8589934653 +mike steinbeck 12884902273 +mike steinbeck 12884902301 +mike steinbeck 17179869903 +mike van buren 8589934942 +mike van buren 12884902402 +mike white 12884902485 +mike white 17179869676 +mike white 21474836928 +mike white 25769804626 +mike young 8589934704 +mike young 8589934878 +mike young 17179869685 +mike zipper 4294967501 +mike zipper 17179869582 +mike zipper 25769804400 +nick allen 8589934664 +nick allen 8589934860 +nick brown 21474836962 +nick davidson 4294967357 +nick ellison 12884902066 +nick ellison 17179869779 +nick falkner 8589935020 +nick falkner 12884902433 +nick garcia 8589934885 +nick garcia 17179869635 +nick garcia 17179869681 +nick ichabod 12884902193 +nick ichabod 12884902223 +nick ichabod 12884902252 +nick johnson 17179869591 +nick johnson 17179869702 +nick laertes 8589934919 +nick miller 12884902419 +nick nixon 8589934910 +nick ovid 12884902267 +nick polk 17179869712 +nick quirinius 4294967296 +nick quirinius 12884902183 +nick robinson 17179869506 +nick robinson 17179869731 +nick steinbeck 4294967355 +nick thompson 8589934922 +nick underhill 25769804624 +nick van buren 8589934635 +nick xylophone 12884902279 +nick young 12884902399 +nick young 21474837140 +nick zipper 12884902300 +nick zipper 17179869849 +oscar allen 17179869779 +oscar brown 12884902062 +oscar carson 12884902232 +oscar carson 17179869663 +oscar carson 17179869779 +oscar carson 21474837066 +oscar carson 21474837089 +oscar davidson 17179869895 +oscar ellison 4294967304 +oscar ellison 8589934740 +oscar falkner 4294967526 +oscar garcia 21474837156 +oscar hernandez 4294967343 +oscar hernandez 8589935049 +oscar ichabod 8589934837 +oscar ichabod 21474836952 +oscar ichabod 21474837021 +oscar ichabod 25769804491 +oscar johnson 12884902182 +oscar johnson 30064772044 +oscar king 12884902159 +oscar king 17179869738 +oscar king 17179869834 +oscar laertes 4294967550 +oscar laertes 8589934727 +oscar laertes 12884902043 +oscar laertes 12884902478 +oscar nixon 17179869458 +oscar ovid 12884902128 +oscar ovid 12884902240 +oscar ovid 25769804460 +oscar polk 21474836829 +oscar polk 21474837063 +oscar quirinius 8589934728 +oscar quirinius 17179869698 +oscar quirinius 21474837051 +oscar quirinius 25769804521 +oscar robinson 8589934656 +oscar robinson 12884902249 +oscar robinson 21474837105 +oscar robinson 25769804694 +oscar steinbeck 4294967548 +oscar thompson 8589934776 +oscar thompson 12884902164 +oscar thompson 12884902317 +oscar thompson 17179869884 +oscar underhill 8589934895 +oscar van buren 4294967500 +oscar van buren 8589934984 +oscar van buren 21474837205 +oscar white 4294967454 +oscar white 8589934826 +oscar white 21474836931 +oscar white 21474837305 +oscar xylophone 12884902193 +oscar xylophone 12884902307 +oscar xylophone 17179869593 +oscar zipper 8589934865 +oscar zipper 8589934874 +oscar zipper 8589934911 +priscilla brown 8589934848 +priscilla brown 8589935013 +priscilla brown 17179869801 +priscilla carson 12884902145 +priscilla carson 21474836880 +priscilla carson 30064772126 +priscilla ichabod 4294967547 +priscilla ichabod 17179869756 +priscilla johnson 4294967468 +priscilla johnson 8589934667 +priscilla johnson 17179869667 +priscilla johnson 17179869787 +priscilla johnson 25769804279 +priscilla king 12884902153 +priscilla nixon 12884902188 +priscilla nixon 25769804766 +priscilla ovid 12884902234 +priscilla ovid 30064772049 +priscilla polk 17179869480 +priscilla quirinius 12884902171 +priscilla thompson 25769804637 +priscilla underhill 4294967333 +priscilla underhill 17179869740 +priscilla van buren 12884902324 +priscilla van buren 21474837167 +priscilla van buren 21474837343 +priscilla white 4294967419 +priscilla xylophone 8589934792 +priscilla xylophone 12884902245 +priscilla xylophone 12884902287 +priscilla young 21474836992 +priscilla young 34359739656 +priscilla zipper 12884902296 +priscilla zipper 12884902537 +quinn allen 4294967542 +quinn allen 17179869552 +quinn brown 12884902251 +quinn brown 17179869401 +quinn brown 17179869626 +quinn davidson 8589934992 +quinn davidson 17179869690 +quinn davidson 25769804455 +quinn davidson 30064771771 +quinn ellison 12884902376 +quinn ellison 34359739559 +quinn garcia 8589934828 +quinn garcia 12884902387 +quinn garcia 12884902460 +quinn garcia 21474837066 +quinn ichabod 30064772171 +quinn king 4294967458 +quinn king 4294967538 +quinn laertes 8589935080 +quinn laertes 17179869711 +quinn laertes 21474837142 +quinn nixon 17179869672 +quinn ovid 17179869695 +quinn quirinius 21474836827 +quinn robinson 12884902445 +quinn steinbeck 17179869739 +quinn steinbeck 21474836905 +quinn thompson 17179869645 +quinn thompson 25769804317 +quinn underhill 8589934815 +quinn underhill 12884902185 +quinn underhill 30064771762 +quinn van buren 4294967362 +quinn young 8589934731 +quinn zipper 12884902453 +quinn zipper 17179869841 +rachel allen 8589934882 +rachel allen 12884902208 +rachel brown 8589934768 +rachel brown 12884902075 +rachel brown 17179869910 +rachel brown 17179869911 +rachel brown 21474837280 +rachel carson 8589934728 +rachel carson 17179869970 +rachel davidson 30064771666 +rachel ellison 4294967423 +rachel falkner 4294967348 +rachel falkner 12884902482 +rachel falkner 21474837331 +rachel falkner 25769804739 +rachel johnson 38654707197 +rachel king 12884902157 +rachel king 30064771759 +rachel laertes 17179869678 +rachel laertes 25769804379 +rachel ovid 12884902055 +rachel ovid 17179869857 +rachel polk 12884902391 +rachel quirinius 17179869456 +rachel robinson 17179869499 +rachel robinson 17179869703 +rachel robinson 25769804290 +rachel thompson 17179869910 +rachel thompson 21474836989 +rachel thompson 21474837392 +rachel underhill 8589934862 +rachel white 17179869585 +rachel white 21474837039 +rachel young 17179869708 +rachel zipper 4294967434 +rachel zipper 21474837228 +sarah carson 4294967319 +sarah carson 17179869688 +sarah carson 30064772084 +sarah ellison 4294967542 +sarah falkner 17179869797 +sarah falkner 21474837349 +sarah garcia 8589934733 +sarah garcia 8589934858 +sarah garcia 17179869599 +sarah ichabod 12884902196 +sarah ichabod 12884902401 +sarah johnson 12884902455 +sarah johnson 21474836981 +sarah johnson 21474837145 +sarah johnson 25769804480 +sarah king 12884902453 +sarah king 21474837191 +sarah miller 8589934958 +sarah ovid 21474837184 +sarah robinson 21474837237 +sarah robinson 21474837389 +sarah steinbeck 21474837313 +sarah white 17179869905 +sarah white 25769804341 +sarah xylophone 12884902207 +sarah young 21474837319 +sarah zipper 25769804616 +tom brown 8589934894 +tom brown 21474837024 +tom carson 4294967388 +tom carson 12884902278 +tom carson 21474836983 +tom davidson 8589934895 +tom ellison 12884902192 +tom ellison 17179869965 +tom ellison 25769804262 +tom falkner 12884902272 +tom falkner 17179869815 +tom hernandez 4294967296 +tom hernandez 12884902109 +tom ichabod 17179869628 +tom johnson 25769804829 +tom johnson 30064771891 +tom king 12884902390 +tom laertes 12884902181 +tom laertes 12884902236 +tom miller 12884901992 +tom miller 17179869647 +tom miller 21474837107 +tom nixon 17179869677 +tom ovid 12884902279 +tom polk 8589934748 +tom polk 8589934892 +tom quirinius 12884902174 +tom quirinius 21474836986 +tom robinson 8589934753 +tom robinson 12884902203 +tom robinson 12884902358 +tom robinson 21474836952 +tom steinbeck 8589934912 +tom van buren 8589934823 +tom van buren 12884902122 +tom van buren 25769804641 +tom white 21474837076 +tom young 4294967535 +tom young 21474837038 +tom zipper 30064772355 +ulysses brown 8589934991 +ulysses carson 8589934789 +ulysses carson 21474837258 +ulysses carson 25769804457 +ulysses carson 34359739082 +ulysses davidson 12884902216 +ulysses ellison 17179869551 +ulysses garcia 12884902382 +ulysses hernandez 12884902210 +ulysses hernandez 12884902276 +ulysses hernandez 17179869748 +ulysses ichabod 4294967353 +ulysses ichabod 12884902217 +ulysses johnson 21474837122 +ulysses king 8589934995 +ulysses laertes 8589934801 +ulysses laertes 21474837354 +ulysses laertes 25769804499 +ulysses miller 21474837284 +ulysses miller 30064771926 +ulysses nixon 17179869288 +ulysses ovid 17179869754 +ulysses polk 8589934855 +ulysses polk 8589934862 +ulysses polk 12884902420 +ulysses polk 17179869479 +ulysses quirinius 17179869659 +ulysses robinson 4294967531 +ulysses steinbeck 8589935027 +ulysses steinbeck 21474837100 +ulysses thompson 12884902194 +ulysses underhill 8589934760 +ulysses underhill 8589934799 +ulysses underhill 12884902240 +ulysses underhill 17179869759 +ulysses underhill 17179869760 +ulysses underhill 17179869939 +ulysses underhill 21474837264 +ulysses van buren 8589934938 +ulysses white 25769804453 +ulysses white 30064772086 +ulysses xylophone 8589935029 +ulysses xylophone 12884902249 +ulysses xylophone 25769804765 +ulysses young 4294967427 +ulysses young 17179869391 +ulysses young 30064771844 +victor allen 8589934793 +victor allen 12884902264 +victor brown 4294967455 +victor brown 17179869657 +victor brown 21474837426 +victor brown 30064771922 +victor davidson 17179869715 +victor davidson 17179869872 +victor davidson 25769804287 +victor ellison 17179869611 +victor ellison 17179869709 +victor hernandez 8589934847 +victor hernandez 12884902463 +victor hernandez 17179869647 +victor hernandez 17179869720 +victor hernandez 25769804310 +victor johnson 17179869652 +victor johnson 21474837148 +victor johnson 25769804771 +victor king 8589934917 +victor king 25769804714 +victor laertes 12884902188 +victor laertes 21474837186 +victor miller 21474837170 +victor nixon 8589934778 +victor nixon 12884902261 +victor ovid 12884902350 +victor polk 17179869376 +victor quirinius 21474837074 +victor quirinius 21474837279 +victor robinson 21474836948 +victor robinson 21474837097 +victor steinbeck 12884902162 +victor steinbeck 17179869721 +victor steinbeck 21474836916 +victor thompson 25769804395 +victor van buren 21474837010 +victor van buren 25769804601 +victor white 8589934816 +victor white 30064771798 +victor xylophone 17179869560 +victor xylophone 25769804719 +victor xylophone 25769804760 +victor xylophone 34359739093 +victor xylophone 34359739095 +victor young 21474837052 +victor zipper 12884902345 +wendy allen 21474837127 +wendy allen 25769804525 +wendy allen 25769804732 +wendy brown 12884902342 +wendy brown 21474836889 +wendy ellison 12884902392 +wendy ellison 21474836763 +wendy falkner 8589934926 +wendy falkner 17179869470 +wendy falkner 25769804816 +wendy garcia 17179869439 +wendy garcia 17179869732 +wendy garcia 30064771654 +wendy garcia 30064771704 +wendy hernandez 17179869752 +wendy ichabod 17179869547 +wendy king 17179869612 +wendy king 21474837301 +wendy king 30064772042 +wendy laertes 8589934872 +wendy laertes 12884902469 +wendy laertes 21474837084 +wendy miller 17179869661 +wendy miller 17179869682 +wendy nixon 12884902521 +wendy nixon 21474836846 +wendy ovid 21474837025 +wendy ovid 38654706512 +wendy polk 8589934960 +wendy polk 21474837144 +wendy quirinius 12884902263 +wendy quirinius 17179869652 +wendy robinson 21474837104 +wendy robinson 25769804321 +wendy robinson 25769804728 +wendy steinbeck 12884902299 +wendy thompson 17179869494 +wendy thompson 21474837072 +wendy underhill 17179869898 +wendy underhill 21474837064 +wendy underhill 25769804845 +wendy van buren 25769804447 +wendy van buren 25769804679 +wendy white 17179869866 +wendy xylophone 17179869596 +wendy xylophone 25769804554 +wendy young 4294967313 +wendy young 25769804562 +xavier allen 12884902364 +xavier allen 17179869960 +xavier allen 21474836864 +xavier brown 8589934824 +xavier brown 17179869646 +xavier brown 25769804653 +xavier carson 17179869770 +xavier carson 21474837445 +xavier davidson 30064772118 +xavier davidson 34359739403 +xavier davidson 38654706539 +xavier ellison 34359739490 +xavier ellison 34359739559 +xavier garcia 21474837142 +xavier hernandez 21474837012 +xavier hernandez 25769804421 +xavier hernandez 38654707021 +xavier ichabod 12884902315 +xavier ichabod 17179869567 +xavier johnson 8589934922 +xavier johnson 38654707066 +xavier king 12884902272 +xavier king 21474836962 +xavier laertes 17179869795 +xavier ovid 17179869597 +xavier polk 12884902254 +xavier polk 17179869581 +xavier polk 17179869743 +xavier polk 34359739344 +xavier quirinius 12884902240 +xavier quirinius 21474836996 +xavier quirinius 25769804437 +xavier quirinius 25769804456 +xavier thompson 17179869822 +xavier underhill 8589934813 +xavier white 12884902262 +xavier white 12884902366 +xavier xylophone 17179869722 +xavier zipper 12884902377 +yuri allen 8589935035 +yuri allen 12884902279 +yuri brown 8589934912 +yuri brown 12884902319 +yuri carson 21474837146 +yuri carson 25769804245 +yuri ellison 25769804504 +yuri ellison 25769804568 +yuri falkner 25769804699 +yuri falkner 42949674720 +yuri garcia 4294967362 +yuri hernandez 21474837117 +yuri johnson 21474837002 +yuri johnson 21474837165 +yuri johnson 25769804545 +yuri king 30064772090 +yuri laertes 30064772076 +yuri laertes 34359739328 +yuri nixon 12884902232 +yuri nixon 12884902265 +yuri polk 12884902362 +yuri polk 21474837245 +yuri polk 25769804539 +yuri quirinius 12884902198 +yuri quirinius 17179869606 +yuri quirinius 30064771819 +yuri steinbeck 4294967535 +yuri steinbeck 8589934657 +yuri thompson 12884902467 +yuri underhill 17179869566 +yuri underhill 17179869715 +yuri white 34359739045 +yuri xylophone 12884902412 +zach allen 17179869908 +zach brown 21474836879 +zach brown 21474836891 +zach brown 21474837040 +zach brown 21474837073 +zach brown 30064771852 +zach carson 21474837185 +zach ellison 8589934898 +zach falkner 17179869807 +zach falkner 25769804634 +zach garcia 17179869536 +zach garcia 21474837142 +zach garcia 30064772246 +zach garcia 34359739192 +zach ichabod 17179869613 +zach ichabod 17179869838 +zach king 17179869700 +zach king 21474837427 +zach king 34359739578 +zach miller 4294967391 +zach miller 12884902310 +zach miller 17179869709 +zach ovid 17179869731 +zach ovid 21474837032 +zach ovid 21474837127 +zach ovid 30064771625 +zach quirinius 34359739151 +zach robinson 21474836938 +zach steinbeck 17179869667 +zach steinbeck 25769804623 +zach thompson 12884902354 +zach thompson 17179869659 +zach underhill 12884902149 +zach white 25769804490 +zach xylophone 12884902198 +zach xylophone 21474837163 +zach young 17179869687 +zach zipper 17179869708 +zach zipper 17179869834 +zach zipper 21474837369 diff --git a/sql/hive/src/test/resources/golden/windowing_windowspec.q (deterministic)-1-6378faf36ffd3f61e61cee6c0cb70e6 b/sql/hive/src/test/resources/golden/windowing_windowspec.q (deterministic)-1-6378faf36ffd3f61e61cee6c0cb70e6 new file mode 100644 index 0000000000000..1436509e4ec17 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_windowspec.q (deterministic)-1-6378faf36ffd3f61e61cee6c0cb70e6 @@ -0,0 +1,1049 @@ + 9.220000267028809 + 43.72999954223633 + 89.52999877929688 +alice allen 2.7899999618530273 +alice allen 21.450000762939453 +alice allen 73.62999725341797 +alice brown 71.30999755859375 +alice carson 39.029998779296875 +alice davidson 70.3499984741211 +alice falkner 90.25 +alice garcia 48.45000076293945 +alice hernandez 88.16999816894531 +alice hernandez 90.55999755859375 +alice johnson 4.46999979019165 +alice king 19.139999389648438 +alice king 23.170000076293945 +alice king 52.22999954223633 +alice laertes 68.94999694824219 +alice laertes 69.52999877929688 +alice miller 68.95999908447266 +alice nixon 40.0 +alice nixon 48.150001525878906 +alice nixon 79.83000183105469 +alice ovid 9.039999961853027 +alice polk 62.900001525878906 +alice quirinius 37.13999938964844 +alice quirinius 62.29999923706055 +alice robinson 5.079999923706055 +alice robinson 56.099998474121094 +alice steinbeck 38.619998931884766 +alice steinbeck 55.5099983215332 +alice steinbeck 92.37000274658203 +alice underhill 98.18000030517578 +alice van buren 38.939998626708984 +alice xylophone 33.58000183105469 +alice xylophone 43.15999984741211 +alice xylophone 78.20999908447266 +alice zipper 26.43000030517578 +alice zipper 42.47999954223633 +alice zipper 89.93000030517578 +bob brown 8.069999694824219 +bob brown 70.93000030517578 +bob brown 93.08999633789062 +bob carson 50.09000015258789 +bob davidson 1.2899999618530273 +bob davidson 53.93000030517578 +bob davidson 74.72000122070312 +bob ellison 41.34000015258789 +bob ellison 65.0199966430664 +bob ellison 75.02999877929688 +bob ellison 80.30000305175781 +bob falkner 16.989999771118164 +bob garcia 4.460000038146973 +bob garcia 5.400000095367432 +bob garcia 45.59000015258789 +bob garcia 80.30000305175781 +bob garcia 87.56999969482422 +bob hernandez 22.68000030517578 +bob ichabod 82.55999755859375 +bob king 8.789999961853027 +bob king 12.539999961853027 +bob king 39.0099983215332 +bob laertes 0.7900000214576721 +bob laertes 10.670000076293945 +bob miller 61.91999816894531 +bob ovid 46.86000061035156 +bob ovid 62.849998474121094 +bob ovid 88.77999877929688 +bob ovid 97.08999633789062 +bob polk 7.980000019073486 +bob quirinius 46.099998474121094 +bob steinbeck 9.699999809265137 +bob van buren 33.66999816894531 +bob white 45.34000015258789 +bob white 45.349998474121094 +bob xylophone 19.690000534057617 +bob xylophone 107.93000221252441 +bob young 35.16999816894531 +bob zipper 1.25 +bob zipper 3.819999933242798 +bob zipper 34.349998474121094 +calvin allen 63.119998931884766 +calvin brown 28.110000610351562 +calvin brown 85.9000015258789 +calvin brown 90.19999694824219 +calvin carson 80.2300033569336 +calvin davidson 31.770000457763672 +calvin davidson 85.51000213623047 +calvin ellison 26.489999771118164 +calvin falkner 2.9700000286102295 +calvin falkner 56.040000915527344 +calvin falkner 56.33000183105469 +calvin falkner 80.5999984741211 +calvin falkner 93.61000061035156 +calvin falkner 94.30999755859375 +calvin garcia 41.849998474121094 +calvin hernandez 33.869998931884766 +calvin johnson 66.61000061035156 +calvin laertes 23.1299991607666 +calvin laertes 50.310001373291016 +calvin nixon 9.8100004196167 +calvin nixon 41.20000076293945 +calvin nixon 69.73999786376953 +calvin ovid 69.95999908447266 +calvin ovid 71.26000213623047 +calvin ovid 79.12000274658203 +calvin ovid 84.72000122070312 +calvin polk 65.72000122070312 +calvin quirinius 29.540000915527344 +calvin quirinius 53.02000045776367 +calvin robinson 40.439998626708984 +calvin steinbeck 15.220000267028809 +calvin steinbeck 22.850000381469727 +calvin steinbeck 93.30000305175781 +calvin thompson 8.90999984741211 +calvin thompson 93.7300033569336 +calvin underhill 59.70000076293945 +calvin van buren 34.209999084472656 +calvin van buren 64.0 +calvin white 50.279998779296875 +calvin white 90.69000244140625 +calvin xylophone 21.700000762939453 +calvin xylophone 25.420000076293945 +calvin xylophone 56.810001373291016 +calvin young 24.489999771118164 +calvin young 39.810001373291016 +calvin zipper 9.1899995803833 +calvin zipper 95.37999725341797 +david allen 51.25 +david allen 64.87000274658203 +david brown 3.2100000381469727 +david brown 93.63999938964844 +david davidson 1.0800000429153442 +david davidson 62.720001220703125 +david davidson 74.1500015258789 +david davidson 95.80999755859375 +david ellison 47.689998626708984 +david ellison 85.2300033569336 +david ellison 94.1500015258789 +david hernandez 99.91000366210938 +david ichabod 5.28000020980835 +david ichabod 82.55000305175781 +david laertes 76.70999908447266 +david nixon 50.31999969482422 +david ovid 25.110000610351562 +david ovid 61.70000076293945 +david quirinius 20.639999389648438 +david quirinius 29.239999771118164 +david quirinius 79.97000122070312 +david robinson 25.280000686645508 +david robinson 161.1199951171875 +david thompson 80.89999771118164 +david underhill 8.319999694824219 +david underhill 88.7699966430664 +david underhill 97.55999755859375 +david van buren 83.56999969482422 +david van buren 115.43999862670898 +david white 45.189998626708984 +david xylophone 8.069999694824219 +david xylophone 54.34000015258789 +david xylophone 72.9800033569336 +david young 10.25 +david young 35.650001525878906 +ethan allen 32.75 +ethan brown 7.110000133514404 +ethan brown 10.09000015258789 +ethan brown 15.630000114440918 +ethan brown 61.86000061035156 +ethan brown 73.18000030517578 +ethan brown 82.30000305175781 +ethan carson 76.33000183105469 +ethan ellison 0.2800000011920929 +ethan ellison 81.47000122070312 +ethan falkner 50.02000045776367 +ethan falkner 59.43000030517578 +ethan garcia 43.189998626708984 +ethan hernandez 49.779998779296875 +ethan johnson 90.05000305175781 +ethan king 4.349999904632568 +ethan laertes 15.449999809265137 +ethan laertes 54.75 +ethan laertes 59.209999084472656 +ethan laertes 70.38999938964844 +ethan laertes 80.70999908447266 +ethan laertes 95.06999969482422 +ethan laertes 96.29000091552734 +ethan miller 25.3700008392334 +ethan nixon 37.779998779296875 +ethan ovid 57.290000915527344 +ethan polk 2.3499999046325684 +ethan polk 21.31999969482422 +ethan polk 23.440000534057617 +ethan polk 122.71999740600586 +ethan quirinius 3.859999895095825 +ethan quirinius 51.84000015258789 +ethan quirinius 97.23999786376953 +ethan robinson 67.94000244140625 +ethan robinson 78.62000274658203 +ethan underhill 55.630001068115234 +ethan van buren 36.70000076293945 +ethan white 60.849998474121094 +ethan white 63.41999816894531 +ethan xylophone 57.11000061035156 +ethan zipper 2.9200000762939453 +ethan zipper 97.51000213623047 +fred davidson 18.860000610351562 +fred davidson 37.2400016784668 +fred davidson 78.30999755859375 +fred ellison 31.179998874664307 +fred ellison 48.59000015258789 +fred ellison 96.77999877929688 +fred falkner 10.289999961853027 +fred falkner 72.04000091552734 +fred falkner 85.0 +fred hernandez 55.9900016784668 +fred ichabod 47.359999656677246 +fred ichabod 81.31999969482422 +fred johnson 96.08999633789062 +fred king 48.369998931884766 +fred king 72.13999843597412 +fred laertes 57.63999938964844 +fred miller 46.970001220703125 +fred nixon 28.690000534057617 +fred nixon 38.04999923706055 +fred nixon 70.5199966430664 +fred nixon 93.02999877929688 +fred polk 23.959999084472656 +fred polk 39.18000030517578 +fred polk 47.31999969482422 +fred polk 90.12000274658203 +fred quirinius 15.300000190734863 +fred quirinius 29.399999618530273 +fred robinson 89.02999877929688 +fred steinbeck 32.22999954223633 +fred steinbeck 41.310001373291016 +fred steinbeck 91.05000305175781 +fred underhill 90.7699966430664 +fred van buren 1.0199999809265137 +fred van buren 21.940000534057617 +fred van buren 52.869998931884766 +fred van buren 83.58000183105469 +fred white 37.79999923706055 +fred young 46.79999923706055 +fred young 97.70999908447266 +fred zipper 29.020000457763672 +gabriella allen 46.27000045776367 +gabriella allen 64.22000122070312 +gabriella brown 15.260000228881836 +gabriella brown 84.83000183105469 +gabriella carson 42.7599983215332 +gabriella davidson 6.550000190734863 +gabriella ellison 48.08000183105469 +gabriella ellison 71.54000091552734 +gabriella falkner 10.170000076293945 +gabriella falkner 51.720001220703125 +gabriella falkner 87.61000061035156 +gabriella garcia 43.0099983215332 +gabriella hernandez 76.91999816894531 +gabriella hernandez 92.9800033569336 +gabriella ichabod 10.729999542236328 +gabriella ichabod 26.639999389648438 +gabriella ichabod 66.36000061035156 +gabriella ichabod 71.12999725341797 +gabriella ichabod 90.3499984741211 +gabriella king 20.670000076293945 +gabriella king 80.45999908447266 +gabriella laertes 65.37999725341797 +gabriella miller 50.83000183105469 +gabriella ovid 77.7400016784668 +gabriella ovid 92.4000015258789 +gabriella polk 35.68000030517578 +gabriella polk 88.05000305175781 +gabriella steinbeck 46.45000076293945 +gabriella steinbeck 78.63999938964844 +gabriella thompson 73.31999969482422 +gabriella thompson 88.36000061035156 +gabriella thompson 94.25 +gabriella van buren 69.80000305175781 +gabriella van buren 70.05999755859375 +gabriella white 55.18000030517578 +gabriella young 9.25 +gabriella young 59.709999084472656 +gabriella zipper 36.2599983215332 +gabriella zipper 91.62999725341797 +holly allen 44.56999969482422 +holly brown 77.80999755859375 +holly brown 78.7300033569336 +holly falkner 80.73999786376953 +holly hernandez 20.81999969482422 +holly hernandez 21.190000534057617 +holly hernandez 24.790000915527344 +holly hernandez 30.25 +holly ichabod 83.2699966430664 +holly ichabod 84.69000244140625 +holly ichabod 90.51000213623047 +holly johnson 36.95000076293945 +holly johnson 64.36000061035156 +holly johnson 65.62000274658203 +holly king 42.310001373291016 +holly king 55.38999938964844 +holly laertes 52.5 +holly miller 50.400001525878906 +holly nixon 53.779998779296875 +holly nixon 88.0199966430664 +holly polk 3.619999885559082 +holly polk 98.30999755859375 +holly robinson 69.31999969482422 +holly thompson 0.07999999821186066 +holly thompson 86.69000244140625 +holly thompson 145.93999481201172 +holly underhill 42.54999923706055 +holly underhill 50.40999984741211 +holly underhill 79.95999908447266 +holly underhill 96.68000030517578 +holly van buren 68.80999755859375 +holly white 7.960000038146973 +holly white 32.91999816894531 +holly xylophone 93.11000061035156 +holly young 60.220001220703125 +holly young 66.16999816894531 +holly zipper 99.12999725341797 +holly zipper 99.29000091552734 +irene allen 38.849998474121094 +irene brown 4.789999961853027 +irene brown 53.939998626708984 +irene brown 87.66999816894531 +irene carson 94.54000091552734 +irene ellison 45.2400016784668 +irene ellison 50.08000183105469 +irene falkner 22.079999923706055 +irene falkner 99.91999816894531 +irene garcia 15.369999885559082 +irene garcia 58.43000030517578 +irene garcia 86.93000030517578 +irene ichabod 41.439998626708984 +irene ichabod 99.62000274658203 +irene johnson 5.880000114440918 +irene laertes 9.569999694824219 +irene laertes 42.66999816894531 +irene laertes 44.43000030517578 +irene miller 65.44000244140625 +irene nixon 15.100000381469727 +irene nixon 29.780000686645508 +irene nixon 42.560001373291016 +irene ovid 5.239999771118164 +irene ovid 35.130001068115234 +irene ovid 79.75 +irene polk 0.9800000190734863 +irene polk 24.020000457763672 +irene polk 42.2400016784668 +irene polk 47.08000183105469 +irene polk 95.83999633789062 +irene quirinius 12.899999618530273 +irene quirinius 58.86000061035156 +irene quirinius 70.0 +irene robinson 94.2699966430664 +irene steinbeck 94.33000183105469 +irene thompson 78.30000305175781 +irene underhill 28.309999465942383 +irene underhill 57.349998474121094 +irene van buren 54.439998626708984 +irene van buren 54.9900016784668 +irene xylophone 74.19000244140625 +jessica brown 51.290000915527344 +jessica carson 25.549999237060547 +jessica carson 31.860000610351562 +jessica carson 62.20000076293945 +jessica davidson 33.54999923706055 +jessica davidson 49.77000045776367 +jessica davidson 95.33999633789062 +jessica davidson 99.20999908447266 +jessica ellison 11.180000305175781 +jessica ellison 22.780000686645508 +jessica falkner 99.6500015258789 +jessica garcia 5.539999961853027 +jessica garcia 87.92999941110611 +jessica ichabod 59.15999984741211 +jessica johnson 9.5600004196167 +jessica johnson 40.79999923706055 +jessica miller 151.0199966430664 +jessica nixon 77.0999984741211 +jessica nixon 90.06999969482422 +jessica ovid 71.68000030517578 +jessica ovid 119.9000015258789 +jessica polk 49.68000030517578 +jessica quirinius 22.940000534057617 +jessica quirinius 32.470001220703125 +jessica quirinius 35.619998931884766 +jessica quirinius 46.869998931884766 +jessica robinson 112.36000442504883 +jessica thompson 38.33000183105469 +jessica thompson 89.55000305175781 +jessica underhill 26.079999923706055 +jessica underhill 45.41999816894531 +jessica underhill 46.209999084472656 +jessica van buren 9.739999771118164 +jessica white 11.550000190734863 +jessica white 36.58000183105469 +jessica white 73.93000030517578 +jessica white 74.30000305175781 +jessica white 96.62000274658203 +jessica xylophone 53.060001373291016 +jessica young 11.1899995803833 +jessica young 43.369998931884766 +jessica zipper 6.630000114440918 +jessica zipper 12.020000457763672 +jessica zipper 92.43999862670898 +katie allen 64.66999816894531 +katie brown 27.719999313354492 +katie davidson 170.84000396728516 +katie ellison 3.609999895095825 +katie ellison 80.97000122070312 +katie falkner 18.5 +katie garcia 24.729999542236328 +katie garcia 84.4000015258789 +katie hernandez 38.61999988555908 +katie ichabod 30.709999084472656 +katie ichabod 39.97999954223633 +katie ichabod 43.16999816894531 +katie king 39.34000015258789 +katie king 39.83000183105469 +katie king 97.80999755859375 +katie miller 31.399999618530273 +katie miller 74.77999877929688 +katie nixon 121.3700008392334 +katie ovid 50.65999984741211 +katie polk 11.680000305175781 +katie polk 40.2400016784668 +katie robinson 13.890000343322754 +katie van buren 17.739999771118164 +katie van buren 52.529998779296875 +katie white 1.309999942779541 +katie white 34.72999954223633 +katie xylophone 14.130000114440918 +katie young 31.010000228881836 +katie young 72.51000213623047 +katie young 97.56999969482422 +katie zipper 18.93000030517578 +katie zipper 58.75 +luke allen 15.180000305175781 +luke allen 50.959999084472656 +luke allen 66.61000061035156 +luke allen 89.55000305175781 +luke allen 99.38999938964844 +luke brown 51.790000915527344 +luke davidson 7.050000190734863 +luke davidson 28.950000762939453 +luke ellison 1.8700000047683716 +luke ellison 16.25 +luke ellison 87.83000183105469 +luke falkner 32.25 +luke falkner 39.60000038146973 +luke garcia 13.350000381469727 +luke garcia 30.3700008392334 +luke ichabod 8.449999809265137 +luke ichabod 97.87000274658203 +luke johnson 11.149999618530273 +luke johnson 14.4399995803833 +luke johnson 31.670000076293945 +luke laertes 0.5199999809265137 +luke laertes 4.800000190734863 +luke laertes 11.819999694824219 +luke laertes 16.690000534057617 +luke laertes 45.9900016784668 +luke miller 97.6500015258789 +luke ovid 38.04999923706055 +luke ovid 159.68000030517578 +luke polk 46.880001068115234 +luke polk 95.27999877929688 +luke quirinius 40.41999816894531 +luke robinson 55.099998474121094 +luke robinson 65.69999694824219 +luke thompson 94.37999725341797 +luke underhill 59.68000030517578 +luke underhill 95.52999877929688 +luke underhill 96.94000244140625 +luke van buren 148.62999725341797 +luke white 67.12000274658203 +luke xylophone 48.279998779296875 +luke zipper 24.829999923706055 +mike allen 48.53999900817871 +mike brown 48.22999954223633 +mike carson 20.06999969482422 +mike carson 47.56999969482422 +mike carson 81.66000366210938 +mike davidson 27.309999465942383 +mike davidson 54.83000183105469 +mike ellison 28.559999465942383 +mike ellison 37.099998474121094 +mike ellison 62.13999938964844 +mike ellison 79.37999725341797 +mike ellison 85.73999786376953 +mike falkner 16.479999542236328 +mike garcia 70.8499984741211 +mike garcia 75.83000183105469 +mike garcia 79.20999908447266 +mike hernandez 37.900001525878906 +mike hernandez 59.45000076293945 +mike ichabod 64.7699966430664 +mike king 38.790000915527344 +mike king 62.7400016784668 +mike king 78.26000213623047 +mike king 84.2300033569336 +mike king 85.0999984741211 +mike king 94.68000030517578 +mike miller 3.9600000381469727 +mike nixon 60.119998931884766 +mike nixon 92.95999908447266 +mike polk 12.449999809265137 +mike polk 27.06999969482422 +mike polk 99.68000030517578 +mike quirinius 89.37999725341797 +mike steinbeck 5.849999904632568 +mike steinbeck 85.13999938964844 +mike steinbeck 93.07000207901001 +mike steinbeck 97.45999908447266 +mike van buren 80.83999633789062 +mike van buren 114.56999969482422 +mike white 9.569999694824219 +mike white 28.889999389648438 +mike white 32.0099983215332 +mike white 91.87999725341797 +mike young 7.820000171661377 +mike young 74.58999633789062 +mike young 83.54000091552734 +mike zipper 26.729999542236328 +mike zipper 83.91999816894531 +mike zipper 97.38999938964844 +nick allen 21.830000400543213 +nick allen 35.08000183105469 +nick brown 42.5099983215332 +nick davidson 49.439998626708984 +nick ellison 9.680000305175781 +nick ellison 89.01000213623047 +nick falkner 10.130000114440918 +nick falkner 88.47000122070312 +nick garcia 13.9399995803833 +nick garcia 26.389999389648438 +nick garcia 46.43000030517578 +nick ichabod 23.450000762939453 +nick ichabod 47.59000015258789 +nick ichabod 74.41999816894531 +nick johnson 3.9700000286102295 +nick johnson 94.08000183105469 +nick laertes 96.25 +nick miller 82.97000122070312 +nick nixon 96.37999725341797 +nick ovid 87.98999786376953 +nick polk 59.27000141143799 +nick quirinius 67.44999694824219 +nick quirinius 81.16999816894531 +nick robinson 57.66999816894531 +nick robinson 60.709999084472656 +nick steinbeck 97.83000183105469 +nick thompson 11.90999984741211 +nick underhill 20.809999465942383 +nick van buren 51.290000915527344 +nick xylophone 103.45999908447266 +nick young 0.27000001072883606 +nick young 24.799999237060547 +nick zipper 56.619998931884766 +nick zipper 119.0199966430664 +oscar allen 18.6299991607666 +oscar brown 13.100000381469727 +oscar carson 6.869999885559082 +oscar carson 55.20000076293945 +oscar carson 78.9800033569336 +oscar carson 87.4800033569336 +oscar carson 98.51000213623047 +oscar davidson 64.45999908447266 +oscar ellison 57.88999938964844 +oscar ellison 107.7100019454956 +oscar falkner 98.4800033569336 +oscar garcia 67.4800033569336 +oscar hernandez 95.4800033569336 +oscar hernandez 125.92999649047852 +oscar ichabod 3.3299999237060547 +oscar ichabod 33.52000045776367 +oscar ichabod 71.80000305175781 +oscar ichabod 76.69000244140625 +oscar johnson 16.09000015258789 +oscar johnson 139.69000244140625 +oscar king 19.059999465942383 +oscar king 25.8799991607666 +oscar king 59.5 +oscar laertes 5.510000228881836 +oscar laertes 8.420000076293945 +oscar laertes 9.260000228881836 +oscar laertes 27.1200008392334 +oscar nixon 41.619998931884766 +oscar ovid 37.13999938964844 +oscar ovid 82.23999786376953 +oscar ovid 91.52999877929688 +oscar polk 30.610000610351562 +oscar polk 63.900001525878906 +oscar quirinius 41.45000076293945 +oscar quirinius 65.43000030517578 +oscar quirinius 113.35000228881836 +oscar quirinius 139.10000610351562 +oscar robinson 11.34000015258789 +oscar robinson 42.849998474121094 +oscar robinson 74.52999877929688 +oscar robinson 131.31999969482422 +oscar steinbeck 29.59000015258789 +oscar thompson 31.90999984741211 +oscar thompson 41.34000015258789 +oscar thompson 60.529998779296875 +oscar thompson 70.88999938964844 +oscar underhill 87.4000015258789 +oscar van buren 2.180000066757202 +oscar van buren 61.880001068115234 +oscar van buren 91.77999877929688 +oscar white 19.0 +oscar white 28.450000762939453 +oscar white 51.849998474121094 +oscar white 59.83000183105469 +oscar xylophone 21.799999237060547 +oscar xylophone 57.119998931884766 +oscar xylophone 57.22999954223633 +oscar zipper 13.989999771118164 +oscar zipper 32.88999938964844 +oscar zipper 39.81999969482422 +priscilla brown 70.23999786376953 +priscilla brown 80.5199966430664 +priscilla brown 104.63999938964844 +priscilla carson 7.960000038146973 +priscilla carson 79.80999946594238 +priscilla carson 85.43000316619873 +priscilla ichabod 80.04000091552734 +priscilla ichabod 92.61000061035156 +priscilla johnson 61.939998626708984 +priscilla johnson 67.9800033569336 +priscilla johnson 68.32999992370605 +priscilla johnson 91.4800033569336 +priscilla johnson 92.48000144958496 +priscilla king 43.91999816894531 +priscilla nixon 95.80999755859375 +priscilla nixon 107.69000244140625 +priscilla ovid 52.72999954223633 +priscilla ovid 125.73999643325806 +priscilla polk 15.149999618530273 +priscilla quirinius 9.710000038146973 +priscilla thompson 9.800000190734863 +priscilla underhill 35.720001220703125 +priscilla underhill 68.22000122070312 +priscilla van buren 68.88999938964844 +priscilla van buren 91.61000061035156 +priscilla van buren 170.5500030517578 +priscilla white 78.27999877929688 +priscilla xylophone 0.15000000596046448 +priscilla xylophone 21.489999771118164 +priscilla xylophone 59.61000061035156 +priscilla young 0.4300000071525574 +priscilla young 4.320000171661377 +priscilla zipper 18.6299991607666 +priscilla zipper 25.670000076293945 +quinn allen 54.72999954223633 +quinn allen 83.33000183105469 +quinn brown 24.280000686645508 +quinn brown 52.439998626708984 +quinn brown 80.58000183105469 +quinn davidson 61.57999849319458 +quinn davidson 67.18000030517578 +quinn davidson 83.4000015258789 +quinn davidson 95.11000061035156 +quinn ellison 19.280000686645508 +quinn ellison 30.649999618530273 +quinn garcia 40.97999954223633 +quinn garcia 59.9900016784668 +quinn garcia 74.0199966430664 +quinn garcia 172.8499984741211 +quinn ichabod 36.790000915527344 +quinn king 74.62000274658203 +quinn king 86.2300033569336 +quinn laertes 4.710000038146973 +quinn laertes 41.290000915527344 +quinn laertes 76.5199966430664 +quinn nixon 86.64000129699707 +quinn ovid 52.500000953674316 +quinn quirinius 32.18000030517578 +quinn robinson 38.64999866485596 +quinn steinbeck 8.449999809265137 +quinn steinbeck 66.51000213623047 +quinn thompson 74.9399995803833 +quinn thompson 76.27999877929688 +quinn underhill 17.15999984741211 +quinn underhill 79.4800033569336 +quinn underhill 140.92000198364258 +quinn van buren 82.5199966430664 +quinn young 45.060001373291016 +quinn zipper 22.25 +quinn zipper 58.0 +rachel allen 15.8100004196167 +rachel allen 74.44999694824219 +rachel brown 2.9600000381469727 +rachel brown 30.809999465942383 +rachel brown 33.36000061035156 +rachel brown 34.40999984741211 +rachel brown 52.16999816894531 +rachel carson 37.599998474121094 +rachel carson 98.95999908447266 +rachel davidson 4.920000076293945 +rachel ellison 10.600000381469727 +rachel falkner 46.150001525878906 +rachel falkner 80.91999816894531 +rachel falkner 88.80000305175781 +rachel falkner 99.23999786376953 +rachel johnson 62.22999954223633 +rachel king 36.220001220703125 +rachel king 59.45000076293945 +rachel laertes 44.220001220703125 +rachel laertes 45.45000076293945 +rachel ovid 0.6000000238418579 +rachel ovid 1.0800000429153442 +rachel polk 89.27999877929688 +rachel quirinius 12.4399995803833 +rachel robinson 4.570000171661377 +rachel robinson 30.360000610351562 +rachel robinson 64.94999694824219 +rachel thompson 0.5600000023841858 +rachel thompson 4.170000076293945 +rachel thompson 58.52000045776367 +rachel underhill 48.45000076293945 +rachel white 43.709999084472656 +rachel white 94.72000122070312 +rachel young 43.130001068115234 +rachel zipper 7.059999942779541 +rachel zipper 72.18000030517578 +sarah carson 1.909999966621399 +sarah carson 14.210000038146973 +sarah carson 78.88999938964844 +sarah ellison 16.989999771118164 +sarah falkner 90.27999877929688 +sarah falkner 99.36000061035156 +sarah garcia 41.290000915527344 +sarah garcia 58.010000228881836 +sarah garcia 153.8800048828125 +sarah ichabod 81.31999969482422 +sarah ichabod 97.26000213623047 +sarah johnson 16.239999771118164 +sarah johnson 45.099998474121094 +sarah johnson 73.87999725341797 +sarah johnson 77.66000366210938 +sarah king 41.869998931884766 +sarah king 48.25 +sarah miller 41.709999084472656 +sarah ovid 60.02000045776367 +sarah robinson 33.83000183105469 +sarah robinson 66.88999938964844 +sarah steinbeck 40.16999816894531 +sarah white 37.849998474121094 +sarah white 89.80999755859375 +sarah xylophone 68.31999969482422 +sarah young 45.560001373291016 +sarah zipper 83.08000183105469 +tom brown 8.609999656677246 +tom brown 12.319999694824219 +tom carson 5.440000057220459 +tom carson 16.079999923706055 +tom carson 18.889999389648438 +tom davidson 170.0 +tom ellison 76.73999786376953 +tom ellison 98.2300033569336 +tom ellison 155.99999618530273 +tom falkner 60.130001068115234 +tom falkner 88.22000122070312 +tom hernandez 41.36000061035156 +tom hernandez 81.63999938964844 +tom ichabod 103.29000282287598 +tom johnson 14.920000076293945 +tom johnson 43.56999969482422 +tom king 15.75 +tom laertes 43.310001373291016 +tom laertes 64.6500015258789 +tom miller 21.229999542236328 +tom miller 68.25 +tom miller 139.04000091552734 +tom nixon 153.83999633789062 +tom ovid 8.670000076293945 +tom polk 38.29999923706055 +tom polk 54.43000030517578 +tom quirinius 10.1899995803833 +tom quirinius 75.31999969482422 +tom robinson 90.69000244140625 +tom robinson 98.72000122070312 +tom robinson 99.1500015258789 +tom robinson 123.5199966430664 +tom steinbeck 26.489999771118164 +tom van buren 3.2799999713897705 +tom van buren 40.779998779296875 +tom van buren 63.5099983215332 +tom white 40.040000915527344 +tom young 22.850000381469727 +tom young 84.30999755859375 +tom zipper 122.78000259399414 +ulysses brown 72.79000091552734 +ulysses carson 77.41999816894531 +ulysses carson 79.54000091552734 +ulysses carson 146.7100067138672 +ulysses carson 220.18000030517578 +ulysses davidson 55.16999816894531 +ulysses ellison 96.7300033569336 +ulysses garcia 89.80000305175781 +ulysses hernandez 35.16999816894531 +ulysses hernandez 54.470001220703125 +ulysses hernandez 68.25 +ulysses ichabod 19.1299991607666 +ulysses ichabod 98.56999969482422 +ulysses johnson 102.5999984741211 +ulysses king 74.19000244140625 +ulysses laertes 1.9199999570846558 +ulysses laertes 24.860000610351562 +ulysses laertes 50.1899995803833 +ulysses miller 2.9600000381469727 +ulysses miller 76.27999877929688 +ulysses nixon 80.95999908447266 +ulysses ovid 29.360000610351562 +ulysses polk 8.710000038146973 +ulysses polk 60.060001373291016 +ulysses polk 65.0199966430664 +ulysses polk 97.10000038146973 +ulysses quirinius 112.56999969482422 +ulysses robinson 104.85999870300293 +ulysses steinbeck 32.40999984741211 +ulysses steinbeck 74.0 +ulysses thompson 198.83000564575195 +ulysses underhill 14.119999885559082 +ulysses underhill 22.360000610351562 +ulysses underhill 35.88999938964844 +ulysses underhill 57.369998931884766 +ulysses underhill 81.58000183105469 +ulysses underhill 88.4800033569336 +ulysses underhill 99.66999816894531 +ulysses van buren 95.52999877929688 +ulysses white 59.54999923706055 +ulysses white 170.0800018310547 +ulysses xylophone 39.689998626708984 +ulysses xylophone 54.099998474121094 +ulysses xylophone 57.3100004196167 +ulysses young 14.930000305175781 +ulysses young 32.52000045776367 +ulysses young 114.55999946594238 +victor allen 44.27000045776367 +victor allen 89.5 +victor brown 59.34000015258789 +victor brown 77.88999938964844 +victor brown 90.37999725341797 +victor brown 91.97000122070312 +victor davidson 60.2599983215332 +victor davidson 66.5999984741211 +victor davidson 98.54999923706055 +victor ellison 17.8700008392334 +victor ellison 68.8499984741211 +victor hernandez 19.030000686645508 +victor hernandez 59.619998931884766 +victor hernandez 69.87999725341797 +victor hernandez 71.3499984741211 +victor hernandez 74.5199966430664 +victor johnson 18.200000762939453 +victor johnson 42.89000141620636 +victor johnson 72.55999755859375 +victor king 47.880001068115234 +victor king 66.66999816894531 +victor laertes 62.91999816894531 +victor laertes 67.58999633789062 +victor miller 22.1200008392334 +victor nixon 34.029998779296875 +victor nixon 68.5 +victor ovid 125.84000015258789 +victor polk 17.210000038146973 +victor quirinius 50.70000076293945 +victor quirinius 134.4000015258789 +victor robinson 51.560001373291016 +victor robinson 58.66999816894531 +victor steinbeck 12.460000038146973 +victor steinbeck 46.09000015258789 +victor steinbeck 52.720001220703125 +victor thompson 58.65999984741211 +victor van buren 34.970001220703125 +victor van buren 41.68000030517578 +victor white 5.670000076293945 +victor white 135.02999687194824 +victor xylophone 10.09000015258789 +victor xylophone 11.220000267028809 +victor xylophone 28.5 +victor xylophone 62.38999938964844 +victor xylophone 76.0999984741211 +victor young 88.55000305175781 +victor zipper 26.289999961853027 +wendy allen 56.06999969482422 +wendy allen 93.96999740600586 +wendy allen 220.7900003194809 +wendy brown 27.8700008392334 +wendy brown 50.2599983215332 +wendy ellison 94.66000366210938 +wendy ellison 124.93999481201172 +wendy falkner 22.010000228881836 +wendy falkner 97.68000030517578 +wendy falkner 141.36000061035156 +wendy garcia 30.6200008392334 +wendy garcia 57.25 +wendy garcia 82.1500015258789 +wendy garcia 133.3400001525879 +wendy hernandez 48.11000061035156 +wendy ichabod 13.149999618530273 +wendy king 45.189998626708984 +wendy king 63.33000183105469 +wendy king 183.75 +wendy laertes 46.619998931884766 +wendy laertes 70.37999725341797 +wendy laertes 79.98999786376953 +wendy miller 1.2699999809265137 +wendy miller 12.420000076293945 +wendy nixon 45.91999816894531 +wendy nixon 60.2599983215332 +wendy ovid 86.62999725341797 +wendy ovid 95.33000183105469 +wendy polk 32.369998931884766 +wendy polk 42.04000073671341 +wendy quirinius 12.15999984741211 +wendy quirinius 14.300000190734863 +wendy robinson 26.469999313354492 +wendy robinson 71.06999969482422 +wendy robinson 117.02000045776367 +wendy steinbeck 120.67000007629395 +wendy thompson 67.34000015258789 +wendy thompson 85.76000213623047 +wendy underhill 68.04000091552734 +wendy underhill 79.19000244140625 +wendy underhill 89.77999877929688 +wendy van buren 57.459999084472656 +wendy van buren 92.81999969482422 +wendy white 73.68000030517578 +wendy xylophone 76.69999694824219 +wendy xylophone 90.60000038146973 +wendy young 8.449999809265137 +wendy young 33.7599983215332 +xavier allen 45.68000030517578 +xavier allen 83.93000030517578 +xavier allen 98.22000122070312 +xavier brown 7.789999961853027 +xavier brown 90.7300033569336 +xavier brown 96.2300033569336 +xavier carson 20.790000915527344 +xavier carson 94.68000030517578 +xavier davidson 15.920000076293945 +xavier davidson 82.41000366210938 +xavier davidson 106.5199966430664 +xavier ellison 12.850000381469727 +xavier ellison 77.97000122070312 +xavier garcia 70.04000091552734 +xavier hernandez 6.670000076293945 +xavier hernandez 38.56999969482422 +xavier hernandez 67.26000213623047 +xavier ichabod 4.71999979019165 +xavier ichabod 71.19000244140625 +xavier johnson 27.299999237060547 +xavier johnson 203.65999794006348 +xavier king 8.569999694824219 +xavier king 87.22000122070312 +xavier laertes 15.899999618530273 +xavier ovid 112.91000366210938 +xavier polk 13.869999885559082 +xavier polk 61.209999084472656 +xavier polk 72.62000274658203 +xavier polk 76.93000030517578 +xavier quirinius 62.52000045776367 +xavier quirinius 83.01000022888184 +xavier quirinius 89.55000305175781 +xavier quirinius 97.14999961853027 +xavier thompson 9.930000305175781 +xavier underhill 47.27000045776367 +xavier white 59.20000171661377 +xavier white 75.29000091552734 +xavier xylophone 79.41999816894531 +xavier zipper 8.449999809265137 +yuri allen 52.849998474121094 +yuri allen 94.98999977111816 +yuri brown 75.19000244140625 +yuri brown 84.02999877929688 +yuri carson 6.289999961853027 +yuri carson 91.16000366210938 +yuri ellison 1.1200000047683716 +yuri ellison 98.82999801635742 +yuri falkner 39.6299991607666 +yuri falkner 86.0 +yuri garcia 27.65999984741211 +yuri hernandez 2.069999933242798 +yuri johnson 0.12999999523162842 +yuri johnson 39.900001525878906 +yuri johnson 48.220001220703125 +yuri king 69.59000015258789 +yuri laertes 37.59000015258789 +yuri laertes 61.95000076293945 +yuri nixon 2.200000047683716 +yuri nixon 82.81000328063965 +yuri polk 26.760000228881836 +yuri polk 28.790000915527344 +yuri polk 105.11999702453613 +yuri quirinius 10.260000228881836 +yuri quirinius 54.310001373291016 +yuri quirinius 57.93000030517578 +yuri steinbeck 17.790000915527344 +yuri steinbeck 75.87999725341797 +yuri thompson 14.920000076293945 +yuri underhill 23.770000457763672 +yuri underhill 83.87000274658203 +yuri white 34.58000183105469 +yuri xylophone 20.3799991607666 +zach allen 65.43000030517578 +zach brown 48.0099983215332 +zach brown 49.119998931884766 +zach brown 57.08000183105469 +zach brown 67.37999725341797 +zach brown 100.46000289916992 +zach carson 95.86999893188477 +zach ellison 6.840000152587891 +zach falkner 9.130000114440918 +zach falkner 91.41999816894531 +zach garcia 32.20000076293945 +zach garcia 84.37999725341797 +zach garcia 106.86999893188477 +zach garcia 167.62000274658203 +zach ichabod 64.25 +zach ichabod 106.69000244140625 +zach king 46.18000030517578 +zach king 70.51000213623047 +zach king 86.93000030517578 +zach miller 2.5999999046325684 +zach miller 21.280000686645508 +zach miller 53.27000045776367 +zach ovid 0.10000000149011612 +zach ovid 23.06999969482422 +zach ovid 92.55000305175781 +zach ovid 94.33999633789062 +zach quirinius 39.209999084472656 +zach robinson 122.81000137329102 +zach steinbeck 85.48999786376953 +zach steinbeck 90.05000305175781 +zach thompson 71.5 +zach thompson 91.63999938964844 +zach underhill 86.22000122070312 +zach white 70.52999877929688 +zach xylophone 43.84999942779541 +zach xylophone 71.01000213623047 +zach young 71.31999969482422 +zach zipper 52.60000133514404 +zach zipper 85.87000274658203 +zach zipper 94.43000030517578 diff --git a/sql/hive/src/test/resources/golden/windowing_windowspec.q (deterministic)-2-5f0eab306ea3c22b11ace9b542a7ee56 b/sql/hive/src/test/resources/golden/windowing_windowspec.q (deterministic)-2-5f0eab306ea3c22b11ace9b542a7ee56 new file mode 100644 index 0000000000000..e55bede9242e5 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_windowspec.q (deterministic)-2-5f0eab306ea3c22b11ace9b542a7ee56 @@ -0,0 +1,1049 @@ + 257.04000091552734 + 261.16000175476074 + 284.2699966430664 +alice allen 73.62999725341797 +alice allen 195.0099983215332 +alice allen 196.729998588562 +alice brown 245.52000427246094 +alice carson 424.63000106811523 +alice davidson 319.00999450683594 +alice falkner 90.25 +alice garcia 174.36999893188477 +alice hernandez 185.6699981689453 +alice hernandez 380.1699981689453 +alice johnson 367.2900071144104 +alice king 58.78999900817871 +alice king 294.1199951171875 +alice king 371.23999404907227 +alice laertes 68.94999694824219 +alice laertes 258.3999938964844 +alice miller 154.19000244140625 +alice nixon 209.91000366210938 +alice nixon 246.36000442504883 +alice nixon 260.19000244140625 +alice ovid 49.8199987411499 +alice polk 148.63999938964844 +alice quirinius 239.81999588012695 +alice quirinius 301.4100036621094 +alice robinson 140.47999572753906 +alice robinson 266.4900016784668 +alice steinbeck 169.76000213623047 +alice steinbeck 186.70999908447266 +alice steinbeck 446.8099937438965 +alice underhill 98.18000030517578 +alice van buren 112.42000198364258 +alice xylophone 78.20999908447266 +alice xylophone 91.22000122070312 +alice xylophone 413.1199951171875 +alice zipper 89.93000030517578 +alice zipper 279.54000091552734 +alice zipper 293.25000381469727 +bob brown 188.89999389648438 +bob brown 228.80999946594238 +bob brown 247.37999725341797 +bob carson 207.67000198364258 +bob davidson 53.93000030517578 +bob davidson 113.83999919891357 +bob davidson 259.0899963378906 +bob ellison 65.0199966430664 +bob ellison 80.30000305175781 +bob ellison 243.86000061035156 +bob ellison 245.02999877929688 +bob falkner 208.82000160217285 +bob garcia 33.410000801086426 +bob garcia 87.56999969482422 +bob garcia 120.17999649047852 +bob garcia 148.65999841690063 +bob garcia 178.87000274658203 +bob hernandez 337.23999977111816 +bob ichabod 82.55999755859375 +bob king 114.11000156402588 +bob king 134.81999588012695 +bob king 152.7699956893921 +bob laertes 42.89999961853027 +bob laertes 393.99999433755875 +bob miller 146.1500015258789 +bob ovid 62.849998474121094 +bob ovid 88.77999877929688 +bob ovid 97.08999633789062 +bob ovid 102.93000030517578 +bob polk 261.4599976539612 +bob quirinius 298.7199897766113 +bob steinbeck 103.01999950408936 +bob van buren 174.89999771118164 +bob white 194.25 +bob white 347.7799949645996 +bob xylophone 19.690000534057617 +bob xylophone 191.52999687194824 +bob young 78.17999649047852 +bob zipper 132.86000061035156 +bob zipper 139.6900042295456 +bob zipper 295.59000039100647 +calvin allen 255.68000411987305 +calvin brown 85.9000015258789 +calvin brown 238.02000427246094 +calvin brown 275.8699951171875 +calvin carson 80.2300033569336 +calvin davidson 31.770000457763672 +calvin davidson 181.76000213623047 +calvin ellison 188.0300006866455 +calvin falkner 93.61000061035156 +calvin falkner 94.30999755859375 +calvin falkner 125.91999816894531 +calvin falkner 137.1699981689453 +calvin falkner 140.99999594688416 +calvin falkner 168.81999969482422 +calvin garcia 307.439998626709 +calvin hernandez 303.4599952697754 +calvin johnson 152.8300018310547 +calvin laertes 150.69999885559082 +calvin laertes 216.81000137329102 +calvin nixon 131.57999801635742 +calvin nixon 143.3699951171875 +calvin nixon 196.34000301361084 +calvin ovid 69.95999908447266 +calvin ovid 176.13999938964844 +calvin ovid 176.3800048828125 +calvin ovid 248.65999603271484 +calvin polk 147.04000091552734 +calvin quirinius 226.66999435424805 +calvin quirinius 266.7100009918213 +calvin robinson 289.7900047302246 +calvin steinbeck 92.05000305175781 +calvin steinbeck 118.15000057220459 +calvin steinbeck 333.6000003814697 +calvin thompson 93.7300033569336 +calvin thompson 249.56000137329102 +calvin underhill 208.3400001525879 +calvin van buren 136.51000213623047 +calvin van buren 347.0999946594238 +calvin white 90.69000244140625 +calvin white 112.15999984741211 +calvin xylophone 25.420000076293945 +calvin xylophone 237.71999740600586 +calvin xylophone 315.2099952697754 +calvin young 222.96000289916992 +calvin young 243.3199977874756 +calvin zipper 95.37999725341797 +calvin zipper 531.3600015640259 +david allen 202.43000030517578 +david allen 302.4399948120117 +david brown 93.63999938964844 +david brown 258.05999851226807 +david davidson 74.1500015258789 +david davidson 95.80999755859375 +david davidson 106.50000202655792 +david davidson 149.94000244140625 +david ellison 85.2300033569336 +david ellison 94.1500015258789 +david ellison 208.3900032043457 +david hernandez 99.91000366210938 +david ichabod 82.55000305175781 +david ichabod 320.47999143600464 +david laertes 250.1699981689453 +david nixon 174.58999633789062 +david ovid 198.21000289916992 +david ovid 230.47999954223633 +david quirinius 29.239999771118164 +david quirinius 79.97000122070312 +david quirinius 180.92999649047852 +david robinson 147.65999603271484 +david robinson 168.7100009918213 +david thompson 41.88999938964844 +david underhill 97.55999755859375 +david underhill 277.5999984741211 +david underhill 369.4600009918213 +david van buren 83.56999969482422 +david van buren 289.189998626709 +david white 124.6099967956543 +david xylophone 135.70000076293945 +david xylophone 237.06000137329102 +david xylophone 338.20999908447266 +david young 172.49000549316406 +david young 184.9800033569336 +ethan allen 240.42000198364258 +ethan brown 61.86000061035156 +ethan brown 73.18000030517578 +ethan brown 105.29000043869019 +ethan brown 177.8300018310547 +ethan brown 185.98999691009521 +ethan brown 284.729998588562 +ethan carson 265.22999572753906 +ethan ellison 166.5 +ethan ellison 244.99000671505928 +ethan falkner 59.43000030517578 +ethan falkner 196.17000198364258 +ethan garcia 271.5999946594238 +ethan hernandez 264.50999450683594 +ethan johnson 90.05000305175781 +ethan king 36.49000024795532 +ethan laertes 95.06999969482422 +ethan laertes 96.29000091552734 +ethan laertes 189.66000270843506 +ethan laertes 192.79999923706055 +ethan laertes 249.04000091552734 +ethan laertes 249.76000213623047 +ethan laertes 369.9599952697754 +ethan miller 314.5599994659424 +ethan nixon 493.03000259399414 +ethan ovid 57.290000915527344 +ethan polk 2.3499999046325684 +ethan polk 59.869998931884766 +ethan polk 219.6599998474121 +ethan polk 263.8600025177002 +ethan quirinius 97.23999786376953 +ethan quirinius 111.70999908447266 +ethan quirinius 317.69000363349915 +ethan robinson 78.62000274658203 +ethan robinson 149.5800018310547 +ethan underhill 231.25000381469727 +ethan van buren 152.60000228881836 +ethan white 155.81999969482422 +ethan white 235.55999755859375 +ethan xylophone 414.61000061035156 +ethan zipper 97.51000213623047 +ethan zipper 297.20999908447266 +fred davidson 78.30999755859375 +fred davidson 105.8499984741211 +fred davidson 220.56000137329102 +fred ellison 56.489999771118164 +fred ellison 96.77999877929688 +fred ellison 199.52000045776367 +fred falkner 66.77999973297119 +fred falkner 85.0 +fred falkner 169.91000366210938 +fred hernandez 117.85000228881836 +fred ichabod 81.31999969482422 +fred ichabod 202.45000457763672 +fred johnson 96.08999633789062 +fred king 140.24999618530273 +fred king 343.82000064849854 +fred laertes 57.63999938964844 +fred miller 176.18000030517578 +fred nixon 28.690000534057617 +fred nixon 187.40999603271484 +fred nixon 246.77000045776367 +fred nixon 338.34999084472656 +fred polk 90.12000274658203 +fred polk 323.1899948120117 +fred polk 357.19000244140625 +fred polk 496.16999435424805 +fred quirinius 218.82999801635742 +fred quirinius 224.12000179290771 +fred robinson 89.02999877929688 +fred steinbeck 32.22999954223633 +fred steinbeck 91.05000305175781 +fred steinbeck 231.92000198364258 +fred underhill 183.31999969482422 +fred van buren 83.58000183105469 +fred van buren 318.38000106811523 +fred van buren 346.1400008201599 +fred van buren 391.9999942779541 +fred white 187.38000106811523 +fred young 97.70999908447266 +fred young 141.22999954223633 +fred zipper 163.89999771118164 +gabriella allen 274.8599967956543 +gabriella allen 283.5 +gabriella brown 163.89999961853027 +gabriella brown 465.0 +gabriella carson 147.78999710083008 +gabriella davidson 263.64000415802 +gabriella ellison 71.54000091552734 +gabriella ellison 188.55999755859375 +gabriella falkner 51.720001220703125 +gabriella falkner 87.61000061035156 +gabriella falkner 162.21999835968018 +gabriella garcia 43.0099983215332 +gabriella hernandez 190.5500030517578 +gabriella hernandez 267.4700012207031 +gabriella ichabod 71.12999725341797 +gabriella ichabod 90.3499984741211 +gabriella ichabod 148.6999969482422 +gabriella ichabod 175.70000076293945 +gabriella ichabod 285.72999572753906 +gabriella king 166.75000190734863 +gabriella king 177.6999969482422 +gabriella laertes 65.37999725341797 +gabriella miller 148.4800033569336 +gabriella ovid 92.4000015258789 +gabriella ovid 137.82999801635742 +gabriella polk 244.07000350952148 +gabriella polk 282.00000762939453 +gabriella steinbeck 272.2799987792969 +gabriella steinbeck 461.060001373291 +gabriella thompson 88.36000061035156 +gabriella thompson 94.25 +gabriella thompson 158.80999755859375 +gabriella van buren 146.0800018310547 +gabriella van buren 151.63999938964844 +gabriella white 138.72000122070312 +gabriella young 30.739999771118164 +gabriella young 146.62999725341797 +gabriella zipper 91.62999725341797 +gabriella zipper 357.5099983215332 +holly allen 44.56999969482422 +holly brown 173.64999389648438 +holly brown 174.2100067138672 +holly falkner 166.22999572753906 +holly hernandez 180.0800018310547 +holly hernandez 248.71000480651855 +holly hernandez 336.87000274658203 +holly hernandez 523.2800025939941 +holly ichabod 179.37000274658203 +holly ichabod 180.18000030517578 +holly ichabod 184.66000366210938 +holly johnson 64.36000061035156 +holly johnson 145.61000061035156 +holly johnson 157.12999725341797 +holly king 275.2299995422363 +holly king 288.52000427246094 +holly laertes 246.21000289916992 +holly miller 290.21999740600586 +holly nixon 177.39999389648438 +holly nixon 228.58999633789062 +holly polk 98.30999755859375 +holly polk 307.0799951553345 +holly robinson 219.27999877929688 +holly thompson 75.41999816894531 +holly thompson 86.69000244140625 +holly thompson 523.360002592206 +holly underhill 96.68000030517578 +holly underhill 163.54000091552734 +holly underhill 187.47000122070312 +holly underhill 328.0099983215332 +holly van buren 161.7699966430664 +holly white 122.98999786376953 +holly white 335.93999576568604 +holly xylophone 191.34000396728516 +holly young 60.220001220703125 +holly young 297.20999908447266 +holly zipper 99.12999725341797 +holly zipper 99.29000091552734 +irene allen 234.6400032043457 +irene brown 4.789999961853027 +irene brown 176.4499969482422 +irene brown 338.2099952697754 +irene carson 292.0 +irene ellison 201.06000137329102 +irene ellison 230.79000091552734 +irene falkner 99.91999816894531 +irene falkner 210.11000061035156 +irene garcia 40.78999996185303 +irene garcia 86.93000030517578 +irene garcia 183.02000045776367 +irene ichabod 99.62000274658203 +irene ichabod 281.96999740600586 +irene johnson 243.59999752044678 +irene laertes 112.54000091552734 +irene laertes 227.45000076293945 +irene laertes 246.53000259399414 +irene miller 395.9100036621094 +irene nixon 29.780000686645508 +irene nixon 199.45999908447266 +irene nixon 261.46000480651855 +irene ovid 158.97000122070312 +irene ovid 339.94000244140625 +irene ovid 362.82000732421875 +irene polk 95.83999633789062 +irene polk 183.43000411987305 +irene polk 258.7100033760071 +irene polk 284.6300048828125 +irene polk 507.2400016784668 +irene quirinius 157.5800018310547 +irene quirinius 250.61000061035156 +irene quirinius 431.6499996185303 +irene robinson 191.72999572753906 +irene steinbeck 94.33000183105469 +irene thompson 256.0 +irene underhill 135.55999755859375 +irene underhill 327.0299892425537 +irene van buren 54.439998626708984 +irene van buren 193.71000289916992 +irene xylophone 168.5 +jessica brown 422.5299949645996 +jessica carson 103.66000366210938 +jessica carson 144.92000198364258 +jessica carson 259.1099967956543 +jessica davidson 95.33999633789062 +jessica davidson 99.20999908447266 +jessica davidson 137.17000198364258 +jessica davidson 227.79999923706055 +jessica ellison 207.35000228881836 +jessica ellison 237.4300022125244 +jessica falkner 99.6500015258789 +jessica garcia 174.70999908447266 +jessica garcia 185.62000179290771 +jessica ichabod 124.59000015258789 +jessica johnson 272.0500030517578 +jessica johnson 294.2899990081787 +jessica miller 77.83999633789062 +jessica nixon 77.0999984741211 +jessica nixon 90.06999969482422 +jessica ovid 71.68000030517578 +jessica ovid 309.44000244140625 +jessica polk 472.2099952697754 +jessica quirinius 35.619998931884766 +jessica quirinius 192.7000026702881 +jessica quirinius 208.6500015258789 +jessica quirinius 370.0599937438965 +jessica robinson 254.5300064086914 +jessica thompson 115.9000015258789 +jessica thompson 180.60000610351562 +jessica underhill 199.10999870300293 +jessica underhill 234.29000091552734 +jessica underhill 257.09000396728516 +jessica van buren 9.739999771118164 +jessica white 96.62000274658203 +jessica white 166.54000091552734 +jessica white 240.52999877929688 +jessica white 432.17999362945557 +jessica white 497.6400032043457 +jessica xylophone 385.4799995422363 +jessica young 47.410000801086426 +jessica young 240.6500015258789 +jessica zipper 323.4199962615967 +jessica zipper 344.8399953842163 +jessica zipper 480.06999588012695 +katie allen 312.9700012207031 +katie brown 573.4599933624268 +katie davidson 96.91000366210938 +katie ellison 163.52999877929688 +katie ellison 384.4699947834015 +katie falkner 125.57000160217285 +katie garcia 84.4000015258789 +katie garcia 160.28999710083008 +katie hernandez 257.9600028991699 +katie ichabod 187.63999557495117 +katie ichabod 274.97999572753906 +katie ichabod 362.9200019836426 +katie king 97.80999755859375 +katie king 169.56999969482422 +katie king 314.1999969482422 +katie miller 31.399999618530273 +katie miller 228.40999603271484 +katie nixon 23.190000534057617 +katie ovid 207.1200065612793 +katie polk 143.2599983215332 +katie polk 247.02000045776367 +katie robinson 83.84999942779541 +katie van buren 297.0300064086914 +katie van buren 464.54999351501465 +katie white 344.1700019836426 +katie white 465.8599934577942 +katie xylophone 175.89999675750732 +katie young 31.010000228881836 +katie young 72.51000213623047 +katie young 97.56999969482422 +katie zipper 101.9000015258789 +katie zipper 314.75 +luke allen 89.55000305175781 +luke allen 133.4800033569336 +luke allen 210.8800048828125 +luke allen 392.0300064086914 +luke allen 420.6299934387207 +luke brown 129.20999908447266 +luke davidson 28.950000762939453 +luke davidson 106.41000080108643 +luke ellison 42.09000027179718 +luke ellison 136.52000427246094 +luke ellison 187.51000213623047 +luke falkner 172.8799991607666 +luke falkner 216.0199966430664 +luke garcia 50.94000053405762 +luke garcia 345.1200008392334 +luke ichabod 67.90000057220459 +luke ichabod 97.87000274658203 +luke johnson 59.00999927520752 +luke johnson 105.32000160217285 +luke johnson 187.2899990081787 +luke laertes 105.42000198364258 +luke laertes 147.14999723434448 +luke laertes 158.86000061035156 +luke laertes 167.01999855041504 +luke laertes 281.19999504089355 +luke miller 97.6500015258789 +luke ovid 186.53000259399414 +luke ovid 340.1300048828125 +luke polk 95.27999877929688 +luke polk 277.6700019836426 +luke quirinius 115.83999633789062 +luke robinson 137.33999633789062 +luke robinson 145.23999786376953 +luke thompson 94.37999725341797 +luke underhill 96.94000244140625 +luke underhill 194.73999786376953 +luke underhill 372.6899948120117 +luke van buren 193.93999862670898 +luke white 67.12000274658203 +luke xylophone 102.37999725341797 +luke zipper 223.54000282287598 +mike allen 79.60999870300293 +mike brown 202.81999588012695 +mike carson 81.66000366210938 +mike carson 105.02999877929688 +mike carson 405.4499931335449 +mike davidson 137.74999809265137 +mike davidson 393.17999267578125 +mike ellison 79.37999725341797 +mike ellison 85.73999786376953 +mike ellison 127.15999603271484 +mike ellison 228.07999992370605 +mike ellison 263.8899955749512 +mike falkner 254.50000381469727 +mike garcia 70.8499984741211 +mike garcia 173.63999938964844 +mike garcia 177.5199966430664 +mike hernandez 59.45000076293945 +mike hernandez 327.6900062561035 +mike ichabod 64.7699966430664 +mike king 78.26000213623047 +mike king 84.2300033569336 +mike king 94.68000030517578 +mike king 133.5900001525879 +mike king 134.87999725341797 +mike king 173.45999908447266 +mike miller 57.890000343322754 +mike nixon 92.95999908447266 +mike nixon 203.68999862670898 +mike polk 32.140000343322754 +mike polk 99.68000030517578 +mike polk 306.61000061035156 +mike quirinius 89.37999725341797 +mike steinbeck 85.13999938964844 +mike steinbeck 97.45999908447266 +mike steinbeck 153.86000204086304 +mike steinbeck 221.21999502182007 +mike van buren 80.83999633789062 +mike van buren 174.21000289916992 +mike white 91.87999725341797 +mike white 341.80999755859375 +mike white 341.86000061035156 +mike white 389.20000076293945 +mike young 74.58999633789062 +mike young 83.54000091552734 +mike young 112.19000101089478 +mike zipper 86.98999786376953 +mike zipper 97.38999938964844 +mike zipper 174.61000061035156 +nick allen 173.32000207901 +nick allen 257.7300033569336 +nick brown 192.45000076293945 +nick davidson 258.9799919128418 +nick ellison 183.34000396728516 +nick ellison 193.02000427246094 +nick falkner 10.130000114440918 +nick falkner 182.72000122070312 +nick garcia 142.65999603271484 +nick garcia 183.7699966430664 +nick garcia 277.8299951553345 +nick ichabod 110.43999862670898 +nick ichabod 112.54999923706055 +nick ichabod 241.68999481201172 +nick johnson 192.56000518798828 +nick johnson 325.9499976634979 +nick laertes 96.25 +nick miller 82.97000122070312 +nick nixon 96.37999725341797 +nick ovid 184.3699951171875 +nick polk 199.57000064849854 +nick quirinius 174.80999755859375 +nick quirinius 243.8300018310547 +nick robinson 129.65999603271484 +nick robinson 216.54999923706055 +nick steinbeck 97.83000183105469 +nick thompson 205.4900016784668 +nick underhill 166.42000007629395 +nick van buren 222.6500015258789 +nick xylophone 75.3499984741211 +nick young 332.23999786376953 +nick young 346.41000083088875 +nick zipper 222.9199981689453 +nick zipper 529.7199974060059 +oscar allen 246.42999839782715 +oscar brown 274.01000022888184 +oscar carson 78.9800033569336 +oscar carson 87.4800033569336 +oscar carson 98.51000213623047 +oscar carson 203.86000442504883 +oscar carson 321.82000064849854 +oscar davidson 361.6699981689453 +oscar ellison 146.44000244140625 +oscar ellison 234.32000160217285 +oscar falkner 98.4800033569336 +oscar garcia 231.04000091552734 +oscar hernandez 85.48999786376953 +oscar hernandez 95.4800033569336 +oscar ichabod 71.80000305175781 +oscar ichabod 123.78000068664551 +oscar ichabod 173.31000518798828 +oscar ichabod 251.22000122070312 +oscar johnson 146.27000427246094 +oscar johnson 260.1600036621094 +oscar king 124.2699966430664 +oscar king 249.5399990081787 +oscar king 284.8599910736084 +oscar laertes 15.640000343322754 +oscar laertes 254.8499984741211 +oscar laertes 261.41000175476074 +oscar laertes 261.8400020599365 +oscar nixon 41.619998931884766 +oscar ovid 82.23999786376953 +oscar ovid 187.76000213623047 +oscar ovid 260.6100044250488 +oscar polk 63.900001525878906 +oscar polk 252.71000289916992 +oscar quirinius 73.4800033569336 +oscar quirinius 165.3800048828125 +oscar quirinius 244.2699966430664 +oscar quirinius 248.75 +oscar robinson 93.31999969482422 +oscar robinson 163.55999755859375 +oscar robinson 191.8300018310547 +oscar robinson 315.1999912261963 +oscar steinbeck 376.6899948120117 +oscar thompson 131.1400032043457 +oscar thompson 148.01000213623047 +oscar thompson 325.42000579833984 +oscar thompson 545.7399940490723 +oscar underhill 87.4000015258789 +oscar van buren 61.880001068115234 +oscar van buren 188.8699951171875 +oscar van buren 209.53000235557556 +oscar white 129.73999786376953 +oscar white 148.9800033569336 +oscar white 275.1500015258789 +oscar white 303.8599910736084 +oscar xylophone 115.22999954223633 +oscar xylophone 319.75000381469727 +oscar xylophone 475.3300018310547 +oscar zipper 109.53999710083008 +oscar zipper 214.40999603271484 +oscar zipper 214.6500015258789 +priscilla brown 77.56999969482422 +priscilla brown 165.5199966430664 +priscilla brown 408.4499969482422 +priscilla carson 168.8300018310547 +priscilla carson 195.7900047302246 +priscilla carson 207.5300006866455 +priscilla ichabod 92.61000061035156 +priscilla ichabod 206.16000366210938 +priscilla johnson 89.1500015258789 +priscilla johnson 156.4600067138672 +priscilla johnson 158.88000106811523 +priscilla johnson 190.61000061035156 +priscilla johnson 211.01000022888184 +priscilla king 371.9299964904785 +priscilla nixon 95.80999755859375 +priscilla nixon 278.87999725341797 +priscilla ovid 96.27000284194946 +priscilla ovid 198.3400001525879 +priscilla polk 252.5800018310547 +priscilla quirinius 131.8499994277954 +priscilla thompson 230.36000156402588 +priscilla underhill 143.56999969482422 +priscilla underhill 354.37000274658203 +priscilla van buren 82.72000122070312 +priscilla van buren 145.61000061035156 +priscilla van buren 183.72000122070312 +priscilla white 78.27999877929688 +priscilla xylophone 21.489999771118164 +priscilla xylophone 159.26000213623047 +priscilla xylophone 406.1000007688999 +priscilla young 163.2900013923645 +priscilla young 260.59000366926193 +priscilla zipper 311.399995803833 +priscilla zipper 327.97999572753906 +quinn allen 257.94000244140625 +quinn allen 365.2299995422363 +quinn brown 80.58000183105469 +quinn brown 80.81999969482422 +quinn brown 198.71000289916992 +quinn davidson 83.4000015258789 +quinn davidson 95.11000061035156 +quinn davidson 154.79000091552734 +quinn davidson 227.13999938964844 +quinn ellison 237.17000007629395 +quinn ellison 361.14000129699707 +quinn garcia 92.33000183105469 +quinn garcia 148.63999938964844 +quinn garcia 226.78999710083008 +quinn garcia 246.70000076293945 +quinn ichabod 89.63999938964844 +quinn king 74.62000274658203 +quinn king 86.2300033569336 +quinn laertes 112.36000061035156 +quinn laertes 243.6900019645691 +quinn laertes 265.51000213623047 +quinn nixon 149.3300018310547 +quinn ovid 393.2099943161011 +quinn quirinius 266.8200035095215 +quinn robinson 247.6400032043457 +quinn steinbeck 144.81999969482422 +quinn steinbeck 213.65999507904053 +quinn thompson 156.51000213623047 +quinn thompson 274.1599931716919 +quinn underhill 248.3000030517578 +quinn underhill 252.61999130249023 +quinn underhill 321.9799976348877 +quinn van buren 82.5199966430664 +quinn young 90.97999954223633 +quinn zipper 58.0 +quinn zipper 249.38999938964844 +rachel allen 15.8100004196167 +rachel allen 151.80999755859375 +rachel brown 193.5800018310547 +rachel brown 312.0800018310547 +rachel brown 347.7999954223633 +rachel brown 423.98999214172363 +rachel brown 437.64999771118164 +rachel carson 98.95999908447266 +rachel carson 385.3799934387207 +rachel davidson 396.38999938964844 +rachel ellison 299.12000465393066 +rachel falkner 88.80000305175781 +rachel falkner 99.23999786376953 +rachel falkner 172.54999542236328 +rachel falkner 233.55999755859375 +rachel johnson 197.92999649047852 +rachel king 36.220001220703125 +rachel king 219.8400001525879 +rachel laertes 97.17000198364258 +rachel laertes 109.5999984741211 +rachel ovid 80.20999872684479 +rachel ovid 260.18999683856964 +rachel polk 89.27999877929688 +rachel quirinius 205.1400022506714 +rachel robinson 254.1300015449524 +rachel robinson 286.0400047302246 +rachel robinson 332.4199981689453 +rachel thompson 137.73000198602676 +rachel thompson 213.31000137329102 +rachel thompson 380.85999488830566 +rachel underhill 175.6099967956543 +rachel white 94.72000122070312 +rachel white 196.18000030517578 +rachel young 230.6400032043457 +rachel zipper 148.9000015258789 +rachel zipper 238.98000192642212 +sarah carson 175.62000274658203 +sarah carson 307.70000088214874 +sarah carson 386.8999948501587 +sarah ellison 161.80999946594238 +sarah falkner 99.36000061035156 +sarah falkner 281.62000274658203 +sarah garcia 73.6500015258789 +sarah garcia 153.73000144958496 +sarah garcia 312.8899955749512 +sarah ichabod 81.31999969482422 +sarah ichabod 97.26000213623047 +sarah johnson 140.37999725341797 +sarah johnson 177.57000732421875 +sarah johnson 248.4499969482422 +sarah johnson 309.1800022125244 +sarah king 216.75 +sarah king 268.5399932861328 +sarah miller 222.31000518798828 +sarah ovid 146.25000381469727 +sarah robinson 143.43000030517578 +sarah robinson 310.75 +sarah steinbeck 208.72000122070312 +sarah white 140.22999572753906 +sarah white 181.86000061035156 +sarah xylophone 68.31999969482422 +sarah young 185.80999755859375 +sarah zipper 168.22000122070312 +tom brown 181.1000051498413 +tom brown 404.3500061035156 +tom carson 142.60999822616577 +tom carson 299.57999992370605 +tom carson 592.3499927520752 +tom davidson 180.61000061035156 +tom ellison 98.2300033569336 +tom ellison 154.58999633789062 +tom ellison 173.02999877929688 +tom falkner 88.22000122070312 +tom falkner 139.11000442504883 +tom hernandez 81.63999938964844 +tom hernandez 263.67000579833984 +tom ichabod 214.0699977874756 +tom johnson 405.95000076293945 +tom johnson 438.9099922180176 +tom king 218.18000030517578 +tom laertes 244.37000274658203 +tom laertes 473.0999984741211 +tom miller 68.25 +tom miller 85.59000015258789 +tom miller 127.56999969482422 +tom nixon 85.02999877929688 +tom ovid 217.32000160217285 +tom polk 188.87000274658203 +tom polk 206.52000045776367 +tom quirinius 120.27000427246094 +tom quirinius 232.63000202178955 +tom robinson 90.69000244140625 +tom robinson 98.72000122070312 +tom robinson 99.1500015258789 +tom robinson 209.5399932861328 +tom steinbeck 277.7100009918213 +tom van buren 40.779998779296875 +tom van buren 217.70000076293945 +tom van buren 375.2099964618683 +tom white 223.4700050354004 +tom young 174.36000061035156 +tom young 304.8199977874756 +tom zipper 213.7900047302246 +ulysses brown 247.1500015258789 +ulysses carson 77.41999816894531 +ulysses carson 79.54000091552734 +ulysses carson 150.93000030517578 +ulysses carson 162.24000549316406 +ulysses davidson 414.7100009918213 +ulysses ellison 96.7300033569336 +ulysses garcia 89.80000305175781 +ulysses hernandez 106.29999542236328 +ulysses hernandez 134.44000244140625 +ulysses hernandez 160.22000122070312 +ulysses ichabod 98.56999969482422 +ulysses ichabod 309.34999656677246 +ulysses johnson 152.47000122070312 +ulysses king 244.7100067138672 +ulysses laertes 138.4400042295456 +ulysses laertes 173.55999755859375 +ulysses laertes 256.91999912261963 +ulysses miller 76.27999877929688 +ulysses miller 417.67000102996826 +ulysses nixon 174.56999969482422 +ulysses ovid 130.13000106811523 +ulysses polk 123.9399995803833 +ulysses polk 149.95999908447266 +ulysses polk 205.2400016784668 +ulysses polk 237.5699920654297 +ulysses quirinius 330.4700012207031 +ulysses robinson 79.48999786376953 +ulysses steinbeck 144.8300018310547 +ulysses steinbeck 155.66000366210938 +ulysses thompson 159.92000579833984 +ulysses underhill 81.58000183105469 +ulysses underhill 88.4800033569336 +ulysses underhill 99.66999816894531 +ulysses underhill 135.55999755859375 +ulysses underhill 189.1099977493286 +ulysses underhill 289.6800003051758 +ulysses underhill 385.60000228881836 +ulysses van buren 95.52999877929688 +ulysses white 188.8300018310547 +ulysses white 305.79000091552734 +ulysses xylophone 54.099998474121094 +ulysses xylophone 205.2099952697754 +ulysses xylophone 251.94000148773193 +ulysses young 100.77000045776367 +ulysses young 275.8300018310547 +ulysses young 522.1700019836426 +victor allen 220.1699981689453 +victor allen 222.10000228881836 +victor brown 77.88999938964844 +victor brown 90.37999725341797 +victor brown 91.97000122070312 +victor brown 455.25000381469727 +victor davidson 149.06000137329102 +victor davidson 291.48000717163086 +victor davidson 321.25 +victor ellison 314.37000274658203 +victor ellison 442.50000190734863 +victor hernandez 69.87999725341797 +victor hernandez 99.85000038146973 +victor hernandez 143.02000045776367 +victor hernandez 160.38999938964844 +victor hernandez 391.2999954223633 +victor johnson 145.18000030517578 +victor johnson 190.27000045776367 +victor johnson 308.1900006532669 +victor king 108.10000228881836 +victor king 310.5 +victor laertes 145.42999267578125 +victor laertes 214.72999572753906 +victor miller 173.76000022888184 +victor nixon 68.5 +victor nixon 269.5899963378906 +victor ovid 151.39999771118164 +victor polk 175.8799991607666 +victor quirinius 65.55000305175781 +victor quirinius 168.5500030517578 +victor robinson 177.9100022315979 +victor robinson 204.09999084472656 +victor steinbeck 52.720001220703125 +victor steinbeck 220.45999908447266 +victor steinbeck 309.4900064468384 +victor thompson 58.65999984741211 +victor van buren 206.77999877929688 +victor van buren 222.44000244140625 +victor white 156.36999893188477 +victor white 167.2699966430664 +victor xylophone 158.36999893188477 +victor xylophone 161.54000091552734 +victor xylophone 234.76000308990479 +victor xylophone 267.82999420166016 +victor xylophone 314.95000076293945 +victor young 88.55000305175781 +victor zipper 192.92999649047852 +wendy allen 56.06999969482422 +wendy allen 66.16000306606293 +wendy allen 267.3199996948242 +wendy brown 453.53000259399414 +wendy brown 525.5100040435791 +wendy ellison 193.95000457763672 +wendy ellison 260.9099998474121 +wendy falkner 77.36000061035156 +wendy falkner 97.68000030517578 +wendy falkner 128.30999565124512 +wendy garcia 4.409999847412109 +wendy garcia 76.72000122070312 +wendy garcia 189.42999839782715 +wendy garcia 265.5900001525879 +wendy hernandez 48.11000061035156 +wendy ichabod 104.3700008392334 +wendy king 156.89999771118164 +wendy king 183.31999969482422 +wendy king 403.27000427246094 +wendy laertes 79.98999786376953 +wendy laertes 165.0999984741211 +wendy laertes 365.0 +wendy miller 72.9500002861023 +wendy miller 313.8300037384033 +wendy nixon 45.91999816894531 +wendy nixon 60.2599983215332 +wendy ovid 95.33000183105469 +wendy ovid 180.36000061035156 +wendy polk 386.7400016784668 +wendy polk 443.3400018811226 +wendy quirinius 152.04999828338623 +wendy quirinius 240.23999977111816 +wendy robinson 71.06999969482422 +wendy robinson 249.35000610351562 +wendy robinson 391.4699993133545 +wendy steinbeck 92.11000061035156 +wendy thompson 136.35000228881836 +wendy thompson 183.1500015258789 +wendy underhill 318.6500015258789 +wendy underhill 320.75000190734863 +wendy underhill 328.2300033569336 +wendy van buren 57.459999084472656 +wendy van buren 92.81999969482422 +wendy white 171.36000061035156 +wendy xylophone 153.62999725341797 +wendy xylophone 223.94999885559082 +wendy young 40.22000026702881 +wendy young 513.8299942016602 +xavier allen 102.97000122070312 +xavier allen 168.3300018310547 +xavier allen 197.45999908447266 +xavier brown 55.20000076293945 +xavier brown 90.7300033569336 +xavier brown 96.2300033569336 +xavier carson 193.63999938964844 +xavier carson 265.1600036621094 +xavier davidson 63.349998474121094 +xavier davidson 264.27000427246094 +xavier davidson 288.1999988555908 +xavier ellison 138.42000198364258 +xavier ellison 262.6300048828125 +xavier garcia 148.66000366210938 +xavier hernandez 122.13999938964844 +xavier hernandez 164.97000122070312 +xavier hernandez 306.25 +xavier ichabod 211.84000635147095 +xavier ichabod 244.50000762939453 +xavier johnson 56.53999900817871 +xavier johnson 89.0999984741211 +xavier king 87.22000122070312 +xavier king 151.22999572753906 +xavier laertes 183.65999794006348 +xavier ovid 398.2100067138672 +xavier polk 72.62000274658203 +xavier polk 76.93000030517578 +xavier polk 261.5100030899048 +xavier polk 318.01000213623047 +xavier quirinius 22.1200008392334 +xavier quirinius 89.55000305175781 +xavier quirinius 246.2400016784668 +xavier quirinius 402.2100009918213 +xavier thompson 283.9400005340576 +xavier underhill 120.45000076293945 +xavier white 138.02999591827393 +xavier white 172.06999969482422 +xavier xylophone 79.41999816894531 +xavier zipper 373.67999935150146 +yuri allen 52.849998474121094 +yuri allen 417.3700008392334 +yuri brown 170.52000427246094 +yuri brown 180.70999908447266 +yuri carson 188.99000549316406 +yuri carson 537.6500015258789 +yuri ellison 86.91999816894531 +yuri ellison 376.32999646663666 +yuri falkner 152.99000358581543 +yuri falkner 181.06999969482422 +yuri garcia 274.6800003051758 +yuri hernandez 153.46999764442444 +yuri johnson 197.28000259399414 +yuri johnson 236.0800018310547 +yuri johnson 258.1899985074997 +yuri king 551.9899978637695 +yuri laertes 37.59000015258789 +yuri laertes 253.4799976348877 +yuri nixon 95.54999732971191 +yuri nixon 248.9700005054474 +yuri polk 82.33999633789062 +yuri polk 275.3200035095215 +yuri polk 305.6399974822998 +yuri quirinius 112.97000122070312 +yuri quirinius 148.27999877929688 +yuri quirinius 449.1699924468994 +yuri steinbeck 292.94000244140625 +yuri steinbeck 357.5 +yuri thompson 428.03999519348145 +yuri underhill 83.87000274658203 +yuri underhill 350.7999897003174 +yuri white 132.09000396728516 +yuri xylophone 107.07000160217285 +zach allen 65.43000030517578 +zach brown 135.6999969482422 +zach brown 247.04999542236328 +zach brown 256.8000030517578 +zach brown 362.38000106811523 +zach brown 418.75 +zach carson 291.7700004577637 +zach ellison 135.149995803833 +zach falkner 91.41999816894531 +zach falkner 196.41999912261963 +zach garcia 84.37999725341797 +zach garcia 160.70000457763672 +zach garcia 167.7599983215332 +zach garcia 205.36999893188477 +zach ichabod 116.2699966430664 +zach ichabod 151.18000030517578 +zach king 127.63000106811523 +zach king 182.2699966430664 +zach king 269.0999984741211 +zach miller 199.71000289916992 +zach miller 220.73999977111816 +zach miller 264.0600047111511 +zach ovid 92.55000305175781 +zach ovid 94.33999633789062 +zach ovid 105.94999847561121 +zach ovid 136.04000091552734 +zach quirinius 103.11000061035156 +zach robinson 76.72000122070312 +zach steinbeck 85.48999786376953 +zach steinbeck 182.87000274658203 +zach thompson 116.93999862670898 +zach thompson 319.9499969482422 +zach underhill 86.22000122070312 +zach white 70.52999877929688 +zach xylophone 227.52000427246094 +zach xylophone 286.45000076293945 +zach young 313.00999450683594 +zach zipper 85.87000274658203 +zach zipper 94.43000030517578 +zach zipper 139.38999938964844 diff --git a/sql/hive/src/test/resources/golden/windowing_windowspec.q (deterministic)-3-6f104992e0050576085064815de43194 b/sql/hive/src/test/resources/golden/windowing_windowspec.q (deterministic)-3-6f104992e0050576085064815de43194 new file mode 100644 index 0000000000000..ae2a1e9dd7d3f --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_windowspec.q (deterministic)-3-6f104992e0050576085064815de43194 @@ -0,0 +1,1049 @@ + 43.52666695912679 + 48.735000133514404 + 57.96666590372721 +alice allen 39.21833221117655 +alice allen 42.813999557495116 +alice allen 49.1824996471405 +alice brown 59.68166707456112 +alice carson 46.703333189090095 +alice davidson 59.51833279927572 +alice falkner 64.48333247502644 +alice garcia 55.114999771118164 +alice hernandez 49.61333228151003 +alice hernandez 69.70166667302449 +alice johnson 49.25166805585226 +alice king 20.052499681711197 +alice king 49.019999186197914 +alice king 56.0733331044515 +alice laertes 35.24999898672104 +alice laertes 68.85333251953125 +alice miller 55.542000198364256 +alice nixon 44.010000785191856 +alice nixon 50.900000762939456 +alice nixon 64.78333409627278 +alice ovid 24.90999937057495 +alice polk 47.426666259765625 +alice quirinius 52.80166610081991 +alice quirinius 52.94833393891653 +alice robinson 44.41500027974447 +alice robinson 55.04249954223633 +alice steinbeck 37.735000928243004 +alice steinbeck 41.02800045013428 +alice steinbeck 53.83499916394552 +alice underhill 52.64500021934509 +alice van buren 48.27666727701823 +alice xylophone 28.047500252723694 +alice xylophone 37.82199954986572 +alice xylophone 59.75166606903076 +alice zipper 48.875000635782875 +alice zipper 51.365000108877815 +alice zipper 89.93000030517578 +bob brown 38.134999910990395 +bob brown 56.20666662851969 +bob brown 77.51166598002116 +bob carson 52.77200050354004 +bob davidson 28.945000171661377 +bob davidson 37.946666399637856 +bob davidson 53.90333271026611 +bob ellison 51.383999824523926 +bob ellison 57.30333344141642 +bob ellison 58.53666559855143 +bob ellison 66.7400016784668 +bob falkner 39.053333600362144 +bob garcia 16.705000400543213 +bob garcia 37.16499960422516 +bob garcia 51.17999919255575 +bob garcia 52.37666575113932 +bob garcia 67.39166768391927 +bob hernandez 53.261999893188474 +bob ichabod 43.96999979019165 +bob king 38.03666718800863 +bob king 38.19249892234802 +bob king 67.40999794006348 +bob laertes 21.449999809265137 +bob laertes 50.37166612346967 +bob miller 41.906000471115114 +bob ovid 27.836666425069172 +bob ovid 39.3833335240682 +bob ovid 60.90749979019165 +bob ovid 63.069999313354494 +bob polk 41.88333296775818 +bob quirinius 54.504998207092285 +bob steinbeck 34.33999983469645 +bob van buren 58.29999923706055 +bob white 30.22333288192749 +bob white 41.44000005722046 +bob xylophone 12.163333415985107 +bob xylophone 47.88249921798706 +bob young 27.38499927520752 +bob zipper 27.93800084590912 +bob zipper 59.11800007820129 +bob zipper 66.43000030517578 +calvin allen 71.51000118255615 +calvin brown 50.44166612625122 +calvin brown 53.625000953674316 +calvin brown 67.48333485921223 +calvin carson 62.17750120162964 +calvin davidson 14.03000009059906 +calvin davidson 43.640000343322754 +calvin ellison 52.52750015258789 +calvin falkner 24.016666332880657 +calvin falkner 46.87333329518636 +calvin falkner 47.53666607538859 +calvin falkner 56.47499918937683 +calvin falkner 57.336666107177734 +calvin falkner 72.25 +calvin garcia 46.484999338785805 +calvin hernandez 43.90199909210205 +calvin johnson 76.41500091552734 +calvin laertes 39.09249973297119 +calvin laertes 47.323333422342934 +calvin nixon 30.113332668940227 +calvin nixon 47.7533327738444 +calvin nixon 49.08500075340271 +calvin ovid 41.924999713897705 +calvin ovid 49.10499978065491 +calvin ovid 62.26499938964844 +calvin ovid 62.27999989191691 +calvin polk 52.95333353678385 +calvin quirinius 53.34200019836426 +calvin quirinius 54.831998634338376 +calvin robinson 60.470001220703125 +calvin steinbeck 35.644999980926514 +calvin steinbeck 53.75800037384033 +calvin steinbeck 56.11000084877014 +calvin thompson 42.355000257492065 +calvin thompson 65.91666666666667 +calvin underhill 47.41199951171875 +calvin van buren 31.591666102409363 +calvin van buren 43.18200063705444 +calvin white 45.27500128746033 +calvin white 56.079999923706055 +calvin xylophone 20.394999980926514 +calvin xylophone 40.59999958674113 +calvin xylophone 54.56500005722046 +calvin young 42.246666272481285 +calvin young 64.49000072479248 +calvin zipper 57.49500036239624 +calvin zipper 57.59000039100647 +david allen 40.14999977747599 +david allen 54.545000076293945 +david brown 35.4516666730245 +david brown 62.83999938964844 +david davidson 35.50000067551931 +david davidson 52.75250005722046 +david davidson 74.1500015258789 +david davidson 95.80999755859375 +david ellison 43.43166727821032 +david ellison 52.18750047683716 +david ellison 72.58400039672851 +david hernandez 64.47600173950195 +david ichabod 29.518332719802856 +david ichabod 34.6100010573864 +david laertes 61.127999496459964 +david nixon 53.60249900817871 +david ovid 37.977500796318054 +david ovid 41.58999983469645 +david quirinius 24.96250009536743 +david quirinius 45.23249912261963 +david quirinius 52.282000350952146 +david robinson 42.17750024795532 +david robinson 62.54666519165039 +david thompson 41.88999938964844 +david underhill 48.143333752950035 +david underhill 62.53499941031138 +david underhill 97.55999755859375 +david van buren 34.84749984741211 +david van buren 51.61666679382324 +david white 62.30499839782715 +david xylophone 33.92500019073486 +david xylophone 53.142000222206114 +david xylophone 72.77166684468587 +david young 30.13666756947835 +david young 51.540000915527344 +ethan allen 53.442500591278076 +ethan brown 7.110000133514404 +ethan brown 41.260000228881836 +ethan brown 41.495000084241234 +ethan brown 41.83833312988281 +ethan brown 46.497499227523804 +ethan brown 63.17750072479248 +ethan carson 64.31999969482422 +ethan ellison 46.72599983215332 +ethan ellison 61.24750167876482 +ethan falkner 36.90250104665756 +ethan falkner 52.71000099182129 +ethan garcia 32.771666407585144 +ethan hernandez 56.239999008178714 +ethan johnson 82.38333384195964 +ethan king 8.399999856948853 +ethan laertes 37.93200054168701 +ethan laertes 40.4883329073588 +ethan laertes 53.396667132774986 +ethan laertes 53.81666628519694 +ethan laertes 55.987499713897705 +ethan laertes 66.36999956766765 +ethan laertes 68.83400039672851 +ethan miller 56.519999504089355 +ethan nixon 50.13333353648583 +ethan ovid 37.51333363850912 +ethan polk 2.3499999046325684 +ethan polk 44.147999954223636 +ethan polk 54.91499996185303 +ethan polk 60.340000788370766 +ethan quirinius 40.21750020980835 +ethan quirinius 47.125000298023224 +ethan quirinius 57.69000013669332 +ethan robinson 41.5060001373291 +ethan robinson 55.0640007019043 +ethan underhill 68.01250076293945 +ethan van buren 43.55250072479248 +ethan white 46.40999913215637 +ethan white 53.03200073242188 +ethan xylophone 70.29333432515462 +ethan zipper 46.92999982833862 +ethan zipper 66.04500198364258 +fred davidson 26.487499618902802 +fred davidson 44.99333477020264 +fred davidson 53.93666648864746 +fred ellison 22.2599999109904 +fred ellison 48.047999954223634 +fred ellison 63.423333485921226 +fred falkner 20.035000324249268 +fred falkner 44.243999004364014 +fred falkner 62.130001068115234 +fred hernandez 37.42199997901916 +fred ichabod 46.56999969482422 +fred ichabod 58.275001525878906 +fred johnson 54.63333257039388 +fred king 46.30499919255575 +fred king 61.48500037193298 +fred laertes 26.203333059946697 +fred miller 43.46400032043457 +fred nixon 28.690000534057617 +fred nixon 32.77999955415726 +fred nixon 52.03799936771393 +fred nixon 60.468332290649414 +fred polk 26.81166632970174 +fred polk 42.48999996185303 +fred polk 46.95200061798096 +fred polk 60.465998840332034 +fred quirinius 43.466000366210935 +fred quirinius 45.79799957275391 +fred robinson 62.42833296457926 +fred steinbeck 32.22999954223633 +fred steinbeck 48.73800039291382 +fred steinbeck 65.91750144958496 +fred underhill 72.94250011444092 +fred van buren 41.28250002861023 +fred van buren 41.5283338278532 +fred van buren 50.871665954589844 +fred van buren 81.77000045776367 +fred white 34.897499561309814 +fred young 58.56666692097982 +fred young 70.61499977111816 +fred zipper 33.90500068664551 +gabriella allen 51.041666666666664 +gabriella allen 55.13999938964844 +gabriella brown 54.63333320617676 +gabriella brown 72.33333396911621 +gabriella carson 49.26333236694336 +gabriella davidson 52.72800083160401 +gabriella ellison 54.68999989827474 +gabriella ellison 71.54000091552734 +gabriella falkner 20.834000015258788 +gabriella falkner 46.348333517710365 +gabriella falkner 48.58500099182129 +gabriella garcia 24.78999964396159 +gabriella hernandez 57.61000029246012 +gabriella hernandez 65.11166667938232 +gabriella ichabod 33.78749895095825 +gabriella ichabod 38.9950008392334 +gabriella ichabod 48.15499925613403 +gabriella ichabod 49.739999008178714 +gabriella ichabod 52.789999643961586 +gabriella king 35.58200044631958 +gabriella king 49.63000059723854 +gabriella laertes 47.81000010172526 +gabriella miller 62.17666753133138 +gabriella ovid 45.94333267211914 +gabriella ovid 50.435001373291016 +gabriella polk 42.58000100851059 +gabriella polk 72.87000179290771 +gabriella steinbeck 65.42000102996826 +gabriella steinbeck 72.0499997138977 +gabriella thompson 49.897499561309814 +gabriella thompson 52.84250023961067 +gabriella thompson 57.23249959945679 +gabriella van buren 39.31000053882599 +gabriella van buren 57.920000076293945 +gabriella white 49.85333410898844 +gabriella young 15.369999885559082 +gabriella young 49.04999907811483 +gabriella zipper 42.82499901453654 +gabriella zipper 59.58499972025553 +holly allen 29.50499963760376 +holly brown 55.284998178482056 +holly brown 55.79833386838436 +holly falkner 40.07666663328806 +holly hernandez 46.40500044822693 +holly hernandez 56.14500045776367 +holly hernandez 56.160000483194985 +holly hernandez 58.95166748017073 +holly ichabod 67.19250011444092 +holly ichabod 68.44500064849854 +holly ichabod 90.09000015258789 +holly johnson 42.795000076293945 +holly johnson 55.76999855041504 +holly johnson 66.11333338419597 +holly king 48.788000869750974 +holly king 64.25 +holly laertes 50.40750074386597 +holly miller 45.60666608810425 +holly nixon 58.096665700276695 +holly nixon 69.59249877929688 +holly polk 41.59666601816813 +holly polk 44.64799900054932 +holly robinson 48.405999755859376 +holly thompson 34.605000495910645 +holly thompson 38.36749941110611 +holly thompson 65.99833394338687 +holly underhill 46.52600040435791 +holly underhill 47.92666663726171 +holly underhill 62.41600036621094 +holly underhill 79.95999908447266 +holly van buren 58.63333225250244 +holly white 31.232499361038208 +holly white 61.494998931884766 +holly xylophone 70.32500044504802 +holly young 54.05000114440918 +holly young 57.103333473205566 +holly zipper 50.59500014781952 +holly zipper 67.81000137329102 +irene allen 53.364000701904295 +irene brown 4.789999961853027 +irene brown 51.65399932861328 +irene brown 87.66999816894531 +irene carson 59.2433336575826 +irene ellison 36.446667989095054 +irene ellison 53.85000038146973 +irene falkner 47.429999995231626 +irene falkner 61.20666694641113 +irene garcia 15.369999885559082 +irene garcia 48.28000005086263 +irene garcia 55.643333435058594 +irene ichabod 40.868333299954735 +irene ichabod 64.45666694641113 +irene johnson 44.37999935150147 +irene laertes 27.625 +irene laertes 46.25 +irene laertes 49.17500019073486 +irene miller 75.87500063578288 +irene nixon 29.780000686645508 +irene nixon 30.070000807444256 +irene nixon 36.34333356221517 +irene ovid 27.21500023206075 +irene ovid 55.6175012588501 +irene ovid 60.353999328613284 +irene polk 47.438334147135414 +irene polk 47.77666505177816 +irene polk 51.74200067520142 +irene polk 52.12200088500977 +irene polk 52.184000205993655 +irene quirinius 52.958333015441895 +irene quirinius 55.29000053405762 +irene quirinius 78.79000091552734 +irene robinson 61.398332595825195 +irene steinbeck 64.34000142415364 +irene thompson 41.92250097543001 +irene underhill 27.72499966621399 +irene underhill 50.783331871032715 +irene van buren 46.65999984741211 +irene van buren 49.71000099182129 +irene xylophone 61.220001220703125 +jessica brown 63.449999491373696 +jessica carson 41.78999951481819 +jessica carson 47.52000069618225 +jessica carson 51.83000183105469 +jessica davidson 34.938333332538605 +jessica davidson 45.91000066200892 +jessica davidson 51.082499504089355 +jessica davidson 64.3099988301595 +jessica ellison 32.53000124295553 +jessica ellison 35.265999984741214 +jessica falkner 54.933334032694496 +jessica garcia 32.575000286102295 +jessica garcia 60.584999084472656 +jessica ichabod 46.704999923706055 +jessica johnson 55.73199977874756 +jessica johnson 72.14000066121419 +jessica miller 55.27799835205078 +jessica nixon 58.53999900817871 +jessica nixon 90.06999969482422 +jessica ovid 36.47500014305115 +jessica ovid 63.03000005086263 +jessica polk 69.52999877929688 +jessica quirinius 19.01333288351695 +jessica quirinius 41.630000829696655 +jessica quirinius 42.58750009536743 +jessica quirinius 47.72999986012777 +jessica robinson 64.81000061035157 +jessica thompson 45.83666737874349 +jessica thompson 57.540000915527344 +jessica underhill 45.360000133514404 +jessica underhill 51.40999889373779 +jessica underhill 64.27250099182129 +jessica van buren 9.739999771118164 +jessica white 38.38999891281128 +jessica white 40.75400023460388 +jessica white 61.89800128936768 +jessica white 62.34749984741211 +jessica white 65.7680004119873 +jessica xylophone 50.808332761128746 +jessica young 18.400000254313152 +jessica young 49.04400033950806 +jessica zipper 35.039999643961586 +jessica zipper 52.78999869028727 +jessica zipper 55.51499891281128 +katie allen 56.10800056457519 +katie brown 48.86833222707113 +katie davidson 96.91000366210938 +katie ellison 31.106667200724285 +katie ellison 38.356666127840676 +katie falkner 17.24333318074544 +katie garcia 53.429999033610024 +katie garcia 53.933334002892174 +katie hernandez 72.71666717529297 +katie ichabod 39.97999954223633 +katie ichabod 54.99599914550781 +katie ichabod 59.41999944051107 +katie king 44.88599967956543 +katie king 60.2549991607666 +katie king 67.27499961853027 +katie miller 31.399999618530273 +katie miller 50.57666619618734 +katie nixon 23.190000534057617 +katie ovid 52.96000158786774 +katie polk 51.029998779296875 +katie polk 54.93600006103516 +katie robinson 13.890000343322754 +katie van buren 52.78999908765157 +katie van buren 53.21750092506409 +katie white 59.799998950958255 +katie white 64.70499992370605 +katie xylophone 53.54499959945679 +katie young 31.010000228881836 +katie young 37.55500102043152 +katie young 49.87499872843424 +katie zipper 29.79666694998741 +katie zipper 50.95000076293945 +luke allen 43.757998657226565 +luke allen 53.18000030517578 +luke allen 62.09749984741211 +luke allen 70.2933349609375 +luke allen 89.55000305175781 +luke brown 45.95999972025553 +luke davidson 28.950000762939453 +luke davidson 53.20500040054321 +luke ellison 5.159999907016754 +luke ellison 18.630000392595928 +luke ellison 76.88000106811523 +luke falkner 6.175000190734863 +luke falkner 54.0049991607666 +luke garcia 15.320000424981117 +luke garcia 25.47000026702881 +luke ichabod 33.950000286102295 +luke ichabod 57.17500114440918 +luke johnson 14.4399995803833 +luke johnson 37.23333263397217 +luke johnson 52.660000801086426 +luke laertes 11.819999694824219 +luke laertes 23.5 +luke laertes 43.71999907493591 +luke laertes 45.9900016784668 +luke laertes 57.85499906539917 +luke miller 67.85000038146973 +luke ovid 38.04999923706055 +luke ovid 79.64200134277344 +luke polk 55.106666564941406 +luke polk 70.18999862670898 +luke quirinius 26.016666491826374 +luke robinson 61.256665547688804 +luke robinson 72.61999893188477 +luke thompson 47.203332940737404 +luke underhill 49.244999408721924 +luke underhill 59.32999897003174 +luke underhill 72.18333307902019 +luke van buren 64.646666208903 +luke white 67.12000274658203 +luke xylophone 46.74333190917969 +luke zipper 36.16000008583069 +mike allen 10.709999859333038 +mike brown 61.0674991607666 +mike carson 50.90249824523926 +mike carson 52.30200061798096 +mike carson 52.51499938964844 +mike davidson 29.733333269755047 +mike davidson 47.396666526794434 +mike ellison 37.807498931884766 +mike ellison 44.339999198913574 +mike ellison 45.89199924468994 +mike ellison 47.91249990463257 +mike ellison 55.295000076293945 +mike falkner 16.479999542236328 +mike garcia 38.98999913533529 +mike garcia 54.91333325703939 +mike garcia 57.096666971842446 +mike hernandez 59.45000076293945 +mike hernandez 62.446667989095054 +mike ichabod 54.69666544596354 +mike king 38.790000915527344 +mike king 43.2050017118454 +mike king 46.85666592915853 +mike king 46.945000648498535 +mike king 53.382500648498535 +mike king 94.68000030517578 +mike miller 3.9600000381469727 +mike nixon 53.894999742507935 +mike nixon 92.95999908447266 +mike polk 12.449999809265137 +mike polk 39.040000319480896 +mike polk 71.40500068664551 +mike quirinius 89.37999725341797 +mike steinbeck 34.05499875545502 +mike steinbeck 45.909999465942384 +mike steinbeck 51.286667346954346 +mike steinbeck 59.292500019073486 +mike van buren 43.13999819755554 +mike van buren 49.59000015258789 +mike white 30.53999964396159 +mike white 34.61499961217245 +mike white 48.08999943733215 +mike white 53.967501163482666 +mike young 10.484999895095825 +mike young 47.070000648498535 +mike young 74.58999633789062 +mike zipper 29.029999288419884 +mike zipper 77.96200027465821 +mike zipper 91.57500076293945 +nick allen 29.146666367848713 +nick allen 64.4325008392334 +nick brown 49.429999669392906 +nick davidson 34.36599922180176 +nick ellison 49.345001220703125 +nick ellison 89.01000213623047 +nick falkner 7.820000171661377 +nick falkner 45.025000631809235 +nick garcia 23.9499994913737 +nick garcia 33.49333349863688 +nick garcia 64.33499908447266 +nick ichabod 30.945000171661377 +nick ichabod 56.27499961853027 +nick ichabod 59.24249863624573 +nick johnson 32.3674995303154 +nick johnson 74.30666859944661 +nick laertes 38.38749980926514 +nick miller 82.97000122070312 +nick nixon 70.01333173116048 +nick ovid 56.82999897003174 +nick polk 33.929999669392906 +nick quirinius 58.91199932098389 +nick quirinius 81.16999816894531 +nick robinson 34.53999948501587 +nick robinson 59.80499839782715 +nick steinbeck 57.25333340962728 +nick thompson 51.3725004196167 +nick underhill 55.47333335876465 +nick van buren 74.21666717529297 +nick xylophone 51.82000001271566 +nick young 0.27000001072883606 +nick young 41.02499961853027 +nick zipper 47.72999954223633 +nick zipper 61.917999267578125 +oscar allen 35.21600015163422 +oscar brown 38.03999948501587 +oscar carson 38.09600009918213 +oscar carson 51.34333419799805 +oscar carson 56.9925012588501 +oscar carson 74.00500106811523 +oscar carson 98.51000213623047 +oscar davidson 65.88750076293945 +oscar ellison 50.507500410079956 +oscar ellison 66.57000096638997 +oscar falkner 64.42000198364258 +oscar garcia 66.36333465576172 +oscar hernandez 42.089999516805015 +oscar hernandez 51.21199997067451 +oscar ichabod 25.300000190734863 +oscar ichabod 41.173332850138344 +oscar ichabod 53.44666830698649 +oscar ichabod 71.80000305175781 +oscar johnson 39.9366668065389 +oscar johnson 44.30500118434429 +oscar king 30.59749937057495 +oscar king 46.149999141693115 +oscar king 49.65999984741211 +oscar laertes 5.510000228881836 +oscar laertes 19.099999745686848 +oscar laertes 39.36250039935112 +oscar laertes 45.340000788370766 +oscar nixon 41.619998931884766 +oscar ovid 45.09000015258789 +oscar ovid 82.23999786376953 +oscar ovid 83.47750091552734 +oscar polk 42.54333368937174 +oscar polk 56.8033332824707 +oscar quirinius 52.94500160217285 +oscar quirinius 65.34666570027669 +oscar quirinius 67.00000127156575 +oscar quirinius 79.4800033569336 +oscar robinson 32.16999944051107 +oscar robinson 38.880001068115234 +oscar robinson 63.9433339436849 +oscar robinson 65.80500030517578 +oscar steinbeck 45.73599967956543 +oscar thompson 40.33599853515625 +oscar thompson 47.860000928243004 +oscar thompson 60.529998779296875 +oscar thompson 60.59333292643229 +oscar underhill 43.980000764131546 +oscar van buren 2.180000066757202 +oscar van buren 53.34999942779541 +oscar van buren 61.880001068115234 +oscar white 38.0633331934611 +oscar white 42.584999084472656 +oscar white 55.179999033610024 +oscar white 74.4900016784668 +oscar xylophone 41.3133331934611 +oscar xylophone 55.5049991607666 +oscar xylophone 67.54500007629395 +oscar zipper 15.680000305175781 +oscar zipper 24.019999504089355 +oscar zipper 39.81999969482422 +priscilla brown 42.88666502634684 +priscilla brown 63.242499351501465 +priscilla brown 77.56999969482422 +priscilla carson 44.799999713897705 +priscilla carson 49.663333892822266 +priscilla carson 78.57333374023438 +priscilla ichabod 56.83666737874349 +priscilla ichabod 58.48666636149088 +priscilla johnson 34.416667779286705 +priscilla johnson 52.890000343322754 +priscilla johnson 53.72666883468628 +priscilla johnson 61.939998626708984 +priscilla johnson 89.1500015258789 +priscilla king 34.30750045180321 +priscilla nixon 27.734999656677246 +priscilla nixon 60.90999984741211 +priscilla ovid 48.13500142097473 +priscilla ovid 66.35999870300293 +priscilla polk 15.149999618530273 +priscilla quirinius 18.606666564941406 +priscilla thompson 48.87000131607056 +priscilla underhill 40.05500078201294 +priscilla underhill 49.54999961853027 +priscilla van buren 42.62666734059652 +priscilla van buren 61.54000017642975 +priscilla van buren 72.80500030517578 +priscilla white 43.177499771118164 +priscilla xylophone 21.489999771118164 +priscilla xylophone 40.144999124109745 +priscilla xylophone 59.61000061035156 +priscilla young 31.610000324249267 +priscilla young 53.71000152826309 +priscilla zipper 18.8799991607666 +priscilla zipper 25.670000076293945 +quinn allen 56.77749991416931 +quinn allen 76.47250080108643 +quinn brown 23.536666870117188 +quinn brown 31.829999446868896 +quinn brown 50.388000297546384 +quinn davidson 41.42499923706055 +quinn davidson 45.90250015258789 +quinn davidson 48.16000032424927 +quinn davidson 71.51000022888184 +quinn ellison 50.6766668955485 +quinn ellison 62.32666842142741 +quinn garcia 39.98599967956543 +quinn garcia 43.27000045776367 +quinn garcia 74.31999969482422 +quinn garcia 92.33000183105469 +quinn ichabod 44.81999969482422 +quinn king 50.99666786193848 +quinn king 74.62000274658203 +quinn laertes 5.884999990463257 +quinn laertes 49.1379997253418 +quinn laertes 56.18000030517578 +quinn nixon 74.66500091552734 +quinn ovid 1.2100000381469727 +quinn quirinius 46.15500068664551 +quinn robinson 44.96249985694885 +quinn steinbeck 24.06999921798706 +quinn steinbeck 41.750000953674316 +quinn thompson 23.744999408721924 +quinn thompson 73.64500045776367 +quinn underhill 41.83333269755045 +quinn underhill 49.63749885559082 +quinn underhill 83.85000228881836 +quinn van buren 54.36333338419596 +quinn young 45.489999771118164 +quinn zipper 22.25 +quinn zipper 33.355000019073486 +rachel allen 15.8100004196167 +rachel allen 71.57666524251302 +rachel brown 2.9600000381469727 +rachel brown 33.022499561309814 +rachel brown 42.442498207092285 +rachel brown 59.21999931335449 +rachel brown 64.52666727701823 +rachel carson 61.17999776204427 +rachel carson 69.85333315531413 +rachel davidson 42.292500495910645 +rachel ellison 10.600000381469727 +rachel falkner 23.615000784397125 +rachel falkner 50.46250069141388 +rachel falkner 54.669999837875366 +rachel falkner 72.96999740600586 +rachel johnson 61.76249885559082 +rachel king 22.005000591278076 +rachel king 66.46500015258789 +rachel laertes 39.025001525878906 +rachel laertes 45.45000076293945 +rachel ovid 0.6000000238418579 +rachel ovid 1.0800000429153442 +rachel polk 78.08499908447266 +rachel quirinius 51.12000131607056 +rachel robinson 30.360000610351562 +rachel robinson 37.683333237965904 +rachel robinson 39.199998219807945 +rachel thompson 0.5600000023841858 +rachel thompson 38.81000010172526 +rachel thompson 49.772499561309814 +rachel underhill 48.45000076293945 +rachel white 42.83999991416931 +rachel white 58.609999656677246 +rachel young 43.130001068115234 +rachel zipper 7.059999942779541 +rachel zipper 49.676667173703514 +sarah carson 54.022500067949295 +sarah carson 54.18333212534586 +sarah carson 87.81000137329102 +sarah ellison 16.989999771118164 +sarah falkner 73.06500005722046 +sarah falkner 99.36000061035156 +sarah garcia 38.43250036239624 +sarah garcia 64.97333272298177 +sarah garcia 73.6500015258789 +sarah ichabod 57.46000003814697 +sarah ichabod 81.31999969482422 +sarah johnson 34.98499917984009 +sarah johnson 45.099998474121094 +sarah johnson 74.42249870300293 +sarah johnson 74.72000122070312 +sarah king 41.869998931884766 +sarah king 48.25 +sarah miller 41.53499984741211 +sarah ovid 33.38000011444092 +sarah robinson 33.83000183105469 +sarah robinson 66.88999938964844 +sarah steinbeck 31.023332993189495 +sarah white 45.974998474121094 +sarah white 61.54666709899902 +sarah xylophone 61.60666529337565 +sarah young 45.560001373291016 +sarah zipper 60.69000053405762 +tom brown 40.08000135421753 +tom brown 55.44499969482422 +tom carson 5.440000057220459 +tom carson 26.32499885559082 +tom carson 31.476666768391926 +tom davidson 53.00749921798706 +tom ellison 67.32666714986165 +tom ellison 76.73999786376953 +tom ellison 77.29499816894531 +tom falkner 60.130001068115234 +tom falkner 88.22000122070312 +tom hernandez 41.36000061035156 +tom hernandez 81.63999938964844 +tom ichabod 42.08666547139486 +tom johnson 33.75999959309896 +tom johnson 53.47666549682617 +tom king 40.0 +tom laertes 32.05000114440918 +tom laertes 43.46000099182129 +tom miller 21.229999542236328 +tom miller 43.37666702270508 +tom miller 44.41333325703939 +tom nixon 46.35333251953125 +tom ovid 43.04499912261963 +tom polk 38.29999923706055 +tom polk 54.45000076293945 +tom quirinius 19.82000058889389 +tom quirinius 22.580000400543213 +tom robinson 66.16999816894531 +tom robinson 74.18666712443034 +tom robinson 80.77000045776367 +tom robinson 98.72000122070312 +tom steinbeck 44.999999046325684 +tom van buren 31.103334546089172 +tom van buren 40.779998779296875 +tom van buren 63.5099983215332 +tom white 49.06500053405762 +tom young 54.16999912261963 +tom young 78.54999923706055 +tom zipper 48.666666984558105 +ulysses brown 72.79000091552734 +ulysses carson 40.28500175476074 +ulysses carson 71.55000305175781 +ulysses carson 77.41999816894531 +ulysses carson 79.54000091552734 +ulysses davidson 41.90166711807251 +ulysses ellison 96.7300033569336 +ulysses garcia 89.80000305175781 +ulysses hernandez 21.339999516805012 +ulysses hernandez 54.470001220703125 +ulysses hernandez 80.11000061035156 +ulysses ichabod 19.1299991607666 +ulysses ichabod 98.56999969482422 +ulysses johnson 42.55000019073486 +ulysses king 81.57000223795573 +ulysses laertes 1.9199999570846558 +ulysses laertes 11.890000343322754 +ulysses laertes 53.599998474121094 +ulysses miller 40.43499946594238 +ulysses miller 47.78200054168701 +ulysses nixon 74.60333251953125 +ulysses ovid 30.940000534057617 +ulysses polk 8.710000038146973 +ulysses polk 39.17500019073486 +ulysses polk 65.0199966430664 +ulysses polk 68.41333389282227 +ulysses quirinius 65.93499946594238 +ulysses robinson 79.48999786376953 +ulysses steinbeck 32.40999984741211 +ulysses steinbeck 43.93499994277954 +ulysses thompson 79.96000289916992 +ulysses underhill 17.85000006357829 +ulysses underhill 42.5533332824707 +ulysses underhill 46.60000157356262 +ulysses underhill 47.84000049829483 +ulysses underhill 51.85000133514404 +ulysses underhill 67.77999877929688 +ulysses underhill 99.66999816894531 +ulysses van buren 69.89999961853027 +ulysses white 45.01500183343887 +ulysses white 71.3933334350586 +ulysses xylophone 27.880000591278076 +ulysses xylophone 39.689998626708984 +ulysses xylophone 54.099998474121094 +ulysses young 32.52000045776367 +ulysses young 39.81333382924398 +ulysses young 80.7933349609375 +victor allen 44.27000045776367 +victor allen 57.994998931884766 +victor brown 56.84499931335449 +victor brown 59.34000015258789 +victor brown 90.37999725341797 +victor brown 91.97000122070312 +victor davidson 52.07000160217285 +victor davidson 54.239999771118164 +victor davidson 68.8033332824707 +victor ellison 45.45750088989735 +victor ellison 58.42999839782715 +victor hernandez 23.164999961853027 +victor hernandez 57.98499870300293 +victor hernandez 59.619998931884766 +victor hernandez 71.42499923706055 +victor hernandez 80.19499969482422 +victor johnson 1.5800000429153442 +victor johnson 46.7450008392334 +victor johnson 72.59000015258789 +victor king 37.559998989105225 +victor king 47.880001068115234 +victor laertes 50.0099983215332 +victor laertes 70.13999938964844 +victor miller 22.1200008392334 +victor nixon 34.029998779296875 +victor nixon 37.08500003814697 +victor ovid 18.815000653266907 +victor polk 3.0 +victor quirinius 26.450000405311584 +victor quirinius 33.080001533031464 +victor robinson 4.590000152587891 +victor robinson 41.21999931335449 +victor steinbeck 41.82500123977661 +victor steinbeck 46.09000015258789 +victor steinbeck 52.720001220703125 +victor thompson 45.346666971842446 +victor van buren 34.970001220703125 +victor van buren 46.57333246866862 +victor white 5.670000076293945 +victor white 74.16999816894531 +victor xylophone 11.220000267028809 +victor xylophone 28.954999923706055 +victor xylophone 34.010000228881836 +victor xylophone 43.179999351501465 +victor xylophone 62.38999938964844 +victor young 70.91000175476074 +victor zipper 48.795000076293945 +wendy allen 0.6100000143051147 +wendy allen 52.64999961853027 +wendy allen 56.06999969482422 +wendy brown 51.874999046325684 +wendy brown 66.73999913533528 +wendy ellison 27.014999389648438 +wendy ellison 94.66000366210938 +wendy falkner 14.425000190734863 +wendy falkner 77.36000061035156 +wendy falkner 85.68000030517578 +wendy garcia 4.409999847412109 +wendy garcia 20.390000343322754 +wendy garcia 38.42500060796738 +wendy garcia 57.25 +wendy hernandez 48.11000061035156 +wendy ichabod 13.149999618530273 +wendy king 33.234999656677246 +wendy king 74.97999954223633 +wendy king 87.94000244140625 +wendy laertes 49.01999855041504 +wendy laertes 54.750000635782875 +wendy laertes 79.98999786376953 +wendy miller 1.2699999809265137 +wendy miller 47.41500186920166 +wendy nixon 30.17999916151166 +wendy nixon 45.91999816894531 +wendy ovid 85.26000213623047 +wendy ovid 86.62999725341797 +wendy polk 43.679999669392906 +wendy polk 54.65333424011866 +wendy quirinius 12.15999984741211 +wendy quirinius 14.300000190734863 +wendy robinson 58.815001487731934 +wendy robinson 71.06999969482422 +wendy robinson 71.77999877929688 +wendy steinbeck 77.31500053405762 +wendy thompson 58.09000015258789 +wendy thompson 85.76000213623047 +wendy underhill 41.23333231608073 +wendy underhill 49.33500003814697 +wendy underhill 81.56000137329102 +wendy van buren 57.459999084472656 +wendy van buren 91.43500137329102 +wendy white 73.68000030517578 +wendy xylophone 31.149999618530273 +wendy xylophone 76.81499862670898 +wendy young 8.449999809265137 +wendy young 33.7599983215332 +xavier allen 45.68000030517578 +xavier allen 52.255000591278076 +xavier allen 83.93000030517578 +xavier brown 7.789999961853027 +xavier brown 77.15500259399414 +xavier brown 80.35000228881836 +xavier carson 20.790000915527344 +xavier carson 55.30000019073486 +xavier davidson 15.920000076293945 +xavier davidson 58.46999931335449 +xavier davidson 82.41000366210938 +xavier ellison 16.614999771118164 +xavier ellison 77.97000122070312 +xavier garcia 49.415000915527344 +xavier hernandez 6.670000076293945 +xavier hernandez 23.054999828338623 +xavier hernandez 67.26000213623047 +xavier ichabod 4.71999979019165 +xavier ichabod 71.19000244140625 +xavier johnson 27.299999237060547 +xavier johnson 89.0999984741211 +xavier king 22.729999542236328 +xavier king 87.22000122070312 +xavier laertes 24.050000190734863 +xavier ovid 58.08000183105469 +xavier polk 13.869999885559082 +xavier polk 58.98750066757202 +xavier polk 72.62000274658203 +xavier polk 76.93000030517578 +xavier quirinius 22.1200008392334 +xavier quirinius 58.24666786193848 +xavier quirinius 62.52000045776367 +xavier quirinius 89.55000305175781 +xavier thompson 9.930000305175781 +xavier underhill 47.27000045776367 +xavier white 8.369999885559082 +xavier white 75.29000091552734 +xavier xylophone 79.41999816894531 +xavier zipper 8.449999809265137 +yuri allen 30.6299991607666 +yuri allen 52.849998474121094 +yuri brown 75.19000244140625 +yuri brown 84.02999877929688 +yuri carson 6.289999961853027 +yuri carson 91.16000366210938 +yuri ellison 1.1200000047683716 +yuri ellison 86.91999816894531 +yuri falkner 6.739999771118164 +yuri falkner 80.8650016784668 +yuri garcia 27.65999984741211 +yuri hernandez 2.069999933242798 +yuri johnson 0.12999999523162842 +yuri johnson 39.900001525878906 +yuri johnson 48.220001220703125 +yuri king 22.270000457763672 +yuri laertes 10.15999984741211 +yuri laertes 37.59000015258789 +yuri nixon 2.200000047683716 +yuri nixon 17.3700008392334 +yuri polk 26.760000228881836 +yuri polk 28.790000915527344 +yuri polk 82.33999633789062 +yuri quirinius 10.260000228881836 +yuri quirinius 38.69000053405762 +yuri quirinius 57.93000030517578 +yuri steinbeck 56.064998626708984 +yuri steinbeck 75.87999725341797 +yuri thompson 14.920000076293945 +yuri underhill 23.770000457763672 +yuri underhill 83.87000274658203 +yuri white 34.58000183105469 +yuri xylophone 20.3799991607666 +zach allen 65.43000030517578 +zach brown 48.0099983215332 +zach brown 57.08000183105469 +zach brown 58.24999809265137 +zach brown 67.37999725341797 +zach brown 75.7300033569336 +zach carson 67.78500175476074 +zach ellison 6.840000152587891 +zach falkner 50.274999141693115 +zach falkner 91.41999816894531 +zach garcia 32.20000076293945 +zach garcia 35.79999923706055 +zach garcia 69.97000122070312 +zach garcia 84.37999725341797 +zach ichabod 36.88999938964844 +zach ichabod 64.25 +zach king 31.864999771118164 +zach king 46.18000030517578 +zach king 86.93000030517578 +zach miller 2.5999999046325684 +zach miller 21.280000686645508 +zach miller 53.27000045776367 +zach ovid 0.10000000149011612 +zach ovid 23.06999969482422 +zach ovid 92.55000305175781 +zach ovid 94.33999633789062 +zach quirinius 39.209999084472656 +zach robinson 76.72000122070312 +zach steinbeck 85.48999786376953 +zach steinbeck 90.05000305175781 +zach thompson 53.59000015258789 +zach thompson 71.5 +zach underhill 86.22000122070312 +zach white 70.52999877929688 +zach xylophone 29.40999984741211 +zach xylophone 71.01000213623047 +zach young 71.31999969482422 +zach zipper 7.539999961853027 +zach zipper 85.87000274658203 +zach zipper 94.43000030517578 diff --git a/sql/hive/src/test/resources/golden/windowing_windowspec.q (deterministic)-4-cd2e3d2344810cb3ba843d4c01c81d7e b/sql/hive/src/test/resources/golden/windowing_windowspec.q (deterministic)-4-cd2e3d2344810cb3ba843d4c01c81d7e new file mode 100644 index 0000000000000..ee1c26e331a1e --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_windowspec.q (deterministic)-4-cd2e3d2344810cb3ba843d4c01c81d7e @@ -0,0 +1,1049 @@ + 17.601666666666667 + 30.72 + 33.07833333333334 +alice allen 23.081666666666667 +alice allen 23.263333333333332 +alice allen 31.38 +alice brown 11.518333333333333 +alice carson 31.99166666666667 +alice davidson 26.986666666666665 +alice falkner 30.513333333333335 +alice garcia 17.13 +alice hernandez 28.126666666666665 +alice hernandez 28.69666666666667 +alice johnson 28.30333333333333 +alice king 23.451666666666668 +alice king 25.20333333333333 +alice king 30.613999999999997 +alice laertes 23.633333333333336 +alice laertes 30.16428571428571 +alice miller 32.50833333333333 +alice nixon 25.278333333333336 +alice nixon 35.345 +alice nixon 36.458333333333336 +alice ovid 26.04714285714286 +alice polk 23.185 +alice quirinius 20.388333333333335 +alice quirinius 23.064999999999998 +alice robinson 30.296666666666667 +alice robinson 33.15 +alice steinbeck 27.894999999999996 +alice steinbeck 31.47833333333334 +alice steinbeck 36.089999999999996 +alice underhill 22.513333333333335 +alice van buren 36.32000000000001 +alice xylophone 27.355000000000004 +alice xylophone 30.505714285714284 +alice xylophone 30.613999999999997 +alice zipper 20.113333333333333 +alice zipper 28.058333333333337 +alice zipper 29.705000000000002 +bob brown 13.546666666666669 +bob brown 16.50333333333333 +bob brown 27.59 +bob carson 25.781666666666666 +bob davidson 18.073333333333334 +bob davidson 23.573333333333334 +bob davidson 31.894999999999996 +bob ellison 16.493333333333336 +bob ellison 17.889999999999997 +bob ellison 25.84142857142857 +bob ellison 33.07833333333334 +bob falkner 21.783333333333335 +bob garcia 16.492857142857144 +bob garcia 23.088333333333335 +bob garcia 26.42833333333333 +bob garcia 32.709999999999994 +bob garcia 33.91833333333334 +bob hernandez 22.303333333333338 +bob ichabod 23.336666666666662 +bob king 20.591666666666665 +bob king 21.244999999999997 +bob king 33.15333333333333 +bob laertes 21.240000000000002 +bob laertes 28.88 +bob miller 32.158750000000005 +bob ovid 21.83666666666667 +bob ovid 23.678571428571427 +bob ovid 25.12333333333333 +bob ovid 31.46 +bob polk 16.3475 +bob quirinius 28.465714285714284 +bob steinbeck 26.99 +bob van buren 26.127142857142854 +bob white 23.021666666666672 +bob white 23.582857142857144 +bob xylophone 26.18166666666667 +bob xylophone 27.995 +bob young 25.495714285714286 +bob zipper 26.435000000000002 +bob zipper 28.384285714285713 +bob zipper 30.65833333333333 +calvin allen 20.812 +calvin brown 20.808 +calvin brown 28.26 +calvin brown 28.37875 +calvin carson 20.38714285714286 +calvin davidson 22.65142857142857 +calvin davidson 23.585714285714285 +calvin ellison 24.6875 +calvin falkner 16.073333333333334 +calvin falkner 17.054285714285715 +calvin falkner 25.2025 +calvin falkner 28.525714285714283 +calvin falkner 33.382 +calvin falkner 37.29666666666667 +calvin garcia 19.924285714285713 +calvin hernandez 21.759999999999998 +calvin johnson 24.03222222222222 +calvin laertes 24.702857142857145 +calvin laertes 28.788333333333338 +calvin nixon 15.631250000000001 +calvin nixon 25.64428571428571 +calvin nixon 29.084999999999997 +calvin ovid 16.243333333333332 +calvin ovid 25.2025 +calvin ovid 25.935555555555553 +calvin ovid 29.299999999999997 +calvin polk 29.615 +calvin quirinius 19.294999999999998 +calvin quirinius 31.64625 +calvin robinson 27.811428571428575 +calvin steinbeck 17.14 +calvin steinbeck 17.535 +calvin steinbeck 21.551666666666666 +calvin thompson 28.592857142857145 +calvin thompson 33.382 +calvin underhill 20.113333333333333 +calvin van buren 28.384285714285713 +calvin van buren 32.106 +calvin white 26.948333333333334 +calvin white 28.256249999999998 +calvin xylophone 25.33666666666667 +calvin xylophone 27.061428571428568 +calvin xylophone 28.384285714285713 +calvin young 19.331666666666667 +calvin young 24.63 +calvin zipper 16.08125 +calvin zipper 28.80555555555556 +david allen 26.21375 +david allen 26.948333333333334 +david brown 14.222 +david brown 27.1 +david davidson 21.062857142857144 +david davidson 26.12 +david davidson 30.951428571428576 +david davidson 31.96142857142857 +david ellison 22.687142857142856 +david ellison 23.435 +david ellison 25.442999999999998 +david hernandez 28.279999999999998 +david ichabod 16.3475 +david ichabod 23.751428571428573 +david laertes 29.07 +david nixon 25.535714285714285 +david ovid 26.75714285714286 +david ovid 29.912857142857142 +david quirinius 17.179999999999996 +david quirinius 20.808 +david quirinius 22.90625 +david robinson 25.274 +david robinson 25.885 +david thompson 22.19125 +david underhill 20.812 +david underhill 21.546666666666667 +david underhill 28.26 +david van buren 24.472857142857148 +david van buren 32.75125 +david white 22.861428571428576 +david xylophone 19.3325 +david xylophone 26.930000000000003 +david xylophone 30.093333333333334 +david young 21.12375 +david young 25.03857142857143 +ethan allen 27.038333333333338 +ethan brown 16.3475 +ethan brown 19.37 +ethan brown 23.73 +ethan brown 25.57166666666667 +ethan brown 26.168333333333333 +ethan brown 31.893333333333334 +ethan carson 25.655714285714286 +ethan ellison 27.807777777777776 +ethan ellison 32.244285714285716 +ethan falkner 28.287142857142857 +ethan falkner 31.411428571428576 +ethan garcia 24.153750000000002 +ethan hernandez 17.986 +ethan johnson 31.54571428571429 +ethan king 22.62 +ethan laertes 15.045555555555556 +ethan laertes 17.889999999999997 +ethan laertes 24.28 +ethan laertes 25.2025 +ethan laertes 26.56888888888889 +ethan laertes 28.37875 +ethan laertes 34.84571428571429 +ethan miller 28.592857142857145 +ethan nixon 29.86833333333333 +ethan ovid 17.85166666666667 +ethan polk 16.463 +ethan polk 18.184545454545457 +ethan polk 25.737142857142857 +ethan polk 29.895000000000003 +ethan quirinius 21.827777777777776 +ethan quirinius 23.42 +ethan quirinius 35.54333333333333 +ethan robinson 24.03222222222222 +ethan robinson 36.35333333333333 +ethan underhill 21.545555555555556 +ethan van buren 15.21142857142857 +ethan white 29.702857142857145 +ethan white 33.33 +ethan xylophone 29.521666666666665 +ethan zipper 23.994 +ethan zipper 26.765 +fred davidson 27.729999999999997 +fred davidson 29.90625 +fred davidson 30.804999999999996 +fred ellison 16.720000000000002 +fred ellison 22.17125 +fred ellison 33.15833333333334 +fred falkner 17.13142857142857 +fred falkner 26.82 +fred falkner 31.925555555555555 +fred hernandez 28.094285714285718 +fred ichabod 23.352500000000003 +fred ichabod 32.906666666666666 +fred johnson 16.9925 +fred king 20.125 +fred king 30.377142857142854 +fred laertes 26.458571428571428 +fred miller 29.66666666666667 +fred nixon 19.565555555555555 +fred nixon 21.830000000000002 +fred nixon 25.828333333333333 +fred nixon 28.094285714285718 +fred polk 21.744999999999997 +fred polk 22.301666666666666 +fred polk 24.156666666666666 +fred polk 28.217142857142857 +fred quirinius 27.495 +fred quirinius 29.615714285714283 +fred robinson 24.243636363636366 +fred steinbeck 17.91333333333333 +fred steinbeck 21.12375 +fred steinbeck 26.47125 +fred underhill 26.43166666666667 +fred van buren 16.165714285714284 +fred van buren 23.285 +fred van buren 24.875714285714288 +fred van buren 27.878333333333334 +fred white 29.498571428571427 +fred young 17.889999999999997 +fred young 19.565555555555555 +fred zipper 21.581428571428575 +gabriella allen 22.03142857142857 +gabriella allen 26.87375 +gabriella brown 29.675714285714285 +gabriella brown 32.44 +gabriella carson 28.75 +gabriella davidson 27.531250000000004 +gabriella ellison 19.82 +gabriella ellison 27.353333333333335 +gabriella falkner 17.535 +gabriella falkner 19.487500000000004 +gabriella falkner 30.964999999999996 +gabriella garcia 20.544 +gabriella hernandez 20.818333333333335 +gabriella hernandez 28.094285714285718 +gabriella ichabod 10.58 +gabriella ichabod 18.64272727272727 +gabriella ichabod 20.504444444444445 +gabriella ichabod 23.185 +gabriella ichabod 23.35375 +gabriella king 16.18 +gabriella king 27.887500000000003 +gabriella laertes 23.799090909090907 +gabriella miller 15.695714285714283 +gabriella ovid 23.515454545454546 +gabriella ovid 33.33 +gabriella polk 20.38714285714286 +gabriella polk 35.77285714285715 +gabriella steinbeck 16.18 +gabriella steinbeck 32.464999999999996 +gabriella thompson 26.477777777777778 +gabriella thompson 27.29777777777778 +gabriella thompson 30.87666666666667 +gabriella van buren 28.513333333333335 +gabriella van buren 32.41111111111111 +gabriella white 26.765 +gabriella young 24.022499999999997 +gabriella young 29.521666666666665 +gabriella zipper 21.43727272727273 +gabriella zipper 32.106 +holly allen 24.271428571428572 +holly brown 22.959 +holly brown 27.498 +holly falkner 29.66666666666667 +holly hernandez 14.179999999999998 +holly hernandez 22.396666666666665 +holly hernandez 27.434000000000005 +holly hernandez 27.887500000000003 +holly ichabod 27.87375 +holly ichabod 32.525 +holly ichabod 34.042857142857144 +holly johnson 20.808 +holly johnson 25.024285714285718 +holly johnson 30.487142857142857 +holly king 23.185 +holly king 29.008888888888894 +holly laertes 19.41 +holly miller 29.89333333333333 +holly nixon 23.162857142857142 +holly nixon 28.876250000000002 +holly polk 22.7825 +holly polk 26.297499999999996 +holly robinson 24.160000000000004 +holly thompson 19.565555555555555 +holly thompson 27.048999999999996 +holly thompson 29.70555555555556 +holly underhill 17.876250000000002 +holly underhill 27.820000000000004 +holly underhill 30.613999999999997 +holly underhill 30.708 +holly van buren 20.113333333333333 +holly white 23.185 +holly white 29.64272727272727 +holly xylophone 26.400909090909092 +holly young 27.807777777777776 +holly young 31.63 +holly zipper 27.401999999999997 +holly zipper 28.384285714285713 +irene allen 35.345 +irene brown 22.527500000000003 +irene brown 28.384285714285713 +irene brown 32.81875 +irene carson 27.10666666666667 +irene ellison 16.720000000000002 +irene ellison 28.592857142857145 +irene falkner 19.41 +irene falkner 30.564999999999998 +irene garcia 16.9925 +irene garcia 24.03222222222222 +irene garcia 26.297499999999996 +irene ichabod 27.540000000000003 +irene ichabod 29.34875 +irene johnson 25.418181818181814 +irene laertes 22.124285714285712 +irene laertes 22.200000000000003 +irene laertes 24.446666666666665 +irene miller 30.166666666666668 +irene nixon 18.922222222222224 +irene nixon 25.2025 +irene nixon 33.382 +irene ovid 28.256249999999998 +irene ovid 31.63 +irene ovid 32.088750000000005 +irene polk 24.63 +irene polk 25.296363636363637 +irene polk 27.137142857142855 +irene polk 30.65222222222222 +irene polk 33.760000000000005 +irene quirinius 27.044999999999998 +irene quirinius 33.07833333333334 +irene quirinius 41.865 +irene robinson 32.18875 +irene steinbeck 16.463 +irene thompson 25.281666666666666 +irene underhill 24.4025 +irene underhill 28.531 +irene van buren 25.532222222222224 +irene van buren 32.50142857142857 +irene xylophone 26.288181818181815 +jessica brown 28.968181818181822 +jessica carson 19.41 +jessica carson 24.854285714285716 +jessica carson 25.406363636363633 +jessica davidson 22.19625 +jessica davidson 23.888 +jessica davidson 26.297499999999996 +jessica davidson 26.825454545454537 +jessica ellison 22.07777777777778 +jessica ellison 33.33 +jessica falkner 22.637272727272727 +jessica garcia 14.749999999999996 +jessica garcia 29.675714285714285 +jessica ichabod 31.831249999999997 +jessica johnson 21.546666666666667 +jessica johnson 29.986363636363638 +jessica miller 28.735000000000003 +jessica nixon 19.13111111111111 +jessica nixon 26.244999999999997 +jessica ovid 25.274 +jessica ovid 33.181666666666665 +jessica polk 26.79222222222222 +jessica quirinius 20.38714285714286 +jessica quirinius 25.776000000000003 +jessica quirinius 28.26 +jessica quirinius 29.605000000000008 +jessica robinson 24.5625 +jessica thompson 25.736 +jessica thompson 30.87363636363636 +jessica underhill 16.400000000000002 +jessica underhill 25.529090909090915 +jessica underhill 31.63 +jessica van buren 24.446666666666665 +jessica white 20.812 +jessica white 23.26 +jessica white 27.807777777777776 +jessica white 29.031000000000006 +jessica white 30.654545454545453 +jessica xylophone 15.296666666666667 +jessica young 26.718333333333334 +jessica young 27.853749999999998 +jessica zipper 20.3575 +jessica zipper 24.446666666666665 +jessica zipper 29.276363636363637 +katie allen 29.029090909090915 +katie brown 24.156666666666666 +katie davidson 15.383749999999997 +katie ellison 20.978333333333335 +katie ellison 26.96 +katie falkner 24.5625 +katie garcia 27.807777777777776 +katie garcia 28.287142857142857 +katie hernandez 23.667272727272724 +katie ichabod 14.222 +katie ichabod 20.553749999999997 +katie ichabod 31.831249999999997 +katie king 20.05444444444445 +katie king 20.242222222222225 +katie king 23.342857142857145 +katie miller 26.21857142857143 +katie miller 27.675000000000004 +katie nixon 14.476999999999999 +katie ovid 28.37875 +katie polk 20.99 +katie polk 25.090000000000003 +katie robinson 30.65222222222222 +katie van buren 27.133636363636366 +katie van buren 29.675714285714285 +katie white 22.555714285714288 +katie white 24.463749999999997 +katie xylophone 25.74142857142857 +katie young 23.011250000000004 +katie young 26.650000000000002 +katie young 29.301428571428573 +katie zipper 28.26 +katie zipper 29.675714285714285 +luke allen 16.9925 +luke allen 20.595000000000002 +luke allen 27.54181818181818 +luke allen 27.887500000000003 +luke allen 33.07833333333334 +luke brown 29.34875 +luke davidson 27.3575 +luke davidson 31.473333333333333 +luke ellison 14.024444444444443 +luke ellison 22.555714285714288 +luke ellison 28.592857142857145 +luke falkner 21.855 +luke falkner 27.044999999999998 +luke garcia 27.887500000000003 +luke garcia 31.237000000000002 +luke ichabod 28.198571428571427 +luke ichabod 34.345000000000006 +luke johnson 21.239999999999995 +luke johnson 25.462727272727275 +luke johnson 30.188888888888894 +luke laertes 18.344 +luke laertes 20.817 +luke laertes 27.401999999999997 +luke laertes 30.72285714285714 +luke laertes 41.865 +luke miller 22.539000000000005 +luke ovid 16.615454545454543 +luke ovid 26.06625 +luke polk 28.163000000000004 +luke polk 28.840909090909097 +luke quirinius 27.077142857142857 +luke robinson 24.816363636363644 +luke robinson 27.110909090909093 +luke thompson 28.44454545454545 +luke underhill 22.175 +luke underhill 25.518888888888892 +luke underhill 27.34125 +luke van buren 16.54 +luke white 20.544 +luke xylophone 24.5625 +luke zipper 24.764285714285712 +mike allen 23.860000000000003 +mike brown 31.016363636363643 +mike carson 26.066363636363636 +mike carson 28.947142857142858 +mike carson 33.382 +mike davidson 20.544 +mike davidson 21.239999999999995 +mike ellison 18.922 +mike ellison 21.4175 +mike ellison 25.45272727272727 +mike ellison 27.26 +mike ellison 28.39888888888889 +mike falkner 29.397777777777776 +mike garcia 20.544 +mike garcia 24.582 +mike garcia 34.84571428571429 +mike hernandez 10.4925 +mike hernandez 17.7 +mike ichabod 26.772727272727273 +mike king 17.889999999999997 +mike king 19.294999999999998 +mike king 20.004285714285714 +mike king 23.197999999999997 +mike king 23.285 +mike king 27.401999999999997 +mike miller 31.587272727272726 +mike nixon 17.775555555555556 +mike nixon 27.044999999999998 +mike polk 22.175 +mike polk 23.751428571428573 +mike polk 28.095 +mike quirinius 19.13111111111111 +mike steinbeck 14.222 +mike steinbeck 18.100909090909088 +mike steinbeck 18.344 +mike steinbeck 33.760000000000005 +mike van buren 23.42 +mike van buren 25.828333333333333 +mike white 19.13111111111111 +mike white 25.755714285714284 +mike white 29.031000000000006 +mike white 30.516999999999996 +mike young 26.765 +mike young 27.766 +mike young 28.409090909090914 +mike zipper 17.306 +mike zipper 33.23 +mike zipper 41.865 +nick allen 19.331666666666667 +nick allen 32.106 +nick brown 27.578181818181818 +nick davidson 29.100000000000005 +nick ellison 24.764285714285712 +nick ellison 29.521666666666665 +nick falkner 22.555714285714288 +nick falkner 23.15888888888888 +nick garcia 21.546666666666667 +nick garcia 26.25090909090909 +nick garcia 30.166666666666668 +nick ichabod 21.855 +nick ichabod 23.479000000000003 +nick ichabod 29.100000000000005 +nick johnson 25.274 +nick johnson 29.994 +nick laertes 25.820909090909094 +nick miller 19.87888888888889 +nick nixon 17.082 +nick ovid 33.597777777777786 +nick polk 25.736 +nick quirinius 20.707500000000003 +nick quirinius 28.094285714285718 +nick robinson 22.396666666666665 +nick robinson 25.298749999999995 +nick steinbeck 21.192857142857143 +nick thompson 30.72285714285714 +nick underhill 29.345 +nick van buren 25.152727272727272 +nick xylophone 26.948333333333334 +nick young 23.751428571428573 +nick young 24.810000000000002 +nick zipper 24.854285714285716 +nick zipper 27.353333333333335 +oscar allen 18.815 +oscar brown 26.948333333333334 +oscar carson 24.764285714285712 +oscar carson 27.766 +oscar carson 28.094285714285718 +oscar carson 28.31555555555556 +oscar carson 35.22818181818182 +oscar davidson 17.535 +oscar ellison 22.121428571428574 +oscar ellison 28.735000000000003 +oscar falkner 19.294999999999998 +oscar garcia 20.62636363636364 +oscar hernandez 22.539000000000005 +oscar hernandez 23.31888888888889 +oscar ichabod 20.818333333333335 +oscar ichabod 21.546666666666667 +oscar ichabod 26.914545454545454 +oscar ichabod 28.811111111111106 +oscar johnson 22.381818181818183 +oscar johnson 24.266363636363643 +oscar king 15.296666666666667 +oscar king 25.580000000000002 +oscar king 28.37875 +oscar laertes 21.51818181818182 +oscar laertes 23.285 +oscar laertes 24.4025 +oscar laertes 25.345454545454547 +oscar nixon 18.88111111111111 +oscar ovid 24.854285714285716 +oscar ovid 25.274 +oscar ovid 33.29636363636364 +oscar polk 19.331666666666667 +oscar polk 29.34875 +oscar quirinius 22.928 +oscar quirinius 25.66727272727273 +oscar quirinius 25.970909090909092 +oscar quirinius 29.66666666666667 +oscar robinson 20.90666666666667 +oscar robinson 21.855 +oscar robinson 23.42 +oscar robinson 32.90500000000001 +oscar steinbeck 32.02818181818182 +oscar thompson 20.817 +oscar thompson 21.477000000000004 +oscar thompson 21.843636363636367 +oscar thompson 23.559000000000005 +oscar underhill 22.555714285714288 +oscar van buren 27.210000000000008 +oscar van buren 28.592857142857145 +oscar van buren 31.375454545454545 +oscar white 20.818333333333335 +oscar white 21.748 +oscar white 24.582 +oscar white 28.287142857142857 +oscar xylophone 25.845 +oscar xylophone 28.735000000000003 +oscar xylophone 30.72285714285714 +oscar zipper 24.511111111111113 +oscar zipper 25.067777777777778 +oscar zipper 26.21857142857143 +priscilla brown 14.222 +priscilla brown 27.044999999999998 +priscilla brown 30.19909090909091 +priscilla carson 18.07 +priscilla carson 20.70875 +priscilla carson 26.297499999999996 +priscilla ichabod 29.451111111111118 +priscilla ichabod 29.878888888888884 +priscilla johnson 16.9925 +priscilla johnson 22.050000000000004 +priscilla johnson 24.093000000000004 +priscilla johnson 29.200000000000003 +priscilla johnson 29.246 +priscilla king 15.536666666666669 +priscilla nixon 18.9 +priscilla nixon 30.516999999999996 +priscilla ovid 16.005000000000003 +priscilla ovid 29.88111111111111 +priscilla polk 28.018888888888892 +priscilla quirinius 23.064999999999998 +priscilla thompson 27.077142857142857 +priscilla underhill 28.160999999999994 +priscilla underhill 28.56571428571429 +priscilla van buren 20.7 +priscilla van buren 21.830000000000002 +priscilla van buren 24.665 +priscilla white 29.23375 +priscilla xylophone 19.331666666666667 +priscilla xylophone 23.185 +priscilla xylophone 27.34125 +priscilla young 26.32777777777778 +priscilla young 30.613999999999997 +priscilla zipper 13.498 +priscilla zipper 31.972727272727266 +quinn allen 28.786666666666672 +quinn allen 29.471818181818175 +quinn brown 26.314285714285713 +quinn brown 27.38 +quinn brown 28.39888888888889 +quinn davidson 20.808 +quinn davidson 22.71285714285714 +quinn davidson 23.306250000000002 +quinn davidson 27.34125 +quinn ellison 25.002857142857142 +quinn ellison 33.760000000000005 +quinn garcia 20.544 +quinn garcia 27.055999999999994 +quinn garcia 29.183333333333334 +quinn garcia 31.831249999999997 +quinn ichabod 22.101818181818185 +quinn king 17.535 +quinn king 18.035714285714285 +quinn laertes 24.511111111111113 +quinn laertes 28.876250000000002 +quinn laertes 29.202857142857145 +quinn nixon 21.75111111111111 +quinn ovid 29.64125 +quinn quirinius 18.922 +quinn robinson 23.985 +quinn steinbeck 27.077142857142857 +quinn steinbeck 28.160999999999994 +quinn thompson 21.99888888888889 +quinn thompson 34.46857142857143 +quinn underhill 20.113333333333333 +quinn underhill 23.107272727272726 +quinn underhill 26.224285714285717 +quinn van buren 23.612222222222222 +quinn young 24.5625 +quinn zipper 17.889999999999997 +quinn zipper 20.163333333333338 +rachel allen 28.446666666666665 +rachel allen 41.865 +rachel brown 20.92875 +rachel brown 26.21857142857143 +rachel brown 27.905 +rachel brown 30.166666666666668 +rachel brown 35.345 +rachel carson 28.735000000000003 +rachel carson 31.715999999999998 +rachel davidson 29.100000000000005 +rachel ellison 27.055454545454552 +rachel falkner 14.812499999999998 +rachel falkner 28.876250000000002 +rachel falkner 29.308888888888887 +rachel falkner 31.831249999999997 +rachel johnson 31.63 +rachel king 24.511111111111113 +rachel king 30.873749999999998 +rachel laertes 17.306 +rachel laertes 26.765 +rachel ovid 24.042727272727276 +rachel ovid 28.01181818181818 +rachel polk 21.12375 +rachel quirinius 29.831249999999997 +rachel robinson 10.4925 +rachel robinson 22.264444444444447 +rachel robinson 33.43125 +rachel thompson 16.720000000000002 +rachel thompson 26.905714285714286 +rachel thompson 28.876250000000002 +rachel underhill 21.75111111111111 +rachel white 22.175 +rachel white 31.580000000000002 +rachel young 28.150000000000002 +rachel zipper 22.187142857142856 +rachel zipper 33.760000000000005 +sarah carson 21.86818181818182 +sarah carson 22.175 +sarah carson 33.43125 +sarah ellison 17.535 +sarah falkner 29.34875 +sarah falkner 29.64125 +sarah garcia 10.4925 +sarah garcia 20.812 +sarah garcia 28.2175 +sarah ichabod 26.948333333333334 +sarah ichabod 33.62375 +sarah johnson 21.546666666666667 +sarah johnson 24.978000000000005 +sarah johnson 29.608000000000004 +sarah johnson 33.760000000000005 +sarah king 19.41 +sarah king 27.055999999999994 +sarah miller 24.815454545454543 +sarah ovid 28.31625 +sarah robinson 13.498 +sarah robinson 28.256249999999998 +sarah steinbeck 23.26 +sarah white 21.75111111111111 +sarah white 31.63 +sarah xylophone 21.964545454545455 +sarah young 29.335555555555555 +sarah zipper 29.521666666666665 +tom brown 22.873333333333335 +tom brown 30.415555555555557 +tom carson 22.4025 +tom carson 28.39888888888889 +tom carson 29.64125 +tom davidson 30.61142857142857 +tom ellison 23.568 +tom ellison 27.884999999999998 +tom ellison 32.02625 +tom falkner 13.72 +tom falkner 19.849999999999998 +tom hernandez 16.720000000000002 +tom hernandez 29.974285714285713 +tom ichabod 20.113333333333333 +tom johnson 27.077142857142857 +tom johnson 32.90500000000001 +tom king 21.855 +tom laertes 17.981666666666666 +tom laertes 21.80857142857143 +tom miller 18.922 +tom miller 21.239999999999995 +tom miller 22.396666666666665 +tom nixon 27.005000000000003 +tom ovid 34.84571428571429 +tom polk 29.521666666666665 +tom polk 29.805 +tom quirinius 24.764285714285712 +tom quirinius 36.46857142857143 +tom robinson 16.18 +tom robinson 18.07 +tom robinson 27.34125 +tom robinson 34.958571428571425 +tom steinbeck 30.613999999999997 +tom van buren 22.6475 +tom van buren 23.13555555555556 +tom van buren 24.4025 +tom white 27.715714285714284 +tom young 19.41 +tom young 24.63 +tom zipper 22.902 +ulysses brown 16.223333333333333 +ulysses carson 16.3475 +ulysses carson 22.057500000000005 +ulysses carson 28.256249999999998 +ulysses carson 28.27285714285714 +ulysses davidson 24.701249999999998 +ulysses ellison 29.52333333333333 +ulysses garcia 33.382 +ulysses hernandez 18.421818181818185 +ulysses hernandez 20.443749999999998 +ulysses hernandez 22.365 +ulysses ichabod 24.63 +ulysses ichabod 33.24333333333333 +ulysses johnson 33.43125 +ulysses king 27.083333333333332 +ulysses laertes 26.915000000000003 +ulysses laertes 27.305 +ulysses laertes 28.501111111111115 +ulysses miller 18.22 +ulysses miller 26.21857142857143 +ulysses nixon 30.65222222222222 +ulysses ovid 21.366666666666667 +ulysses polk 22.555714285714288 +ulysses polk 22.66625 +ulysses polk 25.11777777777778 +ulysses polk 27.141666666666666 +ulysses quirinius 33.07833333333334 +ulysses robinson 21.12375 +ulysses steinbeck 23.751428571428573 +ulysses steinbeck 25.931428571428572 +ulysses thompson 22.264444444444447 +ulysses underhill 20.812 +ulysses underhill 23.751428571428573 +ulysses underhill 25.071666666666662 +ulysses underhill 25.828333333333333 +ulysses underhill 25.865 +ulysses underhill 28.722499999999997 +ulysses underhill 35.268888888888895 +ulysses van buren 22.134999999999998 +ulysses white 15.296666666666667 +ulysses white 32.093333333333334 +ulysses xylophone 20.38714285714286 +ulysses xylophone 25.274 +ulysses xylophone 29.64125 +ulysses young 22.213333333333335 +ulysses young 22.90285714285714 +ulysses young 32.93125 +victor allen 24.82875 +victor allen 27.51 +victor brown 21.621250000000003 +victor brown 23.73 +victor brown 26.21857142857143 +victor brown 27.548571428571428 +victor davidson 22.391666666666666 +victor davidson 33.16428571428572 +victor davidson 35.197500000000005 +victor ellison 11.100000000000001 +victor ellison 30.96857142857143 +victor hernandez 10.4925 +victor hernandez 18.922 +victor hernandez 24.301250000000003 +victor hernandez 26.69857142857143 +victor hernandez 35.358333333333334 +victor johnson 16.580000000000002 +victor johnson 27.516666666666666 +victor johnson 32.106 +victor king 19.962857142857143 +victor king 33.01857142857143 +victor laertes 21.78142857142857 +victor laertes 33.10999999999999 +victor miller 21.93285714285714 +victor nixon 20.419999999999998 +victor nixon 33.69 +victor ovid 28.75857142857143 +victor polk 18.43111111111111 +victor quirinius 17.84777777777778 +victor quirinius 27.53 +victor robinson 19.37 +victor robinson 20.38714285714286 +victor steinbeck 20.818333333333335 +victor steinbeck 25.16 +victor steinbeck 30.503749999999997 +victor thompson 23.987142857142857 +victor van buren 27.009999999999998 +victor van buren 33.43125 +victor white 24.322857142857142 +victor white 28.287142857142857 +victor xylophone 11.807142857142859 +victor xylophone 13.988571428571428 +victor xylophone 16.720000000000002 +victor xylophone 19.686666666666667 +victor xylophone 37.20428571428572 +victor young 22.264444444444447 +victor zipper 24.854285714285716 +wendy allen 28.24142857142857 +wendy allen 29.675714285714285 +wendy allen 34.275 +wendy brown 22.482857142857142 +wendy brown 27.79714285714286 +wendy ellison 16.80888888888889 +wendy ellison 18.135 +wendy falkner 22.628888888888884 +wendy falkner 23.325000000000003 +wendy falkner 24.0375 +wendy garcia 19.307142857142853 +wendy garcia 21.761428571428574 +wendy garcia 24.63 +wendy garcia 24.854285714285716 +wendy hernandez 16.60875 +wendy ichabod 28.26 +wendy king 22.5 +wendy king 24.793333333333333 +wendy king 28.252857142857145 +wendy laertes 25.881428571428568 +wendy laertes 30.338333333333328 +wendy laertes 30.52857142857143 +wendy miller 15.478333333333332 +wendy miller 25.34333333333333 +wendy nixon 19.54714285714286 +wendy nixon 27.003333333333334 +wendy ovid 14.283750000000001 +wendy ovid 30.878333333333334 +wendy polk 21.69375 +wendy polk 24.63 +wendy quirinius 28.731428571428573 +wendy quirinius 29.74333333333333 +wendy robinson 16.720000000000002 +wendy robinson 23.834285714285716 +wendy robinson 29.911666666666672 +wendy steinbeck 29.272857142857145 +wendy thompson 18.17875 +wendy thompson 22.544285714285714 +wendy underhill 21.69625 +wendy underhill 27.077142857142857 +wendy underhill 30.03333333333333 +wendy van buren 28.624285714285715 +wendy van buren 29.28333333333333 +wendy white 24.4025 +wendy xylophone 16.84 +wendy xylophone 23.426666666666666 +wendy young 20.80125 +wendy young 32.693333333333335 +xavier allen 16.535 +xavier allen 17.398333333333337 +xavier allen 35.708333333333336 +xavier brown 20.787142857142857 +xavier brown 24.764285714285712 +xavier brown 31.784999999999997 +xavier carson 20.818333333333335 +xavier carson 32.106 +xavier davidson 16.862857142857145 +xavier davidson 20.53625 +xavier davidson 27.353333333333335 +xavier ellison 17.991666666666667 +xavier ellison 23.976666666666663 +xavier garcia 35.84428571428572 +xavier hernandez 22.654285714285713 +xavier hernandez 26.948333333333334 +xavier hernandez 28.075 +xavier ichabod 20.344285714285714 +xavier ichabod 20.818333333333335 +xavier johnson 15.754285714285714 +xavier johnson 19.490000000000002 +xavier king 29.246666666666666 +xavier king 29.521666666666665 +xavier laertes 19.294999999999998 +xavier ovid 28.51 +xavier polk 12.728333333333333 +xavier polk 19.37 +xavier polk 22.548333333333332 +xavier polk 28.465714285714284 +xavier quirinius 9.991428571428571 +xavier quirinius 24.156666666666666 +xavier quirinius 25.69666666666667 +xavier quirinius 25.828333333333333 +xavier thompson 23.961428571428574 +xavier underhill 21.830000000000002 +xavier white 19.331666666666667 +xavier white 35.345 +xavier xylophone 21.187142857142856 +xavier zipper 17.488333333333333 +yuri allen 15.705714285714285 +yuri allen 20.808 +yuri brown 19.53 +yuri brown 22.457142857142856 +yuri carson 25.699999999999996 +yuri carson 27.216666666666665 +yuri ellison 15.034999999999998 +yuri ellison 28.463333333333335 +yuri falkner 17.81833333333333 +yuri falkner 19.294999999999998 +yuri garcia 28.287142857142857 +yuri hernandez 32.395 +yuri johnson 25.828333333333333 +yuri johnson 27.301666666666666 +yuri johnson 29.578333333333337 +yuri king 19.921666666666663 +yuri laertes 16.18 +yuri laertes 30.519999999999996 +yuri nixon 16.383333333333333 +yuri nixon 25.828333333333333 +yuri polk 16.18 +yuri polk 20.503333333333334 +yuri polk 30.16333333333333 +yuri quirinius 20.311666666666667 +yuri quirinius 23.185 +yuri quirinius 24.828333333333333 +yuri steinbeck 19.331666666666667 +yuri steinbeck 28.50666666666667 +yuri thompson 35.27 +yuri underhill 23.042857142857144 +yuri underhill 28.786666666666665 +yuri white 30.72285714285714 +yuri xylophone 24.173333333333332 +zach allen 8.983333333333333 +zach brown 18.922 +zach brown 23.036666666666665 +zach brown 29.72666666666667 +zach brown 31.58285714285714 +zach brown 33.07833333333334 +zach carson 27.110000000000003 +zach ellison 18.168333333333333 +zach falkner 16.18 +zach falkner 30.83285714285714 +zach garcia 16.586666666666666 +zach garcia 22.53333333333333 +zach garcia 28.13166666666667 +zach garcia 34.84571428571429 +zach ichabod 17.535 +zach ichabod 30.72285714285714 +zach king 19.878333333333334 +zach king 25.643333333333334 +zach king 28.646666666666665 +zach miller 23.285 +zach miller 23.366666666666664 +zach miller 30.46833333333333 +zach ovid 23.94666666666667 +zach ovid 28.75166666666667 +zach ovid 28.763333333333335 +zach ovid 34.84571428571429 +zach quirinius 20.755 +zach robinson 21.546666666666667 +zach steinbeck 27.243333333333336 +zach steinbeck 30.073333333333334 +zach thompson 14.222 +zach thompson 24.755 +zach underhill 31.885 +zach white 20.208333333333332 +zach xylophone 10.485 +zach xylophone 20.113333333333333 +zach young 20.176666666666666 +zach zipper 21.709999999999997 +zach zipper 22.264999999999997 +zach zipper 34.01166666666667 diff --git a/sql/hive/src/test/resources/golden/windowing_windowspec.q (deterministic)-5-ee44c5cdc80e1c832b702f9fb76d8145 b/sql/hive/src/test/resources/golden/windowing_windowspec.q (deterministic)-5-ee44c5cdc80e1c832b702f9fb76d8145 new file mode 100644 index 0000000000000..a9ae190825a01 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_windowspec.q (deterministic)-5-ee44c5cdc80e1c832b702f9fb76d8145 @@ -0,0 +1,1049 @@ + 65560 + 65718 + 65740 +alice allen 65662 +alice allen 65720 +alice allen 65758 +alice brown 65696 +alice carson 65559 +alice davidson 65547 +alice falkner 65669 +alice garcia 65613 +alice hernandez 65737 +alice hernandez 65784 +alice johnson 65739 +alice king 65660 +alice king 65738 +alice king 131281 +alice laertes 65669 +alice laertes 65671 +alice miller 65590 +alice nixon 65586 +alice nixon 65595 +alice nixon 65604 +alice ovid 65737 +alice polk 65548 +alice quirinius 65636 +alice quirinius 65728 +alice robinson 65606 +alice robinson 65789 +alice steinbeck 65578 +alice steinbeck 65673 +alice steinbeck 65786 +alice underhill 65750 +alice van buren 65562 +alice xylophone 65585 +alice xylophone 65599 +alice xylophone 131247 +alice zipper 65553 +alice zipper 65662 +alice zipper 65766 +bob brown 65584 +bob brown 65777 +bob brown 65783 +bob carson 65713 +bob davidson 65664 +bob davidson 65693 +bob davidson 65768 +bob ellison 65591 +bob ellison 65624 +bob ellison 65721 +bob ellison 65760 +bob falkner 65789 +bob garcia 65585 +bob garcia 65598 +bob garcia 65673 +bob garcia 65754 +bob garcia 65782 +bob hernandez 131340 +bob ichabod 65549 +bob king 65715 +bob king 65757 +bob king 65783 +bob laertes 65602 +bob laertes 131447 +bob miller 65608 +bob ovid 65564 +bob ovid 65686 +bob ovid 196959 +bob ovid 196973 +bob polk 65594 +bob quirinius 65700 +bob steinbeck 65637 +bob van buren 65778 +bob white 65543 +bob white 65605 +bob xylophone 65574 +bob xylophone 65666 +bob young 65556 +bob zipper 65559 +bob zipper 65633 +bob zipper 65739 +calvin allen 65669 +calvin brown 65537 +calvin brown 131272 +calvin brown 197027 +calvin carson 65637 +calvin davidson 65541 +calvin davidson 65564 +calvin ellison 65667 +calvin falkner 65573 +calvin falkner 65596 +calvin falkner 65778 +calvin falkner 131397 +calvin falkner 131411 +calvin falkner 131433 +calvin garcia 131212 +calvin hernandez 131251 +calvin johnson 65731 +calvin laertes 65570 +calvin laertes 65684 +calvin nixon 65654 +calvin nixon 131386 +calvin nixon 131503 +calvin ovid 65554 +calvin ovid 65643 +calvin ovid 65715 +calvin ovid 196944 +calvin polk 65731 +calvin quirinius 65741 +calvin quirinius 65769 +calvin robinson 131320 +calvin steinbeck 131271 +calvin steinbeck 131326 +calvin steinbeck 131415 +calvin thompson 65560 +calvin thompson 131244 +calvin underhill 196944 +calvin van buren 65771 +calvin van buren 131138 +calvin white 65553 +calvin white 65561 +calvin xylophone 65575 +calvin xylophone 65596 +calvin xylophone 262686 +calvin young 65746 +calvin young 131168 +calvin zipper 65669 +calvin zipper 131476 +david allen 65588 +david allen 131222 +david brown 65637 +david brown 131303 +david davidson 65756 +david davidson 65778 +david davidson 65779 +david davidson 131342 +david ellison 65724 +david ellison 65724 +david ellison 131224 +david hernandez 197083 +david ichabod 131454 +david ichabod 197085 +david laertes 65762 +david nixon 65536 +david ovid 65623 +david ovid 196766 +david quirinius 65759 +david quirinius 65779 +david quirinius 131303 +david robinson 65762 +david robinson 65775 +david thompson 65550 +david underhill 65662 +david underhill 65751 +david underhill 131198 +david van buren 65634 +david van buren 262584 +david white 65678 +david xylophone 65537 +david xylophone 131426 +david xylophone 131447 +david young 65551 +david young 131255 +ethan allen 131460 +ethan brown 65539 +ethan brown 65617 +ethan brown 65685 +ethan brown 65685 +ethan brown 65722 +ethan brown 131483 +ethan carson 197189 +ethan ellison 65714 +ethan ellison 131302 +ethan falkner 131222 +ethan falkner 131333 +ethan garcia 131507 +ethan hernandez 65618 +ethan johnson 65536 +ethan king 131280 +ethan laertes 65562 +ethan laertes 65597 +ethan laertes 65628 +ethan laertes 65680 +ethan laertes 65760 +ethan laertes 131304 +ethan laertes 328329 +ethan miller 328296 +ethan nixon 65766 +ethan ovid 65697 +ethan polk 65589 +ethan polk 65615 +ethan polk 131206 +ethan polk 197082 +ethan quirinius 65591 +ethan quirinius 196912 +ethan quirinius 196957 +ethan robinson 65547 +ethan robinson 65659 +ethan underhill 65570 +ethan van buren 131252 +ethan white 65677 +ethan white 197039 +ethan xylophone 65595 +ethan zipper 65593 +ethan zipper 131365 +fred davidson 65595 +fred davidson 65721 +fred davidson 131221 +fred ellison 65548 +fred ellison 65691 +fred ellison 65771 +fred falkner 65637 +fred falkner 131474 +fred falkner 196920 +fred hernandez 131226 +fred ichabod 131109 +fred ichabod 131520 +fred johnson 131332 +fred king 65694 +fred king 197016 +fred laertes 131354 +fred miller 65536 +fred nixon 65560 +fred nixon 65612 +fred nixon 65705 +fred nixon 196929 +fred polk 65656 +fred polk 131231 +fred polk 262645 +fred polk 262733 +fred quirinius 131486 +fred quirinius 196950 +fred robinson 65623 +fred steinbeck 65544 +fred steinbeck 65755 +fred steinbeck 131253 +fred underhill 131188 +fred van buren 65561 +fred van buren 65745 +fred van buren 131380 +fred van buren 328270 +fred white 131136 +fred young 65594 +fred young 131551 +fred zipper 196885 +gabriella allen 65677 +gabriella allen 131283 +gabriella brown 65753 +gabriella brown 197180 +gabriella carson 65586 +gabriella davidson 65565 +gabriella ellison 65706 +gabriella ellison 131505 +gabriella falkner 65767 +gabriella falkner 131183 +gabriella falkner 131397 +gabriella garcia 131127 +gabriella hernandez 131304 +gabriella hernandez 131304 +gabriella ichabod 65559 +gabriella ichabod 65712 +gabriella ichabod 131297 +gabriella ichabod 131311 +gabriella ichabod 131460 +gabriella king 65657 +gabriella king 197031 +gabriella laertes 131543 +gabriella miller 131300 +gabriella ovid 65556 +gabriella ovid 131260 +gabriella polk 65790 +gabriella polk 131425 +gabriella steinbeck 65582 +gabriella steinbeck 131248 +gabriella thompson 131528 +gabriella thompson 197181 +gabriella thompson 262632 +gabriella van buren 65644 +gabriella van buren 131238 +gabriella white 65638 +gabriella young 65699 +gabriella young 65774 +gabriella zipper 65754 +gabriella zipper 196762 +holly allen 65596 +holly brown 131315 +holly brown 131368 +holly falkner 65720 +holly hernandez 65602 +holly hernandez 65686 +holly hernandez 131387 +holly hernandez 131554 +holly ichabod 65752 +holly ichabod 131308 +holly ichabod 131473 +holly johnson 65755 +holly johnson 131240 +holly johnson 131277 +holly king 131286 +holly king 131303 +holly laertes 196950 +holly miller 131381 +holly nixon 196941 +holly nixon 328184 +holly polk 197132 +holly polk 262782 +holly robinson 131241 +holly thompson 65578 +holly thompson 65713 +holly thompson 197092 +holly underhill 65654 +holly underhill 131323 +holly underhill 131385 +holly underhill 131504 +holly van buren 131449 +holly white 131092 +holly white 262734 +holly xylophone 196792 +holly young 65765 +holly young 131229 +holly zipper 131151 +holly zipper 131545 +irene allen 131109 +irene brown 65765 +irene brown 131368 +irene brown 393929 +irene carson 262770 +irene ellison 196956 +irene ellison 196982 +irene falkner 131287 +irene falkner 197046 +irene garcia 65660 +irene garcia 131286 +irene garcia 131375 +irene ichabod 65645 +irene ichabod 131442 +irene johnson 131179 +irene laertes 131324 +irene laertes 131381 +irene laertes 131407 +irene miller 262822 +irene nixon 197105 +irene nixon 262409 +irene nixon 262565 +irene ovid 65734 +irene ovid 196935 +irene ovid 262836 +irene polk 65551 +irene polk 131189 +irene polk 131189 +irene polk 196943 +irene polk 328365 +irene quirinius 131369 +irene quirinius 196998 +irene quirinius 262855 +irene robinson 131259 +irene steinbeck 65683 +irene thompson 262719 +irene underhill 131291 +irene underhill 131386 +irene van buren 131216 +irene van buren 262539 +irene xylophone 131348 +jessica brown 393772 +jessica carson 65747 +jessica carson 131207 +jessica carson 131232 +jessica davidson 65606 +jessica davidson 65675 +jessica davidson 196917 +jessica davidson 197030 +jessica ellison 131108 +jessica ellison 196885 +jessica falkner 131270 +jessica garcia 197059 +jessica garcia 328458 +jessica ichabod 197028 +jessica johnson 131177 +jessica johnson 197085 +jessica miller 197024 +jessica nixon 131549 +jessica nixon 196682 +jessica ovid 65751 +jessica ovid 196890 +jessica polk 459409 +jessica quirinius 131222 +jessica quirinius 131248 +jessica quirinius 131294 +jessica quirinius 393878 +jessica robinson 131174 +jessica thompson 131336 +jessica thompson 196927 +jessica underhill 131218 +jessica underhill 131267 +jessica underhill 197086 +jessica van buren 65615 +jessica white 65544 +jessica white 65594 +jessica white 197012 +jessica white 262435 +jessica white 262571 +jessica xylophone 196866 +jessica young 65711 +jessica young 131183 +jessica zipper 196897 +jessica zipper 262523 +jessica zipper 262695 +katie allen 196740 +katie brown 328113 +katie davidson 131371 +katie ellison 131248 +katie ellison 197182 +katie falkner 131441 +katie garcia 131384 +katie garcia 197051 +katie hernandez 131296 +katie ichabod 131495 +katie ichabod 197131 +katie ichabod 197275 +katie king 131252 +katie king 262588 +katie king 262861 +katie miller 65661 +katie miller 262723 +katie nixon 65669 +katie ovid 65681 +katie polk 65784 +katie polk 197249 +katie robinson 131251 +katie van buren 131237 +katie van buren 197141 +katie white 262510 +katie white 262860 +katie xylophone 197034 +katie young 65644 +katie young 328173 +katie young 393859 +katie zipper 65733 +katie zipper 328287 +luke allen 65776 +luke allen 131268 +luke allen 196819 +luke allen 196855 +luke allen 328011 +luke brown 196967 +luke davidson 65656 +luke davidson 131573 +luke ellison 65582 +luke ellison 131343 +luke ellison 197118 +luke falkner 196797 +luke falkner 196837 +luke garcia 65778 +luke garcia 393974 +luke ichabod 65629 +luke ichabod 262574 +luke johnson 131302 +luke johnson 131312 +luke johnson 131361 +luke laertes 131226 +luke laertes 131504 +luke laertes 197018 +luke laertes 197153 +luke laertes 197177 +luke miller 197052 +luke ovid 65569 +luke ovid 262745 +luke polk 65658 +luke polk 262627 +luke quirinius 131233 +luke robinson 65634 +luke robinson 262569 +luke thompson 196858 +luke underhill 65651 +luke underhill 131240 +luke underhill 328248 +luke van buren 131398 +luke white 65693 +luke xylophone 131312 +luke zipper 131297 +mike allen 196928 +mike brown 197149 +mike carson 65751 +mike carson 131284 +mike carson 393711 +mike davidson 196917 +mike davidson 262912 +mike ellison 65598 +mike ellison 131366 +mike ellison 131412 +mike ellison 131509 +mike ellison 262704 +mike falkner 328183 +mike garcia 131530 +mike garcia 328305 +mike garcia 328461 +mike hernandez 131301 +mike hernandez 328384 +mike ichabod 131157 +mike king 196965 +mike king 197091 +mike king 197121 +mike king 262471 +mike king 262527 +mike king 328279 +mike miller 131317 +mike nixon 131328 +mike nixon 262653 +mike polk 131240 +mike polk 196899 +mike polk 262885 +mike quirinius 525126 +mike steinbeck 65550 +mike steinbeck 131201 +mike steinbeck 131490 +mike steinbeck 262490 +mike van buren 131548 +mike van buren 262547 +mike white 197000 +mike white 197060 +mike white 262425 +mike white 328482 +mike young 196935 +mike young 196976 +mike young 328084 +mike zipper 131147 +mike zipper 197075 +mike zipper 328517 +nick allen 131192 +nick allen 197024 +nick brown 131503 +nick davidson 262686 +nick ellison 197119 +nick ellison 197119 +nick falkner 65583 +nick falkner 328561 +nick garcia 131318 +nick garcia 262755 +nick garcia 328281 +nick ichabod 131430 +nick ichabod 196812 +nick ichabod 328593 +nick johnson 131453 +nick johnson 262597 +nick laertes 196732 +nick miller 131490 +nick nixon 262547 +nick ovid 328266 +nick polk 196852 +nick quirinius 131438 +nick quirinius 328176 +nick robinson 131326 +nick robinson 196980 +nick steinbeck 131250 +nick thompson 65610 +nick underhill 65619 +nick van buren 196795 +nick xylophone 196972 +nick young 394136 +nick young 459634 +nick zipper 262954 +nick zipper 394218 +oscar allen 262674 +oscar brown 196916 +oscar carson 131099 +oscar carson 131330 +oscar carson 196731 +oscar carson 196733 +oscar carson 196918 +oscar davidson 262554 +oscar ellison 65630 +oscar ellison 197116 +oscar falkner 197145 +oscar garcia 328305 +oscar hernandez 197022 +oscar hernandez 328315 +oscar ichabod 131302 +oscar ichabod 131309 +oscar ichabod 196760 +oscar ichabod 196874 +oscar johnson 196942 +oscar johnson 197203 +oscar king 196793 +oscar king 196944 +oscar king 328236 +oscar laertes 131208 +oscar laertes 262522 +oscar laertes 262842 +oscar laertes 328364 +oscar nixon 65596 +oscar ovid 131228 +oscar ovid 262580 +oscar ovid 393817 +oscar polk 131078 +oscar polk 131260 +oscar quirinius 131103 +oscar quirinius 196748 +oscar quirinius 196829 +oscar quirinius 262838 +oscar robinson 196874 +oscar robinson 262803 +oscar robinson 393773 +oscar robinson 394087 +oscar steinbeck 328432 +oscar thompson 196826 +oscar thompson 196992 +oscar thompson 262593 +oscar thompson 459401 +oscar underhill 131301 +oscar van buren 131134 +oscar van buren 328162 +oscar van buren 394034 +oscar white 131457 +oscar white 262345 +oscar white 328538 +oscar white 459337 +oscar xylophone 65773 +oscar xylophone 262708 +oscar xylophone 262906 +oscar zipper 196904 +oscar zipper 262512 +oscar zipper 328262 +priscilla brown 196950 +priscilla brown 328237 +priscilla brown 328624 +priscilla carson 262488 +priscilla carson 262510 +priscilla carson 262703 +priscilla ichabod 131178 +priscilla ichabod 131303 +priscilla johnson 131224 +priscilla johnson 196906 +priscilla johnson 196994 +priscilla johnson 197184 +priscilla johnson 394171 +priscilla king 262692 +priscilla nixon 262691 +priscilla nixon 394188 +priscilla ovid 65541 +priscilla ovid 197067 +priscilla polk 394009 +priscilla quirinius 131306 +priscilla thompson 196875 +priscilla underhill 197084 +priscilla underhill 262701 +priscilla van buren 65685 +priscilla van buren 131368 +priscilla van buren 196814 +priscilla white 196893 +priscilla xylophone 131473 +priscilla xylophone 262597 +priscilla xylophone 262785 +priscilla young 131392 +priscilla young 262788 +priscilla zipper 393888 +priscilla zipper 394031 +quinn allen 197095 +quinn allen 394225 +quinn brown 131470 +quinn brown 131473 +quinn brown 262642 +quinn davidson 197079 +quinn davidson 197112 +quinn davidson 262510 +quinn davidson 459427 +quinn ellison 197268 +quinn ellison 328130 +quinn garcia 65604 +quinn garcia 131321 +quinn garcia 197067 +quinn garcia 328528 +quinn ichabod 65564 +quinn king 65649 +quinn king 196879 +quinn laertes 65542 +quinn laertes 196877 +quinn laertes 262466 +quinn nixon 196837 +quinn ovid 525126 +quinn quirinius 328235 +quinn robinson 131378 +quinn steinbeck 131484 +quinn steinbeck 262528 +quinn thompson 197030 +quinn thompson 262717 +quinn underhill 262791 +quinn underhill 328146 +quinn underhill 393824 +quinn van buren 197234 +quinn young 65647 +quinn zipper 131466 +quinn zipper 262658 +rachel allen 65661 +rachel allen 196935 +rachel brown 131220 +rachel brown 328076 +rachel brown 328320 +rachel brown 393835 +rachel brown 524988 +rachel carson 131259 +rachel carson 459393 +rachel davidson 262632 +rachel ellison 393845 +rachel falkner 196947 +rachel falkner 262474 +rachel falkner 394046 +rachel falkner 525086 +rachel johnson 65658 +rachel king 131354 +rachel king 196907 +rachel laertes 131391 +rachel laertes 197105 +rachel ovid 262664 +rachel ovid 328195 +rachel polk 328389 +rachel quirinius 262779 +rachel robinson 262491 +rachel robinson 262862 +rachel robinson 590712 +rachel thompson 197034 +rachel thompson 328158 +rachel thompson 394094 +rachel underhill 197033 +rachel white 131399 +rachel white 197190 +rachel young 196967 +rachel zipper 328223 +rachel zipper 394149 +sarah carson 131379 +sarah carson 196870 +sarah carson 262491 +sarah ellison 197095 +sarah falkner 131262 +sarah falkner 328251 +sarah garcia 196963 +sarah garcia 197030 +sarah garcia 459657 +sarah ichabod 262504 +sarah ichabod 262766 +sarah johnson 131409 +sarah johnson 262783 +sarah johnson 328591 +sarah johnson 394043 +sarah king 196998 +sarah king 328416 +sarah miller 196893 +sarah ovid 131199 +sarah robinson 262868 +sarah robinson 394066 +sarah steinbeck 262650 +sarah white 197059 +sarah white 262579 +sarah xylophone 131336 +sarah young 394123 +sarah zipper 262818 +tom brown 196848 +tom brown 328268 +tom carson 197328 +tom carson 262517 +tom carson 656251 +tom davidson 262864 +tom ellison 196974 +tom ellison 328416 +tom ellison 393921 +tom falkner 393809 +tom falkner 459407 +tom hernandez 262525 +tom hernandez 328085 +tom ichabod 197048 +tom johnson 328321 +tom johnson 393865 +tom king 196951 +tom laertes 262657 +tom laertes 459805 +tom miller 131278 +tom miller 131459 +tom miller 262633 +tom nixon 262588 +tom ovid 262595 +tom polk 328470 +tom polk 328584 +tom quirinius 262597 +tom quirinius 262681 +tom robinson 196978 +tom robinson 328481 +tom robinson 459857 +tom robinson 525095 +tom steinbeck 262426 +tom van buren 131389 +tom van buren 328095 +tom van buren 328313 +tom white 328128 +tom young 131080 +tom young 393692 +tom zipper 197167 +ulysses brown 196815 +ulysses carson 131277 +ulysses carson 262450 +ulysses carson 262937 +ulysses carson 328311 +ulysses davidson 262750 +ulysses ellison 262445 +ulysses garcia 328445 +ulysses hernandez 131414 +ulysses hernandez 196871 +ulysses hernandez 394370 +ulysses ichabod 393834 +ulysses ichabod 459582 +ulysses johnson 262966 +ulysses king 131363 +ulysses laertes 262739 +ulysses laertes 328412 +ulysses laertes 328462 +ulysses miller 262661 +ulysses miller 328360 +ulysses nixon 394194 +ulysses ovid 328289 +ulysses polk 65563 +ulysses polk 197046 +ulysses polk 328294 +ulysses polk 590698 +ulysses quirinius 525643 +ulysses robinson 394160 +ulysses steinbeck 196783 +ulysses steinbeck 262778 +ulysses thompson 262607 +ulysses underhill 131214 +ulysses underhill 196937 +ulysses underhill 197027 +ulysses underhill 262623 +ulysses underhill 262623 +ulysses underhill 262648 +ulysses underhill 262836 +ulysses van buren 196944 +ulysses white 197033 +ulysses white 393988 +ulysses xylophone 262695 +ulysses xylophone 328151 +ulysses xylophone 328747 +ulysses young 196903 +ulysses young 394037 +ulysses young 459782 +victor allen 197189 +victor allen 262651 +victor brown 262544 +victor brown 262799 +victor brown 327900 +victor brown 591265 +victor davidson 197173 +victor davidson 262486 +victor davidson 328274 +victor ellison 328618 +victor ellison 393962 +victor hernandez 197041 +victor hernandez 197132 +victor hernandez 262771 +victor hernandez 328261 +victor hernandez 459902 +victor johnson 131155 +victor johnson 131169 +victor johnson 394168 +victor king 131486 +victor king 328509 +victor laertes 262573 +victor laertes 328435 +victor miller 196784 +victor nixon 196987 +victor nixon 394249 +victor ovid 196882 +victor polk 262462 +victor quirinius 65620 +victor quirinius 328301 +victor robinson 328334 +victor robinson 394031 +victor steinbeck 65661 +victor steinbeck 262560 +victor steinbeck 262750 +victor thompson 65548 +victor van buren 197173 +victor van buren 328261 +victor white 262588 +victor white 328039 +victor xylophone 131203 +victor xylophone 262596 +victor xylophone 328191 +victor xylophone 393913 +victor xylophone 459542 +victor young 131258 +victor zipper 131349 +wendy allen 131402 +wendy allen 196954 +wendy allen 328359 +wendy brown 328365 +wendy brown 459501 +wendy ellison 262718 +wendy ellison 328191 +wendy falkner 197009 +wendy falkner 262430 +wendy falkner 328177 +wendy garcia 65746 +wendy garcia 393974 +wendy garcia 459883 +wendy garcia 459926 +wendy hernandez 65650 +wendy ichabod 262665 +wendy king 262545 +wendy king 328229 +wendy king 393951 +wendy laertes 262739 +wendy laertes 262794 +wendy laertes 328315 +wendy miller 131377 +wendy miller 328161 +wendy nixon 131258 +wendy nixon 196893 +wendy ovid 196952 +wendy ovid 459594 +wendy polk 328520 +wendy polk 394310 +wendy quirinius 328703 +wendy quirinius 394360 +wendy robinson 131316 +wendy robinson 394030 +wendy robinson 459665 +wendy steinbeck 262645 +wendy thompson 262725 +wendy thompson 393865 +wendy underhill 328445 +wendy underhill 394295 +wendy underhill 460068 +wendy van buren 65699 +wendy van buren 196964 +wendy white 328135 +wendy xylophone 262894 +wendy xylophone 525344 +wendy young 197017 +wendy young 721936 +xavier allen 197025 +xavier allen 525393 +xavier allen 525839 +xavier brown 197058 +xavier brown 262626 +xavier brown 328388 +xavier carson 196990 +xavier carson 328415 +xavier davidson 65644 +xavier davidson 262745 +xavier davidson 393825 +xavier ellison 197095 +xavier ellison 328447 +xavier garcia 262590 +xavier hernandez 196847 +xavier hernandez 197077 +xavier hernandez 393838 +xavier ichabod 262600 +xavier ichabod 328157 +xavier johnson 197084 +xavier johnson 262785 +xavier king 196919 +xavier king 262774 +xavier laertes 262770 +xavier ovid 328414 +xavier polk 196844 +xavier polk 328474 +xavier polk 394013 +xavier polk 590931 +xavier quirinius 65650 +xavier quirinius 131140 +xavier quirinius 328382 +xavier quirinius 459669 +xavier thompson 393799 +xavier underhill 197012 +xavier white 196858 +xavier white 262712 +xavier xylophone 131250 +xavier zipper 394070 +yuri allen 131129 +yuri allen 459977 +yuri brown 262640 +yuri brown 393858 +yuri carson 459799 +yuri carson 591063 +yuri ellison 197085 +yuri ellison 459558 +yuri falkner 196857 +yuri falkner 525350 +yuri garcia 328378 +yuri hernandez 262588 +yuri johnson 393861 +yuri johnson 394444 +yuri johnson 525638 +yuri king 525526 +yuri laertes 131551 +yuri laertes 459611 +yuri nixon 262644 +yuri nixon 393936 +yuri polk 328197 +yuri polk 328404 +yuri polk 328481 +yuri quirinius 131092 +yuri quirinius 196898 +yuri quirinius 525159 +yuri steinbeck 394037 +yuri steinbeck 525180 +yuri thompson 459710 +yuri underhill 328325 +yuri underhill 459781 +yuri white 131252 +yuri xylophone 262809 +zach allen 394026 +zach brown 262789 +zach brown 262789 +zach brown 459521 +zach brown 459846 +zach brown 590938 +zach carson 262320 +zach ellison 262757 +zach falkner 262608 +zach falkner 262608 +zach garcia 262818 +zach garcia 328314 +zach garcia 393686 +zach garcia 394011 +zach ichabod 262518 +zach ichabod 262563 +zach king 196780 +zach king 196905 +zach king 459991 +zach miller 196923 +zach miller 393813 +zach miller 393892 +zach ovid 196876 +zach ovid 262643 +zach ovid 328023 +zach ovid 459615 +zach quirinius 262471 +zach robinson 196967 +zach steinbeck 131394 +zach steinbeck 459294 +zach thompson 131340 +zach thompson 525538 +zach underhill 131304 +zach white 65733 +zach xylophone 262810 +zach xylophone 459455 +zach young 393615 +zach zipper 197130 +zach zipper 262496 +zach zipper 393937 diff --git a/sql/hive/src/test/resources/golden/windowing_windowspec.q (deterministic)-6-4d78f7b1d172d20c91f5867bc13a42a0 b/sql/hive/src/test/resources/golden/windowing_windowspec.q (deterministic)-6-4d78f7b1d172d20c91f5867bc13a42a0 new file mode 100644 index 0000000000000..b3f08818f491a --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_windowspec.q (deterministic)-6-4d78f7b1d172d20c91f5867bc13a42a0 @@ -0,0 +1,1049 @@ +0.08 0.07999999821186066 +0.1 0.10000000149011612 +0.13 0.12999999523162842 +0.15 0.15000000596046448 +0.27 0.27000001072883606 +0.28 0.2800000011920929 +0.43 0.4300000071525574 +0.52 0.5199999809265137 +0.56 0.5600000023841858 +0.6 0.6000000238418579 +0.61 0.6100000143051147 +0.79 0.7900000214576721 +0.84 0.8399999737739563 +0.98 0.9800000190734863 +1.02 1.2899999916553497 +1.08 1.0800000429153442 +1.08 1.0800000429153442 +1.12 1.1200000047683716 +1.21 2.0000000596046448 +1.25 1.25 +1.27 1.2699999809265137 +1.29 1.2899999618530273 +1.31 1.309999942779541 +1.58 1.5800000429153442 +1.87 1.8700000047683716 +1.91 1.909999966621399 +1.92 3.1699999570846558 +2.07 2.069999933242798 +2.18 2.180000066757202 +2.2 2.200000047683716 +2.35 2.3499999046325684 +2.6 2.5999999046325684 +2.79 2.7899999618530273 +2.92 2.9200000762939453 +2.96 2.9600000381469727 +2.96 2.9600000381469727 +2.97 2.9700000286102295 +3.0 3.0 +3.21 3.340000033378601 +3.28 4.399999976158142 +3.33 3.3299999237060547 +3.61 3.609999895095825 +3.62 3.619999885559082 +3.82 3.819999933242798 +3.86 3.859999895095825 +3.96 3.9600000381469727 +3.97 3.9700000286102295 +4.17 7.7799999713897705 +4.32 4.320000171661377 +4.35 4.349999904632568 +4.41 4.409999847412109 +4.46 4.460000038146973 +4.47 4.46999979019165 +4.57 4.570000171661377 +4.59 4.590000152587891 +4.71 4.710000038146973 +4.72 4.71999979019165 +4.79 4.789999961853027 +4.8 4.800000190734863 +4.92 4.920000076293945 +5.08 5.079999923706055 +5.24 9.559999942779541 +5.28 5.28000020980835 +5.4 5.400000095367432 +5.44 5.440000057220459 +5.45 5.449999809265137 +5.51 5.510000228881836 +5.54 5.539999961853027 +5.62 5.619999885559082 +5.67 5.670000076293945 +5.85 5.849999904632568 +5.88 5.880000114440918 +6.29 6.289999961853027 +6.55 6.550000190734863 +6.57 11.160000324249268 +6.63 9.59000015258789 +6.67 6.670000076293945 +6.72 6.71999979019165 +6.74 6.739999771118164 +6.84 6.840000152587891 +6.87 6.869999885559082 +7.05 7.050000190734863 +7.06 11.769999980926514 +7.11 7.110000133514404 +7.54 7.539999961853027 +7.56 7.559999942779541 +7.79 7.789999961853027 +7.82 7.820000171661377 +7.96 7.960000038146973 +7.96 7.960000038146973 +7.98 7.980000019073486 +8.07 8.069999694824219 +8.07 8.069999694824219 +8.32 8.319999694824219 +8.37 11.339999914169312 +8.42 11.760000109672546 +8.45 8.449999809265137 +8.45 8.449999809265137 +8.45 10.319999814033508 +8.45 16.009999752044678 +8.57 8.569999694824219 +8.61 8.609999656677246 +8.67 8.670000076293945 +8.71 8.710000038146973 +8.79 8.789999961853027 +8.91 13.480000019073486 +9.04 9.039999961853027 +9.13 9.130000114440918 +9.19 15.479999542236328 +9.22 9.220000267028809 +9.25 9.25 +9.26 9.260000228881836 +9.35 12.350000381469727 +9.48 9.479999542236328 +9.56 12.480000495910645 +9.57 9.569999694824219 +9.57 9.569999694824219 +9.68 9.680000305175781 +9.7 9.699999809265137 +9.71 17.25 +9.74 9.739999771118164 +9.8 9.800000190734863 +9.81 9.8100004196167 +9.93 9.930000305175781 +10.09 10.09000015258789 +10.09 10.09000015258789 +10.13 15.640000343322754 +10.16 18.139999866485596 +10.17 14.970000267028809 +10.19 10.1899995803833 +10.2 10.199999809265137 +10.22 10.220000267028809 +10.25 18.859999656677246 +10.26 10.260000228881836 +10.29 10.289999961853027 +10.6 10.600000381469727 +10.66 10.65999984741211 +10.67 10.670000076293945 +10.73 10.729999542236328 +11.15 20.27999973297119 +11.18 13.360000371932983 +11.19 18.979999542236328 +11.22 11.220000267028809 +11.34 16.62000036239624 +11.55 11.550000190734863 +11.57 11.569999694824219 +11.68 17.080000400543213 +11.82 11.819999694824219 +11.89 11.890000343322754 +11.91 11.90999984741211 +12.02 12.020000457763672 +12.16 12.15999984741211 +12.19 20.149999618530273 +12.32 12.319999694824219 +12.42 16.27999997138977 +12.44 12.4399995803833 +12.45 16.799999713897705 +12.46 12.460000038146973 +12.5 15.460000038146973 +12.54 12.539999961853027 +12.85 12.850000381469727 +12.9 12.899999618530273 +13.01 28.47000026702881 +13.1 23.030000686645508 +13.15 20.96999979019165 +13.35 13.350000381469727 +13.87 13.869999885559082 +13.89 13.890000343322754 +13.94 13.9399995803833 +13.99 13.989999771118164 +14.13 24.22000026702881 +14.21 14.210000038146973 +14.3 29.270000457763672 +14.44 14.4399995803833 +14.84 14.84000015258789 +14.92 14.920000076293945 +14.92 25.18000030517578 +14.93 30.40999984741211 +15.1 17.700000286102295 +15.15 24.40999984741211 +15.18 26.730000495910645 +15.22 15.220000267028809 +15.26 15.260000228881836 +15.3 25.5 +15.37 15.369999885559082 +15.45 15.449999809265137 +15.63 28.110000610351562 +15.75 15.75 +15.81 15.8100004196167 +15.86 25.079999923706055 +15.9 21.34999942779541 +15.92 15.920000076293945 +16.08 22.75 +16.09 16.520000159740448 +16.24 27.809999465942383 +16.25 19.419999957084656 +16.48 16.479999542236328 +16.69 16.690000534057617 +16.99 16.989999771118164 +16.99 42.489999771118164 +17.16 21.12999987602234 +17.37 31.360000610351562 +17.74 19.049999713897705 +17.79 45.60000038146973 +17.87 18.710000813007355 +18.2 18.200000762939453 +18.5 31.350000381469727 +18.56 18.559999465942383 +18.63 26.589999198913574 +18.63 30.389999270439148 +18.86 18.96000061184168 +18.89 18.889999389648438 +18.93 18.93000030517578 +19.0 35.62000036239624 +19.03 19.030000686645508 +19.06 19.059999465942383 +19.06 19.059999465942383 +19.13 45.719998359680176 +19.14 19.139999389648438 +19.28 27.600000381469727 +19.69 36.49000024795532 +20.07 46.80000019073486 +20.38 51.72999954223633 +20.64 20.639999389648438 +20.67 31.830000400543213 +20.79 20.790000915527344 +20.81 20.809999465942383 +20.82 21.419999718666077 +20.82 26.359999656677246 +21.18 21.18000030517578 +21.19 21.190000534057617 +21.23 21.229999542236328 +21.28 29.350000381469727 +21.32 21.31999969482422 +21.45 40.510000228881836 +21.49 30.739999771118164 +21.61 37.0600004196167 +21.7 27.58000087738037 +21.8 21.799999237060547 +21.94 23.940000593662262 +22.01 28.850000381469727 +22.08 22.079999923706055 +22.12 22.1200008392334 +22.12 22.1200008392334 +22.25 22.25 +22.27 22.270000457763672 +22.36 22.360000610351562 +22.68 22.68000030517578 +22.78 47.19000053405762 +22.85 33.070000648498535 +22.85 43.980000257492065 +22.94 35.38000011444092 +23.07 23.06999969482422 +23.13 28.799999237060547 +23.17 44.58999979496002 +23.19 23.190000534057617 +23.44 23.440000534057617 +23.45 24.74000072479248 +23.6 33.16000032424927 +23.77 23.770000457763672 +23.96 23.959999084472656 +24.02 24.020000457763672 +24.28 43.310001373291016 +24.49 42.62999963760376 +24.52 32.59000015258789 +24.73 45.369998931884766 +24.79 24.790000915527344 +24.8 34.369998931884766 +24.83 36.05000019073486 +24.86 65.3700008392334 +25.11 44.170000076293945 +25.28 25.280000686645508 +25.37 48.05000114440918 +25.42 40.78999996185303 +25.55 26.62999927997589 +25.67 37.69000053405762 +25.88 61.49999952316284 +26.08 26.079999923706055 +26.39 34.959999084472656 +26.43 26.43000030517578 +26.47 31.389999389648438 +26.49 26.489999771118164 +26.49 48.56999969482422 +26.64 64.32999992370605 +26.71 36.999999046325684 +26.73 45.69000015407801 +26.76 26.760000228881836 +27.07 28.649999737739563 +27.12 32.20000076293945 +27.3 70.61000061035156 +27.31 56.579999923706055 +27.63 27.6299991607666 +27.66 27.65999984741211 +27.72 46.60999870300293 +27.87 27.8700008392334 +28.11 44.59000015258789 +28.31 52.079999923706055 +28.45 74.05000114440918 +28.5 35.36999988555908 +28.56 40.71999931335449 +28.69 28.690000534057617 +28.71 55.46999931335449 +28.79 28.790000915527344 +28.89 56.489999771118164 +28.95 33.410000801086426 +29.02 56.64999961853027 +29.24 99.85000038146973 +29.36 62.52000093460083 +29.4 72.02999925613403 +29.41 64.77999973297119 +29.54 29.540000915527344 +29.59 37.37000012397766 +29.78 66.77999973297119 +30.25 30.32999999821186 +30.36 30.360000610351562 +30.37 31.660000830888748 +30.61 30.610000610351562 +30.62 102.65000009536743 +30.63 30.6299991607666 +30.65 60.19000053405762 +30.71 49.849998474121094 +30.81 55.989999771118164 +31.01 31.010000228881836 +31.15 31.149999618530273 +31.4 31.399999618530273 +31.61 31.610000610351562 +31.67 40.46000003814697 +31.77 42.09000027179718 +31.86 31.860000610351562 +31.91 78.51999855041504 +32.01 60.47999858856201 +32.18 58.61000061035156 +32.2 53.55000019073486 +32.23 42.89999961853027 +32.25 59.83000087738037 +32.37 62.99999809265137 +32.41 32.40999984741211 +32.47 41.14000129699707 +32.52 95.0400013923645 +32.75 56.19000053405762 +32.89 80.07999992370605 +32.92 47.7599983215332 +33.36 45.27000045776367 +33.52 60.010000228881836 +33.55 63.939998507499695 +33.58 54.55000162124634 +33.67 33.66999816894531 +33.76 112.27999687194824 +33.83 59.110002517700195 +33.85 47.719998359680176 +33.87 37.48999881744385 +34.03 71.51999759674072 +34.21 71.57999920845032 +34.35 34.349998474121094 +34.41 59.20000076293945 +34.58 34.58000183105469 +34.73 34.72999954223633 +34.97 45.160000801086426 +35.0 35.0 +35.08 36.060001850128174 +35.13 39.600000858306885 +35.17 64.01999855041504 +35.17 66.52999877929688 +35.56 37.63000130653381 +35.62 80.20999872684479 +35.65 56.83000183105469 +35.68 52.20000046491623 +35.72 98.71999931335449 +35.8 79.96999931335449 +35.89 81.2599983215332 +36.22 55.20000076293945 +36.26 36.2599983215332 +36.58 64.45000267028809 +36.7 73.76000118255615 +36.79 36.790000915527344 +36.89 71.8499984741211 +36.95 36.95000076293945 +37.07 37.06999969482422 +37.1 51.039998054504395 +37.14 53.41999936103821 +37.14 61.15999984741211 +37.24 47.04000186920166 +37.59 50.94000053405762 +37.6 84.39999866485596 +37.72 57.14000117778778 +37.78 68.10999877750874 +37.8 57.94999885559082 +37.85 50.38999843597412 +37.9 77.50000238418579 +38.05 40.24999928474426 +38.05 47.859999656677246 +38.3 98.48999977111816 +38.33 112.09000301361084 +38.57 55.81999969482422 +38.62 73.99999904632568 +38.79 95.44000053405762 +38.85 97.45999908447266 +38.88 48.58000087738037 +38.94 71.3499984741211 +39.01 39.0099983215332 +39.03 57.73999959230423 +39.18 99.6599988937378 +39.21 71.79999923706055 +39.34 39.34000015258789 +39.69 55.69999837875366 +39.81 74.81000137329102 +39.82 39.81999969482422 +39.83 119.80000114440918 +39.87 62.22999954223633 +39.9 64.98000144958496 +39.98 39.97999954223633 +40.0 84.59000015258789 +40.04 101.20000076293945 +40.17 80.41999745368958 +40.24 67.9000015258789 +40.42 78.04999947547913 +40.44 117.94000101089478 +40.78 49.8199987411499 +40.8 40.79999923706055 +40.98 92.01999759674072 +41.2 58.280001163482666 +41.29 41.290000915527344 +41.29 112.87000012397766 +41.31 53.08000135421753 +41.34 53.230000495910645 +41.34 115.33999919891357 +41.36 41.36000061035156 +41.44 85.41999888420105 +41.45 91.29999923706055 +41.62 41.619998931884766 +41.68 109.58000183105469 +41.71 83.06999969482422 +41.81 89.8600025177002 +41.85 76.21999740600586 +41.87 47.48999881744385 +41.89 41.88999938964844 +42.24 72.6500015258789 +42.31 52.91000175476074 +42.42 154.69999504089355 +42.48 71.12999927997589 +42.51 61.069997787475586 +42.55 87.71000003814697 +42.56 71.91000175476074 +42.67 71.45999908447266 +42.76 42.7599983215332 +42.85 85.33999824523926 +43.01 109.53999710083008 +43.02 46.84000039100647 +43.13 43.130001068115234 +43.16 58.079999923706055 +43.17 43.16999816894531 +43.19 156.05999875068665 +43.31 64.10000228881836 +43.37 56.84999895095825 +43.57 43.71999970078468 +43.71 108.69000053405762 +43.73 108.50999927520752 +43.92 48.319998145103455 +44.1 67.1299991607666 +44.22 103.33000373840332 +44.27 74.88000106811523 +44.43 106.65999984741211 +44.57 59.00999927520752 +45.06 45.060001373291016 +45.1 45.099998474121094 +45.19 45.189998626708984 +45.19 117.10000038146973 +45.24 109.34000396728516 +45.34 129.73999881744385 +45.35 109.28999698162079 +45.42 77.61999893188477 +45.45 45.45000076293945 +45.56 137.57999897003174 +45.59 82.54000091552734 +45.68 55.25 +45.92 90.97999954223633 +45.99 47.07000172138214 +46.02 82.27999877929688 +46.09 46.09000015258789 +46.1 98.17999839782715 +46.15 72.7800008058548 +46.18 74.29000091552734 +46.21 52.75999927520752 +46.27 85.61000061035156 +46.43 106.26000118255615 +46.45 110.90000343322754 +46.62 78.0099983215332 +46.8 80.46999740600586 +46.86 62.08000087738037 +46.87 70.80999952554703 +46.88 106.08000183105469 +46.97 88.1100025177002 +47.08 148.28000259399414 +47.27 50.60000038146973 +47.32 118.12999922037125 +47.57 90.32999801635742 +47.59 104.17000007629395 +47.69 99.88999909162521 +47.88 47.880001068115234 +48.01 91.72999802231789 +48.08 79.69000244140625 +48.11 48.11000061035156 +48.15 65.8500018119812 +48.22 105.07000017166138 +48.23 139.52999877929688 +48.25 48.25 +48.28 98.669997215271 +48.37 185.9499979019165 +48.45 48.45000076293945 +48.45 94.54000091552734 +48.52 146.69999885559082 +48.59 89.30999946594238 +49.12 49.119998931884766 +49.28 123.56999969482422 +49.44 110.93999814987183 +49.68 73.63999938964844 +49.77 50.33000046014786 +49.78 66.46999931335449 +50.02 63.380000829696655 +50.08 156.16000366210938 +50.09 106.28000068664551 +50.26 72.05999755859375 +50.28 50.279998779296875 +50.31 117.44000053405762 +50.32 90.13999938964844 +50.4 96.11999988555908 +50.41 98.72999799251556 +50.66 55.37999963760376 +50.7 131.11999821662903 +50.83 98.69000148773193 +50.92 53.70999813079834 +50.96 103.71999835968018 +51.25 67.0 +51.29 87.35000276565552 +51.29 124.93000030517578 +51.72 97.17000198364258 +51.79 139.90000343322754 +51.84 168.94000053405762 +51.85 171.64999961853027 +52.17 206.86999320983887 +52.23 177.1599998474121 +52.44 88.48999881744385 +52.5 105.41000175476074 +52.53 64.98999881744385 +52.72 52.720001220703125 +52.73 74.04999923706055 +52.85 89.63999938964844 +52.87 130.87999725341797 +53.02 100.50999927520752 +53.06 259.9299945831299 +53.18 53.18000030517578 +53.27 53.27000045776367 +53.59 53.59000015258789 +53.78 139.38999938964844 +53.93 57.890000343322754 +53.94 63.529998779296875 +54.1 152.7699956893921 +54.31 77.38000106811523 +54.34 125.46999943256378 +54.43 132.04999923706055 +54.44 103.01999950408936 +54.47 186.52000045776367 +54.73 63.179999351501465 +54.75 112.82999992370605 +54.83 110.82000160217285 +54.99 160.40000343322754 +55.1 161.35999965667725 +55.18 215.58000373840332 +55.2 126.65999984741211 +55.39 137.6699981689453 +55.51 74.55999803543091 +55.63 96.43000030517578 +55.99 187.10999989509583 +56.04 150.5800018310547 +56.07 118.15000057220459 +56.1 135.79000091552734 +56.15 144.64000034332275 +56.33 61.77000188827515 +56.62 78.88999938964844 +56.68 154.13999938964844 +56.81 169.64000129699707 +57.08 69.98000144958496 +57.11 168.0100040435791 +57.12 100.28999710083008 +57.23 65.9399995803833 +57.25 133.46999740600586 +57.29 112.54000091552734 +57.35 110.89999866485596 +57.37 115.109998524189 +57.46 147.78999710083008 +57.64 112.19000101089478 +57.67 57.66999816894531 +57.89 111.15999984741211 +57.93 68.02000045776367 +58.0 123.9399995803833 +58.08 58.08000183105469 +58.09 206.37000274658203 +58.13 105.84999942779541 +58.43 165.0900001525879 +58.52 167.0299997329712 +58.66 136.04000091552734 +58.67 205.36999702453613 +58.75 90.41000083088875 +58.86 165.14000129699707 +59.07 87.86999893188477 +59.16 224.25 +59.21 90.35999870300293 +59.34 127.44999893009663 +59.43 106.50000202655792 +59.45 67.90000057220459 +59.45 197.11999893188477 +59.5 149.63999938964844 +59.55 61.459999203681946 +59.61 85.97000026702881 +59.62 113.3299970626831 +59.68 73.89000034332275 +59.68 94.40999984741211 +59.7 193.1699981689453 +59.71 60.22999906539917 +59.83 145.17000007629395 +59.87 228.80999946594238 +59.99 134.04000282287598 +60.02 66.76000022888184 +60.06 60.060001373291016 +60.12 113.34999942779541 +60.13 214.27000045776367 +60.22 108.10000228881836 +60.26 105.94999847561121 +60.26 165.32999849319458 +60.53 66.37999868392944 +60.6 82.8499984741211 +60.71 72.04999899864197 +60.85 132.36999607086182 +61.21 160.86999797821045 +61.7 127.55000257492065 +61.86 248.9700005054474 +61.88 112.15999984741211 +61.92 125.29999899864197 +61.94 119.6099967956543 +62.14 110.59000015258789 +62.2 149.91000080108643 +62.23 111.3499984741211 +62.3 158.41999912261963 +62.39 110.95999908447266 +62.52 123.97999966144562 +62.72 123.78999900817871 +62.74 153.10000038146973 +62.85 167.01999855041504 +62.9 256.0699996948242 +62.92 129.3899974822998 +63.12 93.47999954223633 +63.33 135.38999938964844 +63.35 116.93999862670898 +63.42 172.76000213623047 +63.51 123.51999855041504 +63.9 135.70000076293945 +64.0 191.55000257492065 +64.22 86.97000122070312 +64.25 131.25 +64.3 122.3800048828125 +64.36 85.59000015258789 +64.46 134.44000053405762 +64.65 143.54000091552734 +64.67 121.15999794006348 +64.77 214.40999603271484 +64.87 194.61000156402588 +64.95 324.87999153137207 +65.02 175.6099967956543 +65.02 259.6299982070923 +65.38 168.7100009918213 +65.43 112.27000069618225 +65.43 289.6800003051758 +65.44 192.89000137150288 +65.55 66.16000306606293 +65.62 139.67000198364258 +65.7 65.69999694824219 +65.72 77.54000091552734 +66.17 177.10999631881714 +66.17 200.60999870300293 +66.36 131.73000144958496 +66.51 83.50000190734863 +66.61 66.61000061035156 +66.61 78.93000030517578 +66.67 129.84999752044678 +66.89 99.96000003814697 +67.12 67.12000274658203 +67.18 234.21000003814697 +67.26 77.9900016784668 +67.38 178.72999572753906 +67.45 197.29999446868896 +67.48 268.0900020599365 +67.59 272.95999336242676 +67.94 125.89000129699707 +67.98 123.36000299453735 +68.01 124.84000396728516 +68.04 166.76000022888184 +68.22 181.57000064849854 +68.25 113.52000045776367 +68.25 163.2900013923645 +68.32 247.04999542236328 +68.41 157.72000312805176 +68.5 156.36999893188477 +68.81 93.02999782562256 +68.85 160.57999649643898 +68.89 89.69999885559082 +68.95 140.99999594688416 +68.96 192.4799976348877 +69.32 156.29000091552734 +69.53 239.17000007629395 +69.74 246.84999418258667 +69.8 101.63000345230103 +69.88 220.45999908447266 +69.96 83.84999942779541 +69.97 169.86000031232834 +70.0 236.76000022888184 +70.04 196.70000076293945 +70.06 92.17999839782715 +70.24 213.77999877929688 +70.35 247.5099983215332 +70.38 179.95999908447266 +70.39 164.79999923706055 +70.52 181.33999824523926 +70.53 70.52999877929688 +70.56 110.53999710083008 +70.85 223.94999885559082 +70.89 149.81999969482422 +70.93 160.79000282287598 +71.01 92.20000267028809 +71.07 112.36000061035156 +71.13 135.149995803833 +71.19 136.1800012588501 +71.26 318.7700004577637 +71.31 231.88999405503273 +71.32 145.21000003814697 +71.35 145.909996509552 +71.5 217.409996509552 +71.54 71.54000091552734 +71.55 90.41000270843506 +71.68 72.9500002861023 +71.68 227.97000122070312 +71.78 189.71999979019165 +71.8 103.66000366210938 +71.89 180.57999992370605 +72.04 156.63000106811523 +72.18 181.46999728679657 +72.51 264.0600047111511 +72.53 265.42000015079975 +72.56 132.61999893188477 +72.62 205.2400016784668 +72.79 72.79000091552734 +72.98 286.76000213623047 +73.18 123.78000068664551 +73.32 175.96999979019165 +73.48 144.8300018310547 +73.63 320.47999143600464 +73.65 114.11000156402588 +73.68 161.0300030708313 +73.88 291.28999376296997 +73.93 156.77999877929688 +74.0 179.8499994277954 +74.02 89.27999687194824 +74.15 74.1500015258789 +74.19 74.47000244259834 +74.19 122.44000244140625 +74.3 159.72000193595886 +74.42 219.62999820709229 +74.45 203.839994430542 +74.52 271.6399955749512 +74.53 342.6200008392334 +74.59 157.12999725341797 +74.62 163.89999961853027 +74.72 139.05000114440918 +74.78 230.83999752998352 +75.03 174.98999881744385 +75.1 214.62999725341797 +75.19 149.6600048840046 +75.29 93.4900016784668 +75.35 256.91999912261963 +75.42 153.46999764442444 +75.66 225.4800033569336 +75.73 236.60000133514404 +75.83 199.4000015258789 +75.88 243.89000129699707 +76.05 179.770001411438 +76.1 257.43999671936035 +76.28 168.48000144958496 +76.28 177.9100022315979 +76.33 363.09000396728516 +76.52 207.39999389648438 +76.69 212.87000370025635 +76.7 307.5399944782257 +76.71 132.17999839782715 +76.72 166.42000007629395 +76.72 258.1899985074997 +76.74 102.81999778747559 +76.92 401.7999897003174 +76.93 384.4699947834015 +77.02 296.6499948501587 +77.1 187.63999557495117 +77.36 281.19999504089355 +77.42 217.32000160217285 +77.57 189.66000270843506 +77.66 267.380003452301 +77.81 178.31999683380127 +77.84 350.7999897003174 +77.89 249.5399990081787 +77.97 178.2599983215332 +78.21 189.1099977493286 +78.26 284.6300048828125 +78.28 306.25 +78.3 168.71000388264656 +78.31 161.80999946594238 +78.62 275.3200035095215 +78.64 94.55999946594238 +78.73 310.6199974119663 +78.89 175.31999969482422 +78.98 293.25000381469727 +79.12 276.419997215271 +79.19 194.30000096559525 +79.21 237.62999820709229 +79.38 151.22999572753906 +79.42 124.6099967956543 +79.48 116.55000305175781 +79.48 200.64000129699707 +79.49 294.1199951171875 +79.54 145.23999786376953 +79.55 254.54000186920166 +79.75 215.13999938964844 +79.83 294.9700012207031 +79.96 79.95999908447266 +79.97 266.4900016784668 +79.99 219.6599998474121 +80.23 248.71000480651855 +80.3 133.4800033569336 +80.3 305.7800064086914 +80.46 249.17000296711922 +80.52 136.21999502182007 +80.58 261.16000175476074 +80.6 281.23999977111816 +80.71 275.0100000500679 +80.74 240.4599997997284 +80.84 142.60999822616577 +80.92 340.5499963760376 +80.96 372.2499928474426 +80.97 93.32000160217285 +80.99 317.74999809265137 +81.17 220.55999755859375 +81.32 158.86000061035156 +81.32 206.16000366210938 +81.47 198.91000175476074 +81.58 173.76000022888184 +81.64 207.5300006866455 +81.66 261.5100030899048 +82.24 243.59999752044678 +82.3 157.18000411987305 +82.34 214.0699977874756 +82.41 135.83000302314758 +82.52 240.23999977111816 +82.55 139.6900042295456 +82.56 175.8799991607666 +82.72 232.63000202178955 +82.97 101.9000015258789 +83.08 181.57000160217285 +83.27 83.2699966430664 +83.33 348.75000198185444 +83.4 196.729998588562 +83.54 299.12000465393066 +83.57 139.38999938964844 +83.58 163.54000091552734 +83.87 263.64000415802 +83.92 432.67000015079975 +83.93 358.9400003552437 +84.03 240.19000244140625 +84.23 209.53000235557556 +84.31 157.0999984741211 +84.38 220.1699981689453 +84.4 443.3400018811226 +84.69 249.4900016784668 +84.72 105.00000095367432 +84.83 157.4800033569336 +85.0 221.21999502182007 +85.03 283.9400005340576 +85.1 217.27999687194824 +85.14 266.7100009918213 +85.23 277.7100009918213 +85.49 261.4599976539612 +85.49 325.9499976634979 +85.51 165.59000205993652 +85.74 341.80999755859375 +85.76 160.57000350952148 +85.87 357.5099983215332 +85.9 202.45000457763672 +86.0 322.60000133514404 +86.22 152.8300018310547 +86.23 152.99000358581543 +86.63 381.5999984741211 +86.69 138.42000198364258 +86.92 147.14999723434448 +86.93 97.59000015258789 +86.93 218.18000030517578 +87.14 219.50999546051025 +87.22 211.01000022888184 +87.4 137.73000198602676 +87.48 153.86000204086304 +87.57 307.0799951553345 +87.61 321.82000064849854 +87.67 248.4600009918213 +87.83 130.96000289916992 +87.94 134.9800043106079 +87.99 227.0399990081787 +88.02 406.7899971008301 +88.05 97.53000259399414 +88.07 210.45000457763672 +88.17 245.6500015258789 +88.22 369.4600009918213 +88.36 305.6399974822998 +88.47 213.9400006532669 +88.48 211.84000635147095 +88.55 199.71000289916992 +88.77 187.49999463558197 +88.78 337.23999977111816 +88.8 254.1300015449524 +88.91 276.40999829769135 +89.01 98.69000244140625 +89.03 431.6499996185303 +89.1 113.83999919891357 +89.15 234.32000160217285 +89.28 343.82000064849854 +89.38 496.16999435424805 +89.53 153.05999755859375 +89.55 89.55000305175781 +89.55 96.27000284194946 +89.55 172.62000274658203 +89.8 205.1400022506714 +89.81 225.64000058174133 +89.93 89.93000030517578 +90.05 90.05000305175781 +90.05 247.1500015258789 +90.07 137.82999801635742 +90.12 234.76000308990479 +90.2 208.32999616861343 +90.25 386.8999948501587 +90.28 334.17000007629395 +90.35 158.36999893188477 +90.38 148.65999841690063 +90.51 268.7700004577637 +90.56 298.8899937272072 +90.69 181.1000051498413 +90.69 523.360002592206 +90.73 260.59000366926193 +90.77 203.03999733924866 +91.05 263.67000579833984 +91.16 298.55999755859375 +91.42 196.41999912261963 +91.48 144.56000471115112 +91.53 301.9800033569336 +91.61 215.59000027179718 +91.63 432.17999362945557 +91.78 330.9499988555908 +91.88 277.8299951553345 +91.97 205.4900016784668 +92.05 317.69000363349915 +92.11 307.70000088214874 +92.33 92.33000183105469 +92.37 226.410005569458 +92.4 265.1600036621094 +92.55 295.59000039100647 +92.61 249.38999938964844 +92.82 182.87000274658203 +92.96 185.98999691009521 +92.98 494.779993057251 +93.03 165.80999958515167 +93.09 456.1800003051758 +93.11 427.2800006866455 +93.61 465.8599934577942 +93.64 314.1999969482422 +93.73 475.3300018310547 +94.08 187.56000137329102 +94.15 362.9200019836426 +94.25 308.1900006532669 +94.27 351.70999336242676 +94.31 216.75 +94.33 193.02000427246094 +94.34 320.75000190734863 +94.38 260.18999683856964 +94.43 174.89999771118164 +94.54 340.19000244140625 +94.66 192.19000625610352 +94.68 189.23999977111816 +94.68 344.1700019836426 +94.72 274.6800003051758 +95.07 417.67000102996826 +95.11 393.99999433755875 +95.28 140.37999725341797 +95.33 244.99000671505928 +95.34 192.92999649047852 +95.38 230.36000156402588 +95.48 406.1000007688999 +95.53 248.58999633789062 +95.53 252.71000289916992 +95.81 134.81999588012695 +95.81 551.9899978637695 +95.84 274.1599931716919 +96.09 191.52999687194824 +96.23 398.2100067138672 +96.25 261.8400020599365 +96.29 199.10999870300293 +96.38 323.4199962615967 +96.62 309.4900064468384 +96.68 336.87000274658203 +96.73 272.0500030517578 +96.78 190.27000045776367 +96.91 180.18000030517578 +96.94 216.54999923706055 +97.09 428.03999519348145 +97.24 346.41000083088875 +97.26 373.67999935150146 +97.39 257.9600028991699 +97.46 449.1699924468994 +97.51 132.09000396728516 +97.56 97.55999755859375 +97.57 592.3499927520752 +97.65 196.34000301361084 +97.68 258.7100033760071 +97.71 175.70000076293945 +97.81 297.20999908447266 +97.83 396.38999938964844 +97.87 254.50000381469727 +98.18 105.29000043869019 +98.22 438.4100036621094 +98.23 525.5100040435791 +98.31 335.93999576568604 +98.48 286.0400047302246 +98.51 132.86000061035156 +98.57 404.3500061035156 +98.72 263.8600025177002 +98.96 288.1999988555908 +99.13 243.6900019645691 +99.15 210.11000061035156 +99.21 347.7999954223633 +99.24 537.6500015258789 +99.29 291.48000717163086 +99.36 106.41000080108643 +99.62 417.3700008392334 +99.65 185.62000179290771 +99.67 180.92999649047852 +99.68 230.6400032043457 +99.91 367.2900071144104 +99.92 376.32999646663666 diff --git a/sql/hive/src/test/resources/golden/windowing_windowspec.q (deterministic)-7-20fdc99aa046b2c41d9b85ab338c749c b/sql/hive/src/test/resources/golden/windowing_windowspec.q (deterministic)-7-20fdc99aa046b2c41d9b85ab338c749c new file mode 100644 index 0000000000000..1a4528978b74f --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_windowspec.q (deterministic)-7-20fdc99aa046b2c41d9b85ab338c749c @@ -0,0 +1,1049 @@ + 65560 3.1 + 65718 2.38 + 65740 3.04 +alice allen 65662 1.55 +alice allen 65720 1.79 +alice allen 65758 1.98 +alice brown 65696 0.13 +alice carson 65559 4.2 +alice davidson 65547 1.51 +alice falkner 65669 4.19 +alice garcia 65613 0.72 +alice hernandez 65737 0.92 +alice hernandez 65784 2.09 +alice johnson 65739 2.55 +alice king 65660 3.84 +alice king 65734 2.96 +alice king 65738 2.14 +alice laertes 65669 0.28 +alice laertes 65671 1.16 +alice miller 65590 4.95 +alice nixon 65586 2.98 +alice nixon 65595 2.36 +alice nixon 65604 2.45 +alice ovid 65737 3.2 +alice polk 65548 1.23 +alice quirinius 65636 0.23 +alice quirinius 65728 0.82 +alice robinson 65606 3.99 +alice robinson 65789 4.35 +alice steinbeck 65578 4.72 +alice steinbeck 65673 3.97 +alice steinbeck 65786 3.92 +alice underhill 65750 2.06 +alice van buren 65562 2.43 +alice xylophone 65578 2.22 +alice xylophone 65585 2.11 +alice xylophone 65599 2.92 +alice zipper 65553 3.78 +alice zipper 65662 2.61 +alice zipper 65766 3.12 +bob brown 65584 2.09 +bob brown 65777 1.62 +bob brown 65783 2.4 +bob carson 65713 3.87 +bob davidson 65664 4.25 +bob davidson 65693 3.54 +bob davidson 65768 2.91 +bob ellison 65591 2.23 +bob ellison 65624 1.69 +bob ellison 65721 1.69 +bob ellison 65760 1.69 +bob falkner 65789 0.21 +bob garcia 65585 3.91 +bob garcia 65598 2.64 +bob garcia 65673 3.26 +bob garcia 65754 3.29 +bob garcia 65782 2.86 +bob hernandez 65557 3.72 +bob ichabod 65549 1.39 +bob king 65715 2.76 +bob king 65757 1.71 +bob king 65783 2.24 +bob laertes 65602 4.98 +bob laertes 65663 3.56 +bob miller 65608 4.95 +bob ovid 65564 1.23 +bob ovid 65619 1.53 +bob ovid 65686 1.84 +bob ovid 65726 2.38 +bob polk 65594 0.28 +bob quirinius 65700 3.82 +bob steinbeck 65637 0.22 +bob van buren 65778 2.89 +bob white 65543 4.75 +bob white 65605 2.89 +bob xylophone 65574 1.7 +bob xylophone 65666 2.51 +bob young 65556 0.95 +bob zipper 65559 3.18 +bob zipper 65633 3.2 +bob zipper 65739 3.24 +calvin allen 65669 2.3 +calvin brown 65537 1.1 +calvin brown 65580 2.82 +calvin brown 65677 2.57 +calvin carson 65637 1.33 +calvin davidson 65541 1.98 +calvin davidson 65564 1.6 +calvin ellison 65667 1.85 +calvin falkner 65573 3.52 +calvin falkner 65596 3.31 +calvin falkner 65738 2.36 +calvin falkner 65762 2.26 +calvin falkner 65778 2.7 +calvin falkner 65784 2.98 +calvin garcia 65664 2.9 +calvin hernandez 65578 2.08 +calvin johnson 65731 4.69 +calvin laertes 65570 4.7 +calvin laertes 65684 3.68 +calvin nixon 65654 2.74 +calvin nixon 65724 3.27 +calvin nixon 65749 2.88 +calvin ovid 65554 3.34 +calvin ovid 65643 2.38 +calvin ovid 65663 2.74 +calvin ovid 65715 2.47 +calvin polk 65731 4.36 +calvin quirinius 65741 4.0 +calvin quirinius 65769 2.2 +calvin robinson 65581 3.32 +calvin steinbeck 65680 1.29 +calvin steinbeck 65762 1.3 +calvin steinbeck 65779 1.98 +calvin thompson 65560 4.08 +calvin thompson 65640 3.33 +calvin underhill 65732 2.41 +calvin van buren 65552 1.05 +calvin van buren 65771 1.33 +calvin white 65553 4.7 +calvin white 65561 2.68 +calvin xylophone 65575 4.6 +calvin xylophone 65596 4.77 +calvin xylophone 65713 3.23 +calvin young 65574 0.27 +calvin young 65746 0.9 +calvin zipper 65669 4.4 +calvin zipper 65739 2.29 +david allen 65588 3.86 +david allen 65617 3.18 +david brown 65637 1.17 +david brown 65760 1.01 +david davidson 65559 1.37 +david davidson 65756 1.57 +david davidson 65778 1.89 +david davidson 65779 2.43 +david ellison 65634 3.23 +david ellison 65724 2.95 +david ellison 65724 2.95 +david hernandez 65763 1.15 +david ichabod 65699 1.67 +david ichabod 65715 1.26 +david laertes 65762 1.38 +david nixon 65536 1.27 +david ovid 65623 0.23 +david ovid 65628 1.15 +david quirinius 65697 1.14 +david quirinius 65759 1.65 +david quirinius 65779 1.93 +david robinson 65762 3.51 +david robinson 65775 3.38 +david thompson 65550 3.3 +david underhill 65602 0.12 +david underhill 65662 2.29 +david underhill 65751 2.43 +david van buren 65625 1.55 +david van buren 65634 3.25 +david white 65678 0.17 +david xylophone 65537 1.07 +david xylophone 65670 0.72 +david xylophone 65764 0.94 +david young 65551 4.51 +david young 65694 2.74 +ethan allen 65747 3.61 +ethan brown 65539 2.9 +ethan brown 65617 1.59 +ethan brown 65685 2.17 +ethan brown 65685 2.17 +ethan brown 65722 2.64 +ethan brown 65733 2.75 +ethan carson 65742 2.84 +ethan ellison 65714 4.87 +ethan ellison 65732 3.9 +ethan falkner 65577 3.61 +ethan falkner 65614 1.95 +ethan garcia 65736 4.63 +ethan hernandez 65618 0.46 +ethan johnson 65536 1.76 +ethan king 65614 0.92 +ethan laertes 65562 2.89 +ethan laertes 65597 3.45 +ethan laertes 65628 3.15 +ethan laertes 65643 3.53 +ethan laertes 65680 3.18 +ethan laertes 65745 3.36 +ethan laertes 65760 3.01 +ethan miller 65712 1.97 +ethan nixon 65766 4.1 +ethan ovid 65697 3.81 +ethan polk 65589 0.7 +ethan polk 65615 1.92 +ethan polk 65622 2.0 +ethan polk 65622 2.0 +ethan quirinius 65542 4.64 +ethan quirinius 65591 3.97 +ethan quirinius 65706 2.88 +ethan robinson 65547 2.2 +ethan robinson 65659 2.17 +ethan underhill 65570 2.45 +ethan van buren 65572 1.11 +ethan white 65677 3.42 +ethan white 65788 4.09 +ethan xylophone 65595 4.66 +ethan zipper 65593 2.1 +ethan zipper 65680 2.53 +fred davidson 65552 0.37 +fred davidson 65595 2.31 +fred davidson 65721 2.65 +fred ellison 65548 4.32 +fred ellison 65691 3.44 +fred ellison 65771 3.1 +fred falkner 65637 4.59 +fred falkner 65648 2.79 +fred falkner 65783 2.35 +fred hernandez 65541 3.87 +fred ichabod 65572 1.45 +fred ichabod 65789 1.68 +fred johnson 65758 3.86 +fred king 65694 4.05 +fred king 65745 4.5 +fred laertes 65769 3.89 +fred miller 65536 2.59 +fred nixon 65560 2.52 +fred nixon 65612 2.0 +fred nixon 65703 1.35 +fred nixon 65705 1.16 +fred polk 65603 2.9 +fred polk 65656 1.65 +fred polk 65701 1.75 +fred polk 65706 1.4 +fred quirinius 65697 1.91 +fred quirinius 65782 3.24 +fred robinson 65623 2.6 +fred steinbeck 65544 2.0 +fred steinbeck 65651 3.32 +fred steinbeck 65755 3.51 +fred underhill 65629 2.56 +fred van buren 65537 4.97 +fred van buren 65561 3.28 +fred van buren 65745 3.24 +fred van buren 65789 3.27 +fred white 65589 3.16 +fred young 65594 0.34 +fred young 65773 1.56 +fred zipper 65553 0.37 +gabriella allen 65646 1.68 +gabriella allen 65677 1.6 +gabriella brown 65704 0.02 +gabriella brown 65753 1.86 +gabriella carson 65586 0.37 +gabriella davidson 65565 3.45 +gabriella ellison 65706 1.15 +gabriella ellison 65716 2.06 +gabriella falkner 65623 2.09 +gabriella falkner 65711 2.48 +gabriella falkner 65767 1.82 +gabriella garcia 65571 3.17 +gabriella hernandez 65587 0.74 +gabriella hernandez 65717 0.96 +gabriella ichabod 65559 0.63 +gabriella ichabod 65633 2.42 +gabriella ichabod 65702 3.27 +gabriella ichabod 65712 3.6 +gabriella ichabod 65717 3.56 +gabriella king 65651 3.59 +gabriella king 65657 2.17 +gabriella laertes 65781 2.81 +gabriella miller 65646 3.47 +gabriella ovid 65556 1.23 +gabriella ovid 65583 1.95 +gabriella polk 65701 3.58 +gabriella polk 65790 2.08 +gabriella steinbeck 65582 3.6 +gabriella steinbeck 65653 2.7 +gabriella thompson 65682 1.78 +gabriella thompson 65755 3.21 +gabriella thompson 65766 2.71 +gabriella van buren 65581 3.36 +gabriella van buren 65644 2.6 +gabriella white 65638 4.55 +gabriella young 65699 4.13 +gabriella young 65774 3.58 +gabriella zipper 65540 0.96 +gabriella zipper 65754 2.13 +holly allen 65596 0.05 +holly brown 65599 3.23 +holly brown 65619 3.4 +holly falkner 65720 4.06 +holly hernandez 65602 3.67 +holly hernandez 65686 3.39 +holly hernandez 65750 3.2 +holly hernandez 65788 2.95 +holly ichabod 65711 4.73 +holly ichabod 65749 3.54 +holly ichabod 65752 3.27 +holly johnson 65655 4.19 +holly johnson 65662 3.84 +holly johnson 65755 2.65 +holly king 65549 3.61 +holly king 65648 2.37 +holly laertes 65664 4.14 +holly miller 65653 3.84 +holly nixon 65539 4.09 +holly nixon 65658 3.04 +holly polk 65743 2.1 +holly polk 65751 2.78 +holly robinson 65564 0.24 +holly thompson 65538 2.39 +holly thompson 65578 1.49 +holly thompson 65713 1.54 +holly underhill 65634 4.69 +holly underhill 65654 3.08 +holly underhill 65721 3.14 +holly underhill 65759 2.61 +holly van buren 65727 0.04 +holly white 65536 4.59 +holly white 65602 4.13 +holly xylophone 65544 1.49 +holly young 65606 4.39 +holly young 65765 3.81 +holly zipper 65607 4.12 +holly zipper 65755 3.3 +irene allen 65556 3.45 +irene brown 65633 4.8 +irene brown 65650 3.77 +irene brown 65765 3.53 +irene carson 65590 2.83 +irene ellison 65659 3.15 +irene ellison 65696 2.0 +irene falkner 65620 0.99 +irene falkner 65661 1.41 +irene garcia 65660 1.55 +irene garcia 65711 1.72 +irene garcia 65787 1.57 +irene ichabod 65645 0.95 +irene ichabod 65722 1.49 +irene johnson 65583 4.51 +irene laertes 65664 2.5 +irene laertes 65710 1.5 +irene laertes 65722 2.01 +irene miller 65730 4.33 +irene nixon 65631 2.36 +irene nixon 65643 3.43 +irene nixon 65653 2.43 +irene ovid 65691 3.24 +irene ovid 65734 3.17 +irene ovid 65753 3.18 +irene polk 65551 4.51 +irene polk 65575 2.97 +irene polk 65579 3.58 +irene polk 65595 2.82 +irene polk 65610 2.99 +irene quirinius 65724 3.5 +irene quirinius 65769 3.85 +irene quirinius 65773 4.21 +irene robinson 65554 2.67 +irene steinbeck 65683 1.48 +irene thompson 65688 0.06 +irene underhill 65591 3.61 +irene underhill 65787 4.01 +irene van buren 65579 4.26 +irene van buren 65589 4.37 +irene xylophone 65775 4.81 +jessica brown 65588 2.87 +jessica carson 65553 1.02 +jessica carson 65672 1.82 +jessica carson 65747 1.91 +jessica davidson 65549 4.48 +jessica davidson 65606 2.72 +jessica davidson 65675 2.23 +jessica davidson 65727 2.12 +jessica ellison 65567 3.0 +jessica ellison 65663 3.15 +jessica falkner 65584 2.11 +jessica garcia 65676 2.13 +jessica garcia 65789 3.54 +jessica ichabod 65704 3.48 +jessica johnson 65607 3.55 +jessica johnson 65720 3.0 +jessica miller 65733 2.9 +jessica nixon 65590 2.18 +jessica nixon 65774 3.1 +jessica ovid 65582 3.23 +jessica ovid 65751 3.14 +jessica polk 65637 1.12 +jessica quirinius 65562 3.06 +jessica quirinius 65608 1.75 +jessica quirinius 65712 1.51 +jessica quirinius 65716 1.37 +jessica robinson 65576 1.11 +jessica thompson 65581 4.94 +jessica thompson 65675 3.56 +jessica underhill 65656 3.97 +jessica underhill 65702 3.01 +jessica underhill 65783 3.5 +jessica van buren 65615 2.15 +jessica white 65544 1.89 +jessica white 65570 1.92 +jessica white 65594 2.67 +jessica white 65673 2.1 +jessica white 65779 2.04 +jessica xylophone 65562 0.49 +jessica young 65623 0.5 +jessica young 65711 2.37 +jessica zipper 65600 1.71 +jessica zipper 65657 1.0 +jessica zipper 65778 0.9 +katie allen 65542 1.3 +katie brown 65590 3.06 +katie davidson 65619 1.89 +katie ellison 65675 1.92 +katie ellison 65699 2.55 +katie falkner 65728 2.42 +katie garcia 65625 4.18 +katie garcia 65747 4.33 +katie hernandez 65550 2.1 +katie ichabod 65658 1.84 +katie ichabod 65726 2.41 +katie ichabod 65757 3.2 +katie king 65629 0.86 +katie king 65647 2.09 +katie king 65776 2.74 +katie miller 65541 0.66 +katie miller 65661 1.39 +katie nixon 65669 2.06 +katie ovid 65681 1.61 +katie polk 65746 4.74 +katie polk 65784 2.57 +katie robinson 65697 4.74 +katie van buren 65643 0.61 +katie van buren 65730 2.79 +katie white 65620 0.42 +katie white 65719 1.38 +katie xylophone 65585 3.02 +katie young 65644 2.64 +katie young 65746 2.45 +katie young 65764 2.66 +katie zipper 65568 2.33 +katie zipper 65733 1.97 +luke allen 65547 2.04 +luke allen 65552 1.49 +luke allen 65576 2.14 +luke allen 65681 2.2 +luke allen 65776 1.92 +luke brown 65719 4.3 +luke davidson 65656 3.37 +luke davidson 65791 3.9 +luke ellison 65582 0.23 +luke ellison 65664 0.51 +luke ellison 65779 0.87 +luke falkner 65589 2.22 +luke falkner 65618 1.22 +luke garcia 65687 4.73 +luke garcia 65778 3.56 +luke ichabod 65629 3.07 +luke ichabod 65654 3.58 +luke johnson 65545 4.33 +luke johnson 65716 3.01 +luke johnson 65718 3.17 +luke laertes 65608 3.79 +luke laertes 65657 3.89 +luke laertes 65685 2.82 +luke laertes 65730 2.96 +luke laertes 65756 3.19 +luke miller 65752 4.7 +luke ovid 65569 4.4 +luke ovid 65693 2.58 +luke polk 65645 0.57 +luke polk 65658 2.73 +luke quirinius 65655 4.1 +luke robinson 65634 4.9 +luke robinson 65772 4.19 +luke thompson 65626 0.15 +luke underhill 65553 1.28 +luke underhill 65571 0.84 +luke underhill 65651 1.14 +luke van buren 65678 0.34 +luke white 65693 0.91 +luke xylophone 65597 2.27 +luke zipper 65641 4.63 +mike allen 65706 3.06 +mike brown 65654 3.57 +mike carson 65698 4.46 +mike carson 65700 3.89 +mike carson 65751 3.58 +mike davidson 65658 2.06 +mike davidson 65759 3.34 +mike ellison 65598 3.96 +mike ellison 65606 3.28 +mike ellison 65718 3.38 +mike ellison 65738 2.56 +mike ellison 65760 3.03 +mike falkner 65609 4.85 +mike garcia 65571 1.82 +mike garcia 65600 1.42 +mike garcia 65770 1.92 +mike hernandez 65548 1.42 +mike hernandez 65672 1.75 +mike ichabod 65621 3.73 +mike king 65563 4.34 +mike king 65586 3.75 +mike king 65591 3.09 +mike king 65642 2.69 +mike king 65769 2.36 +mike king 65776 2.55 +mike miller 65549 3.96 +mike nixon 65619 0.09 +mike nixon 65704 2.15 +mike polk 65619 4.13 +mike polk 65658 4.27 +mike polk 65704 3.77 +mike quirinius 65717 2.81 +mike steinbeck 65550 3.18 +mike steinbeck 65564 2.58 +mike steinbeck 65573 2.12 +mike steinbeck 65749 1.72 +mike van buren 65620 0.09 +mike van buren 65770 0.88 +mike white 65648 1.72 +mike white 65685 1.12 +mike white 65769 1.74 +mike white 65778 2.05 +mike young 65545 1.69 +mike young 65581 0.92 +mike young 65736 1.84 +mike zipper 65552 4.8 +mike zipper 65695 4.16 +mike zipper 65779 4.22 +nick allen 65641 3.78 +nick allen 65786 3.74 +nick brown 65724 4.61 +nick davidson 65601 0.88 +nick ellison 65691 4.04 +nick ellison 65745 3.22 +nick falkner 65583 4.44 +nick falkner 65676 3.08 +nick garcia 65695 1.05 +nick garcia 65712 2.18 +nick garcia 65720 1.94 +nick ichabod 65572 2.62 +nick ichabod 65681 2.99 +nick ichabod 65737 3.55 +nick johnson 65585 0.56 +nick johnson 65784 0.42 +nick laertes 65624 0.16 +nick miller 65757 4.23 +nick nixon 65650 0.7 +nick ovid 65719 3.93 +nick polk 65716 3.66 +nick quirinius 65588 2.88 +nick quirinius 65723 2.42 +nick robinson 65547 0.21 +nick robinson 65675 0.57 +nick steinbeck 65689 4.11 +nick thompson 65610 2.32 +nick underhill 65619 2.73 +nick van buren 65603 1.84 +nick xylophone 65644 2.06 +nick young 65654 2.06 +nick young 65660 2.28 +nick zipper 65757 3.8 +nick zipper 65765 2.04 +oscar allen 65644 3.01 +oscar brown 65614 3.95 +oscar carson 65537 4.29 +oscar carson 65548 2.89 +oscar carson 65549 1.95 +oscar carson 65624 2.56 +oscar carson 65697 3.03 +oscar davidson 65556 0.6 +oscar ellison 65630 1.47 +oscar ellison 65630 1.47 +oscar falkner 65692 3.44 +oscar garcia 65751 3.71 +oscar hernandez 65683 3.32 +oscar hernandez 65707 2.25 +oscar ichabod 65536 1.8 +oscar ichabod 65562 1.18 +oscar ichabod 65637 1.91 +oscar ichabod 65763 1.96 +oscar johnson 65645 1.69 +oscar johnson 65778 1.59 +oscar king 65541 3.96 +oscar king 65550 4.31 +oscar king 65787 3.54 +oscar laertes 65625 2.26 +oscar laertes 65690 2.55 +oscar laertes 65756 2.85 +oscar laertes 65790 2.39 +oscar nixon 65596 3.12 +oscar ovid 65536 2.94 +oscar ovid 65615 2.95 +oscar ovid 65659 3.41 +oscar polk 65541 1.12 +oscar polk 65643 1.62 +oscar quirinius 65541 3.5 +oscar quirinius 65560 2.87 +oscar quirinius 65689 3.35 +oscar quirinius 65720 3.03 +oscar robinson 65537 0.29 +oscar robinson 65658 0.29 +oscar robinson 65687 1.5 +oscar robinson 65782 1.78 +oscar steinbeck 65709 4.96 +oscar thompson 65542 0.48 +oscar thompson 65698 2.07 +oscar thompson 65727 2.01 +oscar thompson 65738 1.8 +oscar underhill 65626 3.49 +oscar van buren 65581 2.33 +oscar van buren 65635 2.45 +oscar van buren 65705 2.68 +oscar white 65552 3.05 +oscar white 65564 2.58 +oscar white 65671 2.56 +oscar white 65735 2.47 +oscar xylophone 65773 1.51 +oscar xylophone 65773 1.51 +oscar xylophone 65775 1.82 +oscar zipper 65568 4.77 +oscar zipper 65740 3.81 +oscar zipper 65777 3.26 +priscilla brown 65670 2.91 +priscilla brown 65690 2.83 +priscilla brown 65749 2.07 +priscilla carson 65658 1.43 +priscilla carson 65687 2.97 +priscilla carson 65755 2.87 +priscilla ichabod 65627 4.95 +priscilla ichabod 65759 3.41 +priscilla johnson 65543 3.85 +priscilla johnson 65633 2.98 +priscilla johnson 65668 2.23 +priscilla johnson 65681 1.98 +priscilla johnson 65755 1.94 +priscilla king 65646 1.93 +priscilla nixon 65564 0.31 +priscilla nixon 65600 2.35 +priscilla ovid 65541 3.8 +priscilla ovid 65790 2.37 +priscilla polk 65747 3.1 +priscilla quirinius 65672 0.81 +priscilla thompson 65654 2.04 +priscilla underhill 65715 0.82 +priscilla underhill 65729 1.33 +priscilla van buren 65607 0.23 +priscilla van buren 65685 0.81 +priscilla van buren 65749 1.84 +priscilla white 65652 4.79 +priscilla xylophone 65538 3.56 +priscilla xylophone 65763 2.48 +priscilla xylophone 65774 1.84 +priscilla young 65585 2.92 +priscilla young 65658 3.77 +priscilla zipper 65622 4.62 +priscilla zipper 65726 2.67 +quinn allen 65657 3.02 +quinn allen 65708 3.35 +quinn brown 65691 4.37 +quinn brown 65700 3.28 +quinn brown 65733 3.27 +quinn davidson 65549 0.03 +quinn davidson 65714 1.02 +quinn davidson 65776 2.19 +quinn davidson 65779 2.66 +quinn ellison 65705 0.54 +quinn ellison 65778 2.74 +quinn garcia 65568 1.04 +quinn garcia 65604 0.79 +quinn garcia 65610 1.85 +quinn garcia 65773 1.67 +quinn ichabod 65564 0.65 +quinn king 65558 2.38 +quinn king 65649 1.53 +quinn laertes 65542 2.33 +quinn laertes 65560 2.03 +quinn laertes 65627 2.08 +quinn nixon 65659 0.72 +quinn ovid 65699 2.87 +quinn quirinius 65747 0.86 +quinn robinson 65627 4.14 +quinn steinbeck 65578 4.92 +quinn steinbeck 65763 3.18 +quinn thompson 65643 4.62 +quinn thompson 65774 2.43 +quinn underhill 65549 3.89 +quinn underhill 65694 2.39 +quinn underhill 65767 2.48 +quinn van buren 65725 0.53 +quinn young 65647 3.54 +quinn zipper 65579 1.83 +quinn zipper 65693 2.1 +rachel allen 65661 4.66 +rachel allen 65709 4.53 +rachel brown 65586 0.43 +rachel brown 65587 2.69 +rachel brown 65587 2.69 +rachel brown 65610 2.67 +rachel brown 65693 3.07 +rachel carson 65677 4.58 +rachel carson 65682 4.54 +rachel davidson 65755 2.28 +rachel ellison 65761 0.05 +rachel falkner 65616 1.2 +rachel falkner 65681 2.01 +rachel falkner 65693 2.56 +rachel falkner 65764 2.91 +rachel johnson 65658 3.02 +rachel king 65604 2.32 +rachel king 65643 2.4 +rachel laertes 65562 4.43 +rachel laertes 65624 2.8 +rachel ovid 65721 0.3 +rachel ovid 65736 0.92 +rachel polk 65686 2.56 +rachel quirinius 65787 2.95 +rachel robinson 65544 0.06 +rachel robinson 65717 1.55 +rachel robinson 65724 2.27 +rachel thompson 65648 2.49 +rachel thompson 65662 3.16 +rachel thompson 65733 2.51 +rachel underhill 65667 2.29 +rachel white 65615 1.99 +rachel white 65717 3.08 +rachel young 65727 1.75 +rachel zipper 65757 2.82 +rachel zipper 65785 3.62 +sarah carson 65679 1.04 +sarah carson 65693 0.85 +sarah carson 65694 2.06 +sarah ellison 65611 1.64 +sarah falkner 65606 0.77 +sarah falkner 65680 2.85 +sarah garcia 65563 4.89 +sarah garcia 65638 4.78 +sarah garcia 65661 3.63 +sarah ichabod 65667 3.94 +sarah ichabod 65671 2.33 +sarah johnson 65659 3.51 +sarah johnson 65716 4.21 +sarah johnson 65731 3.81 +sarah johnson 65751 3.37 +sarah king 65650 1.05 +sarah king 65699 0.99 +sarah miller 65557 0.2 +sarah ovid 65550 3.21 +sarah robinson 65677 4.9 +sarah robinson 65763 2.99 +sarah steinbeck 65721 2.82 +sarah white 65622 0.07 +sarah white 65747 2.29 +sarah xylophone 65678 0.15 +sarah young 65595 2.15 +sarah zipper 65550 2.22 +tom brown 65593 1.64 +tom brown 65675 2.83 +tom carson 65539 4.38 +tom carson 65624 4.28 +tom carson 65780 4.03 +tom davidson 65780 2.4 +tom ellison 65578 3.3 +tom ellison 65670 4.04 +tom ellison 65756 3.51 +tom falkner 65574 1.09 +tom falkner 65583 2.05 +tom hernandez 65575 2.35 +tom hernandez 65632 2.64 +tom ichabod 65588 1.48 +tom johnson 65536 4.68 +tom johnson 65789 4.6 +tom king 65576 2.87 +tom laertes 65617 1.51 +tom laertes 65701 1.93 +tom miller 65594 1.14 +tom miller 65603 1.25 +tom miller 65704 1.26 +tom nixon 65672 0.04 +tom ovid 65628 1.95 +tom polk 65652 2.54 +tom polk 65742 2.26 +tom quirinius 65563 4.51 +tom quirinius 65783 4.17 +tom robinson 65626 3.12 +tom robinson 65632 3.61 +tom robinson 65691 3.13 +tom robinson 65758 3.45 +tom steinbeck 65666 1.34 +tom van buren 65621 0.66 +tom van buren 65652 2.71 +tom van buren 65669 3.47 +tom white 65548 2.13 +tom young 65544 3.73 +tom young 65546 2.02 +tom zipper 65789 1.51 +ulysses brown 65735 2.92 +ulysses carson 65602 0.34 +ulysses carson 65643 2.11 +ulysses carson 65703 1.43 +ulysses carson 65716 1.41 +ulysses davidson 65750 3.04 +ulysses ellison 65575 4.39 +ulysses garcia 65666 4.2 +ulysses hernandez 65651 1.75 +ulysses hernandez 65702 2.42 +ulysses hernandez 65786 2.53 +ulysses ichabod 65551 0.33 +ulysses ichabod 65566 2.19 +ulysses johnson 65776 4.79 +ulysses king 65649 4.46 +ulysses laertes 65691 4.55 +ulysses laertes 65711 3.54 +ulysses laertes 65781 3.66 +ulysses miller 65610 0.24 +ulysses miller 65637 1.08 +ulysses nixon 65603 1.85 +ulysses ovid 65656 3.17 +ulysses polk 65563 1.32 +ulysses polk 65580 3.05 +ulysses polk 65612 3.46 +ulysses polk 65777 3.75 +ulysses quirinius 65786 2.13 +ulysses robinson 65744 1.97 +ulysses steinbeck 65611 2.74 +ulysses steinbeck 65680 2.64 +ulysses thompson 65788 1.51 +ulysses underhill 65570 0.38 +ulysses underhill 65616 0.8 +ulysses underhill 65620 2.09 +ulysses underhill 65623 2.69 +ulysses underhill 65641 2.54 +ulysses underhill 65713 2.9 +ulysses underhill 65785 2.97 +ulysses van buren 65684 1.42 +ulysses white 65654 0.14 +ulysses white 65675 1.51 +ulysses xylophone 65623 2.3 +ulysses xylophone 65636 2.69 +ulysses xylophone 65781 3.22 +ulysses young 65675 1.34 +ulysses young 65736 2.01 +ulysses young 65748 2.24 +victor allen 65684 0.83 +victor allen 65707 2.31 +victor brown 65550 4.57 +victor brown 65555 3.54 +victor brown 65622 2.61 +victor brown 65673 2.34 +victor davidson 65579 0.61 +victor davidson 65628 1.52 +victor davidson 65783 2.25 +victor ellison 65641 1.32 +victor ellison 65782 2.59 +victor hernandez 65571 3.62 +victor hernandez 65659 3.68 +victor hernandez 65708 3.35 +victor hernandez 65735 2.88 +victor hernandez 65775 2.62 +victor johnson 65606 3.03 +victor johnson 65607 2.3 +victor johnson 65607 2.3 +victor king 65721 4.09 +victor king 65743 2.45 +victor laertes 65638 1.46 +victor laertes 65644 2.38 +victor miller 65570 0.1 +victor nixon 65709 0.74 +victor nixon 65791 1.73 +victor ovid 65649 4.93 +victor polk 65625 1.04 +victor quirinius 65620 1.32 +victor quirinius 65651 3.15 +victor robinson 65596 0.92 +victor robinson 65673 1.76 +victor steinbeck 65618 2.87 +victor steinbeck 65661 2.19 +victor steinbeck 65686 2.81 +victor thompson 65548 1.59 +victor van buren 65664 4.44 +victor van buren 65774 4.06 +victor white 65548 4.67 +victor white 65601 3.87 +victor xylophone 65549 3.8 +victor xylophone 65618 2.13 +victor xylophone 65644 1.59 +victor xylophone 65677 1.89 +victor xylophone 65755 2.27 +victor young 65628 3.16 +victor zipper 65743 3.98 +wendy allen 65628 3.8 +wendy allen 65711 3.44 +wendy allen 65782 2.4 +wendy brown 65580 4.67 +wendy brown 65657 4.68 +wendy ellison 65545 1.51 +wendy ellison 65603 1.6 +wendy falkner 65595 0.58 +wendy falkner 65604 0.82 +wendy falkner 65635 1.59 +wendy garcia 65659 3.47 +wendy garcia 65746 2.35 +wendy garcia 65747 1.67 +wendy garcia 65777 1.32 +wendy hernandez 65650 2.26 +wendy ichabod 65730 0.44 +wendy king 65586 4.46 +wendy king 65664 4.25 +wendy king 65670 2.94 +wendy laertes 65566 3.13 +wendy laertes 65683 3.99 +wendy laertes 65727 3.57 +wendy miller 65582 1.53 +wendy miller 65626 1.4 +wendy nixon 65611 0.26 +wendy nixon 65746 2.27 +wendy ovid 65589 4.75 +wendy ovid 65643 3.42 +wendy polk 65656 0.62 +wendy polk 65692 1.36 +wendy quirinius 65766 1.35 +wendy quirinius 65767 2.76 +wendy robinson 65622 0.85 +wendy robinson 65715 2.13 +wendy robinson 65774 1.8 +wendy steinbeck 65612 0.07 +wendy thompson 65650 2.27 +wendy thompson 65737 3.2 +wendy underhill 65662 4.55 +wendy underhill 65758 2.84 +wendy underhill 65775 2.54 +wendy van buren 65680 1.1 +wendy van buren 65699 1.0 +wendy white 65705 0.5 +wendy xylophone 65687 0.46 +wendy xylophone 65773 1.39 +wendy young 65674 0.48 +wendy young 65685 1.31 +xavier allen 65611 1.53 +xavier allen 65618 2.07 +xavier allen 65771 2.33 +xavier brown 65600 0.89 +xavier brown 65704 0.58 +xavier brown 65723 1.55 +xavier carson 65731 4.42 +xavier carson 65758 3.91 +xavier davidson 65644 1.84 +xavier davidson 65664 3.4 +xavier davidson 65755 2.67 +xavier ellison 65541 1.47 +xavier ellison 65654 2.49 +xavier garcia 65672 2.76 +xavier hernandez 65541 0.96 +xavier hernandez 65544 1.47 +xavier hernandez 65766 1.28 +xavier ichabod 65597 4.76 +xavier ichabod 65663 2.99 +xavier johnson 65654 1.34 +xavier johnson 65744 3.06 +xavier king 65590 2.68 +xavier king 65601 1.4 +xavier laertes 65743 0.75 +xavier ovid 65788 2.06 +xavier polk 65587 0.99 +xavier polk 65653 1.15 +xavier polk 65675 1.9 +xavier polk 65696 1.93 +xavier quirinius 65599 1.66 +xavier quirinius 65650 1.94 +xavier quirinius 65656 2.46 +xavier quirinius 65737 1.92 +xavier thompson 65608 1.65 +xavier underhill 65710 0.13 +xavier white 65703 0.98 +xavier white 65732 2.22 +xavier xylophone 65572 1.0 +xavier zipper 65561 0.94 +yuri allen 65565 2.03 +yuri allen 65682 1.14 +yuri brown 65538 2.73 +yuri brown 65688 2.02 +yuri carson 65670 3.06 +yuri carson 65769 3.43 +yuri ellison 65570 1.05 +yuri ellison 65581 1.68 +yuri falkner 65658 2.85 +yuri falkner 65681 2.14 +yuri garcia 65639 3.41 +yuri hernandez 65706 1.64 +yuri johnson 65587 1.27 +yuri johnson 65697 1.44 +yuri johnson 65712 2.29 +yuri king 65721 0.33 +yuri laertes 65637 4.3 +yuri laertes 65773 2.15 +yuri nixon 65635 4.02 +yuri nixon 65740 4.18 +yuri polk 65607 0.08 +yuri polk 65713 0.37 +yuri polk 65742 1.25 +yuri quirinius 65544 2.58 +yuri quirinius 65617 2.1 +yuri quirinius 65695 1.91 +yuri steinbeck 65592 4.89 +yuri steinbeck 65679 3.24 +yuri thompson 65676 2.67 +yuri underhill 65718 2.86 +yuri underhill 65750 2.51 +yuri white 65659 4.59 +yuri xylophone 65714 2.53 +zach allen 65667 0.88 +zach brown 65559 4.88 +zach brown 65588 4.53 +zach brown 65691 3.49 +zach brown 65759 3.4 +zach brown 65762 3.55 +zach carson 65572 2.03 +zach ellison 65748 1.76 +zach falkner 65620 0.34 +zach falkner 65627 0.25 +zach garcia 65544 0.99 +zach garcia 65623 2.84 +zach garcia 65629 3.01 +zach garcia 65786 2.55 +zach ichabod 65599 3.36 +zach ichabod 65612 1.92 +zach king 65556 2.36 +zach king 65702 1.52 +zach king 65773 2.58 +zach miller 65584 1.6 +zach miller 65665 0.99 +zach miller 65719 1.55 +zach ovid 65578 1.51 +zach ovid 65703 1.92 +zach ovid 65750 2.63 +zach ovid 65784 2.72 +zach quirinius 65691 2.95 +zach robinson 65599 2.87 +zach steinbeck 65602 2.45 +zach steinbeck 65695 1.86 +zach thompson 65636 0.25 +zach thompson 65696 0.51 +zach underhill 65573 3.97 +zach white 65733 2.31 +zach xylophone 65542 1.69 +zach xylophone 65780 0.88 +zach young 65576 1.82 +zach zipper 65579 4.5 +zach zipper 65649 4.02 +zach zipper 65676 3.12 diff --git a/sql/hive/src/test/resources/golden/windowing_windowspec.q (deterministic)-8-45a1d7c2aba45d761e19ff4dfdf5463e b/sql/hive/src/test/resources/golden/windowing_windowspec.q (deterministic)-8-45a1d7c2aba45d761e19ff4dfdf5463e new file mode 100644 index 0000000000000..84b934fad85b1 --- /dev/null +++ b/sql/hive/src/test/resources/golden/windowing_windowspec.q (deterministic)-8-45a1d7c2aba45d761e19ff4dfdf5463e @@ -0,0 +1,1049 @@ + 65560 20.0 + 65718 20.0 + 65740 20.0 +alice allen 65662 20.0 +alice allen 65720 20.0 +alice allen 65758 20.0 +alice brown 65696 20.0 +alice carson 65559 20.0 +alice davidson 65547 20.0 +alice falkner 65669 20.0 +alice garcia 65613 20.0 +alice hernandez 65737 20.0 +alice hernandez 65784 20.0 +alice johnson 65739 20.0 +alice king 65660 20.0 +alice king 65734 20.0 +alice king 65738 20.0 +alice laertes 65669 20.0 +alice laertes 65671 20.0 +alice miller 65590 20.0 +alice nixon 65586 20.0 +alice nixon 65595 20.0 +alice nixon 65604 20.0 +alice ovid 65737 20.0 +alice polk 65548 20.0 +alice quirinius 65636 20.0 +alice quirinius 65728 20.0 +alice robinson 65606 20.0 +alice robinson 65789 20.0 +alice steinbeck 65578 20.0 +alice steinbeck 65673 20.0 +alice steinbeck 65786 20.0 +alice underhill 65750 20.0 +alice van buren 65562 20.0 +alice xylophone 65578 20.0 +alice xylophone 65585 20.0 +alice xylophone 65599 20.0 +alice zipper 65553 20.0 +alice zipper 65662 20.0 +alice zipper 65766 20.0 +bob brown 65584 20.0 +bob brown 65777 20.0 +bob brown 65783 20.0 +bob carson 65713 20.0 +bob davidson 65664 20.0 +bob davidson 65693 20.0 +bob davidson 65768 20.0 +bob ellison 65591 20.0 +bob ellison 65624 20.0 +bob ellison 65721 20.0 +bob ellison 65760 20.0 +bob falkner 65789 20.0 +bob garcia 65585 20.0 +bob garcia 65598 20.0 +bob garcia 65673 20.0 +bob garcia 65754 20.0 +bob garcia 65782 20.0 +bob hernandez 65557 20.0 +bob ichabod 65549 20.0 +bob king 65715 20.0 +bob king 65757 20.0 +bob king 65783 20.0 +bob laertes 65602 20.0 +bob laertes 65663 20.0 +bob miller 65608 20.0 +bob ovid 65564 20.0 +bob ovid 65619 20.0 +bob ovid 65686 20.0 +bob ovid 65726 20.0 +bob polk 65594 20.0 +bob quirinius 65700 20.0 +bob steinbeck 65637 20.0 +bob van buren 65778 20.0 +bob white 65543 20.0 +bob white 65605 20.0 +bob xylophone 65574 20.0 +bob xylophone 65666 20.0 +bob young 65556 20.0 +bob zipper 65559 20.0 +bob zipper 65633 20.0 +bob zipper 65739 20.0 +calvin allen 65669 20.0 +calvin brown 65537 20.0 +calvin brown 65580 20.0 +calvin brown 65677 20.0 +calvin carson 65637 20.0 +calvin davidson 65541 20.0 +calvin davidson 65564 20.0 +calvin ellison 65667 20.0 +calvin falkner 65573 20.0 +calvin falkner 65596 20.0 +calvin falkner 65738 20.0 +calvin falkner 65762 20.0 +calvin falkner 65778 20.0 +calvin falkner 65784 20.0 +calvin garcia 65664 20.0 +calvin hernandez 65578 20.0 +calvin johnson 65731 20.0 +calvin laertes 65570 20.0 +calvin laertes 65684 20.0 +calvin nixon 65654 20.0 +calvin nixon 65724 20.0 +calvin nixon 65749 20.0 +calvin ovid 65554 20.0 +calvin ovid 65643 20.0 +calvin ovid 65663 20.0 +calvin ovid 65715 20.0 +calvin polk 65731 20.0 +calvin quirinius 65741 20.0 +calvin quirinius 65769 20.0 +calvin robinson 65581 20.0 +calvin steinbeck 65680 20.0 +calvin steinbeck 65762 20.0 +calvin steinbeck 65779 20.0 +calvin thompson 65560 20.0 +calvin thompson 65640 20.0 +calvin underhill 65732 20.0 +calvin van buren 65552 20.0 +calvin van buren 65771 20.0 +calvin white 65553 20.0 +calvin white 65561 20.0 +calvin xylophone 65575 20.0 +calvin xylophone 65596 20.0 +calvin xylophone 65713 20.0 +calvin young 65574 20.0 +calvin young 65746 20.0 +calvin zipper 65669 20.0 +calvin zipper 65739 20.0 +david allen 65588 20.0 +david allen 65617 20.0 +david brown 65637 20.0 +david brown 65760 20.0 +david davidson 65559 20.0 +david davidson 65756 20.0 +david davidson 65778 20.0 +david davidson 65779 20.0 +david ellison 65634 20.0 +david ellison 65724 20.0 +david ellison 65724 20.0 +david hernandez 65763 20.0 +david ichabod 65699 20.0 +david ichabod 65715 20.0 +david laertes 65762 20.0 +david nixon 65536 20.0 +david ovid 65623 20.0 +david ovid 65628 20.0 +david quirinius 65697 20.0 +david quirinius 65759 20.0 +david quirinius 65779 20.0 +david robinson 65762 20.0 +david robinson 65775 20.0 +david thompson 65550 20.0 +david underhill 65602 20.0 +david underhill 65662 20.0 +david underhill 65751 20.0 +david van buren 65625 20.0 +david van buren 65634 20.0 +david white 65678 20.0 +david xylophone 65537 20.0 +david xylophone 65670 20.0 +david xylophone 65764 20.0 +david young 65551 20.0 +david young 65694 20.0 +ethan allen 65747 20.0 +ethan brown 65539 20.0 +ethan brown 65617 20.0 +ethan brown 65685 20.0 +ethan brown 65685 20.0 +ethan brown 65722 20.0 +ethan brown 65733 20.0 +ethan carson 65742 20.0 +ethan ellison 65714 20.0 +ethan ellison 65732 20.0 +ethan falkner 65577 20.0 +ethan falkner 65614 20.0 +ethan garcia 65736 20.0 +ethan hernandez 65618 20.0 +ethan johnson 65536 20.0 +ethan king 65614 20.0 +ethan laertes 65562 20.0 +ethan laertes 65597 20.0 +ethan laertes 65628 20.0 +ethan laertes 65643 20.0 +ethan laertes 65680 20.0 +ethan laertes 65745 20.0 +ethan laertes 65760 20.0 +ethan miller 65712 20.0 +ethan nixon 65766 20.0 +ethan ovid 65697 20.0 +ethan polk 65589 20.0 +ethan polk 65615 20.0 +ethan polk 65622 20.0 +ethan polk 65622 20.0 +ethan quirinius 65542 20.0 +ethan quirinius 65591 20.0 +ethan quirinius 65706 20.0 +ethan robinson 65547 20.0 +ethan robinson 65659 20.0 +ethan underhill 65570 20.0 +ethan van buren 65572 20.0 +ethan white 65677 20.0 +ethan white 65788 20.0 +ethan xylophone 65595 20.0 +ethan zipper 65593 20.0 +ethan zipper 65680 20.0 +fred davidson 65552 20.0 +fred davidson 65595 20.0 +fred davidson 65721 20.0 +fred ellison 65548 20.0 +fred ellison 65691 20.0 +fred ellison 65771 20.0 +fred falkner 65637 20.0 +fred falkner 65648 20.0 +fred falkner 65783 20.0 +fred hernandez 65541 20.0 +fred ichabod 65572 20.0 +fred ichabod 65789 20.0 +fred johnson 65758 20.0 +fred king 65694 20.0 +fred king 65745 20.0 +fred laertes 65769 20.0 +fred miller 65536 20.0 +fred nixon 65560 20.0 +fred nixon 65612 20.0 +fred nixon 65703 20.0 +fred nixon 65705 20.0 +fred polk 65603 20.0 +fred polk 65656 20.0 +fred polk 65701 20.0 +fred polk 65706 20.0 +fred quirinius 65697 20.0 +fred quirinius 65782 20.0 +fred robinson 65623 20.0 +fred steinbeck 65544 20.0 +fred steinbeck 65651 20.0 +fred steinbeck 65755 20.0 +fred underhill 65629 20.0 +fred van buren 65537 20.0 +fred van buren 65561 20.0 +fred van buren 65745 20.0 +fred van buren 65789 20.0 +fred white 65589 20.0 +fred young 65594 20.0 +fred young 65773 20.0 +fred zipper 65553 20.0 +gabriella allen 65646 20.0 +gabriella allen 65677 20.0 +gabriella brown 65704 20.0 +gabriella brown 65753 20.0 +gabriella carson 65586 20.0 +gabriella davidson 65565 20.0 +gabriella ellison 65706 20.0 +gabriella ellison 65716 20.0 +gabriella falkner 65623 20.0 +gabriella falkner 65711 20.0 +gabriella falkner 65767 20.0 +gabriella garcia 65571 20.0 +gabriella hernandez 65587 20.0 +gabriella hernandez 65717 20.0 +gabriella ichabod 65559 20.0 +gabriella ichabod 65633 20.0 +gabriella ichabod 65702 20.0 +gabriella ichabod 65712 20.0 +gabriella ichabod 65717 20.0 +gabriella king 65651 20.0 +gabriella king 65657 20.0 +gabriella laertes 65781 20.0 +gabriella miller 65646 20.0 +gabriella ovid 65556 20.0 +gabriella ovid 65583 20.0 +gabriella polk 65701 20.0 +gabriella polk 65790 20.0 +gabriella steinbeck 65582 20.0 +gabriella steinbeck 65653 20.0 +gabriella thompson 65682 20.0 +gabriella thompson 65755 20.0 +gabriella thompson 65766 20.0 +gabriella van buren 65581 20.0 +gabriella van buren 65644 20.0 +gabriella white 65638 20.0 +gabriella young 65699 20.0 +gabriella young 65774 20.0 +gabriella zipper 65540 20.0 +gabriella zipper 65754 20.0 +holly allen 65596 20.0 +holly brown 65599 20.0 +holly brown 65619 20.0 +holly falkner 65720 20.0 +holly hernandez 65602 20.0 +holly hernandez 65686 20.0 +holly hernandez 65750 20.0 +holly hernandez 65788 20.0 +holly ichabod 65711 20.0 +holly ichabod 65749 20.0 +holly ichabod 65752 20.0 +holly johnson 65655 20.0 +holly johnson 65662 20.0 +holly johnson 65755 20.0 +holly king 65549 20.0 +holly king 65648 20.0 +holly laertes 65664 20.0 +holly miller 65653 20.0 +holly nixon 65539 20.0 +holly nixon 65658 20.0 +holly polk 65743 20.0 +holly polk 65751 20.0 +holly robinson 65564 20.0 +holly thompson 65538 20.0 +holly thompson 65578 20.0 +holly thompson 65713 20.0 +holly underhill 65634 20.0 +holly underhill 65654 20.0 +holly underhill 65721 20.0 +holly underhill 65759 20.0 +holly van buren 65727 20.0 +holly white 65536 20.0 +holly white 65602 20.0 +holly xylophone 65544 20.0 +holly young 65606 20.0 +holly young 65765 20.0 +holly zipper 65607 20.0 +holly zipper 65755 20.0 +irene allen 65556 20.0 +irene brown 65633 20.0 +irene brown 65650 20.0 +irene brown 65765 20.0 +irene carson 65590 20.0 +irene ellison 65659 20.0 +irene ellison 65696 20.0 +irene falkner 65620 20.0 +irene falkner 65661 20.0 +irene garcia 65660 20.0 +irene garcia 65711 20.0 +irene garcia 65787 20.0 +irene ichabod 65645 20.0 +irene ichabod 65722 20.0 +irene johnson 65583 20.0 +irene laertes 65664 20.0 +irene laertes 65710 20.0 +irene laertes 65722 20.0 +irene miller 65730 20.0 +irene nixon 65631 20.0 +irene nixon 65643 20.0 +irene nixon 65653 20.0 +irene ovid 65691 20.0 +irene ovid 65734 20.0 +irene ovid 65753 20.0 +irene polk 65551 20.0 +irene polk 65575 20.0 +irene polk 65579 20.0 +irene polk 65595 20.0 +irene polk 65610 20.0 +irene quirinius 65724 20.0 +irene quirinius 65769 20.0 +irene quirinius 65773 20.0 +irene robinson 65554 20.0 +irene steinbeck 65683 20.0 +irene thompson 65688 20.0 +irene underhill 65591 20.0 +irene underhill 65787 20.0 +irene van buren 65579 20.0 +irene van buren 65589 20.0 +irene xylophone 65775 20.0 +jessica brown 65588 20.0 +jessica carson 65553 20.0 +jessica carson 65672 20.0 +jessica carson 65747 20.0 +jessica davidson 65549 20.0 +jessica davidson 65606 20.0 +jessica davidson 65675 20.0 +jessica davidson 65727 20.0 +jessica ellison 65567 20.0 +jessica ellison 65663 20.0 +jessica falkner 65584 20.0 +jessica garcia 65676 20.0 +jessica garcia 65789 20.0 +jessica ichabod 65704 20.0 +jessica johnson 65607 20.0 +jessica johnson 65720 20.0 +jessica miller 65733 20.0 +jessica nixon 65590 20.0 +jessica nixon 65774 20.0 +jessica ovid 65582 20.0 +jessica ovid 65751 20.0 +jessica polk 65637 20.0 +jessica quirinius 65562 20.0 +jessica quirinius 65608 20.0 +jessica quirinius 65712 20.0 +jessica quirinius 65716 20.0 +jessica robinson 65576 20.0 +jessica thompson 65581 20.0 +jessica thompson 65675 20.0 +jessica underhill 65656 20.0 +jessica underhill 65702 20.0 +jessica underhill 65783 20.0 +jessica van buren 65615 20.0 +jessica white 65544 20.0 +jessica white 65570 20.0 +jessica white 65594 20.0 +jessica white 65673 20.0 +jessica white 65779 20.0 +jessica xylophone 65562 20.0 +jessica young 65623 20.0 +jessica young 65711 20.0 +jessica zipper 65600 20.0 +jessica zipper 65657 20.0 +jessica zipper 65778 20.0 +katie allen 65542 20.0 +katie brown 65590 20.0 +katie davidson 65619 20.0 +katie ellison 65675 20.0 +katie ellison 65699 20.0 +katie falkner 65728 20.0 +katie garcia 65625 20.0 +katie garcia 65747 20.0 +katie hernandez 65550 20.0 +katie ichabod 65658 20.0 +katie ichabod 65726 20.0 +katie ichabod 65757 20.0 +katie king 65629 20.0 +katie king 65647 20.0 +katie king 65776 20.0 +katie miller 65541 20.0 +katie miller 65661 20.0 +katie nixon 65669 20.0 +katie ovid 65681 20.0 +katie polk 65746 20.0 +katie polk 65784 20.0 +katie robinson 65697 20.0 +katie van buren 65643 20.0 +katie van buren 65730 20.0 +katie white 65620 20.0 +katie white 65719 20.0 +katie xylophone 65585 20.0 +katie young 65644 20.0 +katie young 65746 20.0 +katie young 65764 20.0 +katie zipper 65568 20.0 +katie zipper 65733 20.0 +luke allen 65547 20.0 +luke allen 65552 20.0 +luke allen 65576 20.0 +luke allen 65681 20.0 +luke allen 65776 20.0 +luke brown 65719 20.0 +luke davidson 65656 20.0 +luke davidson 65791 20.0 +luke ellison 65582 20.0 +luke ellison 65664 20.0 +luke ellison 65779 20.0 +luke falkner 65589 20.0 +luke falkner 65618 20.0 +luke garcia 65687 20.0 +luke garcia 65778 20.0 +luke ichabod 65629 20.0 +luke ichabod 65654 20.0 +luke johnson 65545 20.0 +luke johnson 65716 20.0 +luke johnson 65718 20.0 +luke laertes 65608 20.0 +luke laertes 65657 20.0 +luke laertes 65685 20.0 +luke laertes 65730 20.0 +luke laertes 65756 20.0 +luke miller 65752 20.0 +luke ovid 65569 20.0 +luke ovid 65693 20.0 +luke polk 65645 20.0 +luke polk 65658 20.0 +luke quirinius 65655 20.0 +luke robinson 65634 20.0 +luke robinson 65772 20.0 +luke thompson 65626 20.0 +luke underhill 65553 20.0 +luke underhill 65571 20.0 +luke underhill 65651 20.0 +luke van buren 65678 20.0 +luke white 65693 20.0 +luke xylophone 65597 20.0 +luke zipper 65641 20.0 +mike allen 65706 20.0 +mike brown 65654 20.0 +mike carson 65698 20.0 +mike carson 65700 20.0 +mike carson 65751 20.0 +mike davidson 65658 20.0 +mike davidson 65759 20.0 +mike ellison 65598 20.0 +mike ellison 65606 20.0 +mike ellison 65718 20.0 +mike ellison 65738 20.0 +mike ellison 65760 20.0 +mike falkner 65609 20.0 +mike garcia 65571 20.0 +mike garcia 65600 20.0 +mike garcia 65770 20.0 +mike hernandez 65548 20.0 +mike hernandez 65672 20.0 +mike ichabod 65621 20.0 +mike king 65563 20.0 +mike king 65586 20.0 +mike king 65591 20.0 +mike king 65642 20.0 +mike king 65769 20.0 +mike king 65776 20.0 +mike miller 65549 20.0 +mike nixon 65619 20.0 +mike nixon 65704 20.0 +mike polk 65619 20.0 +mike polk 65658 20.0 +mike polk 65704 20.0 +mike quirinius 65717 20.0 +mike steinbeck 65550 20.0 +mike steinbeck 65564 20.0 +mike steinbeck 65573 20.0 +mike steinbeck 65749 20.0 +mike van buren 65620 20.0 +mike van buren 65770 20.0 +mike white 65648 20.0 +mike white 65685 20.0 +mike white 65769 20.0 +mike white 65778 20.0 +mike young 65545 20.0 +mike young 65581 20.0 +mike young 65736 20.0 +mike zipper 65552 20.0 +mike zipper 65695 20.0 +mike zipper 65779 20.0 +nick allen 65641 20.0 +nick allen 65786 20.0 +nick brown 65724 20.0 +nick davidson 65601 20.0 +nick ellison 65691 20.0 +nick ellison 65745 20.0 +nick falkner 65583 20.0 +nick falkner 65676 20.0 +nick garcia 65695 20.0 +nick garcia 65712 20.0 +nick garcia 65720 20.0 +nick ichabod 65572 20.0 +nick ichabod 65681 20.0 +nick ichabod 65737 20.0 +nick johnson 65585 20.0 +nick johnson 65784 20.0 +nick laertes 65624 20.0 +nick miller 65757 20.0 +nick nixon 65650 20.0 +nick ovid 65719 20.0 +nick polk 65716 20.0 +nick quirinius 65588 20.0 +nick quirinius 65723 20.0 +nick robinson 65547 20.0 +nick robinson 65675 20.0 +nick steinbeck 65689 20.0 +nick thompson 65610 20.0 +nick underhill 65619 20.0 +nick van buren 65603 20.0 +nick xylophone 65644 20.0 +nick young 65654 20.0 +nick young 65660 20.0 +nick zipper 65757 20.0 +nick zipper 65765 20.0 +oscar allen 65644 20.0 +oscar brown 65614 20.0 +oscar carson 65537 20.0 +oscar carson 65548 20.0 +oscar carson 65549 20.0 +oscar carson 65624 20.0 +oscar carson 65697 20.0 +oscar davidson 65556 20.0 +oscar ellison 65630 20.0 +oscar ellison 65630 20.0 +oscar falkner 65692 20.0 +oscar garcia 65751 20.0 +oscar hernandez 65683 20.0 +oscar hernandez 65707 20.0 +oscar ichabod 65536 20.0 +oscar ichabod 65562 20.0 +oscar ichabod 65637 20.0 +oscar ichabod 65763 20.0 +oscar johnson 65645 20.0 +oscar johnson 65778 20.0 +oscar king 65541 20.0 +oscar king 65550 20.0 +oscar king 65787 20.0 +oscar laertes 65625 20.0 +oscar laertes 65690 20.0 +oscar laertes 65756 20.0 +oscar laertes 65790 20.0 +oscar nixon 65596 20.0 +oscar ovid 65536 20.0 +oscar ovid 65615 20.0 +oscar ovid 65659 20.0 +oscar polk 65541 20.0 +oscar polk 65643 20.0 +oscar quirinius 65541 20.0 +oscar quirinius 65560 20.0 +oscar quirinius 65689 20.0 +oscar quirinius 65720 20.0 +oscar robinson 65537 20.0 +oscar robinson 65658 20.0 +oscar robinson 65687 20.0 +oscar robinson 65782 20.0 +oscar steinbeck 65709 20.0 +oscar thompson 65542 20.0 +oscar thompson 65698 20.0 +oscar thompson 65727 20.0 +oscar thompson 65738 20.0 +oscar underhill 65626 20.0 +oscar van buren 65581 20.0 +oscar van buren 65635 20.0 +oscar van buren 65705 20.0 +oscar white 65552 20.0 +oscar white 65564 20.0 +oscar white 65671 20.0 +oscar white 65735 20.0 +oscar xylophone 65773 20.0 +oscar xylophone 65773 20.0 +oscar xylophone 65775 20.0 +oscar zipper 65568 20.0 +oscar zipper 65740 20.0 +oscar zipper 65777 20.0 +priscilla brown 65670 20.0 +priscilla brown 65690 20.0 +priscilla brown 65749 20.0 +priscilla carson 65658 20.0 +priscilla carson 65687 20.0 +priscilla carson 65755 20.0 +priscilla ichabod 65627 20.0 +priscilla ichabod 65759 20.0 +priscilla johnson 65543 20.0 +priscilla johnson 65633 20.0 +priscilla johnson 65668 20.0 +priscilla johnson 65681 20.0 +priscilla johnson 65755 20.0 +priscilla king 65646 20.0 +priscilla nixon 65564 20.0 +priscilla nixon 65600 20.0 +priscilla ovid 65541 20.0 +priscilla ovid 65790 20.0 +priscilla polk 65747 20.0 +priscilla quirinius 65672 20.0 +priscilla thompson 65654 20.0 +priscilla underhill 65715 20.0 +priscilla underhill 65729 20.0 +priscilla van buren 65607 20.0 +priscilla van buren 65685 20.0 +priscilla van buren 65749 20.0 +priscilla white 65652 20.0 +priscilla xylophone 65538 20.0 +priscilla xylophone 65763 20.0 +priscilla xylophone 65774 20.0 +priscilla young 65585 20.0 +priscilla young 65658 20.0 +priscilla zipper 65622 20.0 +priscilla zipper 65726 20.0 +quinn allen 65657 20.0 +quinn allen 65708 20.0 +quinn brown 65691 20.0 +quinn brown 65700 20.0 +quinn brown 65733 20.0 +quinn davidson 65549 20.0 +quinn davidson 65714 20.0 +quinn davidson 65776 20.0 +quinn davidson 65779 20.0 +quinn ellison 65705 20.0 +quinn ellison 65778 20.0 +quinn garcia 65568 20.0 +quinn garcia 65604 20.0 +quinn garcia 65610 20.0 +quinn garcia 65773 20.0 +quinn ichabod 65564 20.0 +quinn king 65558 20.0 +quinn king 65649 20.0 +quinn laertes 65542 20.0 +quinn laertes 65560 20.0 +quinn laertes 65627 20.0 +quinn nixon 65659 20.0 +quinn ovid 65699 20.0 +quinn quirinius 65747 20.0 +quinn robinson 65627 20.0 +quinn steinbeck 65578 20.0 +quinn steinbeck 65763 20.0 +quinn thompson 65643 20.0 +quinn thompson 65774 20.0 +quinn underhill 65549 20.0 +quinn underhill 65694 20.0 +quinn underhill 65767 20.0 +quinn van buren 65725 20.0 +quinn young 65647 20.0 +quinn zipper 65579 20.0 +quinn zipper 65693 20.0 +rachel allen 65661 20.0 +rachel allen 65709 20.0 +rachel brown 65586 20.0 +rachel brown 65587 20.0 +rachel brown 65587 20.0 +rachel brown 65610 20.0 +rachel brown 65693 20.0 +rachel carson 65677 20.0 +rachel carson 65682 20.0 +rachel davidson 65755 20.0 +rachel ellison 65761 20.0 +rachel falkner 65616 20.0 +rachel falkner 65681 20.0 +rachel falkner 65693 20.0 +rachel falkner 65764 20.0 +rachel johnson 65658 20.0 +rachel king 65604 20.0 +rachel king 65643 20.0 +rachel laertes 65562 20.0 +rachel laertes 65624 20.0 +rachel ovid 65721 20.0 +rachel ovid 65736 20.0 +rachel polk 65686 20.0 +rachel quirinius 65787 20.0 +rachel robinson 65544 20.0 +rachel robinson 65717 20.0 +rachel robinson 65724 20.0 +rachel thompson 65648 20.0 +rachel thompson 65662 20.0 +rachel thompson 65733 20.0 +rachel underhill 65667 20.0 +rachel white 65615 20.0 +rachel white 65717 20.0 +rachel young 65727 20.0 +rachel zipper 65757 20.0 +rachel zipper 65785 20.0 +sarah carson 65679 20.0 +sarah carson 65693 20.0 +sarah carson 65694 20.0 +sarah ellison 65611 20.0 +sarah falkner 65606 20.0 +sarah falkner 65680 20.0 +sarah garcia 65563 20.0 +sarah garcia 65638 20.0 +sarah garcia 65661 20.0 +sarah ichabod 65667 20.0 +sarah ichabod 65671 20.0 +sarah johnson 65659 20.0 +sarah johnson 65716 20.0 +sarah johnson 65731 20.0 +sarah johnson 65751 20.0 +sarah king 65650 20.0 +sarah king 65699 20.0 +sarah miller 65557 20.0 +sarah ovid 65550 20.0 +sarah robinson 65677 20.0 +sarah robinson 65763 20.0 +sarah steinbeck 65721 20.0 +sarah white 65622 20.0 +sarah white 65747 20.0 +sarah xylophone 65678 20.0 +sarah young 65595 20.0 +sarah zipper 65550 20.0 +tom brown 65593 20.0 +tom brown 65675 20.0 +tom carson 65539 20.0 +tom carson 65624 20.0 +tom carson 65780 20.0 +tom davidson 65780 20.0 +tom ellison 65578 20.0 +tom ellison 65670 20.0 +tom ellison 65756 20.0 +tom falkner 65574 20.0 +tom falkner 65583 20.0 +tom hernandez 65575 20.0 +tom hernandez 65632 20.0 +tom ichabod 65588 20.0 +tom johnson 65536 20.0 +tom johnson 65789 20.0 +tom king 65576 20.0 +tom laertes 65617 20.0 +tom laertes 65701 20.0 +tom miller 65594 20.0 +tom miller 65603 20.0 +tom miller 65704 20.0 +tom nixon 65672 20.0 +tom ovid 65628 20.0 +tom polk 65652 20.0 +tom polk 65742 20.0 +tom quirinius 65563 20.0 +tom quirinius 65783 20.0 +tom robinson 65626 20.0 +tom robinson 65632 20.0 +tom robinson 65691 20.0 +tom robinson 65758 20.0 +tom steinbeck 65666 20.0 +tom van buren 65621 20.0 +tom van buren 65652 20.0 +tom van buren 65669 20.0 +tom white 65548 20.0 +tom young 65544 20.0 +tom young 65546 20.0 +tom zipper 65789 20.0 +ulysses brown 65735 20.0 +ulysses carson 65602 20.0 +ulysses carson 65643 20.0 +ulysses carson 65703 20.0 +ulysses carson 65716 20.0 +ulysses davidson 65750 20.0 +ulysses ellison 65575 20.0 +ulysses garcia 65666 20.0 +ulysses hernandez 65651 20.0 +ulysses hernandez 65702 20.0 +ulysses hernandez 65786 20.0 +ulysses ichabod 65551 20.0 +ulysses ichabod 65566 20.0 +ulysses johnson 65776 20.0 +ulysses king 65649 20.0 +ulysses laertes 65691 20.0 +ulysses laertes 65711 20.0 +ulysses laertes 65781 20.0 +ulysses miller 65610 20.0 +ulysses miller 65637 20.0 +ulysses nixon 65603 20.0 +ulysses ovid 65656 20.0 +ulysses polk 65563 20.0 +ulysses polk 65580 20.0 +ulysses polk 65612 20.0 +ulysses polk 65777 20.0 +ulysses quirinius 65786 20.0 +ulysses robinson 65744 20.0 +ulysses steinbeck 65611 20.0 +ulysses steinbeck 65680 20.0 +ulysses thompson 65788 20.0 +ulysses underhill 65570 20.0 +ulysses underhill 65616 20.0 +ulysses underhill 65620 20.0 +ulysses underhill 65623 20.0 +ulysses underhill 65641 20.0 +ulysses underhill 65713 20.0 +ulysses underhill 65785 20.0 +ulysses van buren 65684 20.0 +ulysses white 65654 20.0 +ulysses white 65675 20.0 +ulysses xylophone 65623 20.0 +ulysses xylophone 65636 20.0 +ulysses xylophone 65781 20.0 +ulysses young 65675 20.0 +ulysses young 65736 20.0 +ulysses young 65748 20.0 +victor allen 65684 20.0 +victor allen 65707 20.0 +victor brown 65550 20.0 +victor brown 65555 20.0 +victor brown 65622 20.0 +victor brown 65673 20.0 +victor davidson 65579 20.0 +victor davidson 65628 20.0 +victor davidson 65783 20.0 +victor ellison 65641 20.0 +victor ellison 65782 20.0 +victor hernandez 65571 20.0 +victor hernandez 65659 20.0 +victor hernandez 65708 20.0 +victor hernandez 65735 20.0 +victor hernandez 65775 20.0 +victor johnson 65606 20.0 +victor johnson 65607 20.0 +victor johnson 65607 20.0 +victor king 65721 20.0 +victor king 65743 20.0 +victor laertes 65638 20.0 +victor laertes 65644 20.0 +victor miller 65570 20.0 +victor nixon 65709 20.0 +victor nixon 65791 20.0 +victor ovid 65649 20.0 +victor polk 65625 20.0 +victor quirinius 65620 20.0 +victor quirinius 65651 20.0 +victor robinson 65596 20.0 +victor robinson 65673 20.0 +victor steinbeck 65618 20.0 +victor steinbeck 65661 20.0 +victor steinbeck 65686 20.0 +victor thompson 65548 20.0 +victor van buren 65664 20.0 +victor van buren 65774 20.0 +victor white 65548 20.0 +victor white 65601 20.0 +victor xylophone 65549 20.0 +victor xylophone 65618 20.0 +victor xylophone 65644 20.0 +victor xylophone 65677 20.0 +victor xylophone 65755 20.0 +victor young 65628 20.0 +victor zipper 65743 20.0 +wendy allen 65628 20.0 +wendy allen 65711 20.0 +wendy allen 65782 20.0 +wendy brown 65580 20.0 +wendy brown 65657 20.0 +wendy ellison 65545 20.0 +wendy ellison 65603 20.0 +wendy falkner 65595 20.0 +wendy falkner 65604 20.0 +wendy falkner 65635 20.0 +wendy garcia 65659 20.0 +wendy garcia 65746 20.0 +wendy garcia 65747 20.0 +wendy garcia 65777 20.0 +wendy hernandez 65650 20.0 +wendy ichabod 65730 20.0 +wendy king 65586 20.0 +wendy king 65664 20.0 +wendy king 65670 20.0 +wendy laertes 65566 20.0 +wendy laertes 65683 20.0 +wendy laertes 65727 20.0 +wendy miller 65582 20.0 +wendy miller 65626 20.0 +wendy nixon 65611 20.0 +wendy nixon 65746 20.0 +wendy ovid 65589 20.0 +wendy ovid 65643 20.0 +wendy polk 65656 20.0 +wendy polk 65692 20.0 +wendy quirinius 65766 20.0 +wendy quirinius 65767 20.0 +wendy robinson 65622 20.0 +wendy robinson 65715 20.0 +wendy robinson 65774 20.0 +wendy steinbeck 65612 20.0 +wendy thompson 65650 20.0 +wendy thompson 65737 20.0 +wendy underhill 65662 20.0 +wendy underhill 65758 20.0 +wendy underhill 65775 20.0 +wendy van buren 65680 20.0 +wendy van buren 65699 20.0 +wendy white 65705 20.0 +wendy xylophone 65687 20.0 +wendy xylophone 65773 20.0 +wendy young 65674 20.0 +wendy young 65685 20.0 +xavier allen 65611 20.0 +xavier allen 65618 20.0 +xavier allen 65771 20.0 +xavier brown 65600 20.0 +xavier brown 65704 20.0 +xavier brown 65723 20.0 +xavier carson 65731 20.0 +xavier carson 65758 20.0 +xavier davidson 65644 20.0 +xavier davidson 65664 20.0 +xavier davidson 65755 20.0 +xavier ellison 65541 20.0 +xavier ellison 65654 20.0 +xavier garcia 65672 20.0 +xavier hernandez 65541 20.0 +xavier hernandez 65544 20.0 +xavier hernandez 65766 20.0 +xavier ichabod 65597 20.0 +xavier ichabod 65663 20.0 +xavier johnson 65654 20.0 +xavier johnson 65744 20.0 +xavier king 65590 20.0 +xavier king 65601 20.0 +xavier laertes 65743 20.0 +xavier ovid 65788 20.0 +xavier polk 65587 20.0 +xavier polk 65653 20.0 +xavier polk 65675 20.0 +xavier polk 65696 20.0 +xavier quirinius 65599 20.0 +xavier quirinius 65650 20.0 +xavier quirinius 65656 20.0 +xavier quirinius 65737 20.0 +xavier thompson 65608 20.0 +xavier underhill 65710 20.0 +xavier white 65703 20.0 +xavier white 65732 20.0 +xavier xylophone 65572 20.0 +xavier zipper 65561 20.0 +yuri allen 65565 20.0 +yuri allen 65682 20.0 +yuri brown 65538 20.0 +yuri brown 65688 20.0 +yuri carson 65670 20.0 +yuri carson 65769 20.0 +yuri ellison 65570 20.0 +yuri ellison 65581 20.0 +yuri falkner 65658 20.0 +yuri falkner 65681 20.0 +yuri garcia 65639 20.0 +yuri hernandez 65706 20.0 +yuri johnson 65587 20.0 +yuri johnson 65697 20.0 +yuri johnson 65712 20.0 +yuri king 65721 20.0 +yuri laertes 65637 20.0 +yuri laertes 65773 20.0 +yuri nixon 65635 20.0 +yuri nixon 65740 20.0 +yuri polk 65607 20.0 +yuri polk 65713 20.0 +yuri polk 65742 20.0 +yuri quirinius 65544 20.0 +yuri quirinius 65617 20.0 +yuri quirinius 65695 20.0 +yuri steinbeck 65592 20.0 +yuri steinbeck 65679 20.0 +yuri thompson 65676 20.0 +yuri underhill 65718 20.0 +yuri underhill 65750 20.0 +yuri white 65659 20.0 +yuri xylophone 65714 20.0 +zach allen 65667 20.0 +zach brown 65559 20.0 +zach brown 65588 20.0 +zach brown 65691 20.0 +zach brown 65759 20.0 +zach brown 65762 20.0 +zach carson 65572 20.0 +zach ellison 65748 20.0 +zach falkner 65620 20.0 +zach falkner 65627 20.0 +zach garcia 65544 20.0 +zach garcia 65623 20.0 +zach garcia 65629 20.0 +zach garcia 65786 20.0 +zach ichabod 65599 20.0 +zach ichabod 65612 20.0 +zach king 65556 20.0 +zach king 65702 20.0 +zach king 65773 20.0 +zach miller 65584 20.0 +zach miller 65665 20.0 +zach miller 65719 20.0 +zach ovid 65578 20.0 +zach ovid 65703 20.0 +zach ovid 65750 20.0 +zach ovid 65784 20.0 +zach quirinius 65691 20.0 +zach robinson 65599 20.0 +zach steinbeck 65602 20.0 +zach steinbeck 65695 20.0 +zach thompson 65636 20.0 +zach thompson 65696 20.0 +zach underhill 65573 20.0 +zach white 65733 20.0 +zach xylophone 65542 20.0 +zach xylophone 65780 20.0 +zach young 65576 20.0 +zach zipper 65579 20.0 +zach zipper 65649 20.0 +zach zipper 65676 20.0 diff --git a/sql/hive/src/test/resources/log4j.properties b/sql/hive/src/test/resources/log4j.properties index 5bc08062d30eb..92eaf1f2795ba 100644 --- a/sql/hive/src/test/resources/log4j.properties +++ b/sql/hive/src/test/resources/log4j.properties @@ -33,7 +33,7 @@ log4j.appender.FA.layout=org.apache.log4j.PatternLayout log4j.appender.FA.layout.ConversionPattern=%d{HH:mm:ss.SSS} %t %p %c{1}: %m%n # Set the logger level of File Appender to WARN -log4j.appender.FA.Threshold = INFO +log4j.appender.FA.Threshold = DEBUG # Some packages are noisy for no good reason. log4j.additivity.org.apache.hadoop.hive.serde2.lazy.LazyStruct=false diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala index fc6c3c35037b0..39d315aaeab57 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala @@ -57,7 +57,7 @@ class CachedTableSuite extends QueryTest { checkAnswer( sql("SELECT * FROM src s"), preCacheResults) - + uncacheTable("src") assertCached(sql("SELECT * FROM src"), 0) } @@ -162,7 +162,7 @@ class CachedTableSuite extends QueryTest { test("REFRESH TABLE also needs to recache the data (data source tables)") { val tempPath: File = Utils.createTempDir() tempPath.delete() - table("src").save(tempPath.toString, "parquet", SaveMode.Overwrite) + table("src").write.mode(SaveMode.Overwrite).parquet(tempPath.toString) sql("DROP TABLE IF EXISTS refreshTable") createExternalTable("refreshTable", tempPath.toString, "parquet") checkAnswer( @@ -172,7 +172,7 @@ class CachedTableSuite extends QueryTest { sql("CACHE TABLE refreshTable") assertCached(table("refreshTable")) // Append new data. - table("src").save(tempPath.toString, "parquet", SaveMode.Append) + table("src").write.mode(SaveMode.Append).parquet(tempPath.toString) // We are still using the old data. assertCached(table("refreshTable")) checkAnswer( diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ErrorPositionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ErrorPositionSuite.scala index d960a30e00738..30f5313d2b812 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ErrorPositionSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ErrorPositionSuite.scala @@ -17,12 +17,11 @@ package org.apache.spark.sql.hive -import java.io.{OutputStream, PrintStream} - import scala.util.Try import org.scalatest.BeforeAndAfter +import org.apache.spark.sql.catalyst.util.quietly import org.apache.spark.sql.hive.test.TestHive._ import org.apache.spark.sql.hive.test.TestHive.implicits._ import org.apache.spark.sql.{AnalysisException, QueryTest} @@ -109,25 +108,6 @@ class ErrorPositionSuite extends QueryTest with BeforeAndAfter { "SELECT 1 + array(1)", "1 + array") } - /** Hive can be very noisy, messing up the output of our tests. */ - private def quietly[A](f: => A): A = { - val origErr = System.err - val origOut = System.out - try { - System.setErr(new PrintStream(new OutputStream { - def write(b: Int) = {} - })) - System.setOut(new PrintStream(new OutputStream { - def write(b: Int) = {} - })) - - f - } finally { - System.setErr(origErr) - System.setOut(origOut) - } - } - /** * Creates a test that checks to see if the error thrown when analyzing a given query includes * the location of the given token in the query string. diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala new file mode 100644 index 0000000000000..fb10f8583da99 --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameAnalyticsSuite.scala @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive + +import org.apache.spark.sql.{DataFrame, QueryTest} +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.hive.test.TestHive +import org.apache.spark.sql.hive.test.TestHive._ +import org.apache.spark.sql.hive.test.TestHive.implicits._ +import org.scalatest.BeforeAndAfterAll + +// TODO ideally we should put the test suite into the package `sql`, as +// `hive` package is optional in compiling, however, `SQLContext.sql` doesn't +// support the `cube` or `rollup` yet. +class HiveDataFrameAnalyticsSuite extends QueryTest with BeforeAndAfterAll { + private var testData: DataFrame = _ + + override def beforeAll() { + testData = Seq((1, 2), (2, 4)).toDF("a", "b") + TestHive.registerDataFrameAsTable(testData, "mytable") + } + + override def afterAll(): Unit = { + TestHive.dropTempTable("mytable") + } + + test("rollup") { + checkAnswer( + testData.rollup($"a" + $"b", $"b").agg(sum($"a" - $"b")), + sql("select a + b, b, sum(a - b) from mytable group by a + b, b with rollup").collect() + ) + + checkAnswer( + testData.rollup("a", "b").agg(sum("b")), + sql("select a, b, sum(b) from mytable group by a, b with rollup").collect() + ) + } + + test("cube") { + checkAnswer( + testData.cube($"a" + $"b", $"b").agg(sum($"a" - $"b")), + sql("select a + b, b, sum(a - b) from mytable group by a + b, b with cube").collect() + ) + + checkAnswer( + testData.cube("a", "b").agg(sum("b")), + sql("select a, b, sum(b) from mytable group by a, b with cube").collect() + ) + } +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameJoinSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameJoinSuite.scala new file mode 100644 index 0000000000000..52e782768cb75 --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameJoinSuite.scala @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive + +import org.apache.spark.sql.{Row, QueryTest} +import org.apache.spark.sql.hive.test.TestHive.implicits._ + + +class HiveDataFrameJoinSuite extends QueryTest { + + // We should move this into SQL package if we make case sensitivity configurable in SQL. + test("join - self join auto resolve ambiguity with case insensitivity") { + val df = Seq((1, "1"), (2, "2")).toDF("key", "value") + checkAnswer( + df.join(df, df("key") === df("Key")), + Row(1, "1", 1, "1") :: Row(2, "2", 2, "2") :: Nil) + + checkAnswer( + df.join(df.filter($"value" === "2"), df("key") === df("Key")), + Row(2, "2", 2, "2") :: Nil) + } + +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameWindowSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameWindowSuite.scala new file mode 100644 index 0000000000000..efb3f2545db84 --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameWindowSuite.scala @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive + +import org.apache.spark.sql.{Row, QueryTest} +import org.apache.spark.sql.expressions.Window +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.hive.test.TestHive._ +import org.apache.spark.sql.hive.test.TestHive.implicits._ + +class HiveDataFrameWindowSuite extends QueryTest { + + test("reuse window partitionBy") { + val df = Seq((1, "1"), (2, "2"), (1, "1"), (2, "2")).toDF("key", "value") + val w = Window.partitionBy("key").orderBy("value") + + checkAnswer( + df.select( + lead("key", 1).over(w), + lead("value", 1).over(w)), + Row(1, "1") :: Row(2, "2") :: Row(null, null) :: Row(null, null) :: Nil) + } + + test("reuse window orderBy") { + val df = Seq((1, "1"), (2, "2"), (1, "1"), (2, "2")).toDF("key", "value") + val w = Window.orderBy("value").partitionBy("key") + + checkAnswer( + df.select( + lead("key", 1).over(w), + lead("value", 1).over(w)), + Row(1, "1") :: Row(2, "2") :: Row(null, null) :: Row(null, null) :: Nil) + } + + test("lead") { + val df = Seq((1, "1"), (2, "2"), (1, "1"), (2, "2")).toDF("key", "value") + df.registerTempTable("window_table") + + checkAnswer( + df.select( + lead("value", 1).over(Window.partitionBy($"key").orderBy($"value"))), + sql( + """SELECT + | lead(value) OVER (PARTITION BY key ORDER BY value) + | FROM window_table""".stripMargin).collect()) + } + + test("lag") { + val df = Seq((1, "1"), (2, "2"), (1, "1"), (2, "2")).toDF("key", "value") + df.registerTempTable("window_table") + + checkAnswer( + df.select( + lag("value", 1).over(Window.partitionBy($"key").orderBy($"value"))), + sql( + """SELECT + | lag(value) OVER (PARTITION BY key ORDER BY value) + | FROM window_table""".stripMargin).collect()) + } + + test("lead with default value") { + val df = Seq((1, "1"), (1, "1"), (2, "2"), (1, "1"), + (2, "2"), (1, "1"), (2, "2")).toDF("key", "value") + df.registerTempTable("window_table") + checkAnswer( + df.select( + lead("value", 2, "n/a").over(Window.partitionBy("key").orderBy("value"))), + sql( + """SELECT + | lead(value, 2, "n/a") OVER (PARTITION BY key ORDER BY value) + | FROM window_table""".stripMargin).collect()) + } + + test("lag with default value") { + val df = Seq((1, "1"), (1, "1"), (2, "2"), (1, "1"), + (2, "2"), (1, "1"), (2, "2")).toDF("key", "value") + df.registerTempTable("window_table") + checkAnswer( + df.select( + lag("value", 2, "n/a").over(Window.partitionBy($"key").orderBy($"value"))), + sql( + """SELECT + | lag(value, 2, "n/a") OVER (PARTITION BY key ORDER BY value) + | FROM window_table""".stripMargin).collect()) + } + + test("rank functions in unspecific window") { + val df = Seq((1, "1"), (2, "2"), (1, "2"), (2, "2")).toDF("key", "value") + df.registerTempTable("window_table") + checkAnswer( + df.select( + $"key", + max("key").over(Window.partitionBy("value").orderBy("key")), + min("key").over(Window.partitionBy("value").orderBy("key")), + mean("key").over(Window.partitionBy("value").orderBy("key")), + count("key").over(Window.partitionBy("value").orderBy("key")), + sum("key").over(Window.partitionBy("value").orderBy("key")), + ntile(2).over(Window.partitionBy("value").orderBy("key")), + rowNumber().over(Window.partitionBy("value").orderBy("key")), + denseRank().over(Window.partitionBy("value").orderBy("key")), + rank().over(Window.partitionBy("value").orderBy("key")), + cumeDist().over(Window.partitionBy("value").orderBy("key")), + percentRank().over(Window.partitionBy("value").orderBy("key"))), + sql( + s"""SELECT + |key, + |max(key) over (partition by value order by key), + |min(key) over (partition by value order by key), + |avg(key) over (partition by value order by key), + |count(key) over (partition by value order by key), + |sum(key) over (partition by value order by key), + |ntile(2) over (partition by value order by key), + |row_number() over (partition by value order by key), + |dense_rank() over (partition by value order by key), + |rank() over (partition by value order by key), + |cume_dist() over (partition by value order by key), + |percent_rank() over (partition by value order by key) + |FROM window_table""".stripMargin).collect()) + } + + test("aggregation and rows between") { + val df = Seq((1, "1"), (2, "2"), (1, "1"), (2, "2")).toDF("key", "value") + df.registerTempTable("window_table") + checkAnswer( + df.select( + avg("key").over(Window.partitionBy($"value").orderBy($"key").rowsBetween(-1, 2))), + sql( + """SELECT + | avg(key) OVER + | (PARTITION BY value ORDER BY key ROWS BETWEEN 1 preceding and 2 following) + | FROM window_table""".stripMargin).collect()) + } + + test("aggregation and range betweens") { + val df = Seq((1, "1"), (2, "2"), (1, "1"), (2, "2")).toDF("key", "value") + df.registerTempTable("window_table") + checkAnswer( + df.select( + avg("key").over(Window.partitionBy($"value").orderBy($"key").rangeBetween(-1, 1))), + sql( + """SELECT + | avg(key) OVER + | (PARTITION BY value ORDER BY key RANGE BETWEEN 1 preceding and 1 following) + | FROM window_table""".stripMargin).collect()) + } + + test("aggregation and rows betweens with unbounded") { + val df = Seq((1, "1"), (2, "2"), (2, "3"), (1, "3"), (3, "2"), (4, "3")).toDF("key", "value") + df.registerTempTable("window_table") + checkAnswer( + df.select( + $"key", + last("value").over( + Window.partitionBy($"value").orderBy($"key").rowsBetween(0, Long.MaxValue)), + last("value").over( + Window.partitionBy($"value").orderBy($"key").rowsBetween(Long.MinValue, 0)), + last("value").over(Window.partitionBy($"value").orderBy($"key").rowsBetween(-1, 3))), + sql( + """SELECT + | key, + | last_value(value) OVER + | (PARTITION BY value ORDER BY key ROWS between current row and unbounded following), + | last_value(value) OVER + | (PARTITION BY value ORDER BY key ROWS between unbounded preceding and current row), + | last_value(value) OVER + | (PARTITION BY value ORDER BY key ROWS between 1 preceding and 3 following) + | FROM window_table""".stripMargin).collect()) + } + + test("aggregation and range betweens with unbounded") { + val df = Seq((1, "1"), (2, "2"), (2, "2"), (2, "2"), (1, "1"), (2, "2")).toDF("key", "value") + df.registerTempTable("window_table") + checkAnswer( + df.select( + $"key", + last("value").over( + Window.partitionBy($"value").orderBy($"key").rangeBetween(1, Long.MaxValue)) + .equalTo("2") + .as("last_v"), + avg("key").over(Window.partitionBy("value").orderBy("key").rangeBetween(Long.MinValue, 1)) + .as("avg_key1"), + avg("key").over(Window.partitionBy("value").orderBy("key").rangeBetween(0, Long.MaxValue)) + .as("avg_key2"), + avg("key").over(Window.partitionBy("value").orderBy("key").rangeBetween(-1, 0)) + .as("avg_key3") + ), + sql( + """SELECT + | key, + | last_value(value) OVER + | (PARTITION BY value ORDER BY key RANGE 1 preceding) == "2", + | avg(key) OVER + | (PARTITION BY value ORDER BY key RANGE BETWEEN unbounded preceding and 1 following), + | avg(key) OVER + | (PARTITION BY value ORDER BY key RANGE BETWEEN current row and unbounded following), + | avg(key) OVER + | (PARTITION BY value ORDER BY key RANGE BETWEEN 1 preceding and current row) + | FROM window_table""".stripMargin).collect()) + } +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala index 2a7374cc172b7..80c2d32bf70d7 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveInspectorSuite.scala @@ -78,10 +78,10 @@ class HiveInspectorSuite extends FunSuite with HiveInspectors { Literal(java.sql.Date.valueOf("2014-09-23")) :: Literal(Decimal(BigDecimal(123.123))) :: Literal(new java.sql.Timestamp(123123)) :: - Literal(Array[Byte](1,2,3)) :: - Literal.create(Seq[Int](1,2,3), ArrayType(IntegerType)) :: - Literal.create(Map[Int, Int](1->2, 2->1), MapType(IntegerType, IntegerType)) :: - Literal.create(Row(1,2.0d,3.0f), + Literal(Array[Byte](1, 2, 3)) :: + Literal.create(Seq[Int](1, 2, 3), ArrayType(IntegerType)) :: + Literal.create(Map[Int, Int](1 -> 2, 2 -> 1), MapType(IntegerType, IntegerType)) :: + Literal.create(Row(1, 2.0d, 3.0f), StructType(StructField("c1", IntegerType) :: StructField("c2", DoubleType) :: StructField("c3", FloatType) :: Nil)) :: @@ -111,8 +111,8 @@ class HiveInspectorSuite extends FunSuite with HiveInspectors { case DecimalType() => PrimitiveObjectInspectorFactory.writableHiveDecimalObjectInspector case StructType(fields) => ObjectInspectorFactory.getStandardStructObjectInspector( - java.util.Arrays.asList(fields.map(f => f.name) :_*), - java.util.Arrays.asList(fields.map(f => toWritableInspector(f.dataType)) :_*)) + java.util.Arrays.asList(fields.map(f => f.name) : _*), + java.util.Arrays.asList(fields.map(f => toWritableInspector(f.dataType)) : _*)) } def checkDataType(dt1: Seq[DataType], dt2: Seq[DataType]): Unit = { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala index 7ff5719adb3ab..5a5ea10e3c82e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala @@ -55,8 +55,8 @@ class HiveParquetSuite extends QueryTest with ParquetTest { test(s"$prefix: Converting Hive to Parquet Table via saveAsParquetFile") { withTempPath { dir => - sql("SELECT * FROM src").saveAsParquetFile(dir.getCanonicalPath) - parquetFile(dir.getCanonicalPath).registerTempTable("p") + sql("SELECT * FROM src").write.parquet(dir.getCanonicalPath) + read.parquet(dir.getCanonicalPath).registerTempTable("p") withTempTable("p") { checkAnswer( sql("SELECT * FROM src ORDER BY key"), @@ -68,8 +68,8 @@ class HiveParquetSuite extends QueryTest with ParquetTest { test(s"$prefix: INSERT OVERWRITE TABLE Parquet table") { withParquetTable((1 to 10).map(i => (i, s"val_$i")), "t") { withTempPath { file => - sql("SELECT * FROM t LIMIT 1").saveAsParquetFile(file.getCanonicalPath) - parquetFile(file.getCanonicalPath).registerTempTable("p") + sql("SELECT * FROM t LIMIT 1").write.parquet(file.getCanonicalPath) + read.parquet(file.getCanonicalPath).registerTempTable("p") withTempTable("p") { // let's do three overwrites for good measure sql("INSERT OVERWRITE TABLE p SELECT * FROM t") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala new file mode 100644 index 0000000000000..941a2941649b8 --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive + +import org.apache.hadoop.hive.conf.HiveConf +import org.apache.hadoop.hive.ql.session.SessionState +import org.apache.hadoop.hive.serde.serdeConstants +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.hive.client.{ManagedTable, HiveColumn, ExternalTable, HiveTable} +import org.scalatest.{BeforeAndAfterAll, FunSuite} + + +class HiveQlSuite extends FunSuite with BeforeAndAfterAll { + override def beforeAll() { + if (SessionState.get() == null) { + SessionState.start(new HiveConf()) + } + } + + private def extractTableDesc(sql: String): (HiveTable, Boolean) = { + HiveQl.createPlan(sql).collect { + case CreateTableAsSelect(desc, child, allowExisting) => (desc, allowExisting) + }.head + } + + test("Test CTAS #1") { + val s1 = + """CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view + |(viewTime INT, + |userid BIGINT, + |page_url STRING, + |referrer_url STRING, + |ip STRING COMMENT 'IP Address of the User', + |country STRING COMMENT 'country of origination') + |COMMENT 'This is the staging page view table' + |PARTITIONED BY (dt STRING COMMENT 'date type', hour STRING COMMENT 'hour of the day') + |ROW FORMAT DELIMITED FIELDS TERMINATED BY '\054' STORED AS RCFILE + |LOCATION '/user/external/page_view' + |TBLPROPERTIES ('p1'='v1', 'p2'='v2') + |AS SELECT * FROM src""".stripMargin + + val (desc, exists) = extractTableDesc(s1) + assert(exists == true) + assert(desc.specifiedDatabase == Some("mydb")) + assert(desc.name == "page_view") + assert(desc.tableType == ExternalTable) + assert(desc.location == Some("/user/external/page_view")) + assert(desc.schema == + HiveColumn("viewtime", "int", null) :: + HiveColumn("userid", "bigint", null) :: + HiveColumn("page_url", "string", null) :: + HiveColumn("referrer_url", "string", null) :: + HiveColumn("ip", "string", "IP Address of the User") :: + HiveColumn("country", "string", "country of origination") :: Nil) + // TODO will be SQLText + assert(desc.viewText == Option("This is the staging page view table")) + assert(desc.partitionColumns == + HiveColumn("dt", "string", "date type") :: + HiveColumn("hour", "string", "hour of the day") :: Nil) + assert(desc.serdeProperties == + Map((serdeConstants.SERIALIZATION_FORMAT, "\054"), (serdeConstants.FIELD_DELIM, "\054"))) + assert(desc.inputFormat == Option("org.apache.hadoop.hive.ql.io.RCFileInputFormat")) + assert(desc.outputFormat == Option("org.apache.hadoop.hive.ql.io.RCFileOutputFormat")) + assert(desc.serde == Option("org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe")) + assert(desc.properties == Map(("p1", "v1"), ("p2", "v2"))) + } + + test("Test CTAS #2") { + val s2 = + """CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view + |(viewTime INT, + |userid BIGINT, + |page_url STRING, + |referrer_url STRING, + |ip STRING COMMENT 'IP Address of the User', + |country STRING COMMENT 'country of origination') + |COMMENT 'This is the staging page view table' + |PARTITIONED BY (dt STRING COMMENT 'date type', hour STRING COMMENT 'hour of the day') + |ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe' + | STORED AS + | INPUTFORMAT 'parquet.hive.DeprecatedParquetInputFormat' + | OUTPUTFORMAT 'parquet.hive.DeprecatedParquetOutputFormat' + |LOCATION '/user/external/page_view' + |TBLPROPERTIES ('p1'='v1', 'p2'='v2') + |AS SELECT * FROM src""".stripMargin + + val (desc, exists) = extractTableDesc(s2) + assert(exists == true) + assert(desc.specifiedDatabase == Some("mydb")) + assert(desc.name == "page_view") + assert(desc.tableType == ExternalTable) + assert(desc.location == Some("/user/external/page_view")) + assert(desc.schema == + HiveColumn("viewtime", "int", null) :: + HiveColumn("userid", "bigint", null) :: + HiveColumn("page_url", "string", null) :: + HiveColumn("referrer_url", "string", null) :: + HiveColumn("ip", "string", "IP Address of the User") :: + HiveColumn("country", "string", "country of origination") :: Nil) + // TODO will be SQLText + assert(desc.viewText == Option("This is the staging page view table")) + assert(desc.partitionColumns == + HiveColumn("dt", "string", "date type") :: + HiveColumn("hour", "string", "hour of the day") :: Nil) + assert(desc.serdeProperties == Map()) + assert(desc.inputFormat == Option("parquet.hive.DeprecatedParquetInputFormat")) + assert(desc.outputFormat == Option("parquet.hive.DeprecatedParquetOutputFormat")) + assert(desc.serde == Option("parquet.hive.serde.ParquetHiveSerDe")) + assert(desc.properties == Map(("p1", "v1"), ("p2", "v2"))) + } + + test("Test CTAS #3") { + val s3 = """CREATE TABLE page_view AS SELECT * FROM src""" + val (desc, exists) = extractTableDesc(s3) + assert(exists == false) + assert(desc.specifiedDatabase == None) + assert(desc.name == "page_view") + assert(desc.tableType == ManagedTable) + assert(desc.location == None) + assert(desc.schema == Seq.empty[HiveColumn]) + assert(desc.viewText == None) // TODO will be SQLText + assert(desc.serdeProperties == Map()) + assert(desc.inputFormat == Option("org.apache.hadoop.mapred.TextInputFormat")) + assert(desc.outputFormat == Option("org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat")) + assert(desc.serde.isEmpty) + assert(desc.properties == Map()) + } + + test("Test CTAS #4") { + val s4 = + """CREATE TABLE page_view + |STORED BY 'storage.handler.class.name' AS SELECT * FROM src""".stripMargin + intercept[AnalysisException] { + extractTableDesc(s4) + } + } + + test("Test CTAS #5") { + val s5 = """CREATE TABLE ctas2 + | ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe" + | WITH SERDEPROPERTIES("serde_p1"="p1","serde_p2"="p2") + | STORED AS RCFile + | TBLPROPERTIES("tbl_p1"="p11", "tbl_p2"="p22") + | AS + | SELECT key, value + | FROM src + | ORDER BY key, value""".stripMargin + val (desc, exists) = extractTableDesc(s5) + assert(exists == false) + assert(desc.specifiedDatabase == None) + assert(desc.name == "ctas2") + assert(desc.tableType == ManagedTable) + assert(desc.location == None) + assert(desc.schema == Seq.empty[HiveColumn]) + assert(desc.viewText == None) // TODO will be SQLText + assert(desc.serdeProperties == Map(("serde_p1" -> "p1"), ("serde_p2" -> "p2"))) + assert(desc.inputFormat == Option("org.apache.hadoop.hive.ql.io.RCFileInputFormat")) + assert(desc.outputFormat == Option("org.apache.hadoop.hive.ql.io.RCFileOutputFormat")) + assert(desc.serde == Option("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe")) + assert(desc.properties == Map(("tbl_p1" -> "p11"), ("tbl_p2" -> "p22"))) + } +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala index ecb990e8aac91..aa5dbe2db6903 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertIntoHiveTableSuite.scala @@ -53,7 +53,7 @@ class InsertIntoHiveTableSuite extends QueryTest with BeforeAndAfter { sql("CREATE TABLE createAndInsertTest (key int, value string)") // Add some data. - testData.insertInto("createAndInsertTest") + testData.write.mode(SaveMode.Append).insertInto("createAndInsertTest") // Make sure the table has also been updated. checkAnswer( @@ -62,7 +62,7 @@ class InsertIntoHiveTableSuite extends QueryTest with BeforeAndAfter { ) // Add more data. - testData.insertInto("createAndInsertTest") + testData.write.mode(SaveMode.Append).insertInto("createAndInsertTest") // Make sure the table has been updated. checkAnswer( @@ -71,7 +71,7 @@ class InsertIntoHiveTableSuite extends QueryTest with BeforeAndAfter { ) // Now overwrite. - testData.insertInto("createAndInsertTest", overwrite = true) + testData.write.mode(SaveMode.Overwrite).insertInto("createAndInsertTest") // Make sure the registered table has also been updated. checkAnswer( @@ -160,7 +160,7 @@ class InsertIntoHiveTableSuite extends QueryTest with BeforeAndAfter { "p1=a"::"p2=b"::"p3=c"::"p4=c"::"p5=1"::Nil , "p1=a"::"p2=b"::"p3=c"::"p4=c"::"p5=4"::Nil ) - assert(listFolders(tmpDir,List()).sortBy(_.toString()) == expected.sortBy(_.toString)) + assert(listFolders(tmpDir, List()).sortBy(_.toString()) == expected.sortBy(_.toString)) sql("DROP TABLE table_with_partition") sql("DROP TABLE tmp_table") } @@ -240,7 +240,7 @@ class InsertIntoHiveTableSuite extends QueryTest with BeforeAndAfter { checkAnswer(sql("select key,value from table_with_partition where ds='1' "), testData.collect().toSeq ) - + // test difference type of field sql("ALTER TABLE table_with_partition CHANGE COLUMN key key BIGINT") checkAnswer(sql("select key,value from table_with_partition where ds='1' "), diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ListTablesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ListTablesSuite.scala index e12a6c21ccac4..1c15997ea8e6d 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ListTablesSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ListTablesSuite.scala @@ -29,7 +29,7 @@ class ListTablesSuite extends QueryTest with BeforeAndAfterAll { import org.apache.spark.sql.hive.test.TestHive.implicits._ val df = - sparkContext.parallelize((1 to 10).map(i => (i,s"str$i"))).toDF("key", "value") + sparkContext.parallelize((1 to 10).map(i => (i, s"str$i"))).toDF("key", "value") override def beforeAll(): Unit = { // The catalog in HiveContext is a case insensitive one. diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala index 0538aa203c5a0..58e2d1fbfa73e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala @@ -21,738 +21,818 @@ import java.io.File import scala.collection.mutable.ArrayBuffer -import org.scalatest.BeforeAndAfterEach +import org.scalatest.BeforeAndAfterAll import org.apache.hadoop.fs.Path -import org.apache.hadoop.hive.metastore.TableType -import org.apache.hadoop.hive.ql.metadata.Table import org.apache.hadoop.mapred.InvalidInputException import org.apache.spark.sql._ -import org.apache.spark.util.Utils -import org.apache.spark.sql.types._ +import org.apache.spark.sql.hive.client.{HiveTable, ManagedTable} +import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.hive.test.TestHive._ import org.apache.spark.sql.hive.test.TestHive.implicits._ import org.apache.spark.sql.parquet.ParquetRelation2 import org.apache.spark.sql.sources.LogicalRelation +import org.apache.spark.sql.test.SQLTestUtils +import org.apache.spark.sql.types._ +import org.apache.spark.util.Utils /** * Tests for persisting tables created though the data sources API into the metastore. */ -class MetastoreDataSourcesSuite extends QueryTest with BeforeAndAfterEach { +class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with BeforeAndAfterAll { + override val sqlContext = TestHive - override def afterEach(): Unit = { - reset() - Utils.deleteRecursively(tempPath) + var jsonFilePath: String = _ + + override def beforeAll(): Unit = { + jsonFilePath = Utils.getSparkClassLoader.getResource("sample.json").getFile } - val filePath = Utils.getSparkClassLoader.getResource("sample.json").getFile - var tempPath: File = Utils.createTempDir() - tempPath.delete() - - test ("persistent JSON table") { - sql( - s""" - |CREATE TABLE jsonTable - |USING org.apache.spark.sql.json.DefaultSource - |OPTIONS ( - | path '${filePath}' - |) - """.stripMargin) - - checkAnswer( - sql("SELECT * FROM jsonTable"), - jsonFile(filePath).collect().toSeq) + test("persistent JSON table") { + withTable("jsonTable") { + sql( + s"""CREATE TABLE jsonTable + |USING org.apache.spark.sql.json.DefaultSource + |OPTIONS ( + | path '$jsonFilePath' + |) + """.stripMargin) + + checkAnswer( + sql("SELECT * FROM jsonTable"), + read.json(jsonFilePath).collect().toSeq) + } } - test ("persistent JSON table with a user specified schema") { - sql( - s""" - |CREATE TABLE jsonTable ( - |a string, - |b String, - |`c_!@(3)` int, - |`` Struct<`d!`:array, `=`:array>>) - |USING org.apache.spark.sql.json.DefaultSource - |OPTIONS ( - | path '${filePath}' - |) - """.stripMargin) - - jsonFile(filePath).registerTempTable("expectedJsonTable") - - checkAnswer( - sql("SELECT a, b, `c_!@(3)`, ``.`d!`, ``.`=` FROM jsonTable"), - sql("SELECT a, b, `c_!@(3)`, ``.`d!`, ``.`=` FROM expectedJsonTable").collect().toSeq) + test("persistent JSON table with a user specified schema") { + withTable("jsonTable") { + sql( + s"""CREATE TABLE jsonTable ( + |a string, + |b String, + |`c_!@(3)` int, + |`` Struct<`d!`:array, `=`:array>>) + |USING org.apache.spark.sql.json.DefaultSource + |OPTIONS ( + | path '$jsonFilePath' + |) + """.stripMargin) + + withTempTable("expectedJsonTable") { + read.json(jsonFilePath).registerTempTable("expectedJsonTable") + checkAnswer( + sql("SELECT a, b, `c_!@(3)`, ``.`d!`, ``.`=` FROM jsonTable"), + sql("SELECT a, b, `c_!@(3)`, ``.`d!`, ``.`=` FROM expectedJsonTable")) + } + } } - test ("persistent JSON table with a user specified schema with a subset of fields") { - // This works because JSON objects are self-describing and JSONRelation can get needed - // field values based on field names. - sql( - s""" - |CREATE TABLE jsonTable (`` Struct<`=`:array>>, b String) - |USING org.apache.spark.sql.json.DefaultSource - |OPTIONS ( - | path '${filePath}' - |) - """.stripMargin) - - val innerStruct = StructType( - StructField("=", ArrayType(StructType(StructField("Dd2", BooleanType, true) :: Nil))) :: Nil) - val expectedSchema = StructType( - StructField("", innerStruct, true) :: - StructField("b", StringType, true) :: Nil) - - assert(expectedSchema === table("jsonTable").schema) - - jsonFile(filePath).registerTempTable("expectedJsonTable") - - checkAnswer( - sql("SELECT b, ``.`=` FROM jsonTable"), - sql("SELECT b, ``.`=` FROM expectedJsonTable").collect().toSeq) + test("persistent JSON table with a user specified schema with a subset of fields") { + withTable("jsonTable") { + // This works because JSON objects are self-describing and JSONRelation can get needed + // field values based on field names. + sql( + s"""CREATE TABLE jsonTable (`` Struct<`=`:array>>, b String) + |USING org.apache.spark.sql.json.DefaultSource + |OPTIONS ( + | path '$jsonFilePath' + |) + """.stripMargin) + + val innerStruct = StructType(Seq( + StructField("=", ArrayType(StructType(StructField("Dd2", BooleanType, true) :: Nil))))) + + val expectedSchema = StructType(Seq( + StructField("", innerStruct, true), + StructField("b", StringType, true))) + + assert(expectedSchema === table("jsonTable").schema) + + withTempTable("expectedJsonTable") { + read.json(jsonFilePath).registerTempTable("expectedJsonTable") + checkAnswer( + sql("SELECT b, ``.`=` FROM jsonTable"), + sql("SELECT b, ``.`=` FROM expectedJsonTable")) + } + } } test("resolve shortened provider names") { - sql( - s""" - |CREATE TABLE jsonTable - |USING org.apache.spark.sql.json - |OPTIONS ( - | path '${filePath}' - |) - """.stripMargin) - - checkAnswer( - sql("SELECT * FROM jsonTable"), - jsonFile(filePath).collect().toSeq) + withTable("jsonTable") { + sql( + s""" + |CREATE TABLE jsonTable + |USING org.apache.spark.sql.json + |OPTIONS ( + | path '$jsonFilePath' + |) + """.stripMargin) + + checkAnswer( + sql("SELECT * FROM jsonTable"), + read.json(jsonFilePath).collect().toSeq) + } } test("drop table") { - sql( - s""" - |CREATE TABLE jsonTable - |USING org.apache.spark.sql.json - |OPTIONS ( - | path '${filePath}' - |) - """.stripMargin) - - checkAnswer( - sql("SELECT * FROM jsonTable"), - jsonFile(filePath).collect().toSeq) - - sql("DROP TABLE jsonTable") - - intercept[Exception] { - sql("SELECT * FROM jsonTable").collect() - } + withTable("jsonTable") { + sql( + s""" + |CREATE TABLE jsonTable + |USING org.apache.spark.sql.json + |OPTIONS ( + | path '$jsonFilePath' + |) + """.stripMargin) - assert( - (new File(filePath)).exists(), - "The table with specified path is considered as an external table, " + - "its data should not deleted after DROP TABLE.") + checkAnswer( + sql("SELECT * FROM jsonTable"), + read.json(jsonFilePath)) + + sql("DROP TABLE jsonTable") + + intercept[Exception] { + sql("SELECT * FROM jsonTable").collect() + } + + assert( + new File(jsonFilePath).exists(), + "The table with specified path is considered as an external table, " + + "its data should not deleted after DROP TABLE.") + } } test("check change without refresh") { - val tempDir = File.createTempFile("sparksql", "json", Utils.createTempDir()) - tempDir.delete() - sparkContext.parallelize(("a", "b") :: Nil).toDF() - .toJSON.saveAsTextFile(tempDir.getCanonicalPath) - - sql( - s""" - |CREATE TABLE jsonTable - |USING org.apache.spark.sql.json - |OPTIONS ( - | path '${tempDir.getCanonicalPath}' - |) - """.stripMargin) - - checkAnswer( - sql("SELECT * FROM jsonTable"), - Row("a", "b")) - - Utils.deleteRecursively(tempDir) - sparkContext.parallelize(("a1", "b1", "c1") :: Nil).toDF() - .toJSON.saveAsTextFile(tempDir.getCanonicalPath) - - // Schema is cached so the new column does not show. The updated values in existing columns - // will show. - checkAnswer( - sql("SELECT * FROM jsonTable"), - Row("a1", "b1")) - - sql("REFRESH TABLE jsonTable") - - // Check that the refresh worked - checkAnswer( - sql("SELECT * FROM jsonTable"), - Row("a1", "b1", "c1")) - Utils.deleteRecursively(tempDir) + withTempPath { tempDir => + withTable("jsonTable") { + (("a", "b") :: Nil).toDF().toJSON.saveAsTextFile(tempDir.getCanonicalPath) + + sql( + s"""CREATE TABLE jsonTable + |USING org.apache.spark.sql.json + |OPTIONS ( + | path '${tempDir.getCanonicalPath}' + |) + """.stripMargin) + + checkAnswer( + sql("SELECT * FROM jsonTable"), + Row("a", "b")) + + Utils.deleteRecursively(tempDir) + (("a1", "b1", "c1") :: Nil).toDF().toJSON.saveAsTextFile(tempDir.getCanonicalPath) + + // Schema is cached so the new column does not show. The updated values in existing columns + // will show. + checkAnswer( + sql("SELECT * FROM jsonTable"), + Row("a1", "b1")) + + sql("REFRESH TABLE jsonTable") + + // Check that the refresh worked + checkAnswer( + sql("SELECT * FROM jsonTable"), + Row("a1", "b1", "c1")) + } + } } test("drop, change, recreate") { - val tempDir = File.createTempFile("sparksql", "json", Utils.createTempDir()) - tempDir.delete() - sparkContext.parallelize(("a", "b") :: Nil).toDF() - .toJSON.saveAsTextFile(tempDir.getCanonicalPath) - - sql( - s""" - |CREATE TABLE jsonTable - |USING org.apache.spark.sql.json - |OPTIONS ( - | path '${tempDir.getCanonicalPath}' - |) - """.stripMargin) - - checkAnswer( - sql("SELECT * FROM jsonTable"), - Row("a", "b")) - - Utils.deleteRecursively(tempDir) - sparkContext.parallelize(("a", "b", "c") :: Nil).toDF() - .toJSON.saveAsTextFile(tempDir.getCanonicalPath) - - sql("DROP TABLE jsonTable") - - sql( - s""" - |CREATE TABLE jsonTable - |USING org.apache.spark.sql.json - |OPTIONS ( - | path '${tempDir.getCanonicalPath}' - |) - """.stripMargin) - - // New table should reflect new schema. - checkAnswer( - sql("SELECT * FROM jsonTable"), - Row("a", "b", "c")) - Utils.deleteRecursively(tempDir) + withTempPath { tempDir => + (("a", "b") :: Nil).toDF().toJSON.saveAsTextFile(tempDir.getCanonicalPath) + + withTable("jsonTable") { + sql( + s"""CREATE TABLE jsonTable + |USING org.apache.spark.sql.json + |OPTIONS ( + | path '${tempDir.getCanonicalPath}' + |) + """.stripMargin) + + checkAnswer( + sql("SELECT * FROM jsonTable"), + Row("a", "b")) + + Utils.deleteRecursively(tempDir) + (("a", "b", "c") :: Nil).toDF().toJSON.saveAsTextFile(tempDir.getCanonicalPath) + + sql("DROP TABLE jsonTable") + + sql( + s"""CREATE TABLE jsonTable + |USING org.apache.spark.sql.json + |OPTIONS ( + | path '${tempDir.getCanonicalPath}' + |) + """.stripMargin) + + // New table should reflect new schema. + checkAnswer( + sql("SELECT * FROM jsonTable"), + Row("a", "b", "c")) + } + } } test("invalidate cache and reload") { - sql( - s""" - |CREATE TABLE jsonTable (`c_!@(3)` int) - |USING org.apache.spark.sql.json.DefaultSource - |OPTIONS ( - | path '${filePath}' - |) - """.stripMargin) + withTable("jsonTable") { + sql( + s"""CREATE TABLE jsonTable (`c_!@(3)` int) + |USING org.apache.spark.sql.json.DefaultSource + |OPTIONS ( + | path '$jsonFilePath' + |) + """.stripMargin) - jsonFile(filePath).registerTempTable("expectedJsonTable") + withTempTable("expectedJsonTable") { + read.json(jsonFilePath).registerTempTable("expectedJsonTable") - checkAnswer( - sql("SELECT * FROM jsonTable"), - sql("SELECT `c_!@(3)` FROM expectedJsonTable").collect().toSeq) + checkAnswer( + sql("SELECT * FROM jsonTable"), + sql("SELECT `c_!@(3)` FROM expectedJsonTable").collect().toSeq) - // Discard the cached relation. - invalidateTable("jsonTable") + // Discard the cached relation. + invalidateTable("jsonTable") - checkAnswer( - sql("SELECT * FROM jsonTable"), - sql("SELECT `c_!@(3)` FROM expectedJsonTable").collect().toSeq) + checkAnswer( + sql("SELECT * FROM jsonTable"), + sql("SELECT `c_!@(3)` FROM expectedJsonTable").collect().toSeq) - invalidateTable("jsonTable") - val expectedSchema = StructType(StructField("c_!@(3)", IntegerType, true) :: Nil) + invalidateTable("jsonTable") + val expectedSchema = StructType(StructField("c_!@(3)", IntegerType, true) :: Nil) - assert(expectedSchema === table("jsonTable").schema) + assert(expectedSchema === table("jsonTable").schema) + } + } } test("CTAS") { - sql( - s""" - |CREATE TABLE jsonTable - |USING org.apache.spark.sql.json.DefaultSource - |OPTIONS ( - | path '${filePath}' - |) - """.stripMargin) - - sql( - s""" - |CREATE TABLE ctasJsonTable - |USING org.apache.spark.sql.json.DefaultSource - |OPTIONS ( - | path '${tempPath}' - |) AS - |SELECT * FROM jsonTable - """.stripMargin) - - assert(table("ctasJsonTable").schema === table("jsonTable").schema) - - checkAnswer( - sql("SELECT * FROM ctasJsonTable"), - sql("SELECT * FROM jsonTable").collect()) + withTempPath { tempPath => + withTable("jsonTable", "ctasJsonTable") { + sql( + s"""CREATE TABLE jsonTable + |USING org.apache.spark.sql.json.DefaultSource + |OPTIONS ( + | path '$jsonFilePath' + |) + """.stripMargin) + + sql( + s"""CREATE TABLE ctasJsonTable + |USING org.apache.spark.sql.json.DefaultSource + |OPTIONS ( + | path '$tempPath' + |) AS + |SELECT * FROM jsonTable + """.stripMargin) + + assert(table("ctasJsonTable").schema === table("jsonTable").schema) + + checkAnswer( + sql("SELECT * FROM ctasJsonTable"), + sql("SELECT * FROM jsonTable").collect()) + } + } } test("CTAS with IF NOT EXISTS") { - sql( - s""" - |CREATE TABLE jsonTable - |USING org.apache.spark.sql.json.DefaultSource - |OPTIONS ( - | path '${filePath}' - |) - """.stripMargin) - - sql( - s""" - |CREATE TABLE ctasJsonTable - |USING org.apache.spark.sql.json.DefaultSource - |OPTIONS ( - | path '${tempPath}' - |) AS - |SELECT * FROM jsonTable - """.stripMargin) - - // Create the table again should trigger a AnalysisException. - val message = intercept[AnalysisException] { - sql( - s""" - |CREATE TABLE ctasJsonTable - |USING org.apache.spark.sql.json.DefaultSource - |OPTIONS ( - | path '${tempPath}' - |) AS - |SELECT * FROM jsonTable - """.stripMargin) - }.getMessage - assert(message.contains("Table ctasJsonTable already exists."), - "We should complain that ctasJsonTable already exists") - - // The following statement should be fine if it has IF NOT EXISTS. - // It tries to create a table ctasJsonTable with a new schema. - // The actual table's schema and data should not be changed. - sql( - s""" - |CREATE TABLE IF NOT EXISTS ctasJsonTable - |USING org.apache.spark.sql.json.DefaultSource - |OPTIONS ( - | path '${tempPath}' - |) AS - |SELECT a FROM jsonTable - """.stripMargin) - - // Discard the cached relation. - invalidateTable("ctasJsonTable") - - // Schema should not be changed. - assert(table("ctasJsonTable").schema === table("jsonTable").schema) - // Table data should not be changed. - checkAnswer( - sql("SELECT * FROM ctasJsonTable"), - sql("SELECT * FROM jsonTable").collect()) + withTempPath { path => + val tempPath = path.getCanonicalPath + + withTable("jsonTable", "ctasJsonTable") { + sql( + s"""CREATE TABLE jsonTable + |USING org.apache.spark.sql.json.DefaultSource + |OPTIONS ( + | path '$jsonFilePath' + |) + """.stripMargin) + + sql( + s"""CREATE TABLE ctasJsonTable + |USING org.apache.spark.sql.json.DefaultSource + |OPTIONS ( + | path '$tempPath' + |) AS + |SELECT * FROM jsonTable + """.stripMargin) + + // Create the table again should trigger a AnalysisException. + val message = intercept[AnalysisException] { + sql( + s"""CREATE TABLE ctasJsonTable + |USING org.apache.spark.sql.json.DefaultSource + |OPTIONS ( + | path '$tempPath' + |) AS + |SELECT * FROM jsonTable + """.stripMargin) + }.getMessage + + assert( + message.contains("Table ctasJsonTable already exists."), + "We should complain that ctasJsonTable already exists") + + // The following statement should be fine if it has IF NOT EXISTS. + // It tries to create a table ctasJsonTable with a new schema. + // The actual table's schema and data should not be changed. + sql( + s"""CREATE TABLE IF NOT EXISTS ctasJsonTable + |USING org.apache.spark.sql.json.DefaultSource + |OPTIONS ( + | path '$tempPath' + |) AS + |SELECT a FROM jsonTable + """.stripMargin) + + // Discard the cached relation. + invalidateTable("ctasJsonTable") + + // Schema should not be changed. + assert(table("ctasJsonTable").schema === table("jsonTable").schema) + // Table data should not be changed. + checkAnswer( + sql("SELECT * FROM ctasJsonTable"), + sql("SELECT * FROM jsonTable").collect()) + } + } } test("CTAS a managed table") { - sql( - s""" - |CREATE TABLE jsonTable - |USING org.apache.spark.sql.json.DefaultSource - |OPTIONS ( - | path '${filePath}' - |) - """.stripMargin) - - val expectedPath = catalog.hiveDefaultTableFilePath("ctasJsonTable") - val filesystemPath = new Path(expectedPath) - val fs = filesystemPath.getFileSystem(sparkContext.hadoopConfiguration) - if (fs.exists(filesystemPath)) fs.delete(filesystemPath, true) - - // It is a managed table when we do not specify the location. - sql( - s""" - |CREATE TABLE ctasJsonTable - |USING org.apache.spark.sql.json.DefaultSource - |AS - |SELECT * FROM jsonTable - """.stripMargin) - - assert(fs.exists(filesystemPath), s"$expectedPath should exist after we create the table.") - - sql( - s""" - |CREATE TABLE loadedTable - |USING org.apache.spark.sql.json.DefaultSource - |OPTIONS ( - | path '${expectedPath}' - |) - """.stripMargin) - - assert(table("ctasJsonTable").schema === table("loadedTable").schema) - - checkAnswer( - sql("SELECT * FROM ctasJsonTable"), - sql("SELECT * FROM loadedTable").collect() - ) - - sql("DROP TABLE ctasJsonTable") - assert(!fs.exists(filesystemPath), s"$expectedPath should not exist after we drop the table.") - } - - test("SPARK-5286 Fail to drop an invalid table when using the data source API") { - sql( - s""" - |CREATE TABLE jsonTable - |USING org.apache.spark.sql.json.DefaultSource - |OPTIONS ( - | path 'it is not a path at all!' - |) - """.stripMargin) - - sql("DROP TABLE jsonTable").collect().foreach(println) - } - - test("SPARK-5839 HiveMetastoreCatalog does not recognize table aliases of data source tables.") { - val originalDefaultSource = conf.defaultDataSourceName - - val rdd = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}""")) - val df = jsonRDD(rdd) - - conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, "org.apache.spark.sql.json") - // Save the df as a managed table (by not specifiying the path). - df.saveAsTable("savedJsonTable") - - checkAnswer( - sql("SELECT * FROM savedJsonTable where savedJsonTable.a < 5"), - (1 to 4).map(i => Row(i, s"str${i}"))) + withTable("jsonTable", "ctasJsonTable", "loadedTable") { + sql( + s"""CREATE TABLE jsonTable + |USING org.apache.spark.sql.json.DefaultSource + |OPTIONS ( + | path '$jsonFilePath' + |) + """.stripMargin) + + val expectedPath = catalog.hiveDefaultTableFilePath("ctasJsonTable") + val filesystemPath = new Path(expectedPath) + val fs = filesystemPath.getFileSystem(sparkContext.hadoopConfiguration) + if (fs.exists(filesystemPath)) fs.delete(filesystemPath, true) + + // It is a managed table when we do not specify the location. + sql( + s"""CREATE TABLE ctasJsonTable + |USING org.apache.spark.sql.json.DefaultSource + |AS + |SELECT * FROM jsonTable + """.stripMargin) - checkAnswer( - sql("SELECT * FROM savedJsonTable tmp where tmp.a > 5"), - (6 to 10).map(i => Row(i, s"str${i}"))) + assert(fs.exists(filesystemPath), s"$expectedPath should exist after we create the table.") - invalidateTable("savedJsonTable") + sql( + s"""CREATE TABLE loadedTable + |USING org.apache.spark.sql.json.DefaultSource + |OPTIONS ( + | path '$expectedPath' + |) + """.stripMargin) - checkAnswer( - sql("SELECT * FROM savedJsonTable where savedJsonTable.a < 5"), - (1 to 4).map(i => Row(i, s"str${i}"))) + assert(table("ctasJsonTable").schema === table("loadedTable").schema) - checkAnswer( - sql("SELECT * FROM savedJsonTable tmp where tmp.a > 5"), - (6 to 10).map(i => Row(i, s"str${i}"))) + checkAnswer( + sql("SELECT * FROM ctasJsonTable"), + sql("SELECT * FROM loadedTable")) - // Drop table will also delete the data. - sql("DROP TABLE savedJsonTable") + sql("DROP TABLE ctasJsonTable") + assert(!fs.exists(filesystemPath), s"$expectedPath should not exist after we drop the table.") + } + } - conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, originalDefaultSource) + test("SPARK-5286 Fail to drop an invalid table when using the data source API") { + withTable("jsonTable") { + sql( + s"""CREATE TABLE jsonTable + |USING org.apache.spark.sql.json.DefaultSource + |OPTIONS ( + | path 'it is not a path at all!' + |) + """.stripMargin) + + sql("DROP TABLE jsonTable").collect().foreach(println) + } } - test("save table") { - val originalDefaultSource = conf.defaultDataSourceName + test("SPARK-5839 HiveMetastoreCatalog does not recognize table aliases of data source tables.") { + withTable("savedJsonTable") { + // Save the df as a managed table (by not specifying the path). + (1 to 10) + .map(i => i -> s"str$i") + .toDF("a", "b") + .write + .format("json") + .saveAsTable("savedJsonTable") - val rdd = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}""")) - val df = jsonRDD(rdd) + checkAnswer( + sql("SELECT * FROM savedJsonTable where savedJsonTable.a < 5"), + (1 to 4).map(i => Row(i, s"str$i"))) - conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, "org.apache.spark.sql.json") - // Save the df as a managed table (by not specifiying the path). - df.saveAsTable("savedJsonTable") + checkAnswer( + sql("SELECT * FROM savedJsonTable tmp where tmp.a > 5"), + (6 to 10).map(i => Row(i, s"str$i"))) - checkAnswer( - sql("SELECT * FROM savedJsonTable"), - df.collect()) + invalidateTable("savedJsonTable") - // Right now, we cannot append to an existing JSON table. - intercept[RuntimeException] { - df.saveAsTable("savedJsonTable", SaveMode.Append) - } + checkAnswer( + sql("SELECT * FROM savedJsonTable where savedJsonTable.a < 5"), + (1 to 4).map(i => Row(i, s"str$i"))) - // We can overwrite it. - df.saveAsTable("savedJsonTable", SaveMode.Overwrite) - checkAnswer( - sql("SELECT * FROM savedJsonTable"), - df.collect()) - - // When the save mode is Ignore, we will do nothing when the table already exists. - df.select("b").saveAsTable("savedJsonTable", SaveMode.Ignore) - assert(df.schema === table("savedJsonTable").schema) - checkAnswer( - sql("SELECT * FROM savedJsonTable"), - df.collect()) - - // Drop table will also delete the data. - sql("DROP TABLE savedJsonTable") - intercept[InvalidInputException] { - jsonFile(catalog.hiveDefaultTableFilePath("savedJsonTable")) + checkAnswer( + sql("SELECT * FROM savedJsonTable tmp where tmp.a > 5"), + (6 to 10).map(i => Row(i, s"str$i"))) } + } - // Create an external table by specifying the path. - conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, "not a source name") - df.saveAsTable( - "savedJsonTable", - "org.apache.spark.sql.json", - SaveMode.Append, - Map("path" -> tempPath.toString)) - checkAnswer( - sql("SELECT * FROM savedJsonTable"), - df.collect()) - - // Data should not be deleted after we drop the table. - sql("DROP TABLE savedJsonTable") - checkAnswer( - jsonFile(tempPath.toString), - df.collect()) - - conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, originalDefaultSource) + test("save table") { + withTempPath { path => + val tempPath = path.getCanonicalPath + + withTable("savedJsonTable") { + val df = (1 to 10).map(i => i -> s"str$i").toDF("a", "b") + + withSQLConf(SQLConf.DEFAULT_DATA_SOURCE_NAME -> "json") { + // Save the df as a managed table (by not specifying the path). + df.write.saveAsTable("savedJsonTable") + + checkAnswer(sql("SELECT * FROM savedJsonTable"), df) + + // Right now, we cannot append to an existing JSON table. + intercept[RuntimeException] { + df.write.mode(SaveMode.Append).saveAsTable("savedJsonTable") + } + + // We can overwrite it. + df.write.mode(SaveMode.Overwrite).saveAsTable("savedJsonTable") + checkAnswer(sql("SELECT * FROM savedJsonTable"), df) + + // When the save mode is Ignore, we will do nothing when the table already exists. + df.select("b").write.mode(SaveMode.Ignore).saveAsTable("savedJsonTable") + assert(df.schema === table("savedJsonTable").schema) + checkAnswer(sql("SELECT * FROM savedJsonTable"), df) + + // Drop table will also delete the data. + sql("DROP TABLE savedJsonTable") + intercept[InvalidInputException] { + read.json(catalog.hiveDefaultTableFilePath("savedJsonTable")) + } + } + + // Create an external table by specifying the path. + withSQLConf(SQLConf.DEFAULT_DATA_SOURCE_NAME -> "not a source name") { + df.write + .format("org.apache.spark.sql.json") + .mode(SaveMode.Append) + .option("path", tempPath.toString) + .saveAsTable("savedJsonTable") + + checkAnswer(sql("SELECT * FROM savedJsonTable"), df) + } + + // Data should not be deleted after we drop the table. + sql("DROP TABLE savedJsonTable") + checkAnswer(read.json(tempPath.toString), df) + } + } } test("create external table") { - val originalDefaultSource = conf.defaultDataSourceName - - val rdd = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}""")) - val df = jsonRDD(rdd) - - conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, "not a source name") - df.saveAsTable( - "savedJsonTable", - "org.apache.spark.sql.json", - SaveMode.Append, - Map("path" -> tempPath.toString)) - - conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, "org.apache.spark.sql.json") - createExternalTable("createdJsonTable", tempPath.toString) - assert(table("createdJsonTable").schema === df.schema) - checkAnswer( - sql("SELECT * FROM createdJsonTable"), - df.collect()) - - var message = intercept[AnalysisException] { - createExternalTable("createdJsonTable", filePath.toString) - }.getMessage - assert(message.contains("Table createdJsonTable already exists."), - "We should complain that ctasJsonTable already exists") - - // Data should not be deleted. - sql("DROP TABLE createdJsonTable") - checkAnswer( - jsonFile(tempPath.toString), - df.collect()) - - // Try to specify the schema. - conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, "not a source name") - val schema = StructType(StructField("b", StringType, true) :: Nil) - createExternalTable( - "createdJsonTable", - "org.apache.spark.sql.json", - schema, - Map("path" -> tempPath.toString)) - checkAnswer( - sql("SELECT * FROM createdJsonTable"), - sql("SELECT b FROM savedJsonTable").collect()) - - sql("DROP TABLE createdJsonTable") - - message = intercept[RuntimeException] { - createExternalTable( - "createdJsonTable", - "org.apache.spark.sql.json", - schema, - Map.empty[String, String]) - }.getMessage - assert( - message.contains("'path' must be specified for json data."), - "We should complain that path is not specified.") - - sql("DROP TABLE savedJsonTable") - conf.setConf(SQLConf.DEFAULT_DATA_SOURCE_NAME, originalDefaultSource) + withTempPath { tempPath => + withTable("savedJsonTable", "createdJsonTable") { + val df = read.json(sparkContext.parallelize((1 to 10).map { i => + s"""{ "a": $i, "b": "str$i" }""" + })) + + withSQLConf(SQLConf.DEFAULT_DATA_SOURCE_NAME -> "not a source name") { + df.write + .format("json") + .mode(SaveMode.Append) + .option("path", tempPath.toString) + .saveAsTable("savedJsonTable") + } + + withSQLConf(SQLConf.DEFAULT_DATA_SOURCE_NAME -> "json") { + createExternalTable("createdJsonTable", tempPath.toString) + assert(table("createdJsonTable").schema === df.schema) + checkAnswer(sql("SELECT * FROM createdJsonTable"), df) + + assert( + intercept[AnalysisException] { + createExternalTable("createdJsonTable", jsonFilePath.toString) + }.getMessage.contains("Table createdJsonTable already exists."), + "We should complain that createdJsonTable already exists") + } + + // Data should not be deleted. + sql("DROP TABLE createdJsonTable") + checkAnswer(read.json(tempPath.toString), df) + + // Try to specify the schema. + withSQLConf(SQLConf.DEFAULT_DATA_SOURCE_NAME -> "not a source name") { + val schema = StructType(StructField("b", StringType, true) :: Nil) + createExternalTable( + "createdJsonTable", + "org.apache.spark.sql.json", + schema, + Map("path" -> tempPath.toString)) + + checkAnswer( + sql("SELECT * FROM createdJsonTable"), + sql("SELECT b FROM savedJsonTable")) + + sql("DROP TABLE createdJsonTable") + + assert( + intercept[RuntimeException] { + createExternalTable( + "createdJsonTable", + "org.apache.spark.sql.json", + schema, + Map.empty[String, String]) + }.getMessage.contains("'path' must be specified for json data."), + "We should complain that path is not specified.") + } + } + } } if (HiveShim.version == "0.13.1") { test("scan a parquet table created through a CTAS statement") { - val originalConvertMetastore = getConf("spark.sql.hive.convertMetastoreParquet", "true") - val originalUseDataSource = getConf(SQLConf.PARQUET_USE_DATA_SOURCE_API, "true") - setConf("spark.sql.hive.convertMetastoreParquet", "true") - setConf(SQLConf.PARQUET_USE_DATA_SOURCE_API, "true") - - val rdd = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str${i}"}""")) - jsonRDD(rdd).registerTempTable("jt") - sql( - """ - |create table test_parquet_ctas STORED AS parquET - |AS select tmp.a from jt tmp where tmp.a < 5 - """.stripMargin) - - checkAnswer( - sql(s"SELECT a FROM test_parquet_ctas WHERE a > 2 "), - Row(3) :: Row(4) :: Nil - ) - - table("test_parquet_ctas").queryExecution.optimizedPlan match { - case LogicalRelation(p: ParquetRelation2) => // OK - case _ => - fail( - "test_parquet_ctas should be converted to " + - s"${classOf[ParquetRelation2].getCanonicalName}") + withSQLConf( + "spark.sql.hive.convertMetastoreParquet" -> "true", + SQLConf.PARQUET_USE_DATA_SOURCE_API -> "true") { + + withTempTable("jt") { + (1 to 10).map(i => i -> s"str$i").toDF("a", "b").registerTempTable("jt") + + withTable("test_parquet_ctas") { + sql( + """CREATE TABLE test_parquet_ctas STORED AS PARQUET + |AS SELECT tmp.a FROM jt tmp WHERE tmp.a < 5 + """.stripMargin) + + checkAnswer( + sql(s"SELECT a FROM test_parquet_ctas WHERE a > 2 "), + Row(3) :: Row(4) :: Nil) + + table("test_parquet_ctas").queryExecution.optimizedPlan match { + case LogicalRelation(p: ParquetRelation2) => // OK + case _ => + fail(s"test_parquet_ctas should have be converted to ${classOf[ParquetRelation2]}") + } + } + } } - - // Clenup and reset confs. - sql("DROP TABLE IF EXISTS jt") - sql("DROP TABLE IF EXISTS test_parquet_ctas") - setConf("spark.sql.hive.convertMetastoreParquet", originalConvertMetastore) - setConf(SQLConf.PARQUET_USE_DATA_SOURCE_API, originalUseDataSource) } } test("Pre insert nullability check (ArrayType)") { - val df1 = - createDataFrame(Tuple1(Seq(Int.box(1), null.asInstanceOf[Integer])) :: Nil).toDF("a") - val expectedSchema1 = - StructType( - StructField("a", ArrayType(IntegerType, containsNull = true), nullable = true) :: Nil) - assert(df1.schema === expectedSchema1) - df1.saveAsTable("arrayInParquet", "parquet", SaveMode.Overwrite) - - val df2 = - createDataFrame(Tuple1(Seq(2, 3)) :: Nil).toDF("a") - val expectedSchema2 = - StructType( - StructField("a", ArrayType(IntegerType, containsNull = false), nullable = true) :: Nil) - assert(df2.schema === expectedSchema2) - df2.insertInto("arrayInParquet", overwrite = false) - createDataFrame(Tuple1(Seq(4, 5)) :: Nil).toDF("a") - .saveAsTable("arrayInParquet", SaveMode.Append) // This one internally calls df2.insertInto. - createDataFrame(Tuple1(Seq(Int.box(6), null.asInstanceOf[Integer])) :: Nil).toDF("a") - .saveAsTable("arrayInParquet", "parquet", SaveMode.Append) - refreshTable("arrayInParquet") - - checkAnswer( - sql("SELECT a FROM arrayInParquet"), - Row(ArrayBuffer(1, null)) :: - Row(ArrayBuffer(2, 3)) :: - Row(ArrayBuffer(4, 5)) :: - Row(ArrayBuffer(6, null)) :: Nil) - - sql("DROP TABLE arrayInParquet") + withTable("arrayInParquet") { + { + val df = (Tuple1(Seq(Int.box(1), null: Integer)) :: Nil).toDF("a") + val expectedSchema = + StructType( + StructField( + "a", + ArrayType(IntegerType, containsNull = true), + nullable = true) :: Nil) + + assert(df.schema === expectedSchema) + + df.write + .format("parquet") + .mode(SaveMode.Overwrite) + .saveAsTable("arrayInParquet") + } + + { + val df = (Tuple1(Seq(2, 3)) :: Nil).toDF("a") + val expectedSchema = + StructType( + StructField( + "a", + ArrayType(IntegerType, containsNull = false), + nullable = true) :: Nil) + + assert(df.schema === expectedSchema) + + df.write + .format("parquet") + .mode(SaveMode.Append) + .insertInto("arrayInParquet") + } + + (Tuple1(Seq(4, 5)) :: Nil).toDF("a") + .write + .mode(SaveMode.Append) + .saveAsTable("arrayInParquet") // This one internally calls df2.insertInto. + + (Tuple1(Seq(Int.box(6), null: Integer)) :: Nil).toDF("a") + .write + .mode(SaveMode.Append) + .saveAsTable("arrayInParquet") + + refreshTable("arrayInParquet") + + checkAnswer( + sql("SELECT a FROM arrayInParquet"), + Row(ArrayBuffer(1, null)) :: + Row(ArrayBuffer(2, 3)) :: + Row(ArrayBuffer(4, 5)) :: + Row(ArrayBuffer(6, null)) :: Nil) + } } test("Pre insert nullability check (MapType)") { - val df1 = - createDataFrame(Tuple1(Map(1 -> null.asInstanceOf[Integer])) :: Nil).toDF("a") - val mapType1 = MapType(IntegerType, IntegerType, valueContainsNull = true) - val expectedSchema1 = - StructType( - StructField("a", mapType1, nullable = true) :: Nil) - assert(df1.schema === expectedSchema1) - df1.saveAsTable("mapInParquet", "parquet", SaveMode.Overwrite) - - val df2 = - createDataFrame(Tuple1(Map(2 -> 3)) :: Nil).toDF("a") - val mapType2 = MapType(IntegerType, IntegerType, valueContainsNull = false) - val expectedSchema2 = - StructType( - StructField("a", mapType2, nullable = true) :: Nil) - assert(df2.schema === expectedSchema2) - df2.insertInto("mapInParquet", overwrite = false) - createDataFrame(Tuple1(Map(4 -> 5)) :: Nil).toDF("a") - .saveAsTable("mapInParquet", SaveMode.Append) // This one internally calls df2.insertInto. - createDataFrame(Tuple1(Map(6 -> null.asInstanceOf[Integer])) :: Nil).toDF("a") - .saveAsTable("mapInParquet", "parquet", SaveMode.Append) - refreshTable("mapInParquet") - - checkAnswer( - sql("SELECT a FROM mapInParquet"), - Row(Map(1 -> null)) :: - Row(Map(2 -> 3)) :: - Row(Map(4 -> 5)) :: - Row(Map(6 -> null)) :: Nil) - - sql("DROP TABLE mapInParquet") + withTable("mapInParquet") { + { + val df = (Tuple1(Map(1 -> (null: Integer))) :: Nil).toDF("a") + val expectedSchema = + StructType( + StructField( + "a", + MapType(IntegerType, IntegerType, valueContainsNull = true), + nullable = true) :: Nil) + + assert(df.schema === expectedSchema) + + df.write + .format("parquet") + .mode(SaveMode.Overwrite) + .saveAsTable("mapInParquet") + } + + { + val df = (Tuple1(Map(2 -> 3)) :: Nil).toDF("a") + val expectedSchema = + StructType( + StructField( + "a", + MapType(IntegerType, IntegerType, valueContainsNull = false), + nullable = true) :: Nil) + + assert(df.schema === expectedSchema) + + df.write + .format("parquet") + .mode(SaveMode.Append) + .insertInto("mapInParquet") + } + + (Tuple1(Map(4 -> 5)) :: Nil).toDF("a") + .write + .format("parquet") + .mode(SaveMode.Append) + .saveAsTable("mapInParquet") // This one internally calls df2.insertInto. + + (Tuple1(Map(6 -> null.asInstanceOf[Integer])) :: Nil).toDF("a") + .write + .format("parquet") + .mode(SaveMode.Append) + .saveAsTable("mapInParquet") + + refreshTable("mapInParquet") + + checkAnswer( + sql("SELECT a FROM mapInParquet"), + Row(Map(1 -> null)) :: + Row(Map(2 -> 3)) :: + Row(Map(4 -> 5)) :: + Row(Map(6 -> null)) :: Nil) + } } test("SPARK-6024 wide schema support") { - // We will need 80 splits for this schema if the threshold is 4000. - val schema = StructType((1 to 5000).map(i => StructField(s"c_${i}", StringType, true))) - assert( - schema.json.size > conf.schemaStringLengthThreshold, - "To correctly test the fix of SPARK-6024, the value of " + - s"spark.sql.sources.schemaStringLengthThreshold needs to be less than ${schema.json.size}") - // Manually create a metastore data source table. - catalog.createDataSourceTable( - tableName = "wide_schema", - userSpecifiedSchema = Some(schema), - provider = "json", - options = Map("path" -> "just a dummy path"), - isExternal = false) - - invalidateTable("wide_schema") - - val actualSchema = table("wide_schema").schema - assert(schema === actualSchema) + withSQLConf(SQLConf.SCHEMA_STRING_LENGTH_THRESHOLD -> "4000") { + withTable("wide_schema") { + // We will need 80 splits for this schema if the threshold is 4000. + val schema = StructType((1 to 5000).map(i => StructField(s"c_$i", StringType, true))) + + // Manually create a metastore data source table. + catalog.createDataSourceTable( + tableName = "wide_schema", + userSpecifiedSchema = Some(schema), + partitionColumns = Array.empty[String], + provider = "json", + options = Map("path" -> "just a dummy path"), + isExternal = false) + + invalidateTable("wide_schema") + + val actualSchema = table("wide_schema").schema + assert(schema === actualSchema) + } + } } test("SPARK-6655 still support a schema stored in spark.sql.sources.schema") { val tableName = "spark6655" - val schema = StructType(StructField("int", IntegerType, true) :: Nil) - // Manually create the metadata in metastore. - val tbl = new Table("default", tableName) - tbl.setProperty("spark.sql.sources.provider", "json") - tbl.setProperty("spark.sql.sources.schema", schema.json) - tbl.setProperty("EXTERNAL", "FALSE") - tbl.setTableType(TableType.MANAGED_TABLE) - tbl.setSerdeParam("path", catalog.hiveDefaultTableFilePath(tableName)) - catalog.synchronized { - catalog.client.createTable(tbl) + withTable(tableName) { + val schema = StructType(StructField("int", IntegerType, true) :: Nil) + val hiveTable = HiveTable( + specifiedDatabase = Some("default"), + name = tableName, + schema = Seq.empty, + partitionColumns = Seq.empty, + properties = Map( + "spark.sql.sources.provider" -> "json", + "spark.sql.sources.schema" -> schema.json, + "EXTERNAL" -> "FALSE"), + tableType = ManagedTable, + serdeProperties = Map( + "path" -> catalog.hiveDefaultTableFilePath(tableName))) + + catalog.client.createTable(hiveTable) + + invalidateTable(tableName) + val actualSchema = table(tableName).schema + assert(schema === actualSchema) } - - invalidateTable(tableName) - val actualSchema = table(tableName).schema - assert(schema === actualSchema) - sql(s"drop table $tableName") } + test("Saving partition columns information") { + val df = (1 to 10).map(i => (i, i + 1, s"str$i", s"str${i + 1}")).toDF("a", "b", "c", "d") + val tableName = s"partitionInfo_${System.currentTimeMillis()}" + + withTable(tableName) { + df.write.format("parquet").partitionBy("d", "b").saveAsTable(tableName) + invalidateTable(tableName) + val metastoreTable = catalog.client.getTable("default", tableName) + val expectedPartitionColumns = StructType(df.schema("d") :: df.schema("b") :: Nil) + val actualPartitionColumns = + StructType( + metastoreTable.partitionColumns.map(c => + StructField(c.name, HiveMetastoreTypes.toDataType(c.hiveType)))) + // Make sure partition columns are correctly stored in metastore. + assert( + expectedPartitionColumns.sameType(actualPartitionColumns), + s"Partitions columns stored in metastore $actualPartitionColumns is not the " + + s"partition columns defined by the saveAsTable operation $expectedPartitionColumns.") + + // Check the content of the saved table. + checkAnswer( + table(tableName).select("c", "b", "d", "a"), + df.select("c", "b", "d", "a")) + } + } test("insert into a table") { - def createDF(from: Int, to: Int): DataFrame = - createDataFrame((from to to).map(i => Tuple2(i, s"str$i"))).toDF("c1", "c2") + def createDF(from: Int, to: Int): DataFrame = { + (from to to).map(i => i -> s"str$i").toDF("c1", "c2") + } - createDF(0, 9).saveAsTable("insertParquet", "parquet") - checkAnswer( - sql("SELECT p.c1, p.c2 FROM insertParquet p WHERE p.c1 > 5"), - (6 to 9).map(i => Row(i, s"str$i"))) + withTable("insertParquet") { + createDF(0, 9).write.format("parquet").saveAsTable("insertParquet") + checkAnswer( + sql("SELECT p.c1, p.c2 FROM insertParquet p WHERE p.c1 > 5"), + (6 to 9).map(i => Row(i, s"str$i"))) - intercept[AnalysisException] { - createDF(10, 19).saveAsTable("insertParquet", "parquet") - } + intercept[AnalysisException] { + createDF(10, 19).write.format("parquet").saveAsTable("insertParquet") + } - createDF(10, 19).saveAsTable("insertParquet", "parquet", SaveMode.Append) - checkAnswer( - sql("SELECT p.c1, p.c2 FROM insertParquet p WHERE p.c1 > 5"), - (6 to 19).map(i => Row(i, s"str$i"))) + createDF(10, 19).write.mode(SaveMode.Append).format("parquet").saveAsTable("insertParquet") + checkAnswer( + sql("SELECT p.c1, p.c2 FROM insertParquet p WHERE p.c1 > 5"), + (6 to 19).map(i => Row(i, s"str$i"))) - createDF(20, 29).saveAsTable("insertParquet", "parquet", SaveMode.Append) - checkAnswer( - sql("SELECT p.c1, c2 FROM insertParquet p WHERE p.c1 > 5 AND p.c1 < 25"), - (6 to 24).map(i => Row(i, s"str$i"))) + createDF(20, 29).write.mode(SaveMode.Append).format("parquet").saveAsTable("insertParquet") + checkAnswer( + sql("SELECT p.c1, c2 FROM insertParquet p WHERE p.c1 > 5 AND p.c1 < 25"), + (6 to 24).map(i => Row(i, s"str$i"))) - intercept[AnalysisException] { - createDF(30, 39).saveAsTable("insertParquet") - } + intercept[AnalysisException] { + createDF(30, 39).write.saveAsTable("insertParquet") + } + + createDF(30, 39).write.mode(SaveMode.Append).saveAsTable("insertParquet") + checkAnswer( + sql("SELECT p.c1, c2 FROM insertParquet p WHERE p.c1 > 5 AND p.c1 < 35"), + (6 to 34).map(i => Row(i, s"str$i"))) - createDF(30, 39).saveAsTable("insertParquet", SaveMode.Append) - checkAnswer( - sql("SELECT p.c1, c2 FROM insertParquet p WHERE p.c1 > 5 AND p.c1 < 35"), - (6 to 34).map(i => Row(i, s"str$i"))) - - createDF(40, 49).insertInto("insertParquet") - checkAnswer( - sql("SELECT p.c1, c2 FROM insertParquet p WHERE p.c1 > 5 AND p.c1 < 45"), - (6 to 44).map(i => Row(i, s"str$i"))) - - createDF(50, 59).saveAsTable("insertParquet", SaveMode.Overwrite) - checkAnswer( - sql("SELECT p.c1, c2 FROM insertParquet p WHERE p.c1 > 51 AND p.c1 < 55"), - (52 to 54).map(i => Row(i, s"str$i"))) - createDF(60, 69).saveAsTable("insertParquet", SaveMode.Ignore) - checkAnswer( - sql("SELECT p.c1, c2 FROM insertParquet p"), - (50 to 59).map(i => Row(i, s"str$i"))) - - createDF(70, 79).insertInto("insertParquet", overwrite = true) - checkAnswer( - sql("SELECT p.c1, c2 FROM insertParquet p"), - (70 to 79).map(i => Row(i, s"str$i"))) + createDF(40, 49).write.mode(SaveMode.Append).insertInto("insertParquet") + checkAnswer( + sql("SELECT p.c1, c2 FROM insertParquet p WHERE p.c1 > 5 AND p.c1 < 45"), + (6 to 44).map(i => Row(i, s"str$i"))) + + createDF(50, 59).write.mode(SaveMode.Overwrite).saveAsTable("insertParquet") + checkAnswer( + sql("SELECT p.c1, c2 FROM insertParquet p WHERE p.c1 > 51 AND p.c1 < 55"), + (52 to 54).map(i => Row(i, s"str$i"))) + createDF(60, 69).write.mode(SaveMode.Ignore).saveAsTable("insertParquet") + checkAnswer( + sql("SELECT p.c1, c2 FROM insertParquet p"), + (50 to 59).map(i => Row(i, s"str$i"))) + + createDF(70, 79).write.mode(SaveMode.Overwrite).insertInto("insertParquet") + checkAnswer( + sql("SELECT p.c1, c2 FROM insertParquet p"), + (70 to 79).map(i => Row(i, s"str$i"))) + } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/SerializationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/SerializationSuite.scala index d6ddd539d159d..8afe5459d4f1b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/SerializationSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/SerializationSuite.scala @@ -26,8 +26,10 @@ import org.apache.spark.sql.hive.test.TestHive class SerializationSuite extends FunSuite { test("[SPARK-5840] HiveContext should be serializable") { - val hiveContext = new HiveContext(TestHive.sparkContext) + val hiveContext = TestHive hiveContext.hiveconf - new JavaSerializer(new SparkConf()).newInstance().serialize(hiveContext) + val serializer = new JavaSerializer(new SparkConf()).newInstance() + val bytes = serializer.serialize(hiveContext) + val deSer = serializer.deserialize[AnyRef](bytes) } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/UDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/UDFSuite.scala index 85b6bc93d7122..8245047626d57 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/UDFSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/UDFSuite.scala @@ -26,9 +26,9 @@ case class FunctionResult(f1: String, f2: String) class UDFSuite extends QueryTest { test("UDF case insensitive") { - udf.register("random0", () => { Math.random()}) - udf.register("RANDOM1", () => { Math.random()}) - udf.register("strlenScala", (_: String).length + (_:Int)) + udf.register("random0", () => { Math.random() }) + udf.register("RANDOM1", () => { Math.random() }) + udf.register("strlenScala", (_: String).length + (_: Int)) assert(sql("SELECT RANDOM0() FROM src LIMIT 1").head().getDouble(0) >= 0.0) assert(sql("SELECT RANDOm1() FROM src LIMIT 1").head().getDouble(0) >= 0.0) assert(sql("SELECT strlenscala('test', 1) FROM src LIMIT 1").head().getInt(0) === 5) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala index 81e77ba257bf1..1b7296145f338 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala @@ -22,9 +22,13 @@ import org.apache.spark.sql.catalyst.util.quietly import org.apache.spark.util.Utils import org.scalatest.FunSuite +/** + * A simple set of tests that call the methods of a hive ClientInterface, loading different version + * of hive from maven central. These tests are simple in that they are mostly just testing to make + * sure that reflective calls are not throwing NoSuchMethod error, but the actually functionality + * is not fully tested. + */ class VersionsSuite extends FunSuite with Logging { - val testType = "derby" - private def buildConf() = { lazy val warehousePath = Utils.createTempDir() lazy val metastorePath = Utils.createTempDir() @@ -50,6 +54,14 @@ class VersionsSuite extends FunSuite with Logging { causes } + private val emptyDir = Utils.createTempDir().getCanonicalPath + + private def partSpec = { + val hashMap = new java.util.LinkedHashMap[String, String] + hashMap.put("key", "1") + hashMap + } + // Its actually pretty easy to mess things up and have all of your tests "pass" by accidentally // connecting to an auto-populated, in-process metastore. Let's make sure we are getting the // versions right by forcing a known compatibility failure. @@ -66,10 +78,9 @@ class VersionsSuite extends FunSuite with Logging { private var client: ClientInterface = null versions.foreach { version => - test(s"$version: listTables") { + test(s"$version: create client") { client = null client = IsolatedClientLoader.forVersion(version, buildConf()).client - client.listTables("default") } test(s"$version: createDatabase") { @@ -101,5 +112,64 @@ class VersionsSuite extends FunSuite with Logging { test(s"$version: getTable") { client.getTable("default", "src") } + + test(s"$version: listTables") { + assert(client.listTables("default") === Seq("src")) + } + + test(s"$version: currentDatabase") { + assert(client.currentDatabase === "default") + } + + test(s"$version: getDatabase") { + client.getDatabase("default") + } + + test(s"$version: alterTable") { + client.alterTable(client.getTable("default", "src")) + } + + test(s"$version: set command") { + client.runSqlHive("SET spark.sql.test.key=1") + } + + test(s"$version: create partitioned table DDL") { + client.runSqlHive("CREATE TABLE src_part (value INT) PARTITIONED BY (key INT)") + client.runSqlHive("ALTER TABLE src_part ADD PARTITION (key = '1')") + } + + test(s"$version: getPartitions") { + client.getAllPartitions(client.getTable("default", "src_part")) + } + + test(s"$version: loadPartition") { + client.loadPartition( + emptyDir, + "default.src_part", + partSpec, + false, + false, + false, + false) + } + + test(s"$version: loadTable") { + client.loadTable( + emptyDir, + "src", + false, + false) + } + + test(s"$version: loadDynamicPartitions") { + client.loadDynamicPartitions( + emptyDir, + "default.src_part", + partSpec, + false, + 1, + false, + false) + } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala index 027056d4b865f..55e5551b63818 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala @@ -129,7 +129,7 @@ abstract class HiveComparisonTest } protected def prepareAnswer( - hiveQuery: TestHive.type#HiveQLQueryExecution, + hiveQuery: TestHive.type#QueryExecution, answer: Seq[String]): Seq[String] = { def isSorted(plan: LogicalPlan): Boolean = plan match { @@ -273,7 +273,7 @@ abstract class HiveComparisonTest } val hiveCacheFiles = queryList.zipWithIndex.map { - case (queryString, i) => + case (queryString, i) => val cachedAnswerName = s"$testCaseName-$i-${getMd5(queryString)}" new File(answerCache, cachedAnswerName) } @@ -298,11 +298,13 @@ abstract class HiveComparisonTest hiveCachedResults } else { - val hiveQueries = queryList.map(new TestHive.HiveQLQueryExecution(_)) + val hiveQueries = queryList.map(new TestHive.QueryExecution(_)) // Make sure we can at least parse everything before attempting hive execution. - hiveQueries.foreach(_.analyzed) + // Note this must only look at the logical plan as we might not be able to analyze if + // other DDL has not been executed yet. + hiveQueries.foreach(_.logical) val computedResults = (queryList.zipWithIndex, hiveQueries, hiveCacheFiles).zipped.map { - case ((queryString, i), hiveQuery, cachedAnswerFile)=> + case ((queryString, i), hiveQuery, cachedAnswerFile) => try { // Hooks often break the harness and don't really affect our test anyway, don't // even try running them. @@ -346,7 +348,7 @@ abstract class HiveComparisonTest // Run w/ catalyst val catalystResults = queryList.zip(hiveResults).map { case (queryString, hive) => - val query = new TestHive.HiveQLQueryExecution(queryString) + val query = new TestHive.QueryExecution(queryString) try { (query, prepareAnswer(query, query.stringResult())) } catch { case e: Throwable => val errorMessage = @@ -402,7 +404,7 @@ abstract class HiveComparisonTest // okay by running a simple query. If this fails then we halt testing since // something must have gone seriously wrong. try { - new TestHive.HiveQLQueryExecution("SELECT key FROM src").stringResult() + new TestHive.QueryExecution("SELECT key FROM src").stringResult() TestHive.runSqlHive("SELECT key FROM src") } catch { case e: Exception => diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index ac10b173307d8..440b7c87b0da2 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -20,16 +20,16 @@ package org.apache.spark.sql.hive.execution import java.io.File import java.util.{Locale, TimeZone} -import org.scalatest.BeforeAndAfter - import scala.util.Try +import org.scalatest.BeforeAndAfter + import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.spark.{SparkFiles, SparkException} import org.apache.spark.sql.{AnalysisException, DataFrame, Row} +import org.apache.spark.sql.catalyst.expressions.Cast import org.apache.spark.sql.catalyst.plans.logical.Project -import org.apache.spark.sql.functions._ import org.apache.spark.sql.hive._ import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.hive.test.TestHive._ @@ -52,14 +52,32 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter { TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles")) // Add Locale setting Locale.setDefault(Locale.US) + sql(s"ADD JAR ${TestHive.getHiveFile("TestUDTF.jar").getCanonicalPath()}") + // The function source code can be found at: + // https://cwiki.apache.org/confluence/display/Hive/DeveloperGuide+UDTF + sql( + """ + |CREATE TEMPORARY FUNCTION udtf_count2 + |AS 'org.apache.spark.sql.hive.execution.GenericUDTFCount2' + """.stripMargin) } override def afterAll() { TestHive.cacheTables = false TimeZone.setDefault(originalTimeZone) Locale.setDefault(originalLocale) + sql("DROP TEMPORARY FUNCTION udtf_count2") } + createQueryTest("Test UDTF.close in Lateral Views", + """ + |SELECT key, cc + |FROM src LATERAL VIEW udtf_count2(value) dd AS cc + """.stripMargin, false) // false mean we have to keep the temp function in registry + + createQueryTest("Test UDTF.close in SELECT", + "SELECT udtf_count2(a) FROM (SELECT 1 AS a FROM src LIMIT 3) table", false) + test("SPARK-4908: concurrent hive native commands") { (1 to 100).par.map { _ => sql("USE default") @@ -91,13 +109,13 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter { | SELECT key FROM gen_tmp ORDER BY key ASC; """.stripMargin) - test("multiple generator in projection") { + test("multiple generators in projection") { intercept[AnalysisException] { - sql("SELECT explode(map(key, value)), key FROM src").collect() + sql("SELECT explode(array(key, key)), explode(array(key, key)) FROM src").collect() } intercept[AnalysisException] { - sql("SELECT explode(map(key, value)) as k1, k2, key FROM src").collect() + sql("SELECT explode(array(key, key)) as k1, explode(array(key, key)) FROM src").collect() } } @@ -398,6 +416,25 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter { |SELECT * FROM createdtable; """.stripMargin) + test("SPARK-7270: consider dynamic partition when comparing table output") { + sql(s"CREATE TABLE test_partition (a STRING) PARTITIONED BY (b BIGINT, c STRING)") + sql(s"CREATE TABLE ptest (a STRING, b BIGINT, c STRING)") + + val analyzedPlan = sql( + """ + |INSERT OVERWRITE table test_partition PARTITION (b=1, c) + |SELECT 'a', 'c' from ptest + """.stripMargin).queryExecution.analyzed + + assertResult(false, "Incorrect cast detected\n" + analyzedPlan) { + var hasCast = false + analyzedPlan.collect { + case p: Project => p.transformExpressionsUp { case c: Cast => hasCast = true; c } + } + hasCast + } + } + createQueryTest("transform", "SELECT TRANSFORM (key) USING 'cat' AS (tKey) FROM src") @@ -900,7 +937,7 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter { |DROP TABLE IF EXISTS dynamic_part_table; """.stripMargin) - test("Dynamic partition folder layout") { + ignore("Dynamic partition folder layout") { sql("DROP TABLE IF EXISTS dynamic_part_table") sql("CREATE TABLE dynamic_part_table(intcol INT) PARTITIONED BY (partcol1 INT, partcol2 INT)") sql("SET hive.exec.dynamic.partition.mode=nonstrict") diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala index 8ad3627504229..b08db6de2d2f6 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.hive.execution import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.hive.test.TestHive.{sparkContext, jsonRDD, sql} +import org.apache.spark.sql.hive.test.TestHive.{read, sparkContext, jsonRDD, sql} import org.apache.spark.sql.hive.test.TestHive.implicits._ case class Nested(a: Int, B: Int) @@ -31,14 +31,14 @@ case class Data(a: Int, B: Int, n: Nested, nestedArray: Seq[Nested]) class HiveResolutionSuite extends HiveComparisonTest { test("SPARK-3698: case insensitive test for nested data") { - jsonRDD(sparkContext.makeRDD( + read.json(sparkContext.makeRDD( """{"a": [{"a": {"a": 1}}]}""" :: Nil)).registerTempTable("nested") // This should be successfully analyzed sql("SELECT a[0].A.A from nested").queryExecution.analyzed } test("SPARK-5278: check ambiguous reference to fields") { - jsonRDD(sparkContext.makeRDD( + read.json(sparkContext.makeRDD( """{"a": [{"b": 1, "B": 2}]}""" :: Nil)).registerTempTable("nested") // there are 2 filed matching field name "b", we should report Ambiguous reference error @@ -77,7 +77,7 @@ class HiveResolutionSuite extends HiveComparisonTest { test("case insensitivity with scala reflection") { // Test resolution with Scala Reflection - sparkContext.parallelize(Data(1, 2, Nested(1,2), Seq(Nested(1,2))) :: Nil) + sparkContext.parallelize(Data(1, 2, Nested(1, 2), Seq(Nested(1, 2))) :: Nil) .toDF().registerTempTable("caseSensitivityTest") val query = sql("SELECT a, b, A, B, n.a, n.b, n.A, n.B FROM caseSensitivityTest") @@ -88,14 +88,14 @@ class HiveResolutionSuite extends HiveComparisonTest { ignore("case insensitivity with scala reflection joins") { // Test resolution with Scala Reflection - sparkContext.parallelize(Data(1, 2, Nested(1,2), Seq(Nested(1,2))) :: Nil) + sparkContext.parallelize(Data(1, 2, Nested(1, 2), Seq(Nested(1, 2))) :: Nil) .toDF().registerTempTable("caseSensitivityTest") sql("SELECT * FROM casesensitivitytest a JOIN casesensitivitytest b ON a.a = b.a").collect() } test("nested repeated resolution") { - sparkContext.parallelize(Data(1, 2, Nested(1,2), Seq(Nested(1,2))) :: Nil) + sparkContext.parallelize(Data(1, 2, Nested(1, 2), Seq(Nested(1, 2))) :: Nil) .toDF().registerTempTable("nestedRepeatedTest") assert(sql("SELECT nestedArray[0].a FROM nestedRepeatedTest").collect().head(0) === 1) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala index ab53c6309e089..2209fc2f30a3c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala @@ -61,7 +61,7 @@ class HiveTableScanSuite extends HiveComparisonTest { TestHive.sql("select KEY from tb where VALUE='just_for_test' limit 5").collect() TestHive.sql("drop table tb") } - + test("Spark-4077: timestamp query for null value") { TestHive.sql("DROP TABLE IF EXISTS timestamp_query_null") TestHive.sql( @@ -71,12 +71,12 @@ class HiveTableScanSuite extends HiveComparisonTest { FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' """.stripMargin) - val location = + val location = Utils.getSparkClassLoader.getResource("data/files/issue-4077-data.txt").getFile() - + TestHive.sql(s"LOAD DATA LOCAL INPATH '$location' INTO TABLE timestamp_query_null") - assert(TestHive.sql("SELECT time from timestamp_query_null limit 2").collect() - === Array(Row(java.sql.Timestamp.valueOf("2014-12-11 00:00:00")),Row(null))) + assert(TestHive.sql("SELECT time from timestamp_query_null limit 2").collect() + === Array(Row(java.sql.Timestamp.valueOf("2014-12-11 00:00:00")), Row(null))) TestHive.sql("DROP TABLE timestamp_query_null") } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUdfSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUdfSuite.scala index 7f49eac490572..ce5985888f540 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUdfSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUdfSuite.scala @@ -101,7 +101,7 @@ class HiveUdfSuite extends QueryTest { sql("DROP TEMPORARY FUNCTION IF EXISTS test_avg") TestHive.reset() } - + test("SPARK-2693 udaf aggregates test") { checkAnswer(sql("SELECT percentile(key, 1) FROM src LIMIT 1"), sql("SELECT max(key) FROM src").collect().toSeq) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala index 067b577f1560e..de6a41ce5bfcb 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruningSuite.scala @@ -145,25 +145,26 @@ class PruningSuite extends HiveComparisonTest with BeforeAndAfter { expectedScannedColumns: Seq[String], expectedPartValues: Seq[Seq[String]]): Unit = { test(s"$testCaseName - pruning test") { - val plan = new TestHive.HiveQLQueryExecution(sql).executedPlan + val plan = new TestHive.QueryExecution(sql).executedPlan val actualOutputColumns = plan.output.map(_.name) val (actualScannedColumns, actualPartValues) = plan.collect { case p @ HiveTableScan(columns, relation, _) => val columnNames = columns.map(_.name) - val partValues = p.prunePartitions(relation.hiveQlPartitions).map(_.getValues) + val partValues = if (relation.table.isPartitioned) { + p.prunePartitions(relation.hiveQlPartitions).map(_.getValues) + } else { + Seq.empty + } (columnNames, partValues) }.head assert(actualOutputColumns === expectedOutputColumns, "Output columns mismatch") assert(actualScannedColumns === expectedScannedColumns, "Scanned columns mismatch") - assert( - actualPartValues.length === expectedPartValues.length, - "Partition value count mismatches") + val actualPartitions = actualPartValues.map(_.toSeq.mkString(",")).sorted + val expectedPartitions = expectedPartValues.map(_.mkString(",")).sorted - for ((actual, expected) <- actualPartValues.zip(expectedPartValues)) { - assert(actual sameElements expected, "Partition values mismatch") - } + assert(actualPartitions === expectedPartitions, "Partitions selected do not match") } // Creates a query test to compare query results generated by Hive and Catalyst. diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 630dec8fa05a0..aba3becb1bce2 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -17,15 +17,14 @@ package org.apache.spark.sql.hive.execution +import org.apache.spark.sql.catalyst.DefaultParserDialect import org.apache.spark.sql.catalyst.analysis.EliminateSubQueries import org.apache.spark.sql.catalyst.errors.DialectException -import org.apache.spark.sql.DefaultDialect -import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SQLConf} -import org.apache.spark.sql.hive.MetastoreRelation +import org.apache.spark.sql._ import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.hive.test.TestHive._ import org.apache.spark.sql.hive.test.TestHive.implicits._ -import org.apache.spark.sql.hive.{HiveQLDialect, HiveShim} +import org.apache.spark.sql.hive.{HiveQLDialect, HiveShim, MetastoreRelation} import org.apache.spark.sql.parquet.ParquetRelation2 import org.apache.spark.sql.sources.LogicalRelation import org.apache.spark.sql.types._ @@ -48,8 +47,12 @@ case class Order( state: String, month: Int) +case class WindowData( + month: Int, + area: String, + product: Int) /** A SQL Dialect for testing purpose, and it can not be nested type */ -class MyDialect extends DefaultDialect +class MyDialect extends DefaultParserDialect /** * A collection of hive query tests where we generate the answers ourselves instead of depending on @@ -197,7 +200,7 @@ class SQLQuerySuite extends QueryTest { var message = intercept[AnalysisException] { sql("CREATE TABLE ctas1 AS SELECT key k, value FROM src ORDER BY k, value") }.getMessage - assert(message.contains("Table ctas1 already exists")) + assert(message.contains("ctas1 already exists")) checkRelation("ctas1", true) sql("DROP TABLE ctas1") @@ -243,7 +246,7 @@ class SQLQuerySuite extends QueryTest { // set the dialect back to the DefaultSQLDialect sql("SET spark.sql.dialect=sql") - assert(getSQLDialect().getClass === classOf[DefaultDialect]) + assert(getSQLDialect().getClass === classOf[DefaultParserDialect]) sql("SET spark.sql.dialect=hiveql") assert(getSQLDialect().getClass === classOf[HiveQLDialect]) @@ -310,7 +313,7 @@ class SQLQuerySuite extends QueryTest { SELECT key, value FROM src ORDER BY key, value""").collect().toSeq) - intercept[org.apache.hadoop.hive.metastore.api.AlreadyExistsException] { + intercept[AnalysisException] { sql( """CREATE TABLE ctas4 AS | SELECT key, value FROM src ORDER BY key, value""".stripMargin).collect() @@ -324,7 +327,7 @@ class SQLQuerySuite extends QueryTest { "org.apache.hadoop.hive.ql.io.RCFileInputFormat", "org.apache.hadoop.hive.ql.io.RCFileOutputFormat", "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe", - "serde_p1=p1", "serde_p2=p2", "tbl_p1=p11", "tbl_p2=p22","MANAGED_TABLE" + "serde_p1=p1", "serde_p2=p2", "tbl_p1=p11", "tbl_p2=p22", "MANAGED_TABLE" ) if (HiveShim.version =="0.13.1") { @@ -422,10 +425,10 @@ class SQLQuerySuite extends QueryTest { test("SPARK-4825 save join to table") { val testData = sparkContext.parallelize(1 to 10).map(i => TestData(i, i.toString)).toDF() sql("CREATE TABLE test1 (key INT, value STRING)") - testData.insertInto("test1") + testData.write.mode(SaveMode.Append).insertInto("test1") sql("CREATE TABLE test2 (key INT, value STRING)") - testData.insertInto("test2") - testData.insertInto("test2") + testData.write.mode(SaveMode.Append).insertInto("test2") + testData.write.mode(SaveMode.Append).insertInto("test2") sql("CREATE TABLE test AS SELECT COUNT(a.value) FROM test1 a JOIN test2 b ON a.key = b.key") checkAnswer( table("test"), @@ -489,6 +492,12 @@ class SQLQuerySuite extends QueryTest { } } + test("SPARK-4699 HiveContext should be case insensitive by default") { + checkAnswer( + sql("SELECT KEY FROM Src ORDER BY value"), + sql("SELECT key FROM src ORDER BY value").collect().toSeq) + } + test("SPARK-5284 Insert into Hive throws NPE when a inner complex type field has a null value") { val schema = StructType( StructField("s", @@ -526,26 +535,49 @@ class SQLQuerySuite extends QueryTest { test("SPARK-4296 Grouping field with Hive UDF as sub expression") { val rdd = sparkContext.makeRDD( """{"a": "str", "b":"1", "c":"1970-01-01 00:00:00"}""" :: Nil) - jsonRDD(rdd).registerTempTable("data") + read.json(rdd).registerTempTable("data") checkAnswer( sql("SELECT concat(a, '-', b), year(c) FROM data GROUP BY concat(a, '-', b), year(c)"), Row("str-1", 1970)) dropTempTable("data") - jsonRDD(rdd).registerTempTable("data") + read.json(rdd).registerTempTable("data") checkAnswer(sql("SELECT year(c) + 1 FROM data GROUP BY year(c) + 1"), Row(1971)) dropTempTable("data") } - test("resolve udtf with single alias") { + test("resolve udtf in projection #1") { val rdd = sparkContext.makeRDD((1 to 5).map(i => s"""{"a":[$i, ${i + 1}]}""")) - jsonRDD(rdd).registerTempTable("data") + read.json(rdd).registerTempTable("data") val df = sql("SELECT explode(a) AS val FROM data") val col = df("val") } + test("resolve udtf in projection #2") { + val rdd = sparkContext.makeRDD((1 to 2).map(i => s"""{"a":[$i, ${i + 1}]}""")) + jsonRDD(rdd).registerTempTable("data") + checkAnswer(sql("SELECT explode(map(1, 1)) FROM data LIMIT 1"), Row(1, 1) :: Nil) + checkAnswer(sql("SELECT explode(map(1, 1)) as (k1, k2) FROM data LIMIT 1"), Row(1, 1) :: Nil) + intercept[AnalysisException] { + sql("SELECT explode(map(1, 1)) as k1 FROM data LIMIT 1") + } + + intercept[AnalysisException] { + sql("SELECT explode(map(1, 1)) as (k1, k2, k3) FROM data LIMIT 1") + } + } + + // TGF with non-TGF in project is allowed in Spark SQL, but not in Hive + test("TGF with non-TGF in projection") { + val rdd = sparkContext.makeRDD( """{"a": "1", "b":"1"}""" :: Nil) + jsonRDD(rdd).registerTempTable("data") + checkAnswer( + sql("SELECT explode(map(a, b)) as (k1, k2), a, b FROM data"), + Row("1", "1", "1", "1") :: Nil) + } + test("logical.Project should not be resolved if it contains aggregates or generators") { // This test is used to test the fix of SPARK-5875. // The original issue was that Project's resolved will be true when it contains @@ -554,7 +586,7 @@ class SQLQuerySuite extends QueryTest { // PreInsertionCasts will actually start to work before ImplicitGenerate and then // generates an invalid query plan. val rdd = sparkContext.makeRDD((1 to 5).map(i => s"""{"a":[$i, ${i + 1}]}""")) - jsonRDD(rdd).registerTempTable("data") + read.json(rdd).registerTempTable("data") val originalConf = getConf("spark.sql.hive.convertCTAS", "false") setConf("spark.sql.hive.convertCTAS", "false") @@ -586,7 +618,7 @@ class SQLQuerySuite extends QueryTest { sql(s"DROP TABLE $tableName") } } - + test("SPARK-5203 union with different decimal precision") { Seq.empty[(Decimal, Decimal)] .toDF("d1", "d2") @@ -604,4 +636,274 @@ class SQLQuerySuite extends QueryTest { sql("SELECT TRANSFORM (d1, d2, d3) USING 'cat' AS (a,b,c) FROM script_trans") .queryExecution.toRdd.count()) } + + test("window function: udaf with aggregate expressin") { + val data = Seq( + WindowData(1, "a", 5), + WindowData(2, "a", 6), + WindowData(3, "b", 7), + WindowData(4, "b", 8), + WindowData(5, "c", 9), + WindowData(6, "c", 10) + ) + sparkContext.parallelize(data).toDF().registerTempTable("windowData") + + checkAnswer( + sql( + """ + |select area, sum(product), sum(sum(product)) over (partition by area) + |from windowData group by month, area + """.stripMargin), + Seq( + ("a", 5, 11), + ("a", 6, 11), + ("b", 7, 15), + ("b", 8, 15), + ("c", 9, 19), + ("c", 10, 19) + ).map(i => Row(i._1, i._2, i._3))) + + checkAnswer( + sql( + """ + |select area, sum(product) - 1, sum(sum(product)) over (partition by area) + |from windowData group by month, area + """.stripMargin), + Seq( + ("a", 4, 11), + ("a", 5, 11), + ("b", 6, 15), + ("b", 7, 15), + ("c", 8, 19), + ("c", 9, 19) + ).map(i => Row(i._1, i._2, i._3))) + + checkAnswer( + sql( + """ + |select area, sum(product), sum(product) / sum(sum(product)) over (partition by area) + |from windowData group by month, area + """.stripMargin), + Seq( + ("a", 5, 5d/11), + ("a", 6, 6d/11), + ("b", 7, 7d/15), + ("b", 8, 8d/15), + ("c", 10, 10d/19), + ("c", 9, 9d/19) + ).map(i => Row(i._1, i._2, i._3))) + + checkAnswer( + sql( + """ + |select area, sum(product), sum(product) / sum(sum(product) - 1) over (partition by area) + |from windowData group by month, area + """.stripMargin), + Seq( + ("a", 5, 5d/9), + ("a", 6, 6d/9), + ("b", 7, 7d/13), + ("b", 8, 8d/13), + ("c", 10, 10d/17), + ("c", 9, 9d/17) + ).map(i => Row(i._1, i._2, i._3))) + } + + test("window function: partition and order expressions") { + val data = Seq( + WindowData(1, "a", 5), + WindowData(2, "a", 6), + WindowData(3, "b", 7), + WindowData(4, "b", 8), + WindowData(5, "c", 9), + WindowData(6, "c", 10) + ) + sparkContext.parallelize(data).toDF().registerTempTable("windowData") + + checkAnswer( + sql( + """ + |select month, area, product, sum(product + 1) over (partition by 1 order by 2) + |from windowData + """.stripMargin), + Seq( + (1, "a", 5, 51), + (2, "a", 6, 51), + (3, "b", 7, 51), + (4, "b", 8, 51), + (5, "c", 9, 51), + (6, "c", 10, 51) + ).map(i => Row(i._1, i._2, i._3, i._4))) + + checkAnswer( + sql( + """ + |select month, area, product, sum(product) + |over (partition by month % 2 order by 10 - product) + |from windowData + """.stripMargin), + Seq( + (1, "a", 5, 21), + (2, "a", 6, 24), + (3, "b", 7, 16), + (4, "b", 8, 18), + (5, "c", 9, 9), + (6, "c", 10, 10) + ).map(i => Row(i._1, i._2, i._3, i._4))) + } + + test("window function: expressions in arguments of a window functions") { + val data = Seq( + WindowData(1, "a", 5), + WindowData(2, "a", 6), + WindowData(3, "b", 7), + WindowData(4, "b", 8), + WindowData(5, "c", 9), + WindowData(6, "c", 10) + ) + sparkContext.parallelize(data).toDF().registerTempTable("windowData") + + checkAnswer( + sql( + """ + |select month, area, month % 2, + |lag(product, 1 + 1, product) over (partition by month % 2 order by area) + |from windowData + """.stripMargin), + Seq( + (1, "a", 1, 5), + (2, "a", 0, 6), + (3, "b", 1, 7), + (4, "b", 0, 8), + (5, "c", 1, 5), + (6, "c", 0, 6) + ).map(i => Row(i._1, i._2, i._3, i._4))) + } + + test("window function: multiple window expressions in a single expression") { + val nums = sparkContext.parallelize(1 to 10).map(x => (x, x % 2)).toDF("x", "y") + nums.registerTempTable("nums") + + val expected = + Row(1, 1, 1, 55, 1, 57) :: + Row(0, 2, 3, 55, 2, 60) :: + Row(1, 3, 6, 55, 4, 65) :: + Row(0, 4, 10, 55, 6, 71) :: + Row(1, 5, 15, 55, 9, 79) :: + Row(0, 6, 21, 55, 12, 88) :: + Row(1, 7, 28, 55, 16, 99) :: + Row(0, 8, 36, 55, 20, 111) :: + Row(1, 9, 45, 55, 25, 125) :: + Row(0, 10, 55, 55, 30, 140) :: Nil + + val actual = sql( + """ + |SELECT + | y, + | x, + | sum(x) OVER w1 AS running_sum, + | sum(x) OVER w2 AS total_sum, + | sum(x) OVER w3 AS running_sum_per_y, + | ((sum(x) OVER w1) + (sum(x) OVER w2) + (sum(x) OVER w3)) as combined2 + |FROM nums + |WINDOW w1 AS (ORDER BY x ROWS BETWEEN UnBOUNDED PRECEDiNG AND CuRRENT RoW), + | w2 AS (ORDER BY x ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOuNDED FoLLOWING), + | w3 AS (PARTITION BY y ORDER BY x ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) + """.stripMargin) + + checkAnswer(actual, expected) + + dropTempTable("nums") + } + + test("test case key when") { + (1 to 5).map(i => (i, i.toString)).toDF("k", "v").registerTempTable("t") + checkAnswer( + sql("SELECT CASE k WHEN 2 THEN 22 WHEN 4 THEN 44 ELSE 0 END, v FROM t"), + Row(0, "1") :: Row(22, "2") :: Row(0, "3") :: Row(44, "4") :: Row(0, "5") :: Nil) + } + + test("SPARK-7595: Window will cause resolve failed with self join") { + checkAnswer(sql( + """ + |with + | v1 as (select key, count(value) over (partition by key) cnt_val from src), + | v2 as (select v1.key, v1_lag.cnt_val from v1, v1 v1_lag where v1.key = v1_lag.key) + | select * from v2 order by key limit 1 + """.stripMargin), Row(0, 3)) + } + + test("SPARK-7269 Check analysis failed in case in-sensitive") { + Seq(1, 2, 3).map { i => + (i.toString, i.toString) + }.toDF("key", "value").registerTempTable("df_analysis") + sql("SELECT kEy from df_analysis group by key").collect() + sql("SELECT kEy+3 from df_analysis group by key+3").collect() + sql("SELECT kEy+3, a.kEy, A.kEy from df_analysis A group by key").collect() + sql("SELECT cast(kEy+1 as Int) from df_analysis A group by cast(key+1 as int)").collect() + sql("SELECT cast(kEy+1 as Int) from df_analysis A group by key+1").collect() + sql("SELECT 2 from df_analysis A group by key+1").collect() + intercept[AnalysisException] { + sql("SELECT kEy+1 from df_analysis group by key+3") + } + intercept[AnalysisException] { + sql("SELECT cast(key+2 as Int) from df_analysis A group by cast(key+1 as int)") + } + } + + // `Math.exp(1.0)` has different result for different jdk version, so not use createQueryTest + test("udf_java_method") { + checkAnswer(sql( + """ + |SELECT java_method("java.lang.String", "valueOf", 1), + | java_method("java.lang.String", "isEmpty"), + | java_method("java.lang.Math", "max", 2, 3), + | java_method("java.lang.Math", "min", 2, 3), + | java_method("java.lang.Math", "round", 2.5), + | java_method("java.lang.Math", "exp", 1.0), + | java_method("java.lang.Math", "floor", 1.9) + |FROM src tablesample (1 rows) + """.stripMargin), + Row( + "1", + "true", + java.lang.Math.max(2, 3).toString, + java.lang.Math.min(2, 3).toString, + java.lang.Math.round(2.5).toString, + java.lang.Math.exp(1.0).toString, + java.lang.Math.floor(1.9).toString)) + } + + test("dynamic partition value test") { + try { + sql("set hive.exec.dynamic.partition.mode=nonstrict") + // date + sql("drop table if exists dynparttest1") + sql("create table dynparttest1 (value int) partitioned by (pdate date)") + sql( + """ + |insert into table dynparttest1 partition(pdate) + | select count(*), cast('2015-05-21' as date) as pdate from src + """.stripMargin) + checkAnswer( + sql("select * from dynparttest1"), + Seq(Row(500, java.sql.Date.valueOf("2015-05-21")))) + + // decimal + sql("drop table if exists dynparttest2") + sql("create table dynparttest2 (value int) partitioned by (pdec decimal(5, 1))") + sql( + """ + |insert into table dynparttest2 partition(pdec) + | select count(*), cast('100.12' as decimal(5, 1)) as pdec from src + """.stripMargin) + checkAnswer( + sql("select * from dynparttest2"), + Seq(Row(500, new java.math.BigDecimal("100.1")))) + } finally { + sql("drop table if exists dynparttest1") + sql("drop table if exists dynparttest2") + sql("set hive.exec.dynamic.partition.mode=strict") + } + } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala new file mode 100644 index 0000000000000..080af5bb23c16 --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcHadoopFsRelationSuite.scala @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive.orc + +import org.apache.hadoop.fs.Path + +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.sql.sources.HadoopFsRelationTest +import org.apache.spark.sql.types._ + +class OrcHadoopFsRelationSuite extends HadoopFsRelationTest { + override val dataSourceName: String = classOf[DefaultSource].getCanonicalName + + import sqlContext._ + import sqlContext.implicits._ + + test("save()/load() - partitioned table - simple queries - partition columns in data") { + withTempDir { file => + val basePath = new Path(file.getCanonicalPath) + val fs = basePath.getFileSystem(SparkHadoopUtil.get.conf) + val qualifiedBasePath = fs.makeQualified(basePath) + + for (p1 <- 1 to 2; p2 <- Seq("foo", "bar")) { + val partitionDir = new Path(qualifiedBasePath, s"p1=$p1/p2=$p2") + sparkContext + .parallelize(for (i <- 1 to 3) yield (i, s"val_$i", p1)) + .toDF("a", "b", "p1") + .write + .format("orc") + .save(partitionDir.toString) + } + + val dataSchemaWithPartition = + StructType(dataSchema.fields :+ StructField("p1", IntegerType, nullable = true)) + + checkQueries( + load( + source = dataSourceName, + options = Map( + "path" -> file.getCanonicalPath, + "dataSchema" -> dataSchemaWithPartition.json))) + } + } +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcPartitionDiscoverySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcPartitionDiscoverySuite.scala new file mode 100644 index 0000000000000..88c99e35260d9 --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcPartitionDiscoverySuite.scala @@ -0,0 +1,256 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive.orc + +import java.io.File +import org.apache.hadoop.hive.conf.HiveConf.ConfVars +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.expressions.Row +import org.apache.spark.sql.hive.test.TestHive +import org.apache.spark.sql.hive.test.TestHive._ +import org.apache.spark.sql.hive.test.TestHive.implicits._ +import org.apache.spark.util.Utils +import org.scalatest.{BeforeAndAfterAll, FunSuiteLike} + +import scala.reflect.ClassTag +import scala.reflect.runtime.universe.TypeTag + + +// The data where the partitioning key exists only in the directory structure. +case class OrcParData(intField: Int, stringField: String) + +// The data that also includes the partitioning key +case class OrcParDataWithKey(intField: Int, pi: Int, stringField: String, ps: String) + +// TODO This test suite duplicates ParquetPartitionDiscoverySuite a lot +class OrcPartitionDiscoverySuite extends QueryTest with FunSuiteLike with BeforeAndAfterAll { + val defaultPartitionName = ConfVars.DEFAULTPARTITIONNAME.defaultVal + + def withTempDir(f: File => Unit): Unit = { + val dir = Utils.createTempDir().getCanonicalFile + try f(dir) finally Utils.deleteRecursively(dir) + } + + def makeOrcFile[T <: Product: ClassTag: TypeTag]( + data: Seq[T], path: File): Unit = { + data.toDF().write.format("orc").mode("overwrite").save(path.getCanonicalPath) + } + + + def makeOrcFile[T <: Product: ClassTag: TypeTag]( + df: DataFrame, path: File): Unit = { + df.write.format("orc").mode("overwrite").save(path.getCanonicalPath) + } + + protected def withTempTable(tableName: String)(f: => Unit): Unit = { + try f finally TestHive.dropTempTable(tableName) + } + + protected def makePartitionDir( + basePath: File, + defaultPartitionName: String, + partitionCols: (String, Any)*): File = { + val partNames = partitionCols.map { case (k, v) => + val valueString = if (v == null || v == "") defaultPartitionName else v.toString + s"$k=$valueString" + } + + val partDir = partNames.foldLeft(basePath) { (parent, child) => + new File(parent, child) + } + + assert(partDir.mkdirs(), s"Couldn't create directory $partDir") + partDir + } + + test("read partitioned table - normal case") { + withTempDir { base => + for { + pi <- Seq(1, 2) + ps <- Seq("foo", "bar") + } { + makeOrcFile( + (1 to 10).map(i => OrcParData(i, i.toString)), + makePartitionDir(base, defaultPartitionName, "pi" -> pi, "ps" -> ps)) + } + + read.format("orc").load(base.getCanonicalPath).registerTempTable("t") + + withTempTable("t") { + checkAnswer( + sql("SELECT * FROM t"), + for { + i <- 1 to 10 + pi <- Seq(1, 2) + ps <- Seq("foo", "bar") + } yield Row(i, i.toString, pi, ps)) + + checkAnswer( + sql("SELECT intField, pi FROM t"), + for { + i <- 1 to 10 + pi <- Seq(1, 2) + _ <- Seq("foo", "bar") + } yield Row(i, pi)) + + checkAnswer( + sql("SELECT * FROM t WHERE pi = 1"), + for { + i <- 1 to 10 + ps <- Seq("foo", "bar") + } yield Row(i, i.toString, 1, ps)) + + checkAnswer( + sql("SELECT * FROM t WHERE ps = 'foo'"), + for { + i <- 1 to 10 + pi <- Seq(1, 2) + } yield Row(i, i.toString, pi, "foo")) + } + } + } + + test("read partitioned table - partition key included in orc file") { + withTempDir { base => + for { + pi <- Seq(1, 2) + ps <- Seq("foo", "bar") + } { + makeOrcFile( + (1 to 10).map(i => OrcParDataWithKey(i, pi, i.toString, ps)), + makePartitionDir(base, defaultPartitionName, "pi" -> pi, "ps" -> ps)) + } + + read.format("orc").load(base.getCanonicalPath).registerTempTable("t") + + withTempTable("t") { + checkAnswer( + sql("SELECT * FROM t"), + for { + i <- 1 to 10 + pi <- Seq(1, 2) + ps <- Seq("foo", "bar") + } yield Row(i, pi, i.toString, ps)) + + checkAnswer( + sql("SELECT intField, pi FROM t"), + for { + i <- 1 to 10 + pi <- Seq(1, 2) + _ <- Seq("foo", "bar") + } yield Row(i, pi)) + + checkAnswer( + sql("SELECT * FROM t WHERE pi = 1"), + for { + i <- 1 to 10 + ps <- Seq("foo", "bar") + } yield Row(i, 1, i.toString, ps)) + + checkAnswer( + sql("SELECT * FROM t WHERE ps = 'foo'"), + for { + i <- 1 to 10 + pi <- Seq(1, 2) + } yield Row(i, pi, i.toString, "foo")) + } + } + } + + + test("read partitioned table - with nulls") { + withTempDir { base => + for { + // Must be `Integer` rather than `Int` here. `null.asInstanceOf[Int]` results in a zero... + pi <- Seq(1, null.asInstanceOf[Integer]) + ps <- Seq("foo", null.asInstanceOf[String]) + } { + makeOrcFile( + (1 to 10).map(i => OrcParData(i, i.toString)), + makePartitionDir(base, defaultPartitionName, "pi" -> pi, "ps" -> ps)) + } + + read + .format("orc") + .option(ConfVars.DEFAULTPARTITIONNAME.varname, defaultPartitionName) + .load(base.getCanonicalPath) + .registerTempTable("t") + + withTempTable("t") { + checkAnswer( + sql("SELECT * FROM t"), + for { + i <- 1 to 10 + pi <- Seq(1, null.asInstanceOf[Integer]) + ps <- Seq("foo", null.asInstanceOf[String]) + } yield Row(i, i.toString, pi, ps)) + + checkAnswer( + sql("SELECT * FROM t WHERE pi IS NULL"), + for { + i <- 1 to 10 + ps <- Seq("foo", null.asInstanceOf[String]) + } yield Row(i, i.toString, null, ps)) + + checkAnswer( + sql("SELECT * FROM t WHERE ps IS NULL"), + for { + i <- 1 to 10 + pi <- Seq(1, null.asInstanceOf[Integer]) + } yield Row(i, i.toString, pi, null)) + } + } + } + + test("read partitioned table - with nulls and partition keys are included in Orc file") { + withTempDir { base => + for { + pi <- Seq(1, 2) + ps <- Seq("foo", null.asInstanceOf[String]) + } { + makeOrcFile( + (1 to 10).map(i => OrcParDataWithKey(i, pi, i.toString, ps)), + makePartitionDir(base, defaultPartitionName, "pi" -> pi, "ps" -> ps)) + } + + read + .format("orc") + .option(ConfVars.DEFAULTPARTITIONNAME.varname, defaultPartitionName) + .load(base.getCanonicalPath) + .registerTempTable("t") + + withTempTable("t") { + checkAnswer( + sql("SELECT * FROM t"), + for { + i <- 1 to 10 + pi <- Seq(1, 2) + ps <- Seq("foo", null.asInstanceOf[String]) + } yield Row(i, pi, i.toString, ps)) + + checkAnswer( + sql("SELECT * FROM t WHERE ps IS NULL"), + for { + i <- 1 to 10 + pi <- Seq(1, 2) + } yield Row(i, pi, i.toString, null)) + } + } + } +} + diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala new file mode 100644 index 0000000000000..cdd6e705f4a2c --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcQuerySuite.scala @@ -0,0 +1,294 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive.orc + +import java.io.File + +import org.apache.hadoop.hive.conf.HiveConf.ConfVars +import org.apache.hadoop.hive.ql.io.orc.CompressionKind +import org.scalatest.{BeforeAndAfterAll, FunSuiteLike} + +import org.apache.spark.sql._ +import org.apache.spark.sql.catalyst.expressions.Row +import org.apache.spark.sql.hive.test.TestHive +import org.apache.spark.sql.hive.test.TestHive._ +import org.apache.spark.sql.hive.test.TestHive.implicits._ + +case class AllDataTypesWithNonPrimitiveType( + stringField: String, + intField: Int, + longField: Long, + floatField: Float, + doubleField: Double, + shortField: Short, + byteField: Byte, + booleanField: Boolean, + array: Seq[Int], + arrayContainsNull: Seq[Option[Int]], + map: Map[Int, Long], + mapValueContainsNull: Map[Int, Option[Long]], + data: (Seq[Int], (Int, String))) + +case class BinaryData(binaryData: Array[Byte]) + +case class Contact(name: String, phone: String) + +case class Person(name: String, age: Int, contacts: Seq[Contact]) + +class OrcQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterAll with OrcTest { + override val sqlContext = TestHive + + import TestHive.read + + def getTempFilePath(prefix: String, suffix: String = ""): File = { + val tempFile = File.createTempFile(prefix, suffix) + tempFile.delete() + tempFile + } + + test("Read/write All Types") { + val data = (0 to 255).map { i => + (s"$i", i, i.toLong, i.toFloat, i.toDouble, i.toShort, i.toByte, i % 2 == 0) + } + + withOrcFile(data) { file => + checkAnswer( + read.format("orc").load(file), + data.toDF().collect()) + } + } + + test("Read/write binary data") { + withOrcFile(BinaryData("test".getBytes("utf8")) :: Nil) { file => + val bytes = read.format("orc").load(file).head().getAs[Array[Byte]](0) + assert(new String(bytes, "utf8") === "test") + } + } + + test("Read/write all types with non-primitive type") { + val data = (0 to 255).map { i => + AllDataTypesWithNonPrimitiveType( + s"$i", i, i.toLong, i.toFloat, i.toDouble, i.toShort, i.toByte, i % 2 == 0, + 0 until i, + (0 until i).map(Option(_).filter(_ % 3 == 0)), + (0 until i).map(i => i -> i.toLong).toMap, + (0 until i).map(i => i -> Option(i.toLong)).toMap + (i -> None), + (0 until i, (i, s"$i"))) + } + + withOrcFile(data) { file => + checkAnswer( + read.format("orc").load(file), + data.toDF().collect()) + } + } + + test("Creating case class RDD table") { + val data = (1 to 100).map(i => (i, s"val_$i")) + sparkContext.parallelize(data).toDF().registerTempTable("t") + withTempTable("t") { + checkAnswer(sql("SELECT * FROM t"), data.toDF().collect()) + } + } + + test("Simple selection form ORC table") { + val data = (1 to 10).map { i => + Person(s"name_$i", i, (0 to 1).map { m => Contact(s"contact_$m", s"phone_$m") }) + } + + withOrcTable(data, "t") { + // ppd: + // leaf-0 = (LESS_THAN_EQUALS age 5) + // expr = leaf-0 + assert(sql("SELECT name FROM t WHERE age <= 5").count() === 5) + + // ppd: + // leaf-0 = (LESS_THAN_EQUALS age 5) + // expr = (not leaf-0) + assertResult(10) { + sql("SELECT name, contacts FROM t where age > 5") + .flatMap(_.getAs[Seq[_]]("contacts")) + .count() + } + + // ppd: + // leaf-0 = (LESS_THAN_EQUALS age 5) + // leaf-1 = (LESS_THAN age 8) + // expr = (and (not leaf-0) leaf-1) + { + val df = sql("SELECT name, contacts FROM t WHERE age > 5 AND age < 8") + assert(df.count() === 2) + assertResult(4) { + df.flatMap(_.getAs[Seq[_]]("contacts")).count() + } + } + + // ppd: + // leaf-0 = (LESS_THAN age 2) + // leaf-1 = (LESS_THAN_EQUALS age 8) + // expr = (or leaf-0 (not leaf-1)) + { + val df = sql("SELECT name, contacts FROM t WHERE age < 2 OR age > 8") + assert(df.count() === 3) + assertResult(6) { + df.flatMap(_.getAs[Seq[_]]("contacts")).count() + } + } + } + } + + test("save and load case class RDD with `None`s as orc") { + val data = ( + None: Option[Int], + None: Option[Long], + None: Option[Float], + None: Option[Double], + None: Option[Boolean] + ) :: Nil + + withOrcFile(data) { file => + checkAnswer( + read.format("orc").load(file), + Row(Seq.fill(5)(null): _*)) + } + } + + // We only support zlib in Hive 0.12.0 now + test("Default compression options for writing to an ORC file") { + withOrcFile((1 to 100).map(i => (i, s"val_$i"))) { file => + assertResult(CompressionKind.ZLIB) { + OrcFileOperator.getFileReader(file).getCompression + } + } + } + + // Following codec is supported in hive-0.13.1, ignore it now + ignore("Other compression options for writing to an ORC file - 0.13.1 and above") { + val data = (1 to 100).map(i => (i, s"val_$i")) + val conf = sparkContext.hadoopConfiguration + + conf.set(ConfVars.HIVE_ORC_DEFAULT_COMPRESS.varname, "SNAPPY") + withOrcFile(data) { file => + assertResult(CompressionKind.SNAPPY) { + OrcFileOperator.getFileReader(file).getCompression + } + } + + conf.set(ConfVars.HIVE_ORC_DEFAULT_COMPRESS.varname, "NONE") + withOrcFile(data) { file => + assertResult(CompressionKind.NONE) { + OrcFileOperator.getFileReader(file).getCompression + } + } + + conf.set(ConfVars.HIVE_ORC_DEFAULT_COMPRESS.varname, "LZO") + withOrcFile(data) { file => + assertResult(CompressionKind.LZO) { + OrcFileOperator.getFileReader(file).getCompression + } + } + } + + test("simple select queries") { + withOrcTable((0 until 10).map(i => (i, i.toString)), "t") { + checkAnswer( + sql("SELECT `_1` FROM t where t.`_1` > 5"), + (6 until 10).map(Row.apply(_))) + + checkAnswer( + sql("SELECT `_1` FROM t as tmp where tmp.`_1` < 5"), + (0 until 5).map(Row.apply(_))) + } + } + + test("appending") { + val data = (0 until 10).map(i => (i, i.toString)) + createDataFrame(data).toDF("c1", "c2").registerTempTable("tmp") + withOrcTable(data, "t") { + sql("INSERT INTO TABLE t SELECT * FROM tmp") + checkAnswer(table("t"), (data ++ data).map(Row.fromTuple)) + } + catalog.unregisterTable(Seq("tmp")) + } + + test("overwriting") { + val data = (0 until 10).map(i => (i, i.toString)) + createDataFrame(data).toDF("c1", "c2").registerTempTable("tmp") + withOrcTable(data, "t") { + sql("INSERT OVERWRITE TABLE t SELECT * FROM tmp") + checkAnswer(table("t"), data.map(Row.fromTuple)) + } + catalog.unregisterTable(Seq("tmp")) + } + + test("self-join") { + // 4 rows, cells of column 1 of row 2 and row 4 are null + val data = (1 to 4).map { i => + val maybeInt = if (i % 2 == 0) None else Some(i) + (maybeInt, i.toString) + } + + withOrcTable(data, "t") { + val selfJoin = sql("SELECT * FROM t x JOIN t y WHERE x.`_1` = y.`_1`") + val queryOutput = selfJoin.queryExecution.analyzed.output + + assertResult(4, "Field count mismatches")(queryOutput.size) + assertResult(2, "Duplicated expression ID in query plan:\n $selfJoin") { + queryOutput.filter(_.name == "_1").map(_.exprId).size + } + + checkAnswer(selfJoin, List(Row(1, "1", 1, "1"), Row(3, "3", 3, "3"))) + } + } + + test("nested data - struct with array field") { + val data = (1 to 10).map(i => Tuple1((i, Seq("val_$i")))) + withOrcTable(data, "t") { + checkAnswer(sql("SELECT `_1`.`_2`[0] FROM t"), data.map { + case Tuple1((_, Seq(string))) => Row(string) + }) + } + } + + test("nested data - array of struct") { + val data = (1 to 10).map(i => Tuple1(Seq(i -> "val_$i"))) + withOrcTable(data, "t") { + checkAnswer(sql("SELECT `_1`[0].`_2` FROM t"), data.map { + case Tuple1(Seq((_, string))) => Row(string) + }) + } + } + + test("columns only referenced by pushed down filters should remain") { + withOrcTable((1 to 10).map(Tuple1.apply), "t") { + checkAnswer(sql("SELECT `_1` FROM t WHERE `_1` < 10"), (1 to 9).map(Row.apply(_))) + } + } + + test("SPARK-5309 strings stored using dictionary compression in orc") { + withOrcTable((0 until 1000).map(i => ("same", "run_" + i / 100, 1)), "t") { + checkAnswer( + sql("SELECT `_1`, `_2`, SUM(`_3`) FROM t GROUP BY `_1`, `_2`"), + (0 until 10).map(i => Row("same", "run_" + i, 100))) + + checkAnswer( + sql("SELECT `_1`, `_2`, SUM(`_3`) FROM t WHERE `_2` = 'run_5' GROUP BY `_1`, `_2`"), + List(Row("same", "run_5", 100))) + } + } +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala new file mode 100644 index 0000000000000..82e08caf46457 --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive.orc + +import java.io.File + +import org.scalatest.BeforeAndAfterAll + +import org.apache.spark.sql.hive.test.TestHive._ +import org.apache.spark.sql.{QueryTest, Row} + +case class OrcData(intField: Int, stringField: String) + +abstract class OrcSuite extends QueryTest with BeforeAndAfterAll { + var orcTableDir: File = null + var orcTableAsDir: File = null + + override def beforeAll(): Unit = { + super.beforeAll() + + orcTableAsDir = File.createTempFile("orctests", "sparksql") + orcTableAsDir.delete() + orcTableAsDir.mkdir() + + // Hack: to prepare orc data files using hive external tables + orcTableDir = File.createTempFile("orctests", "sparksql") + orcTableDir.delete() + orcTableDir.mkdir() + import org.apache.spark.sql.hive.test.TestHive.implicits._ + + sparkContext + .makeRDD(1 to 10) + .map(i => OrcData(i, s"part-$i")) + .toDF() + .registerTempTable(s"orc_temp_table") + + sql( + s"""CREATE EXTERNAL TABLE normal_orc( + | intField INT, + | stringField STRING + |) + |STORED AS ORC + |LOCATION '${orcTableAsDir.getCanonicalPath}' + """.stripMargin) + + sql( + s"""INSERT INTO TABLE normal_orc + |SELECT intField, stringField FROM orc_temp_table + """.stripMargin) + } + + override def afterAll(): Unit = { + orcTableDir.delete() + orcTableAsDir.delete() + } + + test("create temporary orc table") { + checkAnswer(sql("SELECT COUNT(*) FROM normal_orc_source"), Row(10)) + + checkAnswer( + sql("SELECT * FROM normal_orc_source"), + (1 to 10).map(i => Row(i, s"part-$i"))) + + checkAnswer( + sql("SELECT * FROM normal_orc_source where intField > 5"), + (6 to 10).map(i => Row(i, s"part-$i"))) + + checkAnswer( + sql("SELECT COUNT(intField), stringField FROM normal_orc_source GROUP BY stringField"), + (1 to 10).map(i => Row(1, s"part-$i"))) + } + + test("create temporary orc table as") { + checkAnswer(sql("SELECT COUNT(*) FROM normal_orc_as_source"), Row(10)) + + checkAnswer( + sql("SELECT * FROM normal_orc_source"), + (1 to 10).map(i => Row(i, s"part-$i"))) + + checkAnswer( + sql("SELECT * FROM normal_orc_source WHERE intField > 5"), + (6 to 10).map(i => Row(i, s"part-$i"))) + + checkAnswer( + sql("SELECT COUNT(intField), stringField FROM normal_orc_source GROUP BY stringField"), + (1 to 10).map(i => Row(1, s"part-$i"))) + } + + test("appending insert") { + sql("INSERT INTO TABLE normal_orc_source SELECT * FROM orc_temp_table WHERE intField > 5") + + checkAnswer( + sql("SELECT * FROM normal_orc_source"), + (1 to 5).map(i => Row(i, s"part-$i")) ++ (6 to 10).flatMap { i => + Seq.fill(2)(Row(i, s"part-$i")) + }) + } + + test("overwrite insert") { + sql( + """INSERT OVERWRITE TABLE normal_orc_as_source + |SELECT * FROM orc_temp_table WHERE intField > 5 + """.stripMargin) + + checkAnswer( + sql("SELECT * FROM normal_orc_as_source"), + (6 to 10).map(i => Row(i, s"part-$i"))) + } +} + +class OrcSourceSuite extends OrcSuite { + override def beforeAll(): Unit = { + super.beforeAll() + + sql( + s"""CREATE TEMPORARY TABLE normal_orc_source + |USING org.apache.spark.sql.hive.orc + |OPTIONS ( + | PATH '${new File(orcTableAsDir.getAbsolutePath).getCanonicalPath}' + |) + """.stripMargin) + + sql( + s"""CREATE TEMPORARY TABLE normal_orc_as_source + |USING org.apache.spark.sql.hive.orc + |OPTIONS ( + | PATH '${new File(orcTableAsDir.getAbsolutePath).getCanonicalPath}' + |) + """.stripMargin) + } +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcTest.scala new file mode 100644 index 0000000000000..750f0b04aaa87 --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcTest.scala @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive.orc + +import java.io.File + +import scala.reflect.ClassTag +import scala.reflect.runtime.universe.TypeTag + +import org.apache.spark.sql.hive.HiveContext +import org.apache.spark.sql.hive.test.TestHive +import org.apache.spark.sql.test.SQLTestUtils +import org.apache.spark.sql._ + +private[sql] trait OrcTest extends SQLTestUtils { + protected def hiveContext = sqlContext.asInstanceOf[HiveContext] + + import sqlContext.sparkContext + import sqlContext.implicits._ + + /** + * Writes `data` to a Orc file, which is then passed to `f` and will be deleted after `f` + * returns. + */ + protected def withOrcFile[T <: Product: ClassTag: TypeTag] + (data: Seq[T]) + (f: String => Unit): Unit = { + withTempPath { file => + sparkContext.parallelize(data).toDF().write.format("orc").save(file.getCanonicalPath) + f(file.getCanonicalPath) + } + } + + /** + * Writes `data` to a Orc file and reads it back as a [[DataFrame]], + * which is then passed to `f`. The Orc file will be deleted after `f` returns. + */ + protected def withOrcDataFrame[T <: Product: ClassTag: TypeTag] + (data: Seq[T]) + (f: DataFrame => Unit): Unit = { + withOrcFile(data)(path => f(hiveContext.read.format("orc").load(path))) + } + + /** + * Writes `data` to a Orc file, reads it back as a [[DataFrame]] and registers it as a + * temporary table named `tableName`, then call `f`. The temporary table together with the + * Orc file will be dropped/deleted after `f` returns. + */ + protected def withOrcTable[T <: Product: ClassTag: TypeTag] + (data: Seq[T], tableName: String) + (f: => Unit): Unit = { + withOrcDataFrame(data) { df => + hiveContext.registerDataFrameAsTable(df, tableName) + withTempTable(tableName)(f) + } + } + + protected def makeOrcFile[T <: Product: ClassTag: TypeTag]( + data: Seq[T], path: File): Unit = { + data.toDF().write.format("orc").mode(SaveMode.Overwrite).save(path.getCanonicalPath) + } + + protected def makeOrcFile[T <: Product: ClassTag: TypeTag]( + df: DataFrame, path: File): Unit = { + df.write.format("orc").mode(SaveMode.Overwrite).save(path.getCanonicalPath) + } +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala index d5dd0bf58e702..e62ac909cbd0c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala @@ -21,18 +21,16 @@ import java.io.File import org.scalatest.BeforeAndAfterAll -import org.apache.spark.sql.{QueryTest, SQLConf, SaveMode} import org.apache.spark.sql.catalyst.expressions.Row +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.{ExecutedCommand, PhysicalRDD} import org.apache.spark.sql.hive.execution.HiveTableScan -import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.hive.test.TestHive._ import org.apache.spark.sql.hive.test.TestHive.implicits._ -import org.apache.spark.sql.json.JSONRelation -import org.apache.spark.sql.sources.{InsertIntoDataSource, LogicalRelation} import org.apache.spark.sql.parquet.{ParquetRelation2, ParquetTableScan} -import org.apache.spark.sql.SaveMode +import org.apache.spark.sql.sources.{InsertIntoDataSource, InsertIntoHadoopFsRelation, LogicalRelation} import org.apache.spark.sql.types._ +import org.apache.spark.sql.{DataFrame, QueryTest, SQLConf, SaveMode} import org.apache.spark.util.Utils // The data where the partitioning key exists only in the directory structure. @@ -40,7 +38,7 @@ case class ParquetData(intField: Int, stringField: String) // The data that also includes the partitioning key case class ParquetDataWithKey(p: Int, intField: Int, stringField: String) -case class StructContainer(intStructField :Int, stringStructField: String) +case class StructContainer(intStructField: Int, stringStructField: String) case class ParquetDataWithComplexTypes( intField: Int, @@ -153,9 +151,9 @@ class ParquetMetastoreSuiteBase extends ParquetPartitioningTest { } val rdd1 = sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i, "b":"str$i"}""")) - jsonRDD(rdd1).registerTempTable("jt") + read.json(rdd1).registerTempTable("jt") val rdd2 = sparkContext.parallelize((1 to 10).map(i => s"""{"a":[$i, null]}""")) - jsonRDD(rdd2).registerTempTable("jt_array") + read.json(rdd2).registerTempTable("jt_array") setConf("spark.sql.hive.convertMetastoreParquet", "true") } @@ -294,10 +292,10 @@ class ParquetDataSourceOnMetastoreSuite extends ParquetMetastoreSuiteBase { ) table("test_parquet_ctas").queryExecution.optimizedPlan match { - case LogicalRelation(p: ParquetRelation2) => // OK - case _ => - fail( - s"test_parquet_ctas should be converted to ${classOf[ParquetRelation2].getCanonicalName}") + case LogicalRelation(_: ParquetRelation2) => // OK + case _ => fail( + "test_parquet_ctas should be converted to " + + s"${classOf[ParquetRelation2].getCanonicalName}") } sql("DROP TABLE IF EXISTS test_parquet_ctas") @@ -318,12 +316,10 @@ class ParquetDataSourceOnMetastoreSuite extends ParquetMetastoreSuiteBase { val df = sql("INSERT INTO TABLE test_insert_parquet SELECT a FROM jt") df.queryExecution.executedPlan match { - case ExecutedCommand( - InsertIntoDataSource( - LogicalRelation(r: ParquetRelation2), query, overwrite)) => // OK + case ExecutedCommand(InsertIntoHadoopFsRelation(_: ParquetRelation2, _, _)) => // OK case o => fail("test_insert_parquet should be converted to a " + s"${classOf[ParquetRelation2].getCanonicalName} and " + - s"${classOf[InsertIntoDataSource].getCanonicalName} is expcted as the SparkPlan." + + s"${classOf[InsertIntoDataSource].getCanonicalName} is expcted as the SparkPlan. " + s"However, found a ${o.toString} ") } @@ -350,9 +346,7 @@ class ParquetDataSourceOnMetastoreSuite extends ParquetMetastoreSuiteBase { val df = sql("INSERT INTO TABLE test_insert_parquet SELECT a FROM jt_array") df.queryExecution.executedPlan match { - case ExecutedCommand( - InsertIntoDataSource( - LogicalRelation(r: ParquetRelation2), query, overwrite)) => // OK + case ExecutedCommand(InsertIntoHadoopFsRelation(r: ParquetRelation2, _, _)) => // OK case o => fail("test_insert_parquet should be converted to a " + s"${classOf[ParquetRelation2].getCanonicalName} and " + s"${classOf[InsertIntoDataSource].getCanonicalName} is expcted as the SparkPlan." + @@ -392,10 +386,58 @@ class ParquetDataSourceOnMetastoreSuite extends ParquetMetastoreSuiteBase { sql("DROP TABLE ms_convert") } + def collectParquetRelation(df: DataFrame): ParquetRelation2 = { + val plan = df.queryExecution.analyzed + plan.collectFirst { + case LogicalRelation(r: ParquetRelation2) => r + }.getOrElse { + fail(s"Expecting a ParquetRelation2, but got:\n$plan") + } + } + + test("SPARK-7749: non-partitioned metastore Parquet table lookup should use cached relation") { + sql( + s"""CREATE TABLE nonPartitioned ( + | key INT, + | value STRING + |) + |STORED AS PARQUET + """.stripMargin) + + // First lookup fills the cache + val r1 = collectParquetRelation(table("nonPartitioned")) + // Second lookup should reuse the cache + val r2 = collectParquetRelation(table("nonPartitioned")) + // They should be the same instance + assert(r1 eq r2) + + sql("DROP TABLE nonPartitioned") + } + + test("SPARK-7749: partitioned metastore Parquet table lookup should use cached relation") { + sql( + s"""CREATE TABLE partitioned ( + | key INT, + | value STRING + |) + |PARTITIONED BY (part INT) + |STORED AS PARQUET + """.stripMargin) + + // First lookup fills the cache + val r1 = collectParquetRelation(table("partitioned")) + // Second lookup should reuse the cache + val r2 = collectParquetRelation(table("partitioned")) + // They should be the same instance + assert(r1 eq r2) + + sql("DROP TABLE partitioned") + } + test("Caching converted data source Parquet Relations") { - def checkCached(tableIdentifer: catalog.QualifiedTableName): Unit = { + def checkCached(tableIdentifier: catalog.QualifiedTableName): Unit = { // Converted test_parquet should be cached. - catalog.cachedDataSourceTables.getIfPresent(tableIdentifer) match { + catalog.cachedDataSourceTables.getIfPresent(tableIdentifier) match { case null => fail("Converted test_parquet should be cached in the cache.") case logical @ LogicalRelation(parquetRelation: ParquetRelation2) => // OK case other => @@ -421,30 +463,30 @@ class ParquetDataSourceOnMetastoreSuite extends ParquetMetastoreSuiteBase { | OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' """.stripMargin) - var tableIdentifer = catalog.QualifiedTableName("default", "test_insert_parquet") + var tableIdentifier = catalog.QualifiedTableName("default", "test_insert_parquet") // First, make sure the converted test_parquet is not cached. - assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifer) === null) + assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifier) === null) // Table lookup will make the table cached. table("test_insert_parquet") - checkCached(tableIdentifer) + checkCached(tableIdentifier) // For insert into non-partitioned table, we will do the conversion, // so the converted test_insert_parquet should be cached. invalidateTable("test_insert_parquet") - assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifer) === null) + assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifier) === null) sql( """ |INSERT INTO TABLE test_insert_parquet |select a, b from jt """.stripMargin) - checkCached(tableIdentifer) + checkCached(tableIdentifier) // Make sure we can read the data. checkAnswer( sql("select * from test_insert_parquet"), sql("select a, b from jt").collect()) // Invalidate the cache. invalidateTable("test_insert_parquet") - assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifer) === null) + assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifier) === null) // Create a partitioned table. sql( @@ -461,8 +503,8 @@ class ParquetDataSourceOnMetastoreSuite extends ParquetMetastoreSuiteBase { | OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' """.stripMargin) - tableIdentifer = catalog.QualifiedTableName("default", "test_parquet_partitioned_cache_test") - assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifer) === null) + tableIdentifier = catalog.QualifiedTableName("default", "test_parquet_partitioned_cache_test") + assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifier) === null) sql( """ |INSERT INTO TABLE test_parquet_partitioned_cache_test @@ -471,18 +513,18 @@ class ParquetDataSourceOnMetastoreSuite extends ParquetMetastoreSuiteBase { """.stripMargin) // Right now, insert into a partitioned Parquet is not supported in data source Parquet. // So, we expect it is not cached. - assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifer) === null) + assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifier) === null) sql( """ |INSERT INTO TABLE test_parquet_partitioned_cache_test |PARTITION (date='2015-04-02') |select a, b from jt """.stripMargin) - assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifer) === null) + assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifier) === null) // Make sure we can cache the partitioned table. table("test_parquet_partitioned_cache_test") - checkCached(tableIdentifer) + checkCached(tableIdentifier) // Make sure we can read the data. checkAnswer( sql("select STRINGField, date, intField from test_parquet_partitioned_cache_test"), @@ -494,7 +536,7 @@ class ParquetDataSourceOnMetastoreSuite extends ParquetMetastoreSuiteBase { """.stripMargin).collect()) invalidateTable("test_parquet_partitioned_cache_test") - assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifer) === null) + assert(catalog.cachedDataSourceTables.getIfPresent(tableIdentifier) === null) sql("DROP TABLE test_insert_parquet") sql("DROP TABLE test_parquet_partitioned_cache_test") @@ -624,16 +666,16 @@ class ParquetSourceSuiteBase extends ParquetPartitioningTest { sql("drop table if exists spark_6016_fix") // Create a DataFrame with two partitions. So, the created table will have two parquet files. - val df1 = jsonRDD(sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i}"""), 2)) - df1.saveAsTable("spark_6016_fix", "parquet", SaveMode.Overwrite) + val df1 = read.json(sparkContext.parallelize((1 to 10).map(i => s"""{"a":$i}"""), 2)) + df1.write.mode(SaveMode.Overwrite).format("parquet").saveAsTable("spark_6016_fix") checkAnswer( sql("select * from spark_6016_fix"), (1 to 10).map(i => Row(i)) ) // Create a DataFrame with four partitions. So, the created table will have four parquet files. - val df2 = jsonRDD(sparkContext.parallelize((1 to 10).map(i => s"""{"b":$i}"""), 4)) - df2.saveAsTable("spark_6016_fix", "parquet", SaveMode.Overwrite) + val df2 = read.json(sparkContext.parallelize((1 to 10).map(i => s"""{"b":$i}"""), 4)) + df2.write.mode(SaveMode.Overwrite).format("parquet").saveAsTable("spark_6016_fix") // For the bug of SPARK-6016, we are caching two outdated footers for df1. Then, // since the new table has four parquet files, we are trying to read new footers from two files // and then merge metadata in footers of these four (two outdated ones and two latest one), @@ -670,7 +712,7 @@ class ParquetDataSourceOnSourceSuite extends ParquetSourceSuiteBase { StructField("a", arrayType1, nullable = true) :: Nil) assert(df.schema === expectedSchema1) - df.saveAsTable("alwaysNullable", "parquet") + df.write.format("parquet").saveAsTable("alwaysNullable") val mapType2 = MapType(IntegerType, IntegerType, valueContainsNull = true) val arrayType2 = ArrayType(IntegerType, containsNull = true) @@ -693,13 +735,13 @@ class ParquetDataSourceOnSourceSuite extends ParquetSourceSuiteBase { val filePath = new File(tempDir, "testParquet").getCanonicalPath val filePath2 = new File(tempDir, "testParquet2").getCanonicalPath - val df = Seq(1,2,3).map(i => (i, i.toString)).toDF("int", "str") + val df = Seq(1, 2, 3).map(i => (i, i.toString)).toDF("int", "str") val df2 = df.as('x).join(df.as('y), $"x.str" === $"y.str").groupBy("y.str").max("y.int") - intercept[RuntimeException](df2.saveAsParquetFile(filePath)) + intercept[Throwable](df2.write.parquet(filePath)) val df3 = df2.toDF("str", "max_int") - df3.saveAsParquetFile(filePath2) - val df4 = parquetFile(filePath2) + df3.write.parquet(filePath2) + val df4 = read.parquet(filePath2) checkAnswer(df4, Row("1", 1) :: Row("2", 2) :: Row("3", 3) :: Nil) assert(df4.columns === Array("str", "max_int")) } @@ -738,14 +780,14 @@ abstract class ParquetPartitioningTest extends QueryTest with BeforeAndAfterAll sparkContext.makeRDD(1 to 10) .map(i => ParquetData(i, s"part-$p")) .toDF() - .saveAsParquetFile(partDir.getCanonicalPath) + .write.parquet(partDir.getCanonicalPath) } sparkContext .makeRDD(1 to 10) .map(i => ParquetData(i, s"part-1")) .toDF() - .saveAsParquetFile(new File(normalTableDir, "normal").getCanonicalPath) + .write.parquet(new File(normalTableDir, "normal").getCanonicalPath) partitionedTableDirWithKey = Utils.createTempDir() @@ -754,7 +796,7 @@ abstract class ParquetPartitioningTest extends QueryTest with BeforeAndAfterAll sparkContext.makeRDD(1 to 10) .map(i => ParquetDataWithKey(p, i, s"part-$p")) .toDF() - .saveAsParquetFile(partDir.getCanonicalPath) + .write.parquet(partDir.getCanonicalPath) } partitionedTableDirWithKeyAndComplexTypes = Utils.createTempDir() @@ -764,7 +806,7 @@ abstract class ParquetPartitioningTest extends QueryTest with BeforeAndAfterAll sparkContext.makeRDD(1 to 10).map { i => ParquetDataWithKeyAndComplexTypes( p, i, s"part-$p", StructContainer(i, f"${i}_string"), 1 to i) - }.toDF().saveAsParquetFile(partDir.getCanonicalPath) + }.toDF().write.parquet(partDir.getCanonicalPath) } partitionedTableDirWithComplexTypes = Utils.createTempDir() @@ -773,7 +815,7 @@ abstract class ParquetPartitioningTest extends QueryTest with BeforeAndAfterAll val partDir = new File(partitionedTableDirWithComplexTypes, s"p=$p") sparkContext.makeRDD(1 to 10).map { i => ParquetDataWithComplexTypes(i, s"part-$p", StructContainer(i, f"${i}_string"), 1 to i) - }.toDF().saveAsParquetFile(partDir.getCanonicalPath) + }.toDF().write.parquet(partDir.getCanonicalPath) } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala new file mode 100644 index 0000000000000..0f959b3d0b86d --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.sources + +import java.text.NumberFormat +import java.util.UUID + +import com.google.common.base.Objects +import org.apache.hadoop.fs.{FileStatus, Path} +import org.apache.hadoop.io.{NullWritable, Text} +import org.apache.hadoop.mapreduce.lib.output.{FileOutputFormat, TextOutputFormat} +import org.apache.hadoop.mapreduce.{Job, RecordWriter, TaskAttemptContext} + +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.catalyst.CatalystTypeConverters +import org.apache.spark.sql.catalyst.expressions.{Cast, Literal} +import org.apache.spark.sql.types.{DataType, StructType} +import org.apache.spark.sql.{Row, SQLContext} + +/** + * A simple example [[HadoopFsRelationProvider]]. + */ +class SimpleTextSource extends HadoopFsRelationProvider { + override def createRelation( + sqlContext: SQLContext, + paths: Array[String], + schema: Option[StructType], + partitionColumns: Option[StructType], + parameters: Map[String, String]): HadoopFsRelation = { + new SimpleTextRelation(paths, schema, partitionColumns, parameters)(sqlContext) + } +} + +class AppendingTextOutputFormat(outputFile: Path) extends TextOutputFormat[NullWritable, Text] { + val numberFormat = NumberFormat.getInstance() + + numberFormat.setMinimumIntegerDigits(5) + numberFormat.setGroupingUsed(false) + + override def getDefaultWorkFile(context: TaskAttemptContext, extension: String): Path = { + val split = context.getTaskAttemptID.getTaskID.getId + val name = FileOutputFormat.getOutputName(context) + new Path(outputFile, s"$name-${numberFormat.format(split)}-${UUID.randomUUID()}") + } +} + +class SimpleTextOutputWriter(path: String, context: TaskAttemptContext) extends OutputWriter { + private val recordWriter: RecordWriter[NullWritable, Text] = + new AppendingTextOutputFormat(new Path(path)).getRecordWriter(context) + + override def write(row: Row): Unit = { + val serialized = row.toSeq.map(_.toString).mkString(",") + recordWriter.write(null, new Text(serialized)) + } + + override def close(): Unit = { + recordWriter.close(context) + } +} + +/** + * A simple example [[HadoopFsRelation]], used for testing purposes. Data are stored as comma + * separated string lines. When scanning data, schema must be explicitly provided via data source + * option `"dataSchema"`. + */ +class SimpleTextRelation( + override val paths: Array[String], + val maybeDataSchema: Option[StructType], + override val userDefinedPartitionColumns: Option[StructType], + parameters: Map[String, String])( + @transient val sqlContext: SQLContext) + extends HadoopFsRelation { + + import sqlContext.sparkContext + + override val dataSchema: StructType = + maybeDataSchema.getOrElse(DataType.fromJson(parameters("dataSchema")).asInstanceOf[StructType]) + + override def equals(other: Any): Boolean = other match { + case that: SimpleTextRelation => + this.paths.sameElements(that.paths) && + this.maybeDataSchema == that.maybeDataSchema && + this.dataSchema == that.dataSchema && + this.partitionColumns == that.partitionColumns + + case _ => false + } + + override def hashCode(): Int = + Objects.hashCode(paths, maybeDataSchema, dataSchema, partitionColumns) + + override def buildScan(inputStatuses: Array[FileStatus]): RDD[Row] = { + val fields = dataSchema.map(_.dataType) + + sparkContext.textFile(inputStatuses.map(_.getPath).mkString(",")).map { record => + Row(record.split(",").zip(fields).map { case (value, dataType) => + // `Cast`ed values are always of Catalyst types (i.e. UTF8String instead of String, etc.) + val catalystValue = Cast(Literal(value), dataType).eval() + // Here we're converting Catalyst values to Scala values to test `needsConversion` + CatalystTypeConverters.convertToScala(catalystValue, dataType) + }: _*) + } + } + + override def prepareJobForWrite(job: Job): OutputWriterFactory = new OutputWriterFactory { + override def newInstance( + path: String, + dataSchema: StructType, + context: TaskAttemptContext): OutputWriter = { + new SimpleTextOutputWriter(path, context) + } + } +} + +/** + * A simple example [[HadoopFsRelationProvider]]. + */ +class CommitFailureTestSource extends HadoopFsRelationProvider { + override def createRelation( + sqlContext: SQLContext, + paths: Array[String], + schema: Option[StructType], + partitionColumns: Option[StructType], + parameters: Map[String, String]): HadoopFsRelation = { + new CommitFailureTestRelation(paths, schema, partitionColumns, parameters)(sqlContext) + } +} + +class CommitFailureTestRelation( + override val paths: Array[String], + maybeDataSchema: Option[StructType], + override val userDefinedPartitionColumns: Option[StructType], + parameters: Map[String, String])( + @transient sqlContext: SQLContext) + extends SimpleTextRelation( + paths, maybeDataSchema, userDefinedPartitionColumns, parameters)(sqlContext) { + override def prepareJobForWrite(job: Job): OutputWriterFactory = new OutputWriterFactory { + override def newInstance( + path: String, + dataSchema: StructType, + context: TaskAttemptContext): OutputWriter = { + new SimpleTextOutputWriter(path, context) { + override def close(): Unit = { + sys.error("Intentional task commitment failure for testing purpose.") + } + } + } + } +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala new file mode 100644 index 0000000000000..501e1794fc13e --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala @@ -0,0 +1,598 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.sources + +import java.io.File + +import com.google.common.io.Files +import org.apache.hadoop.fs.Path +import org.scalatest.FunSuite + +import org.apache.spark.SparkException +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.sql._ +import org.apache.spark.sql.hive.test.TestHive +import org.apache.spark.sql.test.SQLTestUtils +import org.apache.spark.sql.types._ + +abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils { + override val sqlContext: SQLContext = TestHive + + import sqlContext._ + import sqlContext.implicits._ + + val dataSourceName = classOf[SimpleTextSource].getCanonicalName + + val dataSchema = + StructType( + Seq( + StructField("a", IntegerType, nullable = false), + StructField("b", StringType, nullable = false))) + + val testDF = (1 to 3).map(i => (i, s"val_$i")).toDF("a", "b") + + val partitionedTestDF1 = (for { + i <- 1 to 3 + p2 <- Seq("foo", "bar") + } yield (i, s"val_$i", 1, p2)).toDF("a", "b", "p1", "p2") + + val partitionedTestDF2 = (for { + i <- 1 to 3 + p2 <- Seq("foo", "bar") + } yield (i, s"val_$i", 2, p2)).toDF("a", "b", "p1", "p2") + + val partitionedTestDF = partitionedTestDF1.unionAll(partitionedTestDF2) + + def checkQueries(df: DataFrame): Unit = { + // Selects everything + checkAnswer( + df, + for (i <- 1 to 3; p1 <- 1 to 2; p2 <- Seq("foo", "bar")) yield Row(i, s"val_$i", p1, p2)) + + // Simple filtering and partition pruning + checkAnswer( + df.filter('a > 1 && 'p1 === 2), + for (i <- 2 to 3; p2 <- Seq("foo", "bar")) yield Row(i, s"val_$i", 2, p2)) + + // Simple projection and filtering + checkAnswer( + df.filter('a > 1).select('b, 'a + 1), + for (i <- 2 to 3; _ <- 1 to 2; _ <- Seq("foo", "bar")) yield Row(s"val_$i", i + 1)) + + // Simple projection and partition pruning + checkAnswer( + df.filter('a > 1 && 'p1 < 2).select('b, 'p1), + for (i <- 2 to 3; _ <- Seq("foo", "bar")) yield Row(s"val_$i", 1)) + + // Project many copies of columns with different types (reproduction for SPARK-7858) + checkAnswer( + df.filter('a > 1 && 'p1 < 2).select('b, 'b, 'b, 'b, 'p1, 'p1, 'p1, 'p1), + for (i <- 2 to 3; _ <- Seq("foo", "bar")) + yield Row(s"val_$i", s"val_$i", s"val_$i", s"val_$i", 1, 1, 1, 1)) + + // Self-join + df.registerTempTable("t") + withTempTable("t") { + checkAnswer( + sql( + """SELECT l.a, r.b, l.p1, r.p2 + |FROM t l JOIN t r + |ON l.a = r.a AND l.p1 = r.p1 AND l.p2 = r.p2 + """.stripMargin), + for (i <- 1 to 3; p1 <- 1 to 2; p2 <- Seq("foo", "bar")) yield Row(i, s"val_$i", p1, p2)) + } + } + + test("save()/load() - non-partitioned table - Overwrite") { + withTempPath { file => + testDF.write.mode(SaveMode.Overwrite).format(dataSourceName).save(file.getCanonicalPath) + testDF.write.mode(SaveMode.Overwrite).format(dataSourceName).save(file.getCanonicalPath) + + checkAnswer( + read.format(dataSourceName) + .option("path", file.getCanonicalPath) + .option("dataSchema", dataSchema.json) + .load(), + testDF.collect()) + } + } + + test("save()/load() - non-partitioned table - Append") { + withTempPath { file => + testDF.write.mode(SaveMode.Overwrite).format(dataSourceName).save(file.getCanonicalPath) + testDF.write.mode(SaveMode.Append).format(dataSourceName).save(file.getCanonicalPath) + + checkAnswer( + read.format(dataSourceName) + .option("dataSchema", dataSchema.json) + .load(file.getCanonicalPath).orderBy("a"), + testDF.unionAll(testDF).orderBy("a").collect()) + } + } + + test("save()/load() - non-partitioned table - ErrorIfExists") { + withTempDir { file => + intercept[RuntimeException] { + testDF.write.format(dataSourceName).mode(SaveMode.ErrorIfExists).save(file.getCanonicalPath) + } + } + } + + test("save()/load() - non-partitioned table - Ignore") { + withTempDir { file => + testDF.write.mode(SaveMode.Ignore).format(dataSourceName).save(file.getCanonicalPath) + + val path = new Path(file.getCanonicalPath) + val fs = path.getFileSystem(sqlContext.sparkContext.hadoopConfiguration) + assert(fs.listStatus(path).isEmpty) + } + } + + test("save()/load() - partitioned table - simple queries") { + withTempPath { file => + partitionedTestDF.write + .format(dataSourceName) + .mode(SaveMode.ErrorIfExists) + .partitionBy("p1", "p2") + .save(file.getCanonicalPath) + + checkQueries( + read.format(dataSourceName) + .option("dataSchema", dataSchema.json) + .load(file.getCanonicalPath)) + } + } + + test("save()/load() - partitioned table - Overwrite") { + withTempPath { file => + partitionedTestDF.write + .format(dataSourceName) + .mode(SaveMode.Overwrite) + .partitionBy("p1", "p2") + .save(file.getCanonicalPath) + + partitionedTestDF.write + .format(dataSourceName) + .mode(SaveMode.Overwrite) + .partitionBy("p1", "p2") + .save(file.getCanonicalPath) + + checkAnswer( + read.format(dataSourceName) + .option("dataSchema", dataSchema.json) + .load(file.getCanonicalPath), + partitionedTestDF.collect()) + } + } + + test("save()/load() - partitioned table - Append") { + withTempPath { file => + partitionedTestDF.write + .format(dataSourceName) + .mode(SaveMode.Overwrite) + .partitionBy("p1", "p2") + .save(file.getCanonicalPath) + + partitionedTestDF.write + .format(dataSourceName) + .mode(SaveMode.Append) + .partitionBy("p1", "p2") + .save(file.getCanonicalPath) + + checkAnswer( + read.format(dataSourceName) + .option("dataSchema", dataSchema.json) + .load(file.getCanonicalPath), + partitionedTestDF.unionAll(partitionedTestDF).collect()) + } + } + + test("save()/load() - partitioned table - Append - new partition values") { + withTempPath { file => + partitionedTestDF1.write + .format(dataSourceName) + .mode(SaveMode.Overwrite) + .partitionBy("p1", "p2") + .save(file.getCanonicalPath) + + partitionedTestDF2.write + .format(dataSourceName) + .mode(SaveMode.Append) + .partitionBy("p1", "p2") + .save(file.getCanonicalPath) + + checkAnswer( + read.format(dataSourceName) + .option("dataSchema", dataSchema.json) + .load(file.getCanonicalPath), + partitionedTestDF.collect()) + } + } + + test("save()/load() - partitioned table - ErrorIfExists") { + withTempDir { file => + intercept[RuntimeException] { + partitionedTestDF.write + .format(dataSourceName) + .mode(SaveMode.ErrorIfExists) + .partitionBy("p1", "p2") + .save(file.getCanonicalPath) + } + } + } + + test("save()/load() - partitioned table - Ignore") { + withTempDir { file => + partitionedTestDF.write + .format(dataSourceName).mode(SaveMode.Ignore).save(file.getCanonicalPath) + + val path = new Path(file.getCanonicalPath) + val fs = path.getFileSystem(SparkHadoopUtil.get.conf) + assert(fs.listStatus(path).isEmpty) + } + } + + test("saveAsTable()/load() - non-partitioned table - Overwrite") { + testDF.write.format(dataSourceName).mode(SaveMode.Overwrite) + .option("dataSchema", dataSchema.json) + .saveAsTable("t") + + withTable("t") { + checkAnswer(table("t"), testDF.collect()) + } + } + + test("saveAsTable()/load() - non-partitioned table - Append") { + testDF.write.format(dataSourceName).mode(SaveMode.Overwrite).saveAsTable("t") + testDF.write.format(dataSourceName).mode(SaveMode.Append).saveAsTable("t") + + withTable("t") { + checkAnswer(table("t"), testDF.unionAll(testDF).orderBy("a").collect()) + } + } + + test("saveAsTable()/load() - non-partitioned table - ErrorIfExists") { + Seq.empty[(Int, String)].toDF().registerTempTable("t") + + withTempTable("t") { + intercept[AnalysisException] { + testDF.write.format(dataSourceName).mode(SaveMode.ErrorIfExists).saveAsTable("t") + } + } + } + + test("saveAsTable()/load() - non-partitioned table - Ignore") { + Seq.empty[(Int, String)].toDF().registerTempTable("t") + + withTempTable("t") { + testDF.write.format(dataSourceName).mode(SaveMode.Ignore).saveAsTable("t") + assert(table("t").collect().isEmpty) + } + } + + test("saveAsTable()/load() - partitioned table - simple queries") { + partitionedTestDF.write.format(dataSourceName) + .mode(SaveMode.Overwrite) + .option("dataSchema", dataSchema.json) + .saveAsTable("t") + + withTable("t") { + checkQueries(table("t")) + } + } + + test("saveAsTable()/load() - partitioned table - Overwrite") { + partitionedTestDF.write + .format(dataSourceName) + .mode(SaveMode.Overwrite) + .option("dataSchema", dataSchema.json) + .partitionBy("p1", "p2") + .saveAsTable("t") + + partitionedTestDF.write + .format(dataSourceName) + .mode(SaveMode.Overwrite) + .option("dataSchema", dataSchema.json) + .partitionBy("p1", "p2") + .saveAsTable("t") + + withTable("t") { + checkAnswer(table("t"), partitionedTestDF.collect()) + } + } + + test("saveAsTable()/load() - partitioned table - Append") { + partitionedTestDF.write + .format(dataSourceName) + .mode(SaveMode.Overwrite) + .option("dataSchema", dataSchema.json) + .partitionBy("p1", "p2") + .saveAsTable("t") + + partitionedTestDF.write + .format(dataSourceName) + .mode(SaveMode.Append) + .option("dataSchema", dataSchema.json) + .partitionBy("p1", "p2") + .saveAsTable("t") + + withTable("t") { + checkAnswer(table("t"), partitionedTestDF.unionAll(partitionedTestDF).collect()) + } + } + + test("saveAsTable()/load() - partitioned table - Append - new partition values") { + partitionedTestDF1.write + .format(dataSourceName) + .mode(SaveMode.Overwrite) + .option("dataSchema", dataSchema.json) + .partitionBy("p1", "p2") + .saveAsTable("t") + + partitionedTestDF2.write + .format(dataSourceName) + .mode(SaveMode.Append) + .option("dataSchema", dataSchema.json) + .partitionBy("p1", "p2") + .saveAsTable("t") + + withTable("t") { + checkAnswer(table("t"), partitionedTestDF.collect()) + } + } + + test("saveAsTable()/load() - partitioned table - Append - mismatched partition columns") { + partitionedTestDF1.write + .format(dataSourceName) + .mode(SaveMode.Overwrite) + .option("dataSchema", dataSchema.json) + .partitionBy("p1", "p2") + .saveAsTable("t") + + // Using only a subset of all partition columns + intercept[Throwable] { + partitionedTestDF2.write + .format(dataSourceName) + .mode(SaveMode.Append) + .option("dataSchema", dataSchema.json) + .partitionBy("p1") + .saveAsTable("t") + } + } + + test("saveAsTable()/load() - partitioned table - ErrorIfExists") { + Seq.empty[(Int, String)].toDF().registerTempTable("t") + + withTempTable("t") { + intercept[AnalysisException] { + partitionedTestDF.write + .format(dataSourceName) + .mode(SaveMode.ErrorIfExists) + .option("dataSchema", dataSchema.json) + .partitionBy("p1", "p2") + .saveAsTable("t") + } + } + } + + test("saveAsTable()/load() - partitioned table - Ignore") { + Seq.empty[(Int, String)].toDF().registerTempTable("t") + + withTempTable("t") { + partitionedTestDF.write + .format(dataSourceName) + .mode(SaveMode.Ignore) + .option("dataSchema", dataSchema.json) + .partitionBy("p1", "p2") + .saveAsTable("t") + + assert(table("t").collect().isEmpty) + } + } + + test("Hadoop style globbing") { + withTempPath { file => + partitionedTestDF.write + .format(dataSourceName) + .mode(SaveMode.Overwrite) + .partitionBy("p1", "p2") + .save(file.getCanonicalPath) + + val df = read + .format(dataSourceName) + .option("dataSchema", dataSchema.json) + .load(s"${file.getCanonicalPath}/p1=*/p2=???") + + val expectedPaths = Set( + s"${file.getCanonicalFile}/p1=1/p2=foo", + s"${file.getCanonicalFile}/p1=2/p2=foo", + s"${file.getCanonicalFile}/p1=1/p2=bar", + s"${file.getCanonicalFile}/p1=2/p2=bar" + ).map { p => + val path = new Path(p) + val fs = path.getFileSystem(sqlContext.sparkContext.hadoopConfiguration) + path.makeQualified(fs.getUri, fs.getWorkingDirectory).toString + } + + val actualPaths = df.queryExecution.analyzed.collectFirst { + case LogicalRelation(relation: HadoopFsRelation) => + relation.paths.toSet + }.getOrElse { + fail("Expect an FSBasedRelation, but none could be found") + } + + assert(actualPaths === expectedPaths) + checkAnswer(df, partitionedTestDF.collect()) + } + } + + test("Partition column type casting") { + withTempPath { file => + val input = partitionedTestDF.select('a, 'b, 'p1.cast(StringType).as('ps), 'p2) + + input + .write + .format(dataSourceName) + .mode(SaveMode.Overwrite) + .partitionBy("ps", "p2") + .saveAsTable("t") + + withTempTable("t") { + checkAnswer(table("t"), input.collect()) + } + } + } + + test("SPARK-7616: adjust column name order accordingly when saving partitioned table") { + val df = (1 to 3).map(i => (i, s"val_$i", i * 2)).toDF("a", "b", "c") + + df.write + .format(dataSourceName) + .mode(SaveMode.Overwrite) + .partitionBy("c", "a") + .saveAsTable("t") + + withTable("t") { + checkAnswer(table("t"), df.select('b, 'c, 'a).collect()) + } + } +} + +class SimpleTextHadoopFsRelationSuite extends HadoopFsRelationTest { + override val dataSourceName: String = classOf[SimpleTextSource].getCanonicalName + + import sqlContext._ + + test("save()/load() - partitioned table - simple queries - partition columns in data") { + withTempDir { file => + val basePath = new Path(file.getCanonicalPath) + val fs = basePath.getFileSystem(SparkHadoopUtil.get.conf) + val qualifiedBasePath = fs.makeQualified(basePath) + + for (p1 <- 1 to 2; p2 <- Seq("foo", "bar")) { + val partitionDir = new Path(qualifiedBasePath, s"p1=$p1/p2=$p2") + sparkContext + .parallelize(for (i <- 1 to 3) yield s"$i,val_$i,$p1") + .saveAsTextFile(partitionDir.toString) + } + + val dataSchemaWithPartition = + StructType(dataSchema.fields :+ StructField("p1", IntegerType, nullable = true)) + + checkQueries( + read.format(dataSourceName) + .option("dataSchema", dataSchemaWithPartition.json) + .load(file.getCanonicalPath)) + } + } +} + +class CommitFailureTestRelationSuite extends FunSuite with SQLTestUtils { + import TestHive.implicits._ + + override val sqlContext = TestHive + + val dataSourceName: String = classOf[CommitFailureTestSource].getCanonicalName + + test("SPARK-7684: commitTask() failure should fallback to abortTask()") { + withTempPath { file => + val df = (1 to 3).map(i => i -> s"val_$i").toDF("a", "b") + intercept[SparkException] { + df.write.format(dataSourceName).save(file.getCanonicalPath) + } + + val fs = new Path(file.getCanonicalPath).getFileSystem(SparkHadoopUtil.get.conf) + assert(!fs.exists(new Path(file.getCanonicalPath, "_temporary"))) + } + } +} + +class ParquetHadoopFsRelationSuite extends HadoopFsRelationTest { + override val dataSourceName: String = classOf[parquet.DefaultSource].getCanonicalName + + import sqlContext._ + import sqlContext.implicits._ + + test("save()/load() - partitioned table - simple queries - partition columns in data") { + withTempDir { file => + val basePath = new Path(file.getCanonicalPath) + val fs = basePath.getFileSystem(SparkHadoopUtil.get.conf) + val qualifiedBasePath = fs.makeQualified(basePath) + + for (p1 <- 1 to 2; p2 <- Seq("foo", "bar")) { + val partitionDir = new Path(qualifiedBasePath, s"p1=$p1/p2=$p2") + sparkContext + .parallelize(for (i <- 1 to 3) yield (i, s"val_$i", p1)) + .toDF("a", "b", "p1") + .write.parquet(partitionDir.toString) + } + + val dataSchemaWithPartition = + StructType(dataSchema.fields :+ StructField("p1", IntegerType, nullable = true)) + + checkQueries( + read.format(dataSourceName) + .option("dataSchema", dataSchemaWithPartition.json) + .load(file.getCanonicalPath)) + } + } + + test("SPARK-7868: _temporary directories should be ignored") { + withTempPath { dir => + val df = Seq("a", "b", "c").zipWithIndex.toDF() + + df.write + .format("parquet") + .save(dir.getCanonicalPath) + + df.write + .format("parquet") + .save(s"${dir.getCanonicalPath}/_temporary") + + checkAnswer(read.format("parquet").load(dir.getCanonicalPath), df.collect()) + } + } + + test("SPARK-8014: Avoid scanning output directory when SaveMode isn't SaveMode.Append") { + withTempDir { dir => + val path = dir.getCanonicalPath + val df = Seq(1 -> "a").toDF() + + // Creates an arbitrary file. If this directory gets scanned, ParquetRelation2 will throw + // since it's not a valid Parquet file. + val emptyFile = new File(path, "empty") + Files.createParentDirs(emptyFile) + Files.touch(emptyFile) + + // This shouldn't throw anything. + df.write.format("parquet").mode(SaveMode.Ignore).save(path) + + // This should only complain that the destination directory already exists, rather than file + // "empty" is not a Parquet file. + assert { + intercept[RuntimeException] { + df.write.format("parquet").mode(SaveMode.ErrorIfExists).save(path) + }.getMessage.contains("already exists") + } + + // This shouldn't throw anything. + df.write.format("parquet").mode(SaveMode.Overwrite).save(path) + checkAnswer(read.format("parquet").load(path), df) + } + } +} diff --git a/sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala b/sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala deleted file mode 100644 index 33e96eaabfbf6..0000000000000 --- a/sql/hive/v0.12.0/src/main/scala/org/apache/spark/sql/hive/Shim12.scala +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.hive - -import java.net.URI -import java.util.{ArrayList => JArrayList, Properties} - -import scala.collection.JavaConversions._ -import scala.language.implicitConversions - -import org.apache.hadoop.{io => hadoopIo} -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.Path -import org.apache.hadoop.hive.common.`type`.HiveDecimal -import org.apache.hadoop.hive.conf.HiveConf -import org.apache.hadoop.hive.ql.Context -import org.apache.hadoop.hive.ql.metadata.{Hive, Partition, Table} -import org.apache.hadoop.hive.ql.plan.{CreateTableDesc, FileSinkDesc, TableDesc} -import org.apache.hadoop.hive.ql.processors._ -import org.apache.hadoop.hive.ql.stats.StatsSetupConst -import org.apache.hadoop.hive.serde2.{ColumnProjectionUtils, Deserializer, io => hiveIo} -import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspectorConverters, ObjectInspector, PrimitiveObjectInspector} -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory -import org.apache.hadoop.hive.serde2.objectinspector.primitive.{HiveDecimalObjectInspector, PrimitiveObjectInspectorFactory} -import org.apache.hadoop.hive.serde2.typeinfo.{TypeInfo, TypeInfoFactory} -import org.apache.hadoop.io.{NullWritable, Writable} -import org.apache.hadoop.mapred.InputFormat - -import org.apache.spark.sql.types.{UTF8String, Decimal, DecimalType} - -private[hive] case class HiveFunctionWrapper(functionClassName: String) - extends java.io.Serializable { - - // for Serialization - def this() = this(null) - - import org.apache.spark.util.Utils._ - def createFunction[UDFType <: AnyRef](): UDFType = { - getContextOrSparkClassLoader - .loadClass(functionClassName).newInstance.asInstanceOf[UDFType] - } -} - -/** - * A compatibility layer for interacting with Hive version 0.12.0. - */ -private[hive] object HiveShim { - val version = "0.12.0" - - def getTableDesc( - serdeClass: Class[_ <: Deserializer], - inputFormatClass: Class[_ <: InputFormat[_, _]], - outputFormatClass: Class[_], - properties: Properties) = { - new TableDesc(serdeClass, inputFormatClass, outputFormatClass, properties) - } - - def getStringWritableConstantObjectInspector(value: Any): ObjectInspector = - PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( - PrimitiveCategory.STRING, - getStringWritable(value)) - - def getIntWritableConstantObjectInspector(value: Any): ObjectInspector = - PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( - PrimitiveCategory.INT, - getIntWritable(value)) - - def getDoubleWritableConstantObjectInspector(value: Any): ObjectInspector = - PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( - PrimitiveCategory.DOUBLE, - getDoubleWritable(value)) - - def getBooleanWritableConstantObjectInspector(value: Any): ObjectInspector = - PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( - PrimitiveCategory.BOOLEAN, - getBooleanWritable(value)) - - def getLongWritableConstantObjectInspector(value: Any): ObjectInspector = - PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( - PrimitiveCategory.LONG, - getLongWritable(value)) - - def getFloatWritableConstantObjectInspector(value: Any): ObjectInspector = - PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( - PrimitiveCategory.FLOAT, - getFloatWritable(value)) - - def getShortWritableConstantObjectInspector(value: Any): ObjectInspector = - PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( - PrimitiveCategory.SHORT, - getShortWritable(value)) - - def getByteWritableConstantObjectInspector(value: Any): ObjectInspector = - PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( - PrimitiveCategory.BYTE, - getByteWritable(value)) - - def getBinaryWritableConstantObjectInspector(value: Any): ObjectInspector = - PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( - PrimitiveCategory.BINARY, - getBinaryWritable(value)) - - def getDateWritableConstantObjectInspector(value: Any): ObjectInspector = - PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( - PrimitiveCategory.DATE, - getDateWritable(value)) - - def getTimestampWritableConstantObjectInspector(value: Any): ObjectInspector = - PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( - PrimitiveCategory.TIMESTAMP, - getTimestampWritable(value)) - - def getDecimalWritableConstantObjectInspector(value: Any): ObjectInspector = - PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( - PrimitiveCategory.DECIMAL, - getDecimalWritable(value)) - - def getPrimitiveNullWritableConstantObjectInspector: ObjectInspector = - PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( - PrimitiveCategory.VOID, null) - - def getStringWritable(value: Any): hadoopIo.Text = - if (value == null) null else new hadoopIo.Text(value.asInstanceOf[UTF8String].toString) - - def getIntWritable(value: Any): hadoopIo.IntWritable = - if (value == null) null else new hadoopIo.IntWritable(value.asInstanceOf[Int]) - - def getDoubleWritable(value: Any): hiveIo.DoubleWritable = - if (value == null) null else new hiveIo.DoubleWritable(value.asInstanceOf[Double]) - - def getBooleanWritable(value: Any): hadoopIo.BooleanWritable = - if (value == null) null else new hadoopIo.BooleanWritable(value.asInstanceOf[Boolean]) - - def getLongWritable(value: Any): hadoopIo.LongWritable = - if (value == null) null else new hadoopIo.LongWritable(value.asInstanceOf[Long]) - - def getFloatWritable(value: Any): hadoopIo.FloatWritable = - if (value == null) null else new hadoopIo.FloatWritable(value.asInstanceOf[Float]) - - def getShortWritable(value: Any): hiveIo.ShortWritable = - if (value == null) null else new hiveIo.ShortWritable(value.asInstanceOf[Short]) - - def getByteWritable(value: Any): hiveIo.ByteWritable = - if (value == null) null else new hiveIo.ByteWritable(value.asInstanceOf[Byte]) - - def getBinaryWritable(value: Any): hadoopIo.BytesWritable = - if (value == null) null else new hadoopIo.BytesWritable(value.asInstanceOf[Array[Byte]]) - - def getDateWritable(value: Any): hiveIo.DateWritable = - if (value == null) null else new hiveIo.DateWritable(value.asInstanceOf[Int]) - - def getTimestampWritable(value: Any): hiveIo.TimestampWritable = - if (value == null) { - null - } else { - new hiveIo.TimestampWritable(value.asInstanceOf[java.sql.Timestamp]) - } - - def getDecimalWritable(value: Any): hiveIo.HiveDecimalWritable = - if (value == null) { - null - } else { - new hiveIo.HiveDecimalWritable( - HiveShim.createDecimal(value.asInstanceOf[Decimal].toJavaBigDecimal)) - } - - def getPrimitiveNullWritable: NullWritable = NullWritable.get() - - def createDriverResultsArray = new JArrayList[String] - - def processResults(results: JArrayList[String]) = results - - def getStatsSetupConstTotalSize = StatsSetupConst.TOTAL_SIZE - - def getStatsSetupConstRawDataSize = StatsSetupConst.RAW_DATA_SIZE - - def createDefaultDBIfNeeded(context: HiveContext) = { } - - def getCommandProcessor(cmd: Array[String], conf: HiveConf) = { - CommandProcessorFactory.get(cmd(0), conf) - } - - def createDecimal(bd: java.math.BigDecimal): HiveDecimal = { - new HiveDecimal(bd) - } - - def appendReadColumns(conf: Configuration, ids: Seq[Integer], names: Seq[String]) { - ColumnProjectionUtils.appendReadColumnIDs(conf, ids) - ColumnProjectionUtils.appendReadColumnNames(conf, names) - } - - def getExternalTmpPath(context: Context, uri: URI) = { - context.getExternalTmpFileURI(uri) - } - - def getDataLocationPath(p: Partition) = p.getPartitionPath - - def getAllPartitionsOf(client: Hive, tbl: Table) = client.getAllPartitionsForPruner(tbl) - - def compatibilityBlackList = Seq( - "decimal_.*", - "udf7", - "drop_partitions_filter2", - "show_.*", - "serde_regex", - "udf_to_date", - "udaf_collect_set", - "udf_concat" - ) - - def setLocation(tbl: Table, crtTbl: CreateTableDesc): Unit = { - tbl.setDataLocation(new Path(crtTbl.getLocation()).toUri()) - } - - def decimalMetastoreString(decimalType: DecimalType): String = "decimal" - - def decimalTypeInfo(decimalType: DecimalType): TypeInfo = - TypeInfoFactory.decimalTypeInfo - - def decimalTypeInfoToCatalyst(inspector: PrimitiveObjectInspector): DecimalType = { - DecimalType.Unlimited - } - - def toCatalystDecimal(hdoi: HiveDecimalObjectInspector, data: Any): Decimal = { - if (hdoi.preferWritable()) { - Decimal(hdoi.getPrimitiveWritableObject(data).getHiveDecimal().bigDecimalValue) - } else { - Decimal(hdoi.getPrimitiveJavaObject(data).bigDecimalValue()) - } - } - - def getConvertedOI( - inputOI: ObjectInspector, - outputOI: ObjectInspector): ObjectInspector = { - ObjectInspectorConverters.getConvertedOI(inputOI, outputOI, true) - } - - def prepareWritable(w: Writable): Writable = { - w - } - - def setTblNullFormat(crtTbl: CreateTableDesc, tbl: Table) = {} -} - -private[hive] class ShimFileSinkDesc( - var dir: String, - var tableInfo: TableDesc, - var compressed: Boolean) - extends FileSinkDesc(dir, tableInfo, compressed) { -} diff --git a/streaming/pom.xml b/streaming/pom.xml index 5ca55a4f680bb..095bfcde1b06c 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../pom.xml @@ -105,13 +105,5 @@ - - - ../python - - pyspark/streaming/*.py - - - diff --git a/streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.css b/streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.css new file mode 100644 index 0000000000000..b22c884bfebdb --- /dev/null +++ b/streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.css @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +.graph { + font: 10px sans-serif; +} + +.axis path, .axis line { + fill: none; + stroke: gray; + shape-rendering: crispEdges; +} + +.axis text { + fill: gray; +} + +.tooltip-inner { + max-width: 500px !important; // Make sure we only have one line tooltip +} + +.line { + fill: none; + stroke: #0088cc; + stroke-width: 1.5px; +} + +.bar rect { + fill: #0088cc; + shape-rendering: crispEdges; +} + +.bar rect:hover { + fill: #00c2ff; +} + +.timeline { + width: 500px; +} + +.histogram { + width: auto; +} + +span.expand-input-rate { + cursor: pointer; +} + +tr.batch-table-cell-highlight > td { + background-color: #D6FFE4 !important; +} diff --git a/streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.js b/streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.js new file mode 100644 index 0000000000000..75251f493ad22 --- /dev/null +++ b/streaming/src/main/resources/org/apache/spark/streaming/ui/static/streaming-page.js @@ -0,0 +1,321 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +// timeFormat: StreamingPage.scala will generate a global "timeFormat" dictionary to store the time +// and its formatted string. Because we cannot specify a timezone in JavaScript, to make sure the +// server and client use the same timezone, we use the "timeFormat" dictionary to format all time +// values used in the graphs. + +// A global margin left for all timeline graphs. It will be set in "registerTimeline". This will be +// used to align all timeline graphs. +var maxMarginLeftForTimeline = 0; + +// The max X values for all histograms. It will be set in "registerHistogram". +var maxXForHistogram = 0; + +var histogramBinCount = 10; +var yValueFormat = d3.format(",.2f"); + +// Show a tooltip "text" for "node" +function showBootstrapTooltip(node, text) { + $(node).tooltip({title: text, trigger: "manual", container: "body"}); + $(node).tooltip("show"); +} + +// Hide the tooltip for "node" +function hideBootstrapTooltip(node) { + $(node).tooltip("destroy"); +} + +// Register a timeline graph. All timeline graphs should be register before calling any +// "drawTimeline" so that we can determine the max margin left for all timeline graphs. +function registerTimeline(minY, maxY) { + var numOfChars = yValueFormat(maxY).length; + // A least width for "maxY" in the graph + var pxForMaxY = numOfChars * 8 + 10; + // Make sure we have enough space to show the ticks in the y axis of timeline + maxMarginLeftForTimeline = pxForMaxY > maxMarginLeftForTimeline? pxForMaxY : maxMarginLeftForTimeline; +} + +// Register a histogram graph. All histogram graphs should be register before calling any +// "drawHistogram" so that we can determine the max X value for histograms. +function registerHistogram(values, minY, maxY) { + var data = d3.layout.histogram().range([minY, maxY]).bins(histogramBinCount)(values); + // d.x is the y values while d.y is the x values + var maxX = d3.max(data, function(d) { return d.y; }); + maxXForHistogram = maxX > maxXForHistogram ? maxX : maxXForHistogram; +} + +// Draw a line between (x1, y1) and (x2, y2) +function drawLine(svg, xFunc, yFunc, x1, y1, x2, y2) { + var line = d3.svg.line() + .x(function(d) { return xFunc(d.x); }) + .y(function(d) { return yFunc(d.y); }); + var data = [{x: x1, y: y1}, {x: x2, y: y2}]; + svg.append("path") + .datum(data) + .style("stroke-dasharray", ("6, 6")) + .style("stroke", "lightblue") + .attr("class", "line") + .attr("d", line); +} + +/** + * @param id the `id` used in the html `div` tag + * @param data the data for the timeline graph + * @param minX the min value of X axis + * @param maxX the max value of X axis + * @param minY the min value of Y axis + * @param maxY the max value of Y axis + * @param unitY the unit of Y axis + * @param batchInterval if "batchInterval" is specified, we will draw a line for "batchInterval" in the graph + */ +function drawTimeline(id, data, minX, maxX, minY, maxY, unitY, batchInterval) { + // Hide the right border of "". We cannot use "css" directly, or "sorttable.js" will override them. + d3.select(d3.select(id).node().parentNode) + .style("padding", "8px 0 8px 8px") + .style("border-right", "0px solid white"); + + var margin = {top: 20, right: 27, bottom: 30, left: maxMarginLeftForTimeline}; + var width = 500 - margin.left - margin.right; + var height = 150 - margin.top - margin.bottom; + + var x = d3.scale.linear().domain([minX, maxX]).range([0, width]); + var y = d3.scale.linear().domain([minY, maxY]).range([height, 0]); + + var xAxis = d3.svg.axis().scale(x).orient("bottom").tickFormat(function(d) { + var formattedDate = timeFormat[d]; + var dotIndex = formattedDate.indexOf('.'); + if (dotIndex >= 0) { + // Remove milliseconds + return formattedDate.substring(0, dotIndex); + } else { + return formattedDate; + } + }); + var formatYValue = d3.format(",.2f"); + var yAxis = d3.svg.axis().scale(y).orient("left").ticks(5).tickFormat(formatYValue); + + var line = d3.svg.line() + .x(function(d) { return x(d.x); }) + .y(function(d) { return y(d.y); }); + + var svg = d3.select(id).append("svg") + .attr("width", width + margin.left + margin.right) + .attr("height", height + margin.top + margin.bottom) + .append("g") + .attr("transform", "translate(" + margin.left + "," + margin.top + ")"); + + // Only show the first and last time in the graph + xAxis.tickValues(x.domain()); + + svg.append("g") + .attr("class", "x axis") + .attr("transform", "translate(0," + height + ")") + .call(xAxis) + + svg.append("g") + .attr("class", "y axis") + .call(yAxis) + .append("text") + .attr("transform", "translate(0," + (-3) + ")") + .text(unitY); + + + if (batchInterval && batchInterval <= maxY) { + drawLine(svg, x, y, minX, batchInterval, maxX, batchInterval); + } + + svg.append("path") + .datum(data) + .attr("class", "line") + .attr("d", line); + + // If the user click one point in the graphs, jump to the batch row and highlight it. And + // recovery the batch row after 3 seconds if necessary. + // We need to remember the last clicked batch so that we can recovery it. + var lastClickedBatch = null; + var lastTimeout = null; + + // Add points to the line. However, we make it invisible at first. But when the user moves mouse + // over a point, it will be displayed with its detail. + svg.selectAll(".point") + .data(data) + .enter().append("circle") + .attr("stroke", "white") // white and opacity = 0 make it invisible + .attr("fill", "white") + .attr("opacity", "0") + .style("cursor", "pointer") + .attr("cx", function(d) { return x(d.x); }) + .attr("cy", function(d) { return y(d.y); }) + .attr("r", function(d) { return 3; }) + .on('mouseover', function(d) { + var tip = formatYValue(d.y) + " " + unitY + " at " + timeFormat[d.x]; + showBootstrapTooltip(d3.select(this).node(), tip); + // show the point + d3.select(this) + .attr("stroke", "steelblue") + .attr("fill", "steelblue") + .attr("opacity", "1"); + }) + .on('mouseout', function() { + hideBootstrapTooltip(d3.select(this).node()); + // hide the point + d3.select(this) + .attr("stroke", "white") + .attr("fill", "white") + .attr("opacity", "0"); + }) + .on("click", function(d) { + if (lastTimeout != null) { + window.clearTimeout(lastTimeout); + } + if (lastClickedBatch != null) { + clearBatchRow(lastClickedBatch); + lastClickedBatch = null; + } + lastClickedBatch = d.x; + highlightBatchRow(lastClickedBatch) + lastTimeout = window.setTimeout(function () { + lastTimeout = null; + if (lastClickedBatch != null) { + clearBatchRow(lastClickedBatch); + lastClickedBatch = null; + } + }, 3000); // Clean up after 3 seconds + + var batchSelector = $("#batch-" + d.x); + var topOffset = batchSelector.offset().top - 15; + if (topOffset < 0) { + topOffset = 0; + } + $('html,body').animate({scrollTop: topOffset}, 200); + }); +} + +/** + * @param id the `id` used in the html `div` tag + * @param values the data for the histogram graph + * @param minY the min value of Y axis + * @param maxY the max value of Y axis + * @param unitY the unit of Y axis + * @param batchInterval if "batchInterval" is specified, we will draw a line for "batchInterval" in the graph + */ +function drawHistogram(id, values, minY, maxY, unitY, batchInterval) { + // Hide the left border of "". We cannot use "css" directly, or "sorttable.js" will override them. + d3.select(d3.select(id).node().parentNode) + .style("padding", "8px 8px 8px 0") + .style("border-left", "0px solid white"); + + var margin = {top: 20, right: 30, bottom: 30, left: 10}; + var width = 300 - margin.left - margin.right; + var height = 150 - margin.top - margin.bottom; + + var x = d3.scale.linear().domain([0, maxXForHistogram]).range([0, width]); + var y = d3.scale.linear().domain([minY, maxY]).range([height, 0]); + + var xAxis = d3.svg.axis().scale(x).orient("top").ticks(5); + var yAxis = d3.svg.axis().scale(y).orient("left").ticks(0).tickFormat(function(d) { return ""; }); + + var data = d3.layout.histogram().range([minY, maxY]).bins(histogramBinCount)(values); + + var svg = d3.select(id).append("svg") + .attr("width", width + margin.left + margin.right) + .attr("height", height + margin.top + margin.bottom) + .append("g") + .attr("transform", "translate(" + margin.left + "," + margin.top + ")"); + + if (batchInterval && batchInterval <= maxY) { + drawLine(svg, x, y, 0, batchInterval, maxXForHistogram, batchInterval); + } + + svg.append("g") + .attr("class", "x axis") + .call(xAxis) + .append("text") + .attr("transform", "translate(" + (margin.left + width - 40) + ", 15)") + .text("#batches"); + + svg.append("g") + .attr("class", "y axis") + .call(yAxis) + + var bar = svg.selectAll(".bar") + .data(data) + .enter() + .append("g") + .attr("transform", function(d) { return "translate(0," + (y(d.x) - height + y(d.dx)) + ")";}) + .attr("class", "bar").append("rect") + .attr("width", function(d) { return x(d.y); }) + .attr("height", function(d) { return height - y(d.dx); }) + .on('mouseover', function(d) { + var percent = yValueFormat(d.y * 100.0 / values.length) + "%"; + var tip = d.y + " batches (" + percent + ") between " + yValueFormat(d.x) + " and " + yValueFormat(d.x + d.dx) + " " + unitY; + showBootstrapTooltip(d3.select(this).node(), tip); + }) + .on('mouseout', function() { + hideBootstrapTooltip(d3.select(this).node()); + }); + + if (batchInterval && batchInterval <= maxY) { + // Add the "stable" text to the graph below the batch interval line. + var stableXOffset = x(maxXForHistogram) - 20; + var stableYOffset = y(batchInterval) + 15; + svg.append("text") + .style("fill", "lightblue") + .attr("class", "stable-text") + .attr("text-anchor", "middle") + .attr("transform", "translate(" + stableXOffset + "," + stableYOffset + ")") + .text("stable") + .on('mouseover', function(d) { + var tip = "Processing Time <= Batch Interval (" + yValueFormat(batchInterval) +" " + unitY +")"; + showBootstrapTooltip(d3.select(this).node(), tip); + }) + .on('mouseout', function() { + hideBootstrapTooltip(d3.select(this).node()); + }); + } +} + +$(function() { + var status = window.localStorage && window.localStorage.getItem("show-streams-detail") == "true"; + + $("span.expand-input-rate").click(function() { + status = !status; + $("#inputs-table").toggle('collapsed'); + // Toggle the class of the arrow between open and closed + $(this).find('.expand-input-rate-arrow').toggleClass('arrow-open').toggleClass('arrow-closed'); + if (window.localStorage) { + window.localStorage.setItem("show-streams-detail", "" + status); + } + }); + + if (status) { + $("#inputs-table").toggle('collapsed'); + // Toggle the class of the arrow between open and closed + $(this).find('.expand-input-rate-arrow').toggleClass('arrow-open').toggleClass('arrow-closed'); + } +}); + +function highlightBatchRow(batch) { + $("#batch-" + batch).parent().addClass("batch-table-cell-highlight"); +} + +function clearBatchRow(batch) { + $("#batch-" + batch).parent().removeClass("batch-table-cell-highlight"); +} diff --git a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala index 7bfae253c3a0c..d8dc4e4101664 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala @@ -102,6 +102,44 @@ object Checkpoint extends Logging { Seq.empty } } + + /** Serialize the checkpoint, or throw any exception that occurs */ + def serialize(checkpoint: Checkpoint, conf: SparkConf): Array[Byte] = { + val compressionCodec = CompressionCodec.createCodec(conf) + val bos = new ByteArrayOutputStream() + val zos = compressionCodec.compressedOutputStream(bos) + val oos = new ObjectOutputStream(zos) + Utils.tryWithSafeFinally { + oos.writeObject(checkpoint) + } { + oos.close() + } + bos.toByteArray + } + + /** Deserialize a checkpoint from the input stream, or throw any exception that occurs */ + def deserialize(inputStream: InputStream, conf: SparkConf): Checkpoint = { + val compressionCodec = CompressionCodec.createCodec(conf) + var ois: ObjectInputStreamWithLoader = null + Utils.tryWithSafeFinally { + + // ObjectInputStream uses the last defined user-defined class loader in the stack + // to find classes, which maybe the wrong class loader. Hence, a inherited version + // of ObjectInputStream is used to explicitly use the current thread's default class + // loader to find and load classes. This is a well know Java issue and has popped up + // in other places (e.g., http://jira.codehaus.org/browse/GROOVY-1627) + val zis = compressionCodec.compressedInputStream(inputStream) + ois = new ObjectInputStreamWithLoader(zis, + Thread.currentThread().getContextClassLoader) + val cp = ois.readObject.asInstanceOf[Checkpoint] + cp.validate() + cp + } { + if (ois != null) { + ois.close() + } + } + } } @@ -189,17 +227,10 @@ class CheckpointWriter( } def write(checkpoint: Checkpoint, clearCheckpointDataLater: Boolean) { - val bos = new ByteArrayOutputStream() - val zos = compressionCodec.compressedOutputStream(bos) - val oos = new ObjectOutputStream(zos) - Utils.tryWithSafeFinally { - oos.writeObject(checkpoint) - } { - oos.close() - } try { + val bytes = Checkpoint.serialize(checkpoint, conf) executor.execute(new CheckpointWriteHandler( - checkpoint.checkpointTime, bos.toByteArray, clearCheckpointDataLater)) + checkpoint.checkpointTime, bytes, clearCheckpointDataLater)) logDebug("Submitted checkpoint of time " + checkpoint.checkpointTime + " writer queue") } catch { case rej: RejectedExecutionException => @@ -264,25 +295,8 @@ object CheckpointReader extends Logging { checkpointFiles.foreach(file => { logInfo("Attempting to load checkpoint from file " + file) try { - var ois: ObjectInputStreamWithLoader = null - var cp: Checkpoint = null - Utils.tryWithSafeFinally { - val fis = fs.open(file) - // ObjectInputStream uses the last defined user-defined class loader in the stack - // to find classes, which maybe the wrong class loader. Hence, a inherited version - // of ObjectInputStream is used to explicitly use the current thread's default class - // loader to find and load classes. This is a well know Java issue and has popped up - // in other places (e.g., http://jira.codehaus.org/browse/GROOVY-1627) - val zis = compressionCodec.compressedInputStream(fis) - ois = new ObjectInputStreamWithLoader(zis, - Thread.currentThread().getContextClassLoader) - cp = ois.readObject.asInstanceOf[Checkpoint] - } { - if (ois != null) { - ois.close() - } - } - cp.validate() + val fis = fs.open(file) + val cp = Checkpoint.deserialize(fis, conf) logInfo("Checkpoint successfully loaded from file " + file) logInfo("Checkpoint was generated at time " + cp.checkpointTime) return Some(cp) diff --git a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala index 175140481e5ae..40789c66f3991 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala @@ -45,7 +45,7 @@ final private[streaming] class DStreamGraph extends Serializable with Logging { startTime = time outputStreams.foreach(_.initialize(zeroTime)) outputStreams.foreach(_.remember(rememberDuration)) - outputStreams.foreach(_.validate) + outputStreams.foreach(_.validateAtStart) inputStreams.par.foreach(_.start()) } } @@ -110,6 +110,10 @@ final private[streaming] class DStreamGraph extends Serializable with Logging { .toArray } + def getInputStreamName(streamId: Int): Option[String] = synchronized { + inputStreams.find(_.id == streamId).map(_.name) + } + def generateJobs(time: Time): Seq[Job] = { logDebug("Generating jobs for time " + time) val jobs = this.synchronized { @@ -153,10 +157,10 @@ final private[streaming] class DStreamGraph extends Serializable with Logging { def validate() { this.synchronized { - assert(batchDuration != null, "Batch duration has not been set") + require(batchDuration != null, "Batch duration has not been set") // assert(batchDuration >= Milliseconds(100), "Batch duration of " + batchDuration + // " is very low") - assert(getOutputStreams().size > 0, "No output streams registered, so nothing to execute") + require(getOutputStreams().size > 0, "No output operations registered, so nothing to execute") } } diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala index 117cb59fb61c9..9cd9684d36404 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala @@ -17,28 +17,33 @@ package org.apache.spark.streaming -import java.io.InputStream -import java.util.concurrent.atomic.AtomicInteger +import java.io.{InputStream, NotSerializableException} +import java.util.concurrent.atomic.{AtomicInteger, AtomicReference} import scala.collection.Map import scala.collection.mutable.Queue import scala.reflect.ClassTag +import scala.util.control.NonFatal import akka.actor.{Props, SupervisorStrategy} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.hadoop.io.{BytesWritable, LongWritable, Text} -import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat} import org.apache.hadoop.mapreduce.lib.input.TextInputFormat +import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat} + import org.apache.spark._ -import org.apache.spark.annotation.Experimental +import org.apache.spark.annotation.{DeveloperApi, Experimental} import org.apache.spark.input.FixedLengthBinaryInputFormat -import org.apache.spark.rdd.RDD +import org.apache.spark.rdd.{RDD, RDDOperationScope} +import org.apache.spark.serializer.SerializationDebugger import org.apache.spark.storage.StorageLevel +import org.apache.spark.streaming.StreamingContextState._ import org.apache.spark.streaming.dstream._ import org.apache.spark.streaming.receiver.{ActorReceiver, ActorSupervisorStrategy, Receiver} -import org.apache.spark.streaming.scheduler._ +import org.apache.spark.streaming.scheduler.{JobScheduler, StreamingListener} import org.apache.spark.streaming.ui.{StreamingJobProgressListener, StreamingTab} +import org.apache.spark.util.{CallSite, Utils} /** * Main entry point for Spark Streaming functionality. It provides methods used to create @@ -131,7 +136,7 @@ class StreamingContext private[streaming] ( if (sc_ != null) { sc_ } else if (isCheckpointPresent) { - new SparkContext(cp_.createSparkConf()) + SparkContext.getOrCreate(cp_.createSparkConf()) } else { throw new SparkException("Cannot create StreamingContext without a SparkContext") } @@ -152,7 +157,7 @@ class StreamingContext private[streaming] ( cp_.graph.restoreCheckpointData() cp_.graph } else { - assert(batchDur_ != null, "Batch duration for streaming context cannot be null") + require(batchDur_ != null, "Batch duration for StreamingContext cannot be null") val newGraph = new DStreamGraph() newGraph.setBatchDuration(batchDur_) newGraph @@ -193,14 +198,11 @@ class StreamingContext private[streaming] ( assert(env.metricsSystem != null) env.metricsSystem.registerSource(streamingSource) - /** Enumeration to identify current state of the StreamingContext */ - private[streaming] object StreamingContextState extends Enumeration { - type CheckpointState = Value - val Initialized, Started, Stopped = Value - } + private var state: StreamingContextState = INITIALIZED - import StreamingContextState._ - private[streaming] var state = Initialized + private val startSite = new AtomicReference[CallSite](null) + + private var shutdownHookRef: AnyRef = _ /** * Return the associated Spark context @@ -237,21 +239,46 @@ class StreamingContext private[streaming] ( } } + private[streaming] def isCheckpointingEnabled: Boolean = { + checkpointDir != null + } + private[streaming] def initialCheckpoint: Checkpoint = { if (isCheckpointPresent) cp_ else null } private[streaming] def getNewInputStreamId() = nextInputStreamId.getAndIncrement() + /** + * Execute a block of code in a scope such that all new DStreams created in this body will + * be part of the same scope. For more detail, see the comments in `doCompute`. + * + * Note: Return statements are NOT allowed in the given body. + */ + private[streaming] def withScope[U](body: => U): U = sparkContext.withScope(body) + + /** + * Execute a block of code in a scope such that all new DStreams created in this body will + * be part of the same scope. For more detail, see the comments in `doCompute`. + * + * Note: Return statements are NOT allowed in the given body. + */ + private[streaming] def withNamedScope[U](name: String)(body: => U): U = { + RDDOperationScope.withScope(sc, name, allowNesting = false, ignoreParent = false)(body) + } + /** * Create an input stream with any arbitrary user implemented receiver. * Find more details at: http://spark.apache.org/docs/latest/streaming-custom-receivers.html * @param receiver Custom implementation of Receiver + * + * @deprecated As of 1.0.0", replaced by `receiverStream`. */ @deprecated("Use receiverStream", "1.0.0") - def networkStream[T: ClassTag]( - receiver: Receiver[T]): ReceiverInputDStream[T] = { - receiverStream(receiver) + def networkStream[T: ClassTag](receiver: Receiver[T]): ReceiverInputDStream[T] = { + withNamedScope("network stream") { + receiverStream(receiver) + } } /** @@ -259,9 +286,10 @@ class StreamingContext private[streaming] ( * Find more details at: http://spark.apache.org/docs/latest/streaming-custom-receivers.html * @param receiver Custom implementation of Receiver */ - def receiverStream[T: ClassTag]( - receiver: Receiver[T]): ReceiverInputDStream[T] = { - new PluggableInputDStream[T](this, receiver) + def receiverStream[T: ClassTag](receiver: Receiver[T]): ReceiverInputDStream[T] = { + withNamedScope("receiver stream") { + new PluggableInputDStream[T](this, receiver) + } } /** @@ -281,7 +309,7 @@ class StreamingContext private[streaming] ( name: String, storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2, supervisorStrategy: SupervisorStrategy = ActorSupervisorStrategy.defaultStrategy - ): ReceiverInputDStream[T] = { + ): ReceiverInputDStream[T] = withNamedScope("actor stream") { receiverStream(new ActorReceiver[T](props, name, storageLevel, supervisorStrategy)) } @@ -298,7 +326,7 @@ class StreamingContext private[streaming] ( hostname: String, port: Int, storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2 - ): ReceiverInputDStream[String] = { + ): ReceiverInputDStream[String] = withNamedScope("socket text stream") { socketStream[String](hostname, port, SocketReceiver.bytesToLines, storageLevel) } @@ -336,7 +364,7 @@ class StreamingContext private[streaming] ( hostname: String, port: Int, storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_SER_2 - ): ReceiverInputDStream[T] = { + ): ReceiverInputDStream[T] = withNamedScope("raw socket stream") { new RawInputDStream[T](this, hostname, port, storageLevel) } @@ -410,7 +438,7 @@ class StreamingContext private[streaming] ( * file system. File names starting with . are ignored. * @param directory HDFS directory to monitor for new file */ - def textFileStream(directory: String): DStream[String] = { + def textFileStream(directory: String): DStream[String] = withNamedScope("text file stream") { fileStream[LongWritable, Text, TextInputFormat](directory).map(_._2.toString) } @@ -432,14 +460,15 @@ class StreamingContext private[streaming] ( @Experimental def binaryRecordsStream( directory: String, - recordLength: Int): DStream[Array[Byte]] = { + recordLength: Int): DStream[Array[Byte]] = withNamedScope("binary records stream") { val conf = sc_.hadoopConfiguration conf.setInt(FixedLengthBinaryInputFormat.RECORD_LENGTH_PROPERTY, recordLength) val br = fileStream[LongWritable, BytesWritable, FixedLengthBinaryInputFormat]( - directory, FileInputDStream.defaultFilter : Path => Boolean, newFilesOnly=true, conf) + directory, FileInputDStream.defaultFilter: Path => Boolean, newFilesOnly = true, conf) val data = br.map { case (k, v) => val bytes = v.getBytes - assert(bytes.length == recordLength, "Byte array does not have correct length") + require(bytes.length == recordLength, "Byte array does not have correct length. " + + s"${bytes.length} did not equal recordLength: $recordLength") bytes } data @@ -479,7 +508,7 @@ class StreamingContext private[streaming] ( /** * Create a unified DStream from multiple DStreams of the same type and same slide duration. */ - def union[T: ClassTag](streams: Seq[DStream[T]]): DStream[T] = { + def union[T: ClassTag](streams: Seq[DStream[T]]): DStream[T] = withScope { new UnionDStream[T](streams.toArray) } @@ -490,7 +519,7 @@ class StreamingContext private[streaming] ( def transform[T: ClassTag]( dstreams: Seq[DStream[_]], transformFunc: (Seq[RDD[_]], Time) => RDD[T] - ): DStream[T] = { + ): DStream[T] = withScope { new TransformedDStream[T](dstreams, sparkContext.clean(transformFunc)) } @@ -505,29 +534,77 @@ class StreamingContext private[streaming] ( assert(graph != null, "Graph is null") graph.validate() - assert( - checkpointDir == null || checkpointDuration != null, + require( + !isCheckpointingEnabled || checkpointDuration != null, "Checkpoint directory has been set, but the graph checkpointing interval has " + "not been set. Please use StreamingContext.checkpoint() to set the interval." ) + + // Verify whether the DStream checkpoint is serializable + if (isCheckpointingEnabled) { + val checkpoint = new Checkpoint(this, Time.apply(0)) + try { + Checkpoint.serialize(checkpoint, conf) + } catch { + case e: NotSerializableException => + throw new NotSerializableException( + "DStream checkpointing has been enabled but the DStreams with their functions " + + "are not serializable\nSerialization stack:\n" + + SerializationDebugger.find(checkpoint).map("\t- " + _).mkString("\n") + ) + } + } + } + + /** + * :: DeveloperApi :: + * + * Return the current state of the context. The context can be in three possible states - + * - StreamingContextState.INTIALIZED - The context has been created, but not been started yet. + * Input DStreams, transformations and output operations can be created on the context. + * - StreamingContextState.ACTIVE - The context has been started, and been not stopped. + * Input DStreams, transformations and output operations cannot be created on the context. + * - StreamingContextState.STOPPED - The context has been stopped and cannot be used any more. + */ + @DeveloperApi + def getState(): StreamingContextState = synchronized { + state } /** * Start the execution of the streams. * - * @throws SparkException if the context has already been started or stopped. + * @throws IllegalStateException if the StreamingContext is already stopped. */ def start(): Unit = synchronized { - if (state == Started) { - throw new SparkException("StreamingContext has already been started") - } - if (state == Stopped) { - throw new SparkException("StreamingContext has already been stopped") + state match { + case INITIALIZED => + startSite.set(DStream.getCreationSite()) + sparkContext.setCallSite(startSite.get) + StreamingContext.ACTIVATION_LOCK.synchronized { + StreamingContext.assertNoOtherContextIsActive() + try { + validate() + scheduler.start() + state = StreamingContextState.ACTIVE + } catch { + case NonFatal(e) => + logError("Error starting the context, marking it as stopped", e) + scheduler.stop(false) + state = StreamingContextState.STOPPED + throw e + } + StreamingContext.setActiveContext(this) + } + shutdownHookRef = Utils.addShutdownHook( + StreamingContext.SHUTDOWN_HOOK_PRIORITY)(stopOnShutdown) + uiTab.foreach(_.attach()) + logInfo("StreamingContext started") + case ACTIVE => + logWarning("StreamingContext has already been started") + case STOPPED => + throw new IllegalStateException("StreamingContext has already been stopped") } - validate() - sparkContext.setCallSite(DStream.getCreationSite()) - scheduler.start() - state = Started } /** @@ -542,6 +619,8 @@ class StreamingContext private[streaming] ( * Wait for the execution to stop. Any exceptions that occurs during the execution * will be thrown in this thread. * @param timeout time to wait in milliseconds + * + * @deprecated As of 1.3.0, replaced by `awaitTerminationOrTimeout(Long)`. */ @deprecated("Use awaitTerminationOrTimeout(Long) instead", "1.3.0") def awaitTermination(timeout: Long) { @@ -562,13 +641,17 @@ class StreamingContext private[streaming] ( /** * Stop the execution of the streams immediately (does not wait for all received data - * to be processed). + * to be processed). By default, if `stopSparkContext` is not specified, the underlying + * SparkContext will also be stopped. This implicit behavior can be configured using the + * SparkConf configuration spark.streaming.stopSparkContextByDefault. * - * @param stopSparkContext if true, stops the associated SparkContext. The underlying SparkContext + * @param stopSparkContext If true, stops the associated SparkContext. The underlying SparkContext * will be stopped regardless of whether this StreamingContext has been * started. */ - def stop(stopSparkContext: Boolean = true): Unit = synchronized { + def stop( + stopSparkContext: Boolean = conf.getBoolean("spark.streaming.stopSparkContextByDefault", true) + ): Unit = synchronized { stop(stopSparkContext, false) } @@ -583,21 +666,36 @@ class StreamingContext private[streaming] ( * received data to be completed */ def stop(stopSparkContext: Boolean, stopGracefully: Boolean): Unit = synchronized { - state match { - case Initialized => logWarning("StreamingContext has not been started yet") - case Stopped => logWarning("StreamingContext has already been stopped") - case Started => - scheduler.stop(stopGracefully) - logInfo("StreamingContext stopped successfully") - waiter.notifyStop() + try { + state match { + case INITIALIZED => + logWarning("StreamingContext has not been started yet") + case STOPPED => + logWarning("StreamingContext has already been stopped") + case ACTIVE => + scheduler.stop(stopGracefully) + uiTab.foreach(_.detach()) + StreamingContext.setActiveContext(null) + waiter.notifyStop() + if (shutdownHookRef != null) { + Utils.removeShutdownHook(shutdownHookRef) + } + logInfo("StreamingContext stopped successfully") + } + // Even if we have already stopped, we still need to attempt to stop the SparkContext because + // a user might stop(stopSparkContext = false) and then call stop(stopSparkContext = true). + if (stopSparkContext) sc.stop() + } finally { + // The state should always be Stopped after calling `stop()`, even if we haven't started yet + state = STOPPED } - // Even if the streaming context has not been started, we still need to stop the SparkContext. - // Even if we have already stopped, we still need to attempt to stop the SparkContext because - // a user might stop(stopSparkContext = false) and then call stop(stopSparkContext = true). - if (stopSparkContext) sc.stop() - uiTab.foreach(_.detach()) - // The state should always be Stopped after calling `stop()`, even if we haven't started yet: - state = Stopped + } + + private def stopOnShutdown(): Unit = { + val stopGracefully = conf.getBoolean("spark.streaming.stopGracefullyOnShutdown", false) + logInfo(s"Invoking stop(stopGracefully=$stopGracefully) from shutdown hook") + // Do not stop SparkContext, let its own shutdown hook stop it + stop(stopSparkContext = false, stopGracefully = stopGracefully) } } @@ -608,8 +706,49 @@ class StreamingContext private[streaming] ( object StreamingContext extends Logging { - private[streaming] val DEFAULT_CLEANER_TTL = 3600 + /** + * Lock that guards activation of a StreamingContext as well as access to the singleton active + * StreamingContext in getActiveOrCreate(). + */ + private val ACTIVATION_LOCK = new Object() + + private val SHUTDOWN_HOOK_PRIORITY = Utils.SPARK_CONTEXT_SHUTDOWN_PRIORITY + 1 + + private val activeContext = new AtomicReference[StreamingContext](null) + + private def assertNoOtherContextIsActive(): Unit = { + ACTIVATION_LOCK.synchronized { + if (activeContext.get() != null) { + throw new IllegalStateException( + "Only one StreamingContext may be started in this JVM. " + + "Currently running StreamingContext was started at" + + activeContext.get.startSite.get.longForm) + } + } + } + + private def setActiveContext(ssc: StreamingContext): Unit = { + ACTIVATION_LOCK.synchronized { + activeContext.set(ssc) + } + } + + /** + * :: Experimental :: + * + * Get the currently active context, if there is one. Active means started but not stopped. + */ + @Experimental + def getActive(): Option[StreamingContext] = { + ACTIVATION_LOCK.synchronized { + Option(activeContext.get()) + } + } + /** + * @deprecated As of 1.3.0, replaced by implicit functions in the DStream companion object. + * This is kept here only for backward compatibility. + */ @deprecated("Replaced by implicit functions in the DStream companion object. This is " + "kept here only for backward compatibility.", "1.3.0") def toPairDStreamFunctions[K, V](stream: DStream[(K, V)]) @@ -619,10 +758,26 @@ object StreamingContext extends Logging { } /** - * Either recreate a StreamingContext from checkpoint data or create a new StreamingContext. - * If checkpoint data exists in the provided `checkpointPath`, then StreamingContext will be - * recreated from the checkpoint data. If the data does not exist, then the StreamingContext - * will be created by called the provided `creatingFunc`. + * :: Experimental :: + * + * Either return the "active" StreamingContext (that is, started but not stopped), or create a + * new StreamingContext that is + * @param creatingFunc Function to create a new StreamingContext + */ + @Experimental + def getActiveOrCreate(creatingFunc: () => StreamingContext): StreamingContext = { + ACTIVATION_LOCK.synchronized { + getActive().getOrElse { creatingFunc() } + } + } + + /** + * :: Experimental :: + * + * Either get the currently active StreamingContext (that is, started but not stopped), + * OR recreate a StreamingContext from checkpoint data in the given path. If checkpoint data + * does not exist in the provided, then create a new StreamingContext by calling the provided + * `creatingFunc`. * * @param checkpointPath Checkpoint directory used in an earlier StreamingContext program * @param creatingFunc Function to create a new StreamingContext @@ -632,63 +787,41 @@ object StreamingContext extends Logging { * error in reading checkpoint data. By default, an exception will be * thrown on error. */ - def getOrCreate( + @Experimental + def getActiveOrCreate( checkpointPath: String, creatingFunc: () => StreamingContext, hadoopConf: Configuration = new Configuration(), createOnError: Boolean = false ): StreamingContext = { - val checkpointOption = CheckpointReader.read( - checkpointPath, new SparkConf(), hadoopConf, createOnError) - checkpointOption.map(new StreamingContext(null, _, null)).getOrElse(creatingFunc()) - } - - - /** - * Either recreate a StreamingContext from checkpoint data or create a new StreamingContext. - * If checkpoint data exists in the provided `checkpointPath`, then StreamingContext will be - * recreated from the checkpoint data. If the data does not exist, then the StreamingContext - * will be created by called the provided `creatingFunc` on the provided `sparkContext`. Note - * that the SparkConf configuration in the checkpoint data will not be restored as the - * SparkContext has already been created. - * - * @param checkpointPath Checkpoint directory used in an earlier StreamingContext program - * @param creatingFunc Function to create a new StreamingContext using the given SparkContext - * @param sparkContext SparkContext using which the StreamingContext will be created - */ - def getOrCreate( - checkpointPath: String, - creatingFunc: SparkContext => StreamingContext, - sparkContext: SparkContext - ): StreamingContext = { - getOrCreate(checkpointPath, creatingFunc, sparkContext, createOnError = false) + ACTIVATION_LOCK.synchronized { + getActive().getOrElse { getOrCreate(checkpointPath, creatingFunc, hadoopConf, createOnError) } + } } /** * Either recreate a StreamingContext from checkpoint data or create a new StreamingContext. * If checkpoint data exists in the provided `checkpointPath`, then StreamingContext will be * recreated from the checkpoint data. If the data does not exist, then the StreamingContext - * will be created by called the provided `creatingFunc` on the provided `sparkContext`. Note - * that the SparkConf configuration in the checkpoint data will not be restored as the - * SparkContext has already been created. + * will be created by called the provided `creatingFunc`. * * @param checkpointPath Checkpoint directory used in an earlier StreamingContext program - * @param creatingFunc Function to create a new StreamingContext using the given SparkContext - * @param sparkContext SparkContext using which the StreamingContext will be created - * @param createOnError Whether to create a new StreamingContext if there is an + * @param creatingFunc Function to create a new StreamingContext + * @param hadoopConf Optional Hadoop configuration if necessary for reading from the + * file system + * @param createOnError Optional, whether to create a new StreamingContext if there is an * error in reading checkpoint data. By default, an exception will be * thrown on error. */ def getOrCreate( checkpointPath: String, - creatingFunc: SparkContext => StreamingContext, - sparkContext: SparkContext, - createOnError: Boolean + creatingFunc: () => StreamingContext, + hadoopConf: Configuration = new Configuration(), + createOnError: Boolean = false ): StreamingContext = { val checkpointOption = CheckpointReader.read( - checkpointPath, sparkContext.conf, sparkContext.hadoopConfiguration, createOnError) - checkpointOption.map(new StreamingContext(sparkContext, _, null)) - .getOrElse(creatingFunc(sparkContext)) + checkpointPath, new SparkConf(), hadoopConf, createOnError) + checkpointOption.map(new StreamingContext(null, _, null)).getOrElse(creatingFunc()) } /** @@ -710,7 +843,7 @@ object StreamingContext extends Logging { ): SparkContext = { val conf = SparkContext.updatedConf( new SparkConf(), master, appName, sparkHome, jars, environment) - createNewSparkContext(conf) + new SparkContext(conf) } private[streaming] def rddToFileName[T](prefix: String, suffix: String, time: Time): String = { diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContextState.java b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContextState.java new file mode 100644 index 0000000000000..d7b639383ee34 --- /dev/null +++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContextState.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.streaming; + +import org.apache.spark.annotation.DeveloperApi; + +/** + * :: DeveloperApi :: + * + * Represents the state of a StreamingContext. + */ +@DeveloperApi +public enum StreamingContextState { + /** + * The context has been created, but not been started yet. + * Input DStreams, transformations and output operations can be created on the context. + */ + INITIALIZED, + + /** + * The context has been started, and been not stopped. + * Input DStreams, transformations and output operations cannot be created on the context. + */ + ACTIVE, + + /** + * The context has been stopped and cannot be used any more. + */ + STOPPED +} diff --git a/streaming/src/main/scala/org/apache/spark/streaming/Time.scala b/streaming/src/main/scala/org/apache/spark/streaming/Time.scala index 42c49678d24f0..92cfd7d40338c 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/Time.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/Time.scala @@ -63,6 +63,11 @@ case class Time(private val millis: Long) { new Time((this.millis / t) * t) } + def floor(that: Duration, zeroTime: Time): Time = { + val t = that.milliseconds + new Time(((this.millis - zeroTime.milliseconds) / t) * t + zeroTime.milliseconds) + } + def isMultipleOf(that: Duration): Boolean = (this.millis % that.milliseconds == 0) diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala index 93baad19e3ee1..959ac9c177f81 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala @@ -227,7 +227,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( * @param numPartitions Number of partitions of each RDD in the new DStream. */ def groupByKeyAndWindow(windowDuration: Duration, slideDuration: Duration, numPartitions: Int) - :JavaPairDStream[K, JIterable[V]] = { + : JavaPairDStream[K, JIterable[V]] = { dstream.groupByKeyAndWindow(windowDuration, slideDuration, numPartitions) .mapValues(asJavaIterable _) } @@ -247,7 +247,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( windowDuration: Duration, slideDuration: Duration, partitioner: Partitioner - ):JavaPairDStream[K, JIterable[V]] = { + ): JavaPairDStream[K, JIterable[V]] = { dstream.groupByKeyAndWindow(windowDuration, slideDuration, partitioner) .mapValues(asJavaIterable _) } @@ -262,7 +262,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( * batching interval */ def reduceByKeyAndWindow(reduceFunc: JFunction2[V, V, V], windowDuration: Duration) - :JavaPairDStream[K, V] = { + : JavaPairDStream[K, V] = { dstream.reduceByKeyAndWindow(reduceFunc, windowDuration) } @@ -281,7 +281,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( reduceFunc: JFunction2[V, V, V], windowDuration: Duration, slideDuration: Duration - ):JavaPairDStream[K, V] = { + ): JavaPairDStream[K, V] = { dstream.reduceByKeyAndWindow(reduceFunc, windowDuration, slideDuration) } diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala index 572d7d8e8753d..989e3a729ebc2 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala @@ -148,6 +148,9 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable { /** The underlying SparkContext */ val sparkContext = new JavaSparkContext(ssc.sc) + /** + * @deprecated As of 0.9.0, replaced by `sparkContext` + */ @deprecated("use sparkContext", "0.9.0") val sc: JavaSparkContext = sparkContext @@ -578,6 +581,28 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable { ssc.addStreamingListener(streamingListener) } + /** + * :: DeveloperApi :: + * + * Return the current state of the context. The context can be in three possible states - + *
    + *
  • + * StreamingContextState.INTIALIZED - The context has been created, but not been started yet. + * Input DStreams, transformations and output operations can be created on the context. + *
  • + *
  • + * StreamingContextState.ACTIVE - The context has been started, and been not stopped. + * Input DStreams, transformations and output operations cannot be created on the context. + *
  • + *
  • + * StreamingContextState.STOPPED - The context has been stopped and cannot be used any more. + *
  • + *
+ */ + def getState(): StreamingContextState = { + ssc.getState() + } + /** * Start the execution of the streams. */ @@ -597,6 +622,7 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable { * Wait for the execution to stop. Any exceptions that occurs during the execution * will be thrown in this thread. * @param timeout time to wait in milliseconds + * @deprecated As of 1.3.0, replaced by `awaitTerminationOrTimeout(Long)`. */ @deprecated("Use awaitTerminationOrTimeout(Long) instead", "1.3.0") def awaitTermination(timeout: Long): Unit = { @@ -655,6 +681,7 @@ object JavaStreamingContext { * * @param checkpointPath Checkpoint directory used in an earlier JavaStreamingContext program * @param factory JavaStreamingContextFactory object to create a new JavaStreamingContext + * @deprecated As of 1.4.0, replaced by `getOrCreate` without JavaStreamingContextFactor. */ @deprecated("use getOrCreate without JavaStreamingContextFactor", "1.4.0") def getOrCreate( @@ -677,6 +704,7 @@ object JavaStreamingContext { * @param factory JavaStreamingContextFactory object to create a new JavaStreamingContext * @param hadoopConf Hadoop configuration if necessary for reading from any HDFS compatible * file system + * @deprecated As of 1.4.0, replaced by `getOrCreate` without JavaStreamingContextFactor. */ @deprecated("use getOrCreate without JavaStreamingContextFactory", "1.4.0") def getOrCreate( @@ -702,6 +730,7 @@ object JavaStreamingContext { * file system * @param createOnError Whether to create a new JavaStreamingContext if there is an * error in reading checkpoint data. + * @deprecated As of 1.4.0, replaced by `getOrCreate` without JavaStreamingContextFactor. */ @deprecated("use getOrCreate without JavaStreamingContextFactory", "1.4.0") def getOrCreate( @@ -782,51 +811,6 @@ object JavaStreamingContext { new JavaStreamingContext(ssc) } - /** - * Either recreate a StreamingContext from checkpoint data or create a new StreamingContext. - * If checkpoint data exists in the provided `checkpointPath`, then StreamingContext will be - * recreated from the checkpoint data. If the data does not exist, then the provided factory - * will be used to create a JavaStreamingContext. - * - * @param checkpointPath Checkpoint directory used in an earlier StreamingContext program - * @param creatingFunc Function to create a new JavaStreamingContext - * @param sparkContext SparkContext using which the StreamingContext will be created - */ - def getOrCreate( - checkpointPath: String, - creatingFunc: JFunction[JavaSparkContext, JavaStreamingContext], - sparkContext: JavaSparkContext - ): JavaStreamingContext = { - val ssc = StreamingContext.getOrCreate(checkpointPath, (sparkContext: SparkContext) => { - creatingFunc.call(new JavaSparkContext(sparkContext)).ssc - }, sparkContext.sc) - new JavaStreamingContext(ssc) - } - - /** - * Either recreate a StreamingContext from checkpoint data or create a new StreamingContext. - * If checkpoint data exists in the provided `checkpointPath`, then StreamingContext will be - * recreated from the checkpoint data. If the data does not exist, then the provided factory - * will be used to create a JavaStreamingContext. - * - * @param checkpointPath Checkpoint directory used in an earlier StreamingContext program - * @param creatingFunc Function to create a new JavaStreamingContext - * @param sparkContext SparkContext using which the StreamingContext will be created - * @param createOnError Whether to create a new JavaStreamingContext if there is an - * error in reading checkpoint data. - */ - def getOrCreate( - checkpointPath: String, - creatingFunc: JFunction[JavaSparkContext, JavaStreamingContext], - sparkContext: JavaSparkContext, - createOnError: Boolean - ): JavaStreamingContext = { - val ssc = StreamingContext.getOrCreate(checkpointPath, (sparkContext: SparkContext) => { - creatingFunc.call(new JavaSparkContext(sparkContext)).ssc - }, sparkContext.sc, createOnError) - new JavaStreamingContext(ssc) - } - /** * Find the JAR from which a given class was loaded, to make it easy for users to pass * their JARs to StreamingContext. diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala index 4c28654ef6413..d06401245ff17 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala @@ -109,7 +109,7 @@ private[python] object PythonTransformFunctionSerializer { } def serialize(func: PythonTransformFunction): Array[Byte] = { - assert(serializer != null, "Serializer has not been registered!") + require(serializer != null, "Serializer has not been registered!") // get the id of PythonTransformFunction in py4j val h = Proxy.getInvocationHandler(func.asInstanceOf[Proxy]) val f = h.getClass().getDeclaredField("id") @@ -119,7 +119,7 @@ private[python] object PythonTransformFunctionSerializer { } def deserialize(bytes: Array[Byte]): PythonTransformFunction = { - assert(serializer != null, "Serializer has not been registered!") + require(serializer != null, "Serializer has not been registered!") serializer.loads(bytes) } } diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala index 83d41f5762444..192aa6a139bcb 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala @@ -25,12 +25,13 @@ import scala.language.implicitConversions import scala.reflect.ClassTag import scala.util.matching.Regex -import org.apache.spark.{Logging, SparkException} -import org.apache.spark.rdd.{BlockRDD, PairRDDFunctions, RDD} +import org.apache.spark.{Logging, SparkContext, SparkException} +import org.apache.spark.rdd.{BlockRDD, PairRDDFunctions, RDD, RDDOperationScope} import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming._ import org.apache.spark.streaming.StreamingContext.rddToFileName import org.apache.spark.streaming.scheduler.Job +import org.apache.spark.streaming.ui.UIUtils import org.apache.spark.util.{CallSite, MetadataCleaner, Utils} /** @@ -60,6 +61,8 @@ abstract class DStream[T: ClassTag] ( @transient private[streaming] var ssc: StreamingContext ) extends Serializable with Logging { + validateAtInit() + // ======================================================================= // Methods that should be implemented by subclasses of DStream // ======================================================================= @@ -71,7 +74,7 @@ abstract class DStream[T: ClassTag] ( def dependencies: List[DStream[_]] /** Method that generates a RDD for the given time */ - def compute (validTime: Time): Option[RDD[T]] + def compute(validTime: Time): Option[RDD[T]] // ======================================================================= // Methods and fields available on all DStreams @@ -109,6 +112,44 @@ abstract class DStream[T: ClassTag] ( /* Set the creation call site */ private[streaming] val creationSite = DStream.getCreationSite() + /** + * The base scope associated with the operation that created this DStream. + * + * This is the medium through which we pass the DStream operation name (e.g. updatedStateByKey) + * to the RDDs created by this DStream. Note that we never use this scope directly in RDDs. + * Instead, we instantiate a new scope during each call to `compute` based on this one. + * + * This is not defined if the DStream is created outside of one of the public DStream operations. + */ + protected[streaming] val baseScope: Option[String] = { + Option(ssc.sc.getLocalProperty(SparkContext.RDD_SCOPE_KEY)) + } + + /** + * Make a scope that groups RDDs created in the same DStream operation in the same batch. + * + * Each DStream produces many scopes and each scope may be shared by other DStreams created + * in the same operation. Separate calls to the same DStream operation create separate scopes. + * For instance, `dstream.map(...).map(...)` creates two separate scopes per batch. + */ + private def makeScope(time: Time): Option[RDDOperationScope] = { + baseScope.map { bsJson => + val formattedBatchTime = UIUtils.formatBatchTime( + time.milliseconds, ssc.graph.batchDuration.milliseconds, showYYYYMMSS = false) + val bs = RDDOperationScope.fromJson(bsJson) + val baseName = bs.name // e.g. countByWindow, "kafka stream [0]" + val scopeName = + if (baseName.length > 10) { + // If the operation name is too long, wrap the line + s"$baseName\n@ $formattedBatchTime" + } else { + s"$baseName @ $formattedBatchTime" + } + val scopeId = s"${bs.id}_${time.milliseconds}" + new RDDOperationScope(scopeName, id = scopeId) + } + } + /** Persist the RDDs of this DStream with the given storage level */ def persist(level: StorageLevel): DStream[T] = { if (this.isInitialized) { @@ -171,43 +212,57 @@ abstract class DStream[T: ClassTag] ( dependencies.foreach(_.initialize(zeroTime)) } - private[streaming] def validate() { - assert(rememberDuration != null, "Remember duration is set to null") + private def validateAtInit(): Unit = { + ssc.getState() match { + case StreamingContextState.INITIALIZED => + // good to go + case StreamingContextState.ACTIVE => + throw new IllegalStateException( + "Adding new inputs, transformations, and output operations after " + + "starting a context is not supported") + case StreamingContextState.STOPPED => + throw new IllegalStateException( + "Adding new inputs, transformations, and output operations after " + + "stopping a context is not supported") + } + } + + private[streaming] def validateAtStart() { + require(rememberDuration != null, "Remember duration is set to null") - assert( + require( !mustCheckpoint || checkpointDuration != null, "The checkpoint interval for " + this.getClass.getSimpleName + " has not been set." + " Please use DStream.checkpoint() to set the interval." ) - assert( + require( checkpointDuration == null || context.sparkContext.checkpointDir.isDefined, - "The checkpoint directory has not been set. Please use StreamingContext.checkpoint()" + - " or SparkContext.checkpoint() to set the checkpoint directory." + "The checkpoint directory has not been set. Please set it by StreamingContext.checkpoint()." ) - assert( + require( checkpointDuration == null || checkpointDuration >= slideDuration, "The checkpoint interval for " + this.getClass.getSimpleName + " has been set to " + checkpointDuration + " which is lower than its slide time (" + slideDuration + "). " + "Please set it to at least " + slideDuration + "." ) - assert( + require( checkpointDuration == null || checkpointDuration.isMultipleOf(slideDuration), "The checkpoint interval for " + this.getClass.getSimpleName + " has been set to " + checkpointDuration + " which not a multiple of its slide time (" + slideDuration + "). " + - "Please set it to a multiple " + slideDuration + "." + "Please set it to a multiple of " + slideDuration + "." ) - assert( + require( checkpointDuration == null || storageLevel != StorageLevel.NONE, "" + this.getClass.getSimpleName + " has been marked for checkpointing but the storage " + "level has not been set to enable persisting. Please use DStream.persist() to set the " + "storage level to use memory for better checkpointing performance." ) - assert( + require( checkpointDuration == null || rememberDuration > checkpointDuration, "The remember duration for " + this.getClass.getSimpleName + " has been set to " + rememberDuration + " which is not more than the checkpoint interval (" + @@ -216,7 +271,7 @@ abstract class DStream[T: ClassTag] ( val metadataCleanerDelay = MetadataCleaner.getDelaySeconds(ssc.conf) logInfo("metadataCleanupDelay = " + metadataCleanerDelay) - assert( + require( metadataCleanerDelay < 0 || rememberDuration.milliseconds < metadataCleanerDelay * 1000, "It seems you are doing some DStream window operation or setting a checkpoint interval " + "which requires " + this.getClass.getSimpleName + " to remember generated RDDs for more " + @@ -226,7 +281,7 @@ abstract class DStream[T: ClassTag] ( math.ceil(rememberDuration.milliseconds / 1000.0).toInt + " seconds." ) - dependencies.foreach(_.validate()) + dependencies.foreach(_.validateAtStart()) logInfo("Slide time = " + slideDuration) logInfo("Storage level = " + storageLevel) @@ -278,28 +333,23 @@ abstract class DStream[T: ClassTag] ( * Get the RDD corresponding to the given time; either retrieve it from cache * or compute-and-cache it. */ - private[streaming] def getOrCompute(time: Time): Option[RDD[T]] = { + private[streaming] final def getOrCompute(time: Time): Option[RDD[T]] = { // If RDD was already generated, then retrieve it from HashMap, // or else compute the RDD generatedRDDs.get(time).orElse { // Compute the RDD if time is valid (e.g. correct time in a sliding window) // of RDD generation, else generate nothing. if (isTimeValid(time)) { - // Set the thread-local property for call sites to this DStream's creation site - // such that RDDs generated by compute gets that as their creation site. - // Note that this `getOrCompute` may get called from another DStream which may have - // set its own call site. So we store its call site in a temporary variable, - // set this DStream's creation site, generate RDDs and then restore the previous call site. - val prevCallSite = ssc.sparkContext.getCallSite() - ssc.sparkContext.setCallSite(creationSite) - // Disable checks for existing output directories in jobs launched by the streaming - // scheduler, since we may need to write output to an existing directory during checkpoint - // recovery; see SPARK-4835 for more details. We need to have this call here because - // compute() might cause Spark jobs to be launched. - val rddOption = PairRDDFunctions.disableOutputSpecValidation.withValue(true) { - compute(time) + + val rddOption = createRDDWithLocalProperties(time) { + // Disable checks for existing output directories in jobs launched by the streaming + // scheduler, since we may need to write output to an existing directory during checkpoint + // recovery; see SPARK-4835 for more details. We need to have this call here because + // compute() might cause Spark jobs to be launched. + PairRDDFunctions.disableOutputSpecValidation.withValue(true) { + compute(time) + } } - ssc.sparkContext.setCallSite(prevCallSite) rddOption.foreach { case newRDD => // Register the generated RDD for caching and checkpointing @@ -320,6 +370,41 @@ abstract class DStream[T: ClassTag] ( } } + /** + * Wrap a body of code such that the call site and operation scope + * information are passed to the RDDs created in this body properly. + */ + protected def createRDDWithLocalProperties[U](time: Time)(body: => U): U = { + val scopeKey = SparkContext.RDD_SCOPE_KEY + val scopeNoOverrideKey = SparkContext.RDD_SCOPE_NO_OVERRIDE_KEY + // Pass this DStream's operation scope and creation site information to RDDs through + // thread-local properties in our SparkContext. Since this method may be called from another + // DStream, we need to temporarily store any old scope and creation site information to + // restore them later after setting our own. + val prevCallSite = ssc.sparkContext.getCallSite() + val prevScope = ssc.sparkContext.getLocalProperty(scopeKey) + val prevScopeNoOverride = ssc.sparkContext.getLocalProperty(scopeNoOverrideKey) + + try { + ssc.sparkContext.setCallSite(creationSite) + // Use the DStream's base scope for this RDD so we can (1) preserve the higher level + // DStream operation name, and (2) share this scope with other DStreams created in the + // same operation. Disallow nesting so that low-level Spark primitives do not show up. + // TODO: merge callsites with scopes so we can just reuse the code there + makeScope(time).foreach { s => + ssc.sparkContext.setLocalProperty(scopeKey, s.toJson) + ssc.sparkContext.setLocalProperty(scopeNoOverrideKey, "true") + } + + body + } finally { + // Restore any state that was modified before returning + ssc.sparkContext.setCallSite(prevCallSite) + ssc.sparkContext.setLocalProperty(scopeKey, prevScope) + ssc.sparkContext.setLocalProperty(scopeNoOverrideKey, prevScopeNoOverride) + } + } + /** * Generate a SparkStreaming job for the given time. This is an internal method that * should not be called directly. This default implementation creates a job @@ -439,7 +524,7 @@ abstract class DStream[T: ClassTag] ( // ======================================================================= /** Return a new DStream by applying a function to all elements of this DStream. */ - def map[U: ClassTag](mapFunc: T => U): DStream[U] = { + def map[U: ClassTag](mapFunc: T => U): DStream[U] = ssc.withScope { new MappedDStream(this, context.sparkContext.clean(mapFunc)) } @@ -447,26 +532,31 @@ abstract class DStream[T: ClassTag] ( * Return a new DStream by applying a function to all elements of this DStream, * and then flattening the results */ - def flatMap[U: ClassTag](flatMapFunc: T => Traversable[U]): DStream[U] = { + def flatMap[U: ClassTag](flatMapFunc: T => Traversable[U]): DStream[U] = ssc.withScope { new FlatMappedDStream(this, context.sparkContext.clean(flatMapFunc)) } /** Return a new DStream containing only the elements that satisfy a predicate. */ - def filter(filterFunc: T => Boolean): DStream[T] = new FilteredDStream(this, filterFunc) + def filter(filterFunc: T => Boolean): DStream[T] = ssc.withScope { + new FilteredDStream(this, context.sparkContext.clean(filterFunc)) + } /** * Return a new DStream in which each RDD is generated by applying glom() to each RDD of * this DStream. Applying glom() to an RDD coalesces all elements within each partition into * an array. */ - def glom(): DStream[Array[T]] = new GlommedDStream(this) - + def glom(): DStream[Array[T]] = ssc.withScope { + new GlommedDStream(this) + } /** * Return a new DStream with an increased or decreased level of parallelism. Each RDD in the * returned DStream has exactly numPartitions partitions. */ - def repartition(numPartitions: Int): DStream[T] = this.transform(_.repartition(numPartitions)) + def repartition(numPartitions: Int): DStream[T] = ssc.withScope { + this.transform(_.repartition(numPartitions)) + } /** * Return a new DStream in which each RDD is generated by applying mapPartitions() to each RDDs @@ -476,7 +566,7 @@ abstract class DStream[T: ClassTag] ( def mapPartitions[U: ClassTag]( mapPartFunc: Iterator[T] => Iterator[U], preservePartitioning: Boolean = false - ): DStream[U] = { + ): DStream[U] = ssc.withScope { new MapPartitionedDStream(this, context.sparkContext.clean(mapPartFunc), preservePartitioning) } @@ -484,14 +574,15 @@ abstract class DStream[T: ClassTag] ( * Return a new DStream in which each RDD has a single element generated by reducing each RDD * of this DStream. */ - def reduce(reduceFunc: (T, T) => T): DStream[T] = + def reduce(reduceFunc: (T, T) => T): DStream[T] = ssc.withScope { this.map(x => (null, x)).reduceByKey(reduceFunc, 1).map(_._2) + } /** * Return a new DStream in which each RDD has a single element generated by counting each RDD * of this DStream. */ - def count(): DStream[Long] = { + def count(): DStream[Long] = ssc.withScope { this.map(_ => (null, 1L)) .transform(_.union(context.sparkContext.makeRDD(Seq((null, 0L)), 1))) .reduceByKey(_ + _) @@ -505,24 +596,29 @@ abstract class DStream[T: ClassTag] ( * `numPartitions` not specified). */ def countByValue(numPartitions: Int = ssc.sc.defaultParallelism)(implicit ord: Ordering[T] = null) - : DStream[(T, Long)] = + : DStream[(T, Long)] = ssc.withScope { this.map(x => (x, 1L)).reduceByKey((x: Long, y: Long) => x + y, numPartitions) + } /** * Apply a function to each RDD in this DStream. This is an output operator, so * 'this' DStream will be registered as an output stream and therefore materialized. + * + * @deprecated As of 0.9.0, replaced by `foreachRDD`. */ @deprecated("use foreachRDD", "0.9.0") - def foreach(foreachFunc: RDD[T] => Unit): Unit = { + def foreach(foreachFunc: RDD[T] => Unit): Unit = ssc.withScope { this.foreachRDD(foreachFunc) } /** * Apply a function to each RDD in this DStream. This is an output operator, so * 'this' DStream will be registered as an output stream and therefore materialized. + * + * @deprecated As of 0.9.0, replaced by `foreachRDD`. */ @deprecated("use foreachRDD", "0.9.0") - def foreach(foreachFunc: (RDD[T], Time) => Unit): Unit = { + def foreach(foreachFunc: (RDD[T], Time) => Unit): Unit = ssc.withScope { this.foreachRDD(foreachFunc) } @@ -530,17 +626,18 @@ abstract class DStream[T: ClassTag] ( * Apply a function to each RDD in this DStream. This is an output operator, so * 'this' DStream will be registered as an output stream and therefore materialized. */ - def foreachRDD(foreachFunc: RDD[T] => Unit) { - this.foreachRDD((r: RDD[T], t: Time) => foreachFunc(r)) + def foreachRDD(foreachFunc: RDD[T] => Unit): Unit = ssc.withScope { + val cleanedF = context.sparkContext.clean(foreachFunc, false) + this.foreachRDD((r: RDD[T], t: Time) => cleanedF(r)) } /** * Apply a function to each RDD in this DStream. This is an output operator, so * 'this' DStream will be registered as an output stream and therefore materialized. */ - def foreachRDD(foreachFunc: (RDD[T], Time) => Unit) { - // because the DStream is reachable from the outer object here, and because - // DStreams can't be serialized with closures, we can't proactively check + def foreachRDD(foreachFunc: (RDD[T], Time) => Unit): Unit = ssc.withScope { + // because the DStream is reachable from the outer object here, and because + // DStreams can't be serialized with closures, we can't proactively check // it for serializability and so we pass the optional false to SparkContext.clean new ForEachDStream(this, context.sparkContext.clean(foreachFunc, false)).register() } @@ -549,23 +646,24 @@ abstract class DStream[T: ClassTag] ( * Return a new DStream in which each RDD is generated by applying a function * on each RDD of 'this' DStream. */ - def transform[U: ClassTag](transformFunc: RDD[T] => RDD[U]): DStream[U] = { - // because the DStream is reachable from the outer object here, and because - // DStreams can't be serialized with closures, we can't proactively check + def transform[U: ClassTag](transformFunc: RDD[T] => RDD[U]): DStream[U] = ssc.withScope { + // because the DStream is reachable from the outer object here, and because + // DStreams can't be serialized with closures, we can't proactively check // it for serializability and so we pass the optional false to SparkContext.clean - transform((r: RDD[T], t: Time) => context.sparkContext.clean(transformFunc(r), false)) + val cleanedF = context.sparkContext.clean(transformFunc, false) + transform((r: RDD[T], t: Time) => cleanedF(r)) } /** * Return a new DStream in which each RDD is generated by applying a function * on each RDD of 'this' DStream. */ - def transform[U: ClassTag](transformFunc: (RDD[T], Time) => RDD[U]): DStream[U] = { - // because the DStream is reachable from the outer object here, and because - // DStreams can't be serialized with closures, we can't proactively check + def transform[U: ClassTag](transformFunc: (RDD[T], Time) => RDD[U]): DStream[U] = ssc.withScope { + // because the DStream is reachable from the outer object here, and because + // DStreams can't be serialized with closures, we can't proactively check // it for serializability and so we pass the optional false to SparkContext.clean val cleanedF = context.sparkContext.clean(transformFunc, false) - val realTransformFunc = (rdds: Seq[RDD[_]], time: Time) => { + val realTransformFunc = (rdds: Seq[RDD[_]], time: Time) => { assert(rdds.length == 1) cleanedF(rdds.head.asInstanceOf[RDD[T]], time) } @@ -578,9 +676,9 @@ abstract class DStream[T: ClassTag] ( */ def transformWith[U: ClassTag, V: ClassTag]( other: DStream[U], transformFunc: (RDD[T], RDD[U]) => RDD[V] - ): DStream[V] = { - // because the DStream is reachable from the outer object here, and because - // DStreams can't be serialized with closures, we can't proactively check + ): DStream[V] = ssc.withScope { + // because the DStream is reachable from the outer object here, and because + // DStreams can't be serialized with closures, we can't proactively check // it for serializability and so we pass the optional false to SparkContext.clean val cleanedF = ssc.sparkContext.clean(transformFunc, false) transformWith(other, (rdd1: RDD[T], rdd2: RDD[U], time: Time) => cleanedF(rdd1, rdd2)) @@ -592,9 +690,9 @@ abstract class DStream[T: ClassTag] ( */ def transformWith[U: ClassTag, V: ClassTag]( other: DStream[U], transformFunc: (RDD[T], RDD[U], Time) => RDD[V] - ): DStream[V] = { - // because the DStream is reachable from the outer object here, and because - // DStreams can't be serialized with closures, we can't proactively check + ): DStream[V] = ssc.withScope { + // because the DStream is reachable from the outer object here, and because + // DStreams can't be serialized with closures, we can't proactively check // it for serializability and so we pass the optional false to SparkContext.clean val cleanedF = ssc.sparkContext.clean(transformFunc, false) val realTransformFunc = (rdds: Seq[RDD[_]], time: Time) => { @@ -610,7 +708,7 @@ abstract class DStream[T: ClassTag] ( * Print the first ten elements of each RDD generated in this DStream. This is an output * operator, so this DStream will be registered as an output stream and there materialized. */ - def print() { + def print(): Unit = ssc.withScope { print(10) } @@ -618,7 +716,7 @@ abstract class DStream[T: ClassTag] ( * Print the first num elements of each RDD generated in this DStream. This is an output * operator, so this DStream will be registered as an output stream and there materialized. */ - def print(num: Int) { + def print(num: Int): Unit = ssc.withScope { def foreachFunc: (RDD[T], Time) => Unit = { (rdd: RDD[T], time: Time) => { val firstNum = rdd.take(num + 1) @@ -650,7 +748,7 @@ abstract class DStream[T: ClassTag] ( * the new DStream will generate RDDs); must be a multiple of this * DStream's batching interval */ - def window(windowDuration: Duration, slideDuration: Duration): DStream[T] = { + def window(windowDuration: Duration, slideDuration: Duration): DStream[T] = ssc.withScope { new WindowedDStream(this, windowDuration, slideDuration) } @@ -668,7 +766,7 @@ abstract class DStream[T: ClassTag] ( reduceFunc: (T, T) => T, windowDuration: Duration, slideDuration: Duration - ): DStream[T] = { + ): DStream[T] = ssc.withScope { this.reduce(reduceFunc).window(windowDuration, slideDuration).reduce(reduceFunc) } @@ -693,7 +791,7 @@ abstract class DStream[T: ClassTag] ( invReduceFunc: (T, T) => T, windowDuration: Duration, slideDuration: Duration - ): DStream[T] = { + ): DStream[T] = ssc.withScope { this.map(x => (1, x)) .reduceByKeyAndWindow(reduceFunc, invReduceFunc, windowDuration, slideDuration, 1) .map(_._2) @@ -709,7 +807,9 @@ abstract class DStream[T: ClassTag] ( * the new DStream will generate RDDs); must be a multiple of this * DStream's batching interval */ - def countByWindow(windowDuration: Duration, slideDuration: Duration): DStream[Long] = { + def countByWindow( + windowDuration: Duration, + slideDuration: Duration): DStream[Long] = ssc.withScope { this.map(_ => 1L).reduceByWindow(_ + _, _ - _, windowDuration, slideDuration) } @@ -730,8 +830,7 @@ abstract class DStream[T: ClassTag] ( slideDuration: Duration, numPartitions: Int = ssc.sc.defaultParallelism) (implicit ord: Ordering[T] = null) - : DStream[(T, Long)] = - { + : DStream[(T, Long)] = ssc.withScope { this.map(x => (x, 1L)).reduceByKeyAndWindow( (x: Long, y: Long) => x + y, (x: Long, y: Long) => x - y, @@ -746,32 +845,40 @@ abstract class DStream[T: ClassTag] ( * Return a new DStream by unifying data of another DStream with this DStream. * @param that Another DStream having the same slideDuration as this DStream. */ - def union(that: DStream[T]): DStream[T] = new UnionDStream[T](Array(this, that)) + def union(that: DStream[T]): DStream[T] = ssc.withScope { + new UnionDStream[T](Array(this, that)) + } /** * Return all the RDDs defined by the Interval object (both end times included) */ - def slice(interval: Interval): Seq[RDD[T]] = { + def slice(interval: Interval): Seq[RDD[T]] = ssc.withScope { slice(interval.beginTime, interval.endTime) } /** * Return all the RDDs between 'fromTime' to 'toTime' (both included) */ - def slice(fromTime: Time, toTime: Time): Seq[RDD[T]] = { + def slice(fromTime: Time, toTime: Time): Seq[RDD[T]] = ssc.withScope { if (!isInitialized) { throw new SparkException(this + " has not been initialized") } - if (!(fromTime - zeroTime).isMultipleOf(slideDuration)) { - logWarning("fromTime (" + fromTime + ") is not a multiple of slideDuration (" - + slideDuration + ")") + + val alignedToTime = if ((toTime - zeroTime).isMultipleOf(slideDuration)) { + toTime + } else { + logWarning("toTime (" + toTime + ") is not a multiple of slideDuration (" + + slideDuration + ")") + toTime.floor(slideDuration, zeroTime) } - if (!(toTime - zeroTime).isMultipleOf(slideDuration)) { - logWarning("toTime (" + fromTime + ") is not a multiple of slideDuration (" - + slideDuration + ")") + + val alignedFromTime = if ((fromTime - zeroTime).isMultipleOf(slideDuration)) { + fromTime + } else { + logWarning("fromTime (" + fromTime + ") is not a multiple of slideDuration (" + + slideDuration + ")") + fromTime.floor(slideDuration, zeroTime) } - val alignedToTime = toTime.floor(slideDuration) - val alignedFromTime = fromTime.floor(slideDuration) logInfo("Slicing from " + fromTime + " to " + toTime + " (aligned to " + alignedFromTime + " and " + alignedToTime + ")") @@ -786,7 +893,7 @@ abstract class DStream[T: ClassTag] ( * The file name at each batch interval is generated based on `prefix` and * `suffix`: "prefix-TIME_IN_MS.suffix". */ - def saveAsObjectFiles(prefix: String, suffix: String = "") { + def saveAsObjectFiles(prefix: String, suffix: String = ""): Unit = ssc.withScope { val saveFunc = (rdd: RDD[T], time: Time) => { val file = rddToFileName(prefix, suffix, time) rdd.saveAsObjectFile(file) @@ -799,7 +906,7 @@ abstract class DStream[T: ClassTag] ( * of elements. The file name at each batch interval is generated based on * `prefix` and `suffix`: "prefix-TIME_IN_MS.suffix". */ - def saveAsTextFiles(prefix: String, suffix: String = "") { + def saveAsTextFiles(prefix: String, suffix: String = ""): Unit = ssc.withScope { val saveFunc = (rdd: RDD[T], time: Time) => { val file = rddToFileName(prefix, suffix, time) rdd.saveAsTextFile(file) diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala index eca69f00188e4..6c1fab56740ee 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala @@ -69,7 +69,7 @@ import org.apache.spark.util.{TimeStampedHashMap, Utils} * processing semantics are undefined. */ private[streaming] -class FileInputDStream[K, V, F <: NewInputFormat[K,V]]( +class FileInputDStream[K, V, F <: NewInputFormat[K, V]]( @transient ssc_ : StreamingContext, directory: String, filter: Path => Boolean = FileInputDStream.defaultFilter, @@ -251,7 +251,7 @@ class FileInputDStream[K, V, F <: NewInputFormat[K,V]]( /** Generate one RDD from an array of files */ private def filesToRDD(files: Seq[String]): RDD[(K, V)] = { - val fileRDDs = files.map(file =>{ + val fileRDDs = files.map { file => val rdd = serializableConfOpt.map(_.value) match { case Some(config) => context.sparkContext.newAPIHadoopFile( file, @@ -267,7 +267,7 @@ class FileInputDStream[K, V, F <: NewInputFormat[K,V]]( "Refer to the streaming programming guide for more details.") } rdd - }) + } new UnionRDD(context.sparkContext, fileRDDs) } @@ -294,7 +294,7 @@ class FileInputDStream[K, V, F <: NewInputFormat[K,V]]( private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException { logDebug(this.getClass().getSimpleName + ".readObject used") ois.defaultReadObject() - generatedRDDs = new mutable.HashMap[Time, RDD[(K,V)]] () + generatedRDDs = new mutable.HashMap[Time, RDD[(K, V)]]() batchTimeToSelectedFiles = new mutable.HashMap[Time, Array[String]] with mutable.SynchronizedMap[Time, Array[String]] recentlySelectedFiles = new mutable.HashSet[String]() diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ForEachDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ForEachDStream.scala index 685a32e1d280d..c109ceccc6989 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ForEachDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ForEachDStream.scala @@ -37,7 +37,7 @@ class ForEachDStream[T: ClassTag] ( override def generateJob(time: Time): Option[Job] = { parent.getOrCompute(time) match { case Some(rdd) => - val jobFunc = () => { + val jobFunc = () => createRDDWithLocalProperties(time) { ssc.sparkContext.setCallSite(creationSite) foreachFunc(rdd, time) } diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala index e4ad4b509d8d8..d58c99a8ff321 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala @@ -17,10 +17,13 @@ package org.apache.spark.streaming.dstream -import org.apache.spark.streaming.{Time, Duration, StreamingContext} - import scala.reflect.ClassTag +import org.apache.spark.SparkContext +import org.apache.spark.rdd.RDDOperationScope +import org.apache.spark.streaming.{Time, Duration, StreamingContext} +import org.apache.spark.util.Utils + /** * This is the abstract base class for all input streams. This class provides methods * start() and stop() which is called by Spark Streaming system to start and stop receiving data. @@ -44,6 +47,32 @@ abstract class InputDStream[T: ClassTag] (@transient ssc_ : StreamingContext) /** This is an unique identifier for the input stream. */ val id = ssc.getNewInputStreamId() + /** A human-readable name of this InputDStream */ + private[streaming] def name: String = { + // e.g. FlumePollingDStream -> "Flume polling stream" + val newName = Utils.getFormattedClassName(this) + .replaceAll("InputDStream", "Stream") + .split("(?=[A-Z])") + .filter(_.nonEmpty) + .mkString(" ") + .toLowerCase + .capitalize + s"$newName [$id]" + } + + /** + * The base scope associated with the operation that created this DStream. + * + * For InputDStreams, we use the name of this DStream as the scope name. + * If an outer scope is given, we assume that it includes an alternative name for this stream. + */ + protected[streaming] override val baseScope: Option[String] = { + val scopeName = Option(ssc.sc.getLocalProperty(SparkContext.RDD_SCOPE_KEY)) + .map { json => RDDOperationScope.fromJson(json).name + s" [$id]" } + .getOrElse(name.toLowerCase) + Some(new RDDOperationScope(scopeName).toJson) + } + /** * Checks whether the 'time' is valid wrt slideDuration for generating RDD. * Additionally it also ensures valid times are in strictly increasing order. diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala index 8a58571632447..358e4c66df7ba 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala @@ -32,12 +32,14 @@ import org.apache.spark.streaming.StreamingContext.rddToFileName /** * Extra functions available on DStream of (key, value) pairs through an implicit conversion. */ -class PairDStreamFunctions[K, V](self: DStream[(K,V)]) +class PairDStreamFunctions[K, V](self: DStream[(K, V)]) (implicit kt: ClassTag[K], vt: ClassTag[V], ord: Ordering[K]) extends Serializable { private[streaming] def ssc = self.ssc + private[streaming] def sparkContext = self.context.sparkContext + private[streaming] def defaultPartitioner(numPartitions: Int = self.ssc.sc.defaultParallelism) = { new HashPartitioner(numPartitions) } @@ -46,7 +48,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) * Return a new DStream by applying `groupByKey` to each RDD. Hash partitioning is used to * generate the RDDs with Spark's default number of partitions. */ - def groupByKey(): DStream[(K, Iterable[V])] = { + def groupByKey(): DStream[(K, Iterable[V])] = ssc.withScope { groupByKey(defaultPartitioner()) } @@ -54,7 +56,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) * Return a new DStream by applying `groupByKey` to each RDD. Hash partitioning is used to * generate the RDDs with `numPartitions` partitions. */ - def groupByKey(numPartitions: Int): DStream[(K, Iterable[V])] = { + def groupByKey(numPartitions: Int): DStream[(K, Iterable[V])] = ssc.withScope { groupByKey(defaultPartitioner(numPartitions)) } @@ -62,7 +64,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) * Return a new DStream by applying `groupByKey` on each RDD. The supplied * org.apache.spark.Partitioner is used to control the partitioning of each RDD. */ - def groupByKey(partitioner: Partitioner): DStream[(K, Iterable[V])] = { + def groupByKey(partitioner: Partitioner): DStream[(K, Iterable[V])] = ssc.withScope { val createCombiner = (v: V) => ArrayBuffer[V](v) val mergeValue = (c: ArrayBuffer[V], v: V) => (c += v) val mergeCombiner = (c1: ArrayBuffer[V], c2: ArrayBuffer[V]) => (c1 ++ c2) @@ -75,7 +77,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) * merged using the associative reduce function. Hash partitioning is used to generate the RDDs * with Spark's default number of partitions. */ - def reduceByKey(reduceFunc: (V, V) => V): DStream[(K, V)] = { + def reduceByKey(reduceFunc: (V, V) => V): DStream[(K, V)] = ssc.withScope { reduceByKey(reduceFunc, defaultPartitioner()) } @@ -84,7 +86,9 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) * merged using the supplied reduce function. Hash partitioning is used to generate the RDDs * with `numPartitions` partitions. */ - def reduceByKey(reduceFunc: (V, V) => V, numPartitions: Int): DStream[(K, V)] = { + def reduceByKey( + reduceFunc: (V, V) => V, + numPartitions: Int): DStream[(K, V)] = ssc.withScope { reduceByKey(reduceFunc, defaultPartitioner(numPartitions)) } @@ -93,9 +97,10 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) * merged using the supplied reduce function. org.apache.spark.Partitioner is used to control * the partitioning of each RDD. */ - def reduceByKey(reduceFunc: (V, V) => V, partitioner: Partitioner): DStream[(K, V)] = { - val cleanedReduceFunc = ssc.sc.clean(reduceFunc) - combineByKey((v: V) => v, cleanedReduceFunc, cleanedReduceFunc, partitioner) + def reduceByKey( + reduceFunc: (V, V) => V, + partitioner: Partitioner): DStream[(K, V)] = ssc.withScope { + combineByKey((v: V) => v, reduceFunc, reduceFunc, partitioner) } /** @@ -104,12 +109,20 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) * org.apache.spark.rdd.PairRDDFunctions in the Spark core documentation for more information. */ def combineByKey[C: ClassTag]( - createCombiner: V => C, - mergeValue: (C, V) => C, - mergeCombiner: (C, C) => C, - partitioner: Partitioner, - mapSideCombine: Boolean = true): DStream[(K, C)] = { - new ShuffledDStream[K, V, C](self, createCombiner, mergeValue, mergeCombiner, partitioner, + createCombiner: V => C, + mergeValue: (C, V) => C, + mergeCombiner: (C, C) => C, + partitioner: Partitioner, + mapSideCombine: Boolean = true): DStream[(K, C)] = ssc.withScope { + val cleanedCreateCombiner = sparkContext.clean(createCombiner) + val cleanedMergeValue = sparkContext.clean(mergeValue) + val cleanedMergeCombiner = sparkContext.clean(mergeCombiner) + new ShuffledDStream[K, V, C]( + self, + cleanedCreateCombiner, + cleanedMergeValue, + cleanedMergeCombiner, + partitioner, mapSideCombine) } @@ -121,7 +134,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) * @param windowDuration width of the window; must be a multiple of this DStream's * batching interval */ - def groupByKeyAndWindow(windowDuration: Duration): DStream[(K, Iterable[V])] = { + def groupByKeyAndWindow(windowDuration: Duration): DStream[(K, Iterable[V])] = ssc.withScope { groupByKeyAndWindow(windowDuration, self.slideDuration, defaultPartitioner()) } @@ -136,8 +149,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) * DStream's batching interval */ def groupByKeyAndWindow(windowDuration: Duration, slideDuration: Duration) - : DStream[(K, Iterable[V])] = - { + : DStream[(K, Iterable[V])] = ssc.withScope { groupByKeyAndWindow(windowDuration, slideDuration, defaultPartitioner()) } @@ -157,7 +169,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) windowDuration: Duration, slideDuration: Duration, numPartitions: Int - ): DStream[(K, Iterable[V])] = { + ): DStream[(K, Iterable[V])] = ssc.withScope { groupByKeyAndWindow(windowDuration, slideDuration, defaultPartitioner(numPartitions)) } @@ -176,7 +188,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) windowDuration: Duration, slideDuration: Duration, partitioner: Partitioner - ): DStream[(K, Iterable[V])] = { + ): DStream[(K, Iterable[V])] = ssc.withScope { val createCombiner = (v: Iterable[V]) => new ArrayBuffer[V] ++= v val mergeValue = (buf: ArrayBuffer[V], v: Iterable[V]) => buf ++= v val mergeCombiner = (buf1: ArrayBuffer[V], buf2: ArrayBuffer[V]) => buf1 ++= buf2 @@ -198,7 +210,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) def reduceByKeyAndWindow( reduceFunc: (V, V) => V, windowDuration: Duration - ): DStream[(K, V)] = { + ): DStream[(K, V)] = ssc.withScope { reduceByKeyAndWindow(reduceFunc, windowDuration, self.slideDuration, defaultPartitioner()) } @@ -217,7 +229,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) reduceFunc: (V, V) => V, windowDuration: Duration, slideDuration: Duration - ): DStream[(K, V)] = { + ): DStream[(K, V)] = ssc.withScope { reduceByKeyAndWindow(reduceFunc, windowDuration, slideDuration, defaultPartitioner()) } @@ -238,7 +250,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) windowDuration: Duration, slideDuration: Duration, numPartitions: Int - ): DStream[(K, V)] = { + ): DStream[(K, V)] = ssc.withScope { reduceByKeyAndWindow(reduceFunc, windowDuration, slideDuration, defaultPartitioner(numPartitions)) } @@ -260,11 +272,10 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) windowDuration: Duration, slideDuration: Duration, partitioner: Partitioner - ): DStream[(K, V)] = { - val cleanedReduceFunc = ssc.sc.clean(reduceFunc) - self.reduceByKey(cleanedReduceFunc, partitioner) + ): DStream[(K, V)] = ssc.withScope { + self.reduceByKey(reduceFunc, partitioner) .window(windowDuration, slideDuration) - .reduceByKey(cleanedReduceFunc, partitioner) + .reduceByKey(reduceFunc, partitioner) } /** @@ -294,8 +305,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) slideDuration: Duration = self.slideDuration, numPartitions: Int = ssc.sc.defaultParallelism, filterFunc: ((K, V)) => Boolean = null - ): DStream[(K, V)] = { - + ): DStream[(K, V)] = ssc.withScope { reduceByKeyAndWindow( reduceFunc, invReduceFunc, windowDuration, slideDuration, defaultPartitioner(numPartitions), filterFunc @@ -328,7 +338,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) slideDuration: Duration, partitioner: Partitioner, filterFunc: ((K, V)) => Boolean - ): DStream[(K, V)] = { + ): DStream[(K, V)] = ssc.withScope { val cleanedReduceFunc = ssc.sc.clean(reduceFunc) val cleanedInvReduceFunc = ssc.sc.clean(invReduceFunc) @@ -349,7 +359,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) */ def updateStateByKey[S: ClassTag]( updateFunc: (Seq[V], Option[S]) => Option[S] - ): DStream[(K, S)] = { + ): DStream[(K, S)] = ssc.withScope { updateStateByKey(updateFunc, defaultPartitioner()) } @@ -365,7 +375,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) def updateStateByKey[S: ClassTag]( updateFunc: (Seq[V], Option[S]) => Option[S], numPartitions: Int - ): DStream[(K, S)] = { + ): DStream[(K, S)] = ssc.withScope { updateStateByKey(updateFunc, defaultPartitioner(numPartitions)) } @@ -382,9 +392,10 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) def updateStateByKey[S: ClassTag]( updateFunc: (Seq[V], Option[S]) => Option[S], partitioner: Partitioner - ): DStream[(K, S)] = { + ): DStream[(K, S)] = ssc.withScope { + val cleanedUpdateF = sparkContext.clean(updateFunc) val newUpdateFunc = (iterator: Iterator[(K, Seq[V], Option[S])]) => { - iterator.flatMap(t => updateFunc(t._2, t._3).map(s => (t._1, s))) + iterator.flatMap(t => cleanedUpdateF(t._2, t._3).map(s => (t._1, s))) } updateStateByKey(newUpdateFunc, partitioner, true) } @@ -406,7 +417,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) updateFunc: (Iterator[(K, Seq[V], Option[S])]) => Iterator[(K, S)], partitioner: Partitioner, rememberPartitioner: Boolean - ): DStream[(K, S)] = { + ): DStream[(K, S)] = ssc.withScope { new StateDStream(self, ssc.sc.clean(updateFunc), partitioner, rememberPartitioner, None) } @@ -425,9 +436,10 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) updateFunc: (Seq[V], Option[S]) => Option[S], partitioner: Partitioner, initialRDD: RDD[(K, S)] - ): DStream[(K, S)] = { + ): DStream[(K, S)] = ssc.withScope { + val cleanedUpdateF = sparkContext.clean(updateFunc) val newUpdateFunc = (iterator: Iterator[(K, Seq[V], Option[S])]) => { - iterator.flatMap(t => updateFunc(t._2, t._3).map(s => (t._1, s))) + iterator.flatMap(t => cleanedUpdateF(t._2, t._3).map(s => (t._1, s))) } updateStateByKey(newUpdateFunc, partitioner, true, initialRDD) } @@ -451,7 +463,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) partitioner: Partitioner, rememberPartitioner: Boolean, initialRDD: RDD[(K, S)] - ): DStream[(K, S)] = { + ): DStream[(K, S)] = ssc.withScope { new StateDStream(self, ssc.sc.clean(updateFunc), partitioner, rememberPartitioner, Some(initialRDD)) } @@ -460,8 +472,8 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) * Return a new DStream by applying a map function to the value of each key-value pairs in * 'this' DStream without changing the key. */ - def mapValues[U: ClassTag](mapValuesFunc: V => U): DStream[(K, U)] = { - new MapValuedDStream[K, V, U](self, mapValuesFunc) + def mapValues[U: ClassTag](mapValuesFunc: V => U): DStream[(K, U)] = ssc.withScope { + new MapValuedDStream[K, V, U](self, sparkContext.clean(mapValuesFunc)) } /** @@ -470,8 +482,8 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) */ def flatMapValues[U: ClassTag]( flatMapValuesFunc: V => TraversableOnce[U] - ): DStream[(K, U)] = { - new FlatMapValuedDStream[K, V, U](self, flatMapValuesFunc) + ): DStream[(K, U)] = ssc.withScope { + new FlatMapValuedDStream[K, V, U](self, sparkContext.clean(flatMapValuesFunc)) } /** @@ -479,7 +491,8 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) * Hash partitioning is used to generate the RDDs with Spark's default number * of partitions. */ - def cogroup[W: ClassTag](other: DStream[(K, W)]): DStream[(K, (Iterable[V], Iterable[W]))] = { + def cogroup[W: ClassTag]( + other: DStream[(K, W)]): DStream[(K, (Iterable[V], Iterable[W]))] = ssc.withScope { cogroup(other, defaultPartitioner()) } @@ -487,8 +500,9 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) * Return a new DStream by applying 'cogroup' between RDDs of `this` DStream and `other` DStream. * Hash partitioning is used to generate the RDDs with `numPartitions` partitions. */ - def cogroup[W: ClassTag](other: DStream[(K, W)], numPartitions: Int) - : DStream[(K, (Iterable[V], Iterable[W]))] = { + def cogroup[W: ClassTag]( + other: DStream[(K, W)], + numPartitions: Int): DStream[(K, (Iterable[V], Iterable[W]))] = ssc.withScope { cogroup(other, defaultPartitioner(numPartitions)) } @@ -499,7 +513,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) def cogroup[W: ClassTag]( other: DStream[(K, W)], partitioner: Partitioner - ): DStream[(K, (Iterable[V], Iterable[W]))] = { + ): DStream[(K, (Iterable[V], Iterable[W]))] = ssc.withScope { self.transformWith( other, (rdd1: RDD[(K, V)], rdd2: RDD[(K, W)]) => rdd1.cogroup(rdd2, partitioner) @@ -510,7 +524,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) * Return a new DStream by applying 'join' between RDDs of `this` DStream and `other` DStream. * Hash partitioning is used to generate the RDDs with Spark's default number of partitions. */ - def join[W: ClassTag](other: DStream[(K, W)]): DStream[(K, (V, W))] = { + def join[W: ClassTag](other: DStream[(K, W)]): DStream[(K, (V, W))] = ssc.withScope { join[W](other, defaultPartitioner()) } @@ -518,7 +532,9 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) * Return a new DStream by applying 'join' between RDDs of `this` DStream and `other` DStream. * Hash partitioning is used to generate the RDDs with `numPartitions` partitions. */ - def join[W: ClassTag](other: DStream[(K, W)], numPartitions: Int): DStream[(K, (V, W))] = { + def join[W: ClassTag]( + other: DStream[(K, W)], + numPartitions: Int): DStream[(K, (V, W))] = ssc.withScope { join[W](other, defaultPartitioner(numPartitions)) } @@ -529,7 +545,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) def join[W: ClassTag]( other: DStream[(K, W)], partitioner: Partitioner - ): DStream[(K, (V, W))] = { + ): DStream[(K, (V, W))] = ssc.withScope { self.transformWith( other, (rdd1: RDD[(K, V)], rdd2: RDD[(K, W)]) => rdd1.join(rdd2, partitioner) @@ -541,7 +557,8 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) * `other` DStream. Hash partitioning is used to generate the RDDs with Spark's default * number of partitions. */ - def leftOuterJoin[W: ClassTag](other: DStream[(K, W)]): DStream[(K, (V, Option[W]))] = { + def leftOuterJoin[W: ClassTag]( + other: DStream[(K, W)]): DStream[(K, (V, Option[W]))] = ssc.withScope { leftOuterJoin[W](other, defaultPartitioner()) } @@ -553,7 +570,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) def leftOuterJoin[W: ClassTag]( other: DStream[(K, W)], numPartitions: Int - ): DStream[(K, (V, Option[W]))] = { + ): DStream[(K, (V, Option[W]))] = ssc.withScope { leftOuterJoin[W](other, defaultPartitioner(numPartitions)) } @@ -565,7 +582,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) def leftOuterJoin[W: ClassTag]( other: DStream[(K, W)], partitioner: Partitioner - ): DStream[(K, (V, Option[W]))] = { + ): DStream[(K, (V, Option[W]))] = ssc.withScope { self.transformWith( other, (rdd1: RDD[(K, V)], rdd2: RDD[(K, W)]) => rdd1.leftOuterJoin(rdd2, partitioner) @@ -577,7 +594,8 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) * `other` DStream. Hash partitioning is used to generate the RDDs with Spark's default * number of partitions. */ - def rightOuterJoin[W: ClassTag](other: DStream[(K, W)]): DStream[(K, (Option[V], W))] = { + def rightOuterJoin[W: ClassTag]( + other: DStream[(K, W)]): DStream[(K, (Option[V], W))] = ssc.withScope { rightOuterJoin[W](other, defaultPartitioner()) } @@ -589,7 +607,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) def rightOuterJoin[W: ClassTag]( other: DStream[(K, W)], numPartitions: Int - ): DStream[(K, (Option[V], W))] = { + ): DStream[(K, (Option[V], W))] = ssc.withScope { rightOuterJoin[W](other, defaultPartitioner(numPartitions)) } @@ -601,7 +619,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) def rightOuterJoin[W: ClassTag]( other: DStream[(K, W)], partitioner: Partitioner - ): DStream[(K, (Option[V], W))] = { + ): DStream[(K, (Option[V], W))] = ssc.withScope { self.transformWith( other, (rdd1: RDD[(K, V)], rdd2: RDD[(K, W)]) => rdd1.rightOuterJoin(rdd2, partitioner) @@ -613,7 +631,8 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) * `other` DStream. Hash partitioning is used to generate the RDDs with Spark's default * number of partitions. */ - def fullOuterJoin[W: ClassTag](other: DStream[(K, W)]): DStream[(K, (Option[V], Option[W]))] = { + def fullOuterJoin[W: ClassTag]( + other: DStream[(K, W)]): DStream[(K, (Option[V], Option[W]))] = ssc.withScope { fullOuterJoin[W](other, defaultPartitioner()) } @@ -625,7 +644,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) def fullOuterJoin[W: ClassTag]( other: DStream[(K, W)], numPartitions: Int - ): DStream[(K, (Option[V], Option[W]))] = { + ): DStream[(K, (Option[V], Option[W]))] = ssc.withScope { fullOuterJoin[W](other, defaultPartitioner(numPartitions)) } @@ -637,7 +656,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) def fullOuterJoin[W: ClassTag]( other: DStream[(K, W)], partitioner: Partitioner - ): DStream[(K, (Option[V], Option[W]))] = { + ): DStream[(K, (Option[V], Option[W]))] = ssc.withScope { self.transformWith( other, (rdd1: RDD[(K, V)], rdd2: RDD[(K, W)]) => rdd1.fullOuterJoin(rdd2, partitioner) @@ -651,7 +670,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) def saveAsHadoopFiles[F <: OutputFormat[K, V]]( prefix: String, suffix: String - )(implicit fm: ClassTag[F]) { + )(implicit fm: ClassTag[F]): Unit = ssc.withScope { saveAsHadoopFiles(prefix, suffix, keyClass, valueClass, fm.runtimeClass.asInstanceOf[Class[F]]) } @@ -667,7 +686,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) valueClass: Class[_], outputFormatClass: Class[_ <: OutputFormat[_, _]], conf: JobConf = new JobConf(ssc.sparkContext.hadoopConfiguration) - ) { + ): Unit = ssc.withScope { // Wrap conf in SerializableWritable so that ForeachDStream can be serialized for checkpoints val serializableConf = new SerializableWritable(conf) val saveFunc = (rdd: RDD[(K, V)], time: Time) => { @@ -684,7 +703,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) def saveAsNewAPIHadoopFiles[F <: NewOutputFormat[K, V]]( prefix: String, suffix: String - )(implicit fm: ClassTag[F]) { + )(implicit fm: ClassTag[F]): Unit = ssc.withScope { saveAsNewAPIHadoopFiles(prefix, suffix, keyClass, valueClass, fm.runtimeClass.asInstanceOf[Class[F]]) } @@ -700,7 +719,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K,V)]) valueClass: Class[_], outputFormatClass: Class[_ <: NewOutputFormat[_, _]], conf: Configuration = ssc.sparkContext.hadoopConfiguration - ) { + ): Unit = ssc.withScope { // Wrap conf in SerializableWritable so that ForeachDStream can be serialized for checkpoints val serializableConf = new SerializableWritable(conf) val saveFunc = (rdd: RDD[(K, V)], time: Time) => { diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala index ba88416ef4009..e4ff05e12f201 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala @@ -20,11 +20,12 @@ package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.{BlockRDD, RDD} -import org.apache.spark.storage.{BlockId, StorageLevel} +import org.apache.spark.storage.BlockId import org.apache.spark.streaming._ import org.apache.spark.streaming.rdd.WriteAheadLogBackedBlockRDD -import org.apache.spark.streaming.receiver.{Receiver, WriteAheadLogBasedStoreResult} -import org.apache.spark.streaming.scheduler.{InputInfo, ReceivedBlockInfo} +import org.apache.spark.streaming.receiver.Receiver +import org.apache.spark.streaming.scheduler.InputInfo +import org.apache.spark.streaming.util.WriteAheadLogUtils /** * Abstract class for defining any [[org.apache.spark.streaming.dstream.InputDStream]] @@ -64,32 +65,46 @@ abstract class ReceiverInputDStream[T: ClassTag](@transient ssc_ : StreamingCont } else { // Otherwise, ask the tracker for all the blocks that have been allocated to this stream // for this batch - val blockInfos = - ssc.scheduler.receiverTracker.getBlocksOfBatch(validTime).get(id).getOrElse(Seq.empty) - val blockStoreResults = blockInfos.map { _.blockStoreResult } - val blockIds = blockStoreResults.map { _.blockId.asInstanceOf[BlockId] }.toArray + val receiverTracker = ssc.scheduler.receiverTracker + val blockInfos = receiverTracker.getBlocksOfBatch(validTime).getOrElse(id, Seq.empty) + val blockIds = blockInfos.map { _.blockId.asInstanceOf[BlockId] }.toArray // Register the input blocks information into InputInfoTracker val inputInfo = InputInfo(id, blockInfos.map(_.numRecords).sum) ssc.scheduler.inputInfoTracker.reportInfo(validTime, inputInfo) - // Check whether all the results are of the same type - val resultTypes = blockStoreResults.map { _.getClass }.distinct - if (resultTypes.size > 1) { - logWarning("Multiple result types in block information, WAL information will be ignored.") - } + if (blockInfos.nonEmpty) { + // Are WAL record handles present with all the blocks + val areWALRecordHandlesPresent = blockInfos.forall { _.walRecordHandleOption.nonEmpty } - // If all the results are of type WriteAheadLogBasedStoreResult, then create - // WriteAheadLogBackedBlockRDD else create simple BlockRDD. - if (resultTypes.size == 1 && resultTypes.head == classOf[WriteAheadLogBasedStoreResult]) { - val logSegments = blockStoreResults.map { - _.asInstanceOf[WriteAheadLogBasedStoreResult].walRecordHandle - }.toArray - // Since storeInBlockManager = false, the storage level does not matter. - new WriteAheadLogBackedBlockRDD[T](ssc.sparkContext, - blockIds, logSegments, storeInBlockManager = false, StorageLevel.MEMORY_ONLY_SER) + if (areWALRecordHandlesPresent) { + // If all the blocks have WAL record handle, then create a WALBackedBlockRDD + val isBlockIdValid = blockInfos.map { _.isBlockIdValid() }.toArray + val walRecordHandles = blockInfos.map { _.walRecordHandleOption.get }.toArray + new WriteAheadLogBackedBlockRDD[T]( + ssc.sparkContext, blockIds, walRecordHandles, isBlockIdValid) + } else { + // Else, create a BlockRDD. However, if there are some blocks with WAL info but not + // others then that is unexpected and log a warning accordingly. + if (blockInfos.find(_.walRecordHandleOption.nonEmpty).nonEmpty) { + if (WriteAheadLogUtils.enableReceiverLog(ssc.conf)) { + logError("Some blocks do not have Write Ahead Log information; " + + "this is unexpected and data may not be recoverable after driver failures") + } else { + logWarning("Some blocks have Write Ahead Log information; this is unexpected") + } + } + new BlockRDD[T](ssc.sc, blockIds) + } } else { - new BlockRDD[T](ssc.sc, blockIds) + // If no block is ready now, creating WriteAheadLogBackedBlockRDD or BlockRDD + // according to the configuration + if (WriteAheadLogUtils.enableReceiverLog(ssc.conf)) { + new WriteAheadLogBackedBlockRDD[T]( + ssc.sparkContext, Array.empty, Array.empty, Array.empty) + } else { + new BlockRDD[T](ssc.sc, Array.empty) + } } } } diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala index 1385ccbf56ee5..6a583bf2a3626 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala @@ -38,14 +38,14 @@ class ReducedWindowedDStream[K: ClassTag, V: ClassTag]( _windowDuration: Duration, _slideDuration: Duration, partitioner: Partitioner - ) extends DStream[(K,V)](parent.ssc) { + ) extends DStream[(K, V)](parent.ssc) { - assert(_windowDuration.isMultipleOf(parent.slideDuration), + require(_windowDuration.isMultipleOf(parent.slideDuration), "The window duration of ReducedWindowedDStream (" + _windowDuration + ") " + "must be multiple of the slide duration of parent DStream (" + parent.slideDuration + ")" ) - assert(_slideDuration.isMultipleOf(parent.slideDuration), + require(_slideDuration.isMultipleOf(parent.slideDuration), "The slide duration of ReducedWindowedDStream (" + _slideDuration + ") " + "must be multiple of the slide duration of parent DStream (" + parent.slideDuration + ")" ) @@ -58,7 +58,7 @@ class ReducedWindowedDStream[K: ClassTag, V: ClassTag]( super.persist(StorageLevel.MEMORY_ONLY_SER) reducedStream.persist(StorageLevel.MEMORY_ONLY_SER) - def windowDuration: Duration = _windowDuration + def windowDuration: Duration = _windowDuration override def dependencies: List[DStream[_]] = List(reducedStream) @@ -68,7 +68,7 @@ class ReducedWindowedDStream[K: ClassTag, V: ClassTag]( override def parentRememberDuration: Duration = rememberDuration + windowDuration - override def persist(storageLevel: StorageLevel): DStream[(K,V)] = { + override def persist(storageLevel: StorageLevel): DStream[(K, V)] = { super.persist(storageLevel) reducedStream.persist(storageLevel) this @@ -118,7 +118,7 @@ class ReducedWindowedDStream[K: ClassTag, V: ClassTag]( // Get the RDD of the reduced value of the previous window val previousWindowRDD = - getOrCompute(previousWindow.endTime).getOrElse(ssc.sc.makeRDD(Seq[(K,V)]())) + getOrCompute(previousWindow.endTime).getOrElse(ssc.sc.makeRDD(Seq[(K, V)]())) // Make the list of RDDs that needs to cogrouped together for reducing their reduced values val allRDDs = new ArrayBuffer[RDD[(K, V)]]() += previousWindowRDD ++= oldRDDs ++= newRDDs diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ShuffledDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ShuffledDStream.scala index 7757ccac09a58..e0ffd5d86b435 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ShuffledDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ShuffledDStream.scala @@ -25,19 +25,19 @@ import scala.reflect.ClassTag private[streaming] class ShuffledDStream[K: ClassTag, V: ClassTag, C: ClassTag]( - parent: DStream[(K,V)], + parent: DStream[(K, V)], createCombiner: V => C, mergeValue: (C, V) => C, mergeCombiner: (C, C) => C, partitioner: Partitioner, mapSideCombine: Boolean = true - ) extends DStream[(K,C)] (parent.ssc) { + ) extends DStream[(K, C)] (parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration - override def compute(validTime: Time): Option[RDD[(K,C)]] = { + override def compute(validTime: Time): Option[RDD[(K, C)]] = { parent.getOrCompute(validTime) match { case Some(rdd) => Some(rdd.combineByKey[C]( createCombiner, mergeValue, mergeCombiner, partitioner, mapSideCombine)) diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala index 8b72bcf20653d..5ce5b7aae6e69 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala @@ -17,6 +17,8 @@ package org.apache.spark.streaming.dstream +import scala.util.control.NonFatal + import org.apache.spark.streaming.StreamingContext import org.apache.spark.storage.StorageLevel import org.apache.spark.util.NextIterator @@ -74,13 +76,17 @@ class SocketReceiver[T: ClassTag]( while(!isStopped && iterator.hasNext) { store(iterator.next) } - logInfo("Stopped receiving") - restart("Retrying connecting to " + host + ":" + port) + if (!isStopped()) { + restart("Socket data stream had no more data") + } else { + logInfo("Stopped receiving") + } } catch { case e: java.net.ConnectException => restart("Error connecting to " + host + ":" + port, e) - case t: Throwable => - restart("Error receiving data", t) + case NonFatal(e) => + logWarning("Error receiving data", e) + restart("Error receiving data", e) } finally { if (socket != null) { socket.close() diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala index de8718d0a80fe..621d6dff788f4 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala @@ -51,7 +51,7 @@ class StateDStream[K: ClassTag, V: ClassTag, S: ClassTag]( val finalFunc = (iterator: Iterator[(K, (Iterable[V], Iterable[S]))]) => { val i = iterator.map(t => { val itr = t._2._2.iterator - val headOption = if(itr.hasNext) Some(itr.next) else None + val headOption = if (itr.hasNext) Some(itr.next()) else None (t._1, t._2._1.toSeq, headOption) }) updateFuncLocal(i) diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/WindowedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/WindowedDStream.scala index 899865a906c27..4efba039f8959 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/WindowedDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/WindowedDStream.scala @@ -44,7 +44,7 @@ class WindowedDStream[T: ClassTag]( // Persist parent level by default, as those RDDs are going to be obviously reused. parent.persist(StorageLevel.MEMORY_ONLY_SER) - def windowDuration: Duration = _windowDuration + def windowDuration: Duration = _windowDuration override def dependencies: List[DStream[_]] = List(parent) @@ -68,7 +68,7 @@ class WindowedDStream[T: ClassTag]( new PartitionerAwareUnionRDD(ssc.sc, rddsInWindow) } else { logDebug("Using normal union for windowing at " + validTime) - new UnionRDD(ssc.sc,rddsInWindow) + new UnionRDD(ssc.sc, rddsInWindow) } Some(windowRDD) } diff --git a/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala b/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala index f4c8046e8a1a8..ffce6a4c3c74c 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala @@ -23,6 +23,8 @@ import java.util.UUID import scala.reflect.ClassTag import scala.util.control.NonFatal +import org.apache.commons.io.FileUtils + import org.apache.spark._ import org.apache.spark.rdd.BlockRDD import org.apache.spark.storage.{BlockId, StorageLevel} @@ -31,30 +33,42 @@ import org.apache.spark.streaming.util._ /** * Partition class for [[org.apache.spark.streaming.rdd.WriteAheadLogBackedBlockRDD]]. * It contains information about the id of the blocks having this partition's data and - * the segment of the write ahead log that backs the partition. + * the corresponding record handle in the write ahead log that backs the partition. * @param index index of the partition * @param blockId id of the block having the partition data + * @param isBlockIdValid Whether the block Ids are valid (i.e., the blocks are present in the Spark + * executors). If not, then block lookups by the block ids will be skipped. + * By default, this is an empty array signifying true for all the blocks. * @param walRecordHandle Handle of the record in a write ahead log having the partition data */ private[streaming] class WriteAheadLogBackedBlockRDDPartition( val index: Int, val blockId: BlockId, - val walRecordHandle: WriteAheadLogRecordHandle) - extends Partition + val isBlockIdValid: Boolean, + val walRecordHandle: WriteAheadLogRecordHandle + ) extends Partition /** * This class represents a special case of the BlockRDD where the data blocks in * the block manager are also backed by data in write ahead logs. For reading * the data, this RDD first looks up the blocks by their ids in the block manager. - * If it does not find them, it looks up the corresponding data in the write ahead log. + * If it does not find them, it looks up the WAL using the corresponding record handle. + * The lookup of the blocks from the block manager can be skipped by setting the corresponding + * element in isBlockIdValid to false. This is a performance optimization which does not affect + * correctness, and it can be used in situations where it is known that the block + * does not exist in the Spark executors (e.g. after a failed driver is restarted). + * * * @param sc SparkContext * @param blockIds Ids of the blocks that contains this RDD's data * @param walRecordHandles Record handles in write ahead logs that contain this RDD's data - * @param storeInBlockManager Whether to store in the block manager after reading - * from the WAL record + * @param isBlockIdValid Whether the block Ids are valid (i.e., the blocks are present in the Spark + * executors). If not, then block lookups by the block ids will be skipped. + * By default, this is an empty array signifying true for all the blocks. + * @param storeInBlockManager Whether to store a block in the block manager + * after reading it from the WAL * @param storageLevel storage level to store when storing in block manager * (applicable when storeInBlockManager = true) */ @@ -63,23 +77,32 @@ class WriteAheadLogBackedBlockRDD[T: ClassTag]( @transient sc: SparkContext, @transient blockIds: Array[BlockId], @transient walRecordHandles: Array[WriteAheadLogRecordHandle], - storeInBlockManager: Boolean, - storageLevel: StorageLevel) + @transient isBlockIdValid: Array[Boolean] = Array.empty, + storeInBlockManager: Boolean = false, + storageLevel: StorageLevel = StorageLevel.MEMORY_ONLY_SER) extends BlockRDD[T](sc, blockIds) { require( blockIds.length == walRecordHandles.length, - s"Number of block ids (${blockIds.length}) must be " + - s"the same as number of WAL record handles (${walRecordHandles.length}})!") + s"Number of block Ids (${blockIds.length}) must be " + + s" same as number of WAL record handles (${walRecordHandles.length}})") + + require( + isBlockIdValid.isEmpty || isBlockIdValid.length == blockIds.length, + s"Number of elements in isBlockIdValid (${isBlockIdValid.length}) must be " + + s" same as number of block Ids (${blockIds.length})") // Hadoop configuration is not serializable, so broadcast it as a serializable. @transient private val hadoopConfig = sc.hadoopConfiguration private val broadcastedHadoopConf = new SerializableWritable(hadoopConfig) + override def isValid(): Boolean = true + override def getPartitions: Array[Partition] = { assertValid() - Array.tabulate(blockIds.size) { i => - new WriteAheadLogBackedBlockRDDPartition(i, blockIds(i), walRecordHandles(i)) + Array.tabulate(blockIds.length) { i => + val isValid = if (isBlockIdValid.length == 0) true else isBlockIdValid(i) + new WriteAheadLogBackedBlockRDDPartition(i, blockIds(i), isValid, walRecordHandles(i)) } } @@ -94,51 +117,57 @@ class WriteAheadLogBackedBlockRDD[T: ClassTag]( val blockManager = SparkEnv.get.blockManager val partition = split.asInstanceOf[WriteAheadLogBackedBlockRDDPartition] val blockId = partition.blockId - blockManager.get(blockId) match { - case Some(block) => // Data is in Block Manager - val iterator = block.data.asInstanceOf[Iterator[T]] - logDebug(s"Read partition data of $this from block manager, block $blockId") - iterator - case None => // Data not found in Block Manager, grab it from write ahead log file - var dataRead: ByteBuffer = null - var writeAheadLog: WriteAheadLog = null - try { - // The WriteAheadLogUtils.createLog*** method needs a directory to create a - // WriteAheadLog object as the default FileBasedWriteAheadLog needs a directory for - // writing log data. However, the directory is not needed if data needs to be read, hence - // a dummy path is provided to satisfy the method parameter requirements. - // FileBasedWriteAheadLog will not create any file or directory at that path. - // FileBasedWriteAheadLog will not create any file or directory at that path. Also, - // this dummy directory should not already exist otherwise the WAL will try to recover - // past events from the directory and throw errors. - val nonExistentDirectory = new File( - System.getProperty("java.io.tmpdir"), UUID.randomUUID().toString).getAbsolutePath - writeAheadLog = WriteAheadLogUtils.createLogForReceiver( - SparkEnv.get.conf, nonExistentDirectory, hadoopConf) - dataRead = writeAheadLog.read(partition.walRecordHandle) - } catch { - case NonFatal(e) => - throw new SparkException( - s"Could not read data from write ahead log record ${partition.walRecordHandle}", e) - } finally { - if (writeAheadLog != null) { - writeAheadLog.close() - writeAheadLog = null - } - } - if (dataRead == null) { + + def getBlockFromBlockManager(): Option[Iterator[T]] = { + blockManager.get(blockId).map(_.data.asInstanceOf[Iterator[T]]) + } + + def getBlockFromWriteAheadLog(): Iterator[T] = { + var dataRead: ByteBuffer = null + var writeAheadLog: WriteAheadLog = null + try { + // The WriteAheadLogUtils.createLog*** method needs a directory to create a + // WriteAheadLog object as the default FileBasedWriteAheadLog needs a directory for + // writing log data. However, the directory is not needed if data needs to be read, hence + // a dummy path is provided to satisfy the method parameter requirements. + // FileBasedWriteAheadLog will not create any file or directory at that path. + // FileBasedWriteAheadLog will not create any file or directory at that path. Also, + // this dummy directory should not already exist otherwise the WAL will try to recover + // past events from the directory and throw errors. + val nonExistentDirectory = new File( + System.getProperty("java.io.tmpdir"), UUID.randomUUID().toString).getAbsolutePath + writeAheadLog = WriteAheadLogUtils.createLogForReceiver( + SparkEnv.get.conf, nonExistentDirectory, hadoopConf) + dataRead = writeAheadLog.read(partition.walRecordHandle) + } catch { + case NonFatal(e) => throw new SparkException( - s"Could not read data from write ahead log record ${partition.walRecordHandle}, " + - s"read returned null") + s"Could not read data from write ahead log record ${partition.walRecordHandle}", e) + } finally { + if (writeAheadLog != null) { + writeAheadLog.close() + writeAheadLog = null } - logInfo(s"Read partition data of $this from write ahead log, record handle " + - partition.walRecordHandle) - if (storeInBlockManager) { - blockManager.putBytes(blockId, dataRead, storageLevel) - logDebug(s"Stored partition data of $this into block manager with level $storageLevel") - dataRead.rewind() - } - blockManager.dataDeserialize(blockId, dataRead).asInstanceOf[Iterator[T]] + } + if (dataRead == null) { + throw new SparkException( + s"Could not read data from write ahead log record ${partition.walRecordHandle}, " + + s"read returned null") + } + logInfo(s"Read partition data of $this from write ahead log, record handle " + + partition.walRecordHandle) + if (storeInBlockManager) { + blockManager.putBytes(blockId, dataRead, storageLevel) + logDebug(s"Stored partition data of $this into block manager with level $storageLevel") + dataRead.rewind() + } + blockManager.dataDeserialize(blockId, dataRead).asInstanceOf[Iterator[T]] + } + + if (partition.isBlockIdValid) { + getBlockFromBlockManager().getOrElse { getBlockFromWriteAheadLog() } + } else { + getBlockFromWriteAheadLog() } } @@ -149,12 +178,23 @@ class WriteAheadLogBackedBlockRDD[T: ClassTag]( */ override def getPreferredLocations(split: Partition): Seq[String] = { val partition = split.asInstanceOf[WriteAheadLogBackedBlockRDDPartition] - val blockLocations = getBlockIdLocations().get(partition.blockId) + val blockLocations = if (partition.isBlockIdValid) { + getBlockIdLocations().get(partition.blockId) + } else { + None + } + blockLocations.getOrElse { partition.walRecordHandle match { case fileSegment: FileBasedWriteAheadLogSegment => - HdfsUtils.getFileSegmentLocations( - fileSegment.path, fileSegment.offset, fileSegment.length, hadoopConfig) + try { + HdfsUtils.getFileSegmentLocations( + fileSegment.path, fileSegment.offset, fileSegment.length, hadoopConfig) + } catch { + case NonFatal(e) => + logError("Error getting WAL file segment locations", e) + Seq.empty + } case _ => Seq.empty } diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala index 4bebcc5aa7ca0..8d73593ab6375 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala @@ -164,7 +164,7 @@ private[streaming] class BlockGenerator( private def keepPushingBlocks() { logInfo("Started block pushing thread") try { - while(!stopped) { + while (!stopped) { Option(blocksForPushing.poll(100, TimeUnit.MILLISECONDS)) match { case Some(block) => pushBlock(block) case None => @@ -191,7 +191,7 @@ private[streaming] class BlockGenerator( logError(message, t) listener.onError(message, t) } - + private def pushBlock(block: Block) { listener.onPushBlock(block.id, block.buffer) logInfo("Pushed block " + block.id) diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/RateLimiter.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/RateLimiter.scala index 97db9ded83367..8df542b367d27 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/RateLimiter.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/RateLimiter.scala @@ -17,8 +17,9 @@ package org.apache.spark.streaming.receiver +import com.google.common.util.concurrent.{RateLimiter => GuavaRateLimiter} + import org.apache.spark.{Logging, SparkConf} -import com.google.common.util.concurrent.{RateLimiter=>GuavaRateLimiter} /** Provides waitToPush() method to limit the rate at which receivers consume data. * diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala index 651b534ac1900..207d64d9414ee 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala @@ -62,7 +62,7 @@ private[streaming] case class BlockManagerBasedStoreResult(blockId: StreamBlockI private[streaming] class BlockManagerBasedBlockHandler( blockManager: BlockManager, storageLevel: StorageLevel) extends ReceivedBlockHandler with Logging { - + def storeBlock(blockId: StreamBlockId, block: ReceivedBlock): ReceivedBlockStoreResult = { val putResult: Seq[(BlockId, BlockStatus)] = block match { case ArrayBufferBlock(arrayBuffer) => diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala index 4943f29395d12..33be067ebdaf2 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala @@ -18,14 +18,14 @@ package org.apache.spark.streaming.receiver import java.nio.ByteBuffer +import java.util.concurrent.CountDownLatch import scala.collection.mutable.ArrayBuffer +import scala.concurrent._ import org.apache.spark.{Logging, SparkConf} import org.apache.spark.storage.StreamBlockId -import java.util.concurrent.CountDownLatch -import scala.concurrent._ -import ExecutionContext.Implicits.global +import org.apache.spark.util.ThreadUtils /** * Abstract class that is responsible for supervising a Receiver in the worker. @@ -46,6 +46,9 @@ private[streaming] abstract class ReceiverSupervisor( // Attach the executor to the receiver receiver.attachExecutor(this) + private val futureExecutionContext = ExecutionContext.fromExecutorService( + ThreadUtils.newDaemonCachedThreadPool("receiver-supervisor-future", 128)) + /** Receiver id */ protected val streamId = receiver.streamId @@ -111,6 +114,7 @@ private[streaming] abstract class ReceiverSupervisor( stoppingError = error.orNull stopReceiver(message, error) onStop(message, error) + futureExecutionContext.shutdownNow() stopLatch.countDown() } @@ -150,6 +154,8 @@ private[streaming] abstract class ReceiverSupervisor( /** Restart receiver with delay */ def restartReceiver(message: String, error: Option[Throwable], delay: Int) { Future { + // This is a blocking action so we should use "futureExecutionContext" which is a cached + // thread pool. logWarning("Restarting receiver with delay " + delay + " ms: " + message, error.getOrElse(null)) stopReceiver("Restarting receiver with delay " + delay + "ms: " + message, error) @@ -158,7 +164,7 @@ private[streaming] abstract class ReceiverSupervisor( logInfo("Starting receiver again") startReceiver() logInfo("Receiver started again") - } + }(futureExecutionContext) } /** Check if receiver has been marked for stopping */ diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala index 93f047b91018f..92938379b9c17 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala @@ -146,7 +146,7 @@ private[streaming] class ReceiverSupervisorImpl( val blockStoreResult = receivedBlockHandler.storeBlock(blockId, receivedBlock) logDebug(s"Pushed block $blockId in ${(System.currentTimeMillis - time)} ms") - val blockInfo = ReceivedBlockInfo(streamId, numRecords, blockStoreResult) + val blockInfo = ReceivedBlockInfo(streamId, numRecords, metadataOption, blockStoreResult) trackerEndpoint.askWithRetry[Boolean](AddBlock(blockInfo)) logDebug(s"Reported block $blockId") } diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala index 1d1ddaaccf217..4af9b6d3b56ab 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala @@ -126,6 +126,10 @@ class JobScheduler(val ssc: StreamingContext) extends Logging { eventLoop.post(ErrorReported(msg, e)) } + def isStarted(): Boolean = synchronized { + eventLoop != null + } + private def processEvent(event: JobSchedulerEvent) { try { event match { diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockInfo.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockInfo.scala index 94beb590f52d6..dc11e84f29965 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockInfo.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockInfo.scala @@ -17,12 +17,38 @@ package org.apache.spark.streaming.scheduler -import org.apache.spark.streaming.receiver.ReceivedBlockStoreResult +import org.apache.spark.storage.StreamBlockId +import org.apache.spark.streaming.receiver.{ReceivedBlockStoreResult, WriteAheadLogBasedStoreResult} +import org.apache.spark.streaming.util.WriteAheadLogRecordHandle /** Information about blocks received by the receiver */ private[streaming] case class ReceivedBlockInfo( streamId: Int, numRecords: Long, + metadataOption: Option[Any], blockStoreResult: ReceivedBlockStoreResult - ) + ) { + + @volatile private var _isBlockIdValid = true + + def blockId: StreamBlockId = blockStoreResult.blockId + + def walRecordHandleOption: Option[WriteAheadLogRecordHandle] = { + blockStoreResult match { + case walStoreResult: WriteAheadLogBasedStoreResult => Some(walStoreResult.walRecordHandle) + case _ => None + } + } + + /** Is the block ID valid, that is, is the block present in the Spark executors. */ + def isBlockIdValid(): Boolean = _isBlockIdValid + + /** + * Set the block ID as invalid. This is useful when it is known that the block is not present + * in the Spark executors. + */ + def setBlockIdInvalid(): Unit = { + _isBlockIdValid = false + } +} diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala index 14e769a281f51..7720259a5d794 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala @@ -45,7 +45,7 @@ private[streaming] case class BatchCleanupEvent(times: Seq[Time]) private[streaming] case class AllocatedBlocks(streamIdToAllocatedBlocks: Map[Int, Seq[ReceivedBlockInfo]]) { def getBlocksOfStream(streamId: Int): Seq[ReceivedBlockInfo] = { - streamIdToAllocatedBlocks.get(streamId).getOrElse(Seq.empty) + streamIdToAllocatedBlocks.getOrElse(streamId, Seq.empty) } } @@ -63,6 +63,7 @@ private[streaming] class ReceivedBlockTracker( hadoopConf: Configuration, streamIds: Seq[Int], clock: Clock, + recoverFromWriteAheadLog: Boolean, checkpointDirOption: Option[String]) extends Logging { @@ -75,7 +76,9 @@ private[streaming] class ReceivedBlockTracker( private var lastAllocatedBatchTime: Time = null // Recover block information from write ahead logs - recoverFromWriteAheadLogs() + if (recoverFromWriteAheadLog) { + recoverPastEvents() + } /** Add received block. This event will get written to the write ahead log (if enabled). */ def addBlock(receivedBlockInfo: ReceivedBlockInfo): Boolean = synchronized { @@ -150,7 +153,7 @@ private[streaming] class ReceivedBlockTracker( * returns only after the files are cleaned up. */ def cleanupOldBatches(cleanupThreshTime: Time, waitForCompletion: Boolean): Unit = synchronized { - assert(cleanupThreshTime.milliseconds < clock.getTimeMillis()) + require(cleanupThreshTime.milliseconds < clock.getTimeMillis()) val timesToCleanup = timeToAllocatedBlocks.keys.filter { _ < cleanupThreshTime }.toSeq logInfo("Deleting batches " + timesToCleanup) writeToLog(BatchCleanupEvent(timesToCleanup)) @@ -167,10 +170,11 @@ private[streaming] class ReceivedBlockTracker( * Recover all the tracker actions from the write ahead logs to recover the state (unallocated * and allocated block info) prior to failure. */ - private def recoverFromWriteAheadLogs(): Unit = synchronized { + private def recoverPastEvents(): Unit = synchronized { // Insert the recovered block information def insertAddedBlock(receivedBlockInfo: ReceivedBlockInfo) { logTrace(s"Recovery: Inserting added block $receivedBlockInfo") + receivedBlockInfo.setBlockIdInvalid() getReceivedBlockQueue(receivedBlockInfo.streamId) += receivedBlockInfo } @@ -224,19 +228,9 @@ private[streaming] class ReceivedBlockTracker( /** Optionally create the write ahead log manager only if the feature is enabled */ private def createWriteAheadLog(): Option[WriteAheadLog] = { - if (WriteAheadLogUtils.enableReceiverLog(conf)) { - if (checkpointDirOption.isEmpty) { - throw new SparkException( - "Cannot enable receiver write-ahead log without checkpoint directory set. " + - "Please use streamingContext.checkpoint() to set the checkpoint directory. " + - "See documentation for more details.") - } + checkpointDirOption.map { checkpointDir => val logDir = ReceivedBlockTracker.checkpointDirToLogDir(checkpointDirOption.get) - - val log = WriteAheadLogUtils.createLogForDriver(conf, logDir, hadoopConf) - Some(log) - } else { - None + WriteAheadLogUtils.createLogForDriver(conf, logDir, hadoopConf) } } diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverInfo.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverInfo.scala index 52f08b9c9de68..de85f24dd988d 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverInfo.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverInfo.scala @@ -32,6 +32,7 @@ case class ReceiverInfo( active: Boolean, location: String, lastErrorMessage: String = "", - lastError: String = "" + lastError: String = "", + lastErrorTime: Long = -1L ) { } diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala index 1af65716d3003..f1504b09c9873 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala @@ -62,6 +62,7 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false ssc.sparkContext.hadoopConfiguration, receiverInputStreamIds, ssc.scheduler.clock, + ssc.isCheckpointPresent, Option(ssc.checkpointDir) ) private val listenerBus = ssc.scheduler.listenerBus @@ -154,10 +155,16 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false private def deregisterReceiver(streamId: Int, message: String, error: String) { val newReceiverInfo = receiverInfo.get(streamId) match { case Some(oldInfo) => - oldInfo.copy(endpoint = null, active = false, lastErrorMessage = message, lastError = error) + val lastErrorTime = + if (error == null || error == "") -1 else ssc.scheduler.clock.getTimeMillis() + oldInfo.copy(endpoint = null, active = false, lastErrorMessage = message, + lastError = error, lastErrorTime = lastErrorTime) case None => logWarning("No prior receiver info") - ReceiverInfo(streamId, "", null, false, "", lastErrorMessage = message, lastError = error) + val lastErrorTime = + if (error == null || error == "") -1 else ssc.scheduler.clock.getTimeMillis() + ReceiverInfo(streamId, "", null, false, "", lastErrorMessage = message, + lastError = error, lastErrorTime = lastErrorTime) } receiverInfo -= streamId listenerBus.post(StreamingListenerReceiverStopped(newReceiverInfo)) @@ -181,7 +188,8 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false oldInfo.copy(lastErrorMessage = message, lastError = error) case None => logWarning("No prior receiver info") - ReceiverInfo(streamId, "", null, false, "", lastErrorMessage = message, lastError = error) + ReceiverInfo(streamId, "", null, false, "", lastErrorMessage = message, + lastError = error, lastErrorTime = ssc.scheduler.clock.getTimeMillis()) } receiverInfo(streamId) = newReceiverInfo listenerBus.post(StreamingListenerReceiverError(receiverInfo(streamId))) @@ -222,7 +230,7 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false class ReceiverLauncher { @transient val env = ssc.env @volatile @transient private var running = false - @transient val thread = new Thread() { + @transient val thread = new Thread() { override def run() { try { SparkEnv.set(env) diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/AllBatchesTable.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/AllBatchesTable.scala index e219e27785533..f702bd5bc9466 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/ui/AllBatchesTable.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/AllBatchesTable.scala @@ -17,29 +17,36 @@ package org.apache.spark.streaming.ui +import java.text.SimpleDateFormat +import java.util.Date + import scala.xml.Node -import org.apache.spark.ui.UIUtils +import org.apache.spark.ui.{UIUtils => SparkUIUtils} -private[ui] abstract class BatchTableBase(tableId: String) { +private[ui] abstract class BatchTableBase(tableId: String, batchInterval: Long) { protected def columns: Seq[Node] = { Batch Time Input Size - Scheduling Delay - Processing Time + Scheduling Delay + {SparkUIUtils.tooltip("Time taken by Streaming scheduler to submit jobs of a batch", "top")} + + Processing Time + {SparkUIUtils.tooltip("Time taken to process all jobs of a batch", "top")} } protected def baseRow(batch: BatchUIData): Seq[Node] = { val batchTime = batch.batchTime.milliseconds - val formattedBatchTime = UIUtils.formatDate(batch.batchTime.milliseconds) + val formattedBatchTime = UIUtils.formatBatchTime(batchTime, batchInterval) val eventCount = batch.numRecords val schedulingDelay = batch.schedulingDelay - val formattedSchedulingDelay = schedulingDelay.map(UIUtils.formatDuration).getOrElse("-") + val formattedSchedulingDelay = schedulingDelay.map(SparkUIUtils.formatDuration).getOrElse("-") val processingTime = batch.processingDelay - val formattedProcessingTime = processingTime.map(UIUtils.formatDuration).getOrElse("-") + val formattedProcessingTime = processingTime.map(SparkUIUtils.formatDuration).getOrElse("-") + val batchTimeId = s"batch-$batchTime" - + {formattedBatchTime} @@ -76,7 +83,8 @@ private[ui] abstract class BatchTableBase(tableId: String) { private[ui] class ActiveBatchTable( runningBatches: Seq[BatchUIData], - waitingBatches: Seq[BatchUIData]) extends BatchTableBase("active-batches-table") { + waitingBatches: Seq[BatchUIData], + batchInterval: Long) extends BatchTableBase("active-batches-table", batchInterval) { override protected def columns: Seq[Node] = super.columns ++ Status @@ -96,10 +104,12 @@ private[ui] class ActiveBatchTable( } } -private[ui] class CompletedBatchTable(batches: Seq[BatchUIData]) - extends BatchTableBase("completed-batches-table") { +private[ui] class CompletedBatchTable(batches: Seq[BatchUIData], batchInterval: Long) + extends BatchTableBase("completed-batches-table", batchInterval) { - override protected def columns: Seq[Node] = super.columns ++ Total Delay + override protected def columns: Seq[Node] = super.columns ++ + Total Delay + {SparkUIUtils.tooltip("Total time taken to handle a batch", "top")} override protected def renderRows: Seq[Node] = { batches.flatMap(batch => {completedBatchRow(batch)}) @@ -107,7 +117,7 @@ private[ui] class CompletedBatchTable(batches: Seq[BatchUIData]) private def completedBatchRow(batch: BatchUIData): Seq[Node] = { val totalDelay = batch.totalDelay - val formattedTotalDelay = totalDelay.map(UIUtils.formatDuration).getOrElse("-") + val formattedTotalDelay = totalDelay.map(SparkUIUtils.formatDuration).getOrElse("-") baseRow(batch) ++ {formattedTotalDelay} diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala index 2da9a29e2529e..f75067669abe5 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala @@ -17,17 +17,20 @@ package org.apache.spark.streaming.ui +import java.text.SimpleDateFormat +import java.util.Date import javax.servlet.http.HttpServletRequest -import scala.xml.{NodeSeq, Node} +import scala.xml.{NodeSeq, Node, Text} import org.apache.commons.lang3.StringEscapeUtils import org.apache.spark.streaming.Time -import org.apache.spark.ui.{UIUtils, WebUIPage} +import org.apache.spark.ui.{UIUtils => SparkUIUtils, WebUIPage} import org.apache.spark.streaming.ui.StreamingJobProgressListener.{SparkJobId, OutputOpId} import org.apache.spark.ui.jobs.UIData.JobUIData +private case class SparkJobIdWithUIData(sparkJobId: SparkJobId, jobUIData: Option[JobUIData]) private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") { private val streamingListener = parent.listener @@ -44,25 +47,33 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") { Error } + private def generateJobRow( + outputOpId: OutputOpId, + outputOpDescription: Seq[Node], + formattedOutputOpDuration: String, + numSparkJobRowsInOutputOp: Int, + isFirstRow: Boolean, + sparkJob: SparkJobIdWithUIData): Seq[Node] = { + if (sparkJob.jobUIData.isDefined) { + generateNormalJobRow(outputOpId, outputOpDescription, formattedOutputOpDuration, + numSparkJobRowsInOutputOp, isFirstRow, sparkJob.jobUIData.get) + } else { + generateDroppedJobRow(outputOpId, outputOpDescription, formattedOutputOpDuration, + numSparkJobRowsInOutputOp, isFirstRow, sparkJob.sparkJobId) + } + } + /** * Generate a row for a Spark Job. Because duplicated output op infos needs to be collapsed into * one cell, we use "rowspan" for the first row of a output op. */ - def generateJobRow( + private def generateNormalJobRow( outputOpId: OutputOpId, + outputOpDescription: Seq[Node], formattedOutputOpDuration: String, numSparkJobRowsInOutputOp: Int, isFirstRow: Boolean, sparkJob: JobUIData): Seq[Node] = { - val lastStageInfo = Option(sparkJob.stageIds) - .filter(_.nonEmpty) - .flatMap { ids => sparkListener.stageIdToInfo.get(ids.max) } - val lastStageData = lastStageInfo.flatMap { s => - sparkListener.stageIdToData.get((s.stageId, s.attemptId)) - } - - val lastStageName = lastStageInfo.map(_.name).getOrElse("(Unknown Stage Name)") - val lastStageDescription = lastStageData.flatMap(_.description).getOrElse("") val duration: Option[Long] = { sparkJob.submissionTime.map { start => val end = sparkJob.completionTime.getOrElse(System.currentTimeMillis()) @@ -73,8 +84,8 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") { sparkJob.stageIds.sorted.reverse.flatMap(sparkListener.stageIdToInfo.get). dropWhile(_.failureReason == None).take(1). // get the first info that contains failure flatMap(info => info.failureReason).headOption.getOrElse("") - val formattedDuration = duration.map(d => UIUtils.formatDuration(d)).getOrElse("-") - val detailUrl = s"${UIUtils.prependBaseUri(parent.basePath)}/jobs/job?id=${sparkJob.jobId}" + val formattedDuration = duration.map(d => SparkUIUtils.formatDuration(d)).getOrElse("-") + val detailUrl = s"${SparkUIUtils.prependBaseUri(parent.basePath)}/jobs/job?id=${sparkJob.jobId}" // In the first row, output op id and its information needs to be shown. In other rows, these // cells will be taken up due to "rowspan". @@ -83,9 +94,7 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") { if (isFirstRow) { {outputOpId.toString} - - {lastStageDescription} - {lastStageName} + {outputOpDescription} {formattedOutputOpDuration} } else { @@ -110,7 +119,7 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") { { - UIUtils.makeProgressBar( + SparkUIUtils.makeProgressBar( started = sparkJob.numActiveTasks, completed = sparkJob.numCompletedTasks, failed = sparkJob.numFailedTasks, @@ -122,27 +131,96 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") { } - private def generateOutputOpIdRow( - outputOpId: OutputOpId, sparkJobs: Seq[JobUIData]): Seq[Node] = { - val sparkjobDurations = sparkJobs.map(sparkJob => { - sparkJob.submissionTime.map { start => - val end = sparkJob.completionTime.getOrElse(System.currentTimeMillis()) - end - start + /** + * If a job is dropped by sparkListener due to exceeding the limitation, we only show the job id + * with "-" cells. + */ + private def generateDroppedJobRow( + outputOpId: OutputOpId, + outputOpDescription: Seq[Node], + formattedOutputOpDuration: String, + numSparkJobRowsInOutputOp: Int, + isFirstRow: Boolean, + jobId: Int): Seq[Node] = { + // In the first row, output op id and its information needs to be shown. In other rows, these + // cells will be taken up due to "rowspan". + // scalastyle:off + val prefixCells = + if (isFirstRow) { + {outputOpId.toString} + {outputOpDescription} + {formattedOutputOpDuration} + } else { + Nil } - }) + // scalastyle:on + + + {prefixCells} + + {jobId.toString} + + + - + + - + + - + + - + + } + + private def generateOutputOpIdRow( + outputOpId: OutputOpId, sparkJobs: Seq[SparkJobIdWithUIData]): Seq[Node] = { + // We don't count the durations of dropped jobs + val sparkJobDurations = sparkJobs.filter(_.jobUIData.nonEmpty).map(_.jobUIData.get). + map(sparkJob => { + sparkJob.submissionTime.map { start => + val end = sparkJob.completionTime.getOrElse(System.currentTimeMillis()) + end - start + } + }) val formattedOutputOpDuration = - if (sparkjobDurations.exists(_ == None)) { - // If any job does not finish, set "formattedOutputOpDuration" to "-" + if (sparkJobDurations.isEmpty || sparkJobDurations.exists(_ == None)) { + // If no job or any job does not finish, set "formattedOutputOpDuration" to "-" "-" } else { - UIUtils.formatDuration(sparkjobDurations.flatMap(x => x).sum) + SparkUIUtils.formatDuration(sparkJobDurations.flatMap(x => x).sum) } - generateJobRow(outputOpId, formattedOutputOpDuration, sparkJobs.size, true, sparkJobs.head) ++ + + val description = generateOutputOpDescription(sparkJobs) + + generateJobRow( + outputOpId, description, formattedOutputOpDuration, sparkJobs.size, true, sparkJobs.head) ++ sparkJobs.tail.map { sparkJob => - generateJobRow(outputOpId, formattedOutputOpDuration, sparkJobs.size, false, sparkJob) + generateJobRow( + outputOpId, description, formattedOutputOpDuration, sparkJobs.size, false, sparkJob) }.flatMap(x => x) } + private def generateOutputOpDescription(sparkJobs: Seq[SparkJobIdWithUIData]): Seq[Node] = { + val lastStageInfo = + sparkJobs.flatMap(_.jobUIData).headOption. // Get the first JobUIData + flatMap { sparkJob => // For the first job, get the latest Stage info + if (sparkJob.stageIds.isEmpty) { + None + } else { + sparkListener.stageIdToInfo.get(sparkJob.stageIds.max) + } + } + val lastStageData = lastStageInfo.flatMap { s => + sparkListener.stageIdToData.get((s.stageId, s.attemptId)) + } + + val lastStageName = lastStageInfo.map(_.name).getOrElse("(Unknown Stage Name)") + val lastStageDescription = lastStageData.flatMap(_.description).getOrElse("") + + + {lastStageDescription} + ++ Text(lastStageName) + } + private def failureReasonCell(failureReason: String): Seq[Node] = { val isMultiline = failureReason.indexOf('\n') >= 0 // Display the first line by default @@ -187,10 +265,10 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") { (outputOpId, outputOpIdAndSparkJobIds.map(_.sparkJobId).sorted) } sparkListener.synchronized { - val outputOpIdWithJobs: Seq[(OutputOpId, Seq[JobUIData])] = + val outputOpIdWithJobs: Seq[(OutputOpId, Seq[SparkJobIdWithUIData])] = outputOpIdToSparkJobIds.map { case (outputOpId, sparkJobIds) => - // Filter out spark Job ids that don't exist in sparkListener - (outputOpId, sparkJobIds.flatMap(getJobData)) + (outputOpId, + sparkJobIds.map(sparkJobId => SparkJobIdWithUIData(sparkJobId, getJobData(sparkJobId)))) } @@ -200,7 +278,7 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") { { outputOpIdWithJobs.map { - case (outputOpId, jobs) => generateOutputOpIdRow(outputOpId, jobs) + case (outputOpId, sparkJobIds) => generateOutputOpIdRow(outputOpId, sparkJobIds) } } @@ -212,24 +290,25 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") { val batchTime = Option(request.getParameter("id")).map(id => Time(id.toLong)).getOrElse { throw new IllegalArgumentException(s"Missing id parameter") } - val formattedBatchTime = UIUtils.formatDate(batchTime.milliseconds) + val formattedBatchTime = + UIUtils.formatBatchTime(batchTime.milliseconds, streamingListener.batchDuration) val batchUIData = streamingListener.getBatchUIData(batchTime).getOrElse { throw new IllegalArgumentException(s"Batch $formattedBatchTime does not exist") } val formattedSchedulingDelay = - batchUIData.schedulingDelay.map(UIUtils.formatDuration).getOrElse("-") + batchUIData.schedulingDelay.map(SparkUIUtils.formatDuration).getOrElse("-") val formattedProcessingTime = - batchUIData.processingDelay.map(UIUtils.formatDuration).getOrElse("-") - val formattedTotalDelay = batchUIData.totalDelay.map(UIUtils.formatDuration).getOrElse("-") + batchUIData.processingDelay.map(SparkUIUtils.formatDuration).getOrElse("-") + val formattedTotalDelay = batchUIData.totalDelay.map(SparkUIUtils.formatDuration).getOrElse("-") val summary: NodeSeq =
  • Batch Duration: - {UIUtils.formatDuration(streamingListener.batchDuration)} + {SparkUIUtils.formatDuration(streamingListener.batchDuration)}
  • Input data size: @@ -259,6 +338,6 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") { val content = summary ++ jobTable - UIUtils.headerSparkPage(s"Details of batch at $formattedBatchTime", content, parent) + SparkUIUtils.headerSparkPage(s"Details of batch at $formattedBatchTime", content, parent) } } diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchUIData.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchUIData.scala index 99e10d2b0be12..a5514dfd71c9f 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchUIData.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchUIData.scala @@ -26,7 +26,7 @@ private[ui] case class OutputOpIdAndSparkJobId(outputOpId: OutputOpId, sparkJobI private[ui] case class BatchUIData( val batchTime: Time, - val receiverNumRecords: Map[Int, Long], + val streamIdToNumRecords: Map[Int, Long], val submissionTime: Long, val processingStartTime: Option[Long], val processingEndTime: Option[Long], @@ -58,7 +58,7 @@ private[ui] case class BatchUIData( /** * The number of recorders received by the receivers in this batch. */ - def numRecords: Long = receiverNumRecords.map(_._2).sum + def numRecords: Long = streamIdToNumRecords.values.sum } private[ui] object BatchUIData { diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala index d2729fa70d6d2..68e8ce98945e0 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala @@ -29,7 +29,6 @@ import org.apache.spark.streaming.scheduler._ import org.apache.spark.streaming.scheduler.StreamingListenerReceiverStarted import org.apache.spark.streaming.scheduler.StreamingListenerBatchStarted import org.apache.spark.streaming.scheduler.StreamingListenerBatchSubmitted -import org.apache.spark.util.Distribution private[streaming] class StreamingJobProgressListener(ssc: StreamingContext) @@ -38,7 +37,7 @@ private[streaming] class StreamingJobProgressListener(ssc: StreamingContext) private val waitingBatchUIData = new HashMap[Time, BatchUIData] private val runningBatchUIData = new HashMap[Time, BatchUIData] private val completedBatchUIData = new Queue[BatchUIData] - private val batchUIDataLimit = ssc.conf.getInt("spark.streaming.ui.retainedBatches", 100) + private val batchUIDataLimit = ssc.conf.getInt("spark.streaming.ui.retainedBatches", 1000) private var totalCompletedBatches = 0L private var totalReceivedRecords = 0L private var totalProcessedRecords = 0L @@ -145,7 +144,9 @@ private[streaming] class StreamingJobProgressListener(ssc: StreamingContext) } } - def numReceivers: Int = ssc.graph.getReceiverInputStreams().size + def numReceivers: Int = synchronized { + receiverInfos.size + } def numTotalCompletedBatches: Long = synchronized { totalCompletedBatches @@ -175,39 +176,42 @@ private[streaming] class StreamingJobProgressListener(ssc: StreamingContext) completedBatchUIData.toSeq } - def processingDelayDistribution: Option[Distribution] = synchronized { - extractDistribution(_.processingDelay) + def streamName(streamId: Int): Option[String] = { + ssc.graph.getInputStreamName(streamId) } - def schedulingDelayDistribution: Option[Distribution] = synchronized { - extractDistribution(_.schedulingDelay) - } + /** + * Return all InputDStream Ids + */ + def streamIds: Seq[Int] = ssc.graph.getInputStreams().map(_.id) - def totalDelayDistribution: Option[Distribution] = synchronized { - extractDistribution(_.totalDelay) - } - - def receivedRecordsDistributions: Map[Int, Option[Distribution]] = synchronized { - val latestBatchInfos = retainedBatches.reverse.take(batchUIDataLimit) - val latestReceiverNumRecords = latestBatchInfos.map(_.receiverNumRecords) - val streamIds = ssc.graph.getInputStreams().map(_.id) - streamIds.map { id => - val recordsOfParticularReceiver = - latestReceiverNumRecords.map(v => v.getOrElse(id, 0L).toDouble * 1000 / batchDuration) - val distribution = Distribution(recordsOfParticularReceiver) - (id, distribution) + /** + * Return all of the event rates for each InputDStream in each batch. The key of the return value + * is the stream id, and the value is a sequence of batch time with its event rate. + */ + def receivedEventRateWithBatchTime: Map[Int, Seq[(Long, Double)]] = synchronized { + val _retainedBatches = retainedBatches + val latestBatches = _retainedBatches.map { batchUIData => + (batchUIData.batchTime.milliseconds, batchUIData.streamIdToNumRecords) + } + streamIds.map { streamId => + val eventRates = latestBatches.map { + case (batchTime, streamIdToNumRecords) => + val numRecords = streamIdToNumRecords.getOrElse(streamId, 0L) + (batchTime, numRecords * 1000.0 / batchDuration) + } + (streamId, eventRates) }.toMap } def lastReceivedBatchRecords: Map[Int, Long] = synchronized { - val lastReceiverNumRecords = lastReceivedBatch.map(_.receiverNumRecords) - val streamIds = ssc.graph.getInputStreams().map(_.id) - lastReceiverNumRecords.map { receiverNumRecords => - streamIds.map { id => - (id, receiverNumRecords.getOrElse(id, 0L)) + val lastReceivedBlockInfoOption = lastReceivedBatch.map(_.streamIdToNumRecords) + lastReceivedBlockInfoOption.map { lastReceivedBlockInfo => + streamIds.map { streamId => + (streamId, lastReceivedBlockInfo.getOrElse(streamId, 0L)) }.toMap }.getOrElse { - streamIds.map(id => (id, 0L)).toMap + streamIds.map(streamId => (streamId, 0L)).toMap } } @@ -215,10 +219,6 @@ private[streaming] class StreamingJobProgressListener(ssc: StreamingContext) receiverInfos.get(receiverId) } - def receiverIds(): Iterable[Int] = synchronized { - receiverInfos.keys - } - def lastCompletedBatch: Option[BatchUIData] = synchronized { completedBatchUIData.sortBy(_.batchTime)(Time.ordering).lastOption } @@ -227,15 +227,11 @@ private[streaming] class StreamingJobProgressListener(ssc: StreamingContext) retainedBatches.lastOption } - private def retainedBatches: Seq[BatchUIData] = { + def retainedBatches: Seq[BatchUIData] = synchronized { (waitingBatchUIData.values.toSeq ++ runningBatchUIData.values.toSeq ++ completedBatchUIData).sortBy(_.batchTime)(Time.ordering) } - private def extractDistribution(getMetric: BatchUIData => Option[Long]): Option[Distribution] = { - Distribution(completedBatchUIData.flatMap(getMetric(_)).map(_.toDouble)) - } - def getBatchUIData(batchTime: Time): Option[BatchUIData] = synchronized { val batchUIData = waitingBatchUIData.get(batchTime).orElse { runningBatchUIData.get(batchTime).orElse { diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala index db37ae815bdf5..4ee7a486e370b 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala @@ -17,181 +17,453 @@ package org.apache.spark.streaming.ui -import java.util.Calendar +import java.text.SimpleDateFormat +import java.util.Date +import java.util.concurrent.TimeUnit import javax.servlet.http.HttpServletRequest -import scala.xml.Node +import scala.collection.mutable.ArrayBuffer +import scala.xml.{Node, Unparsed} import org.apache.spark.Logging import org.apache.spark.ui._ -import org.apache.spark.ui.UIUtils._ -import org.apache.spark.util.Distribution +import org.apache.spark.ui.{UIUtils => SparkUIUtils} + +/** + * A helper class to generate JavaScript and HTML for both timeline and histogram graphs. + * + * @param timelineDivId the timeline `id` used in the html `div` tag + * @param histogramDivId the timeline `id` used in the html `div` tag + * @param data the data for the graph + * @param minX the min value of X axis + * @param maxX the max value of X axis + * @param minY the min value of Y axis + * @param maxY the max value of Y axis + * @param unitY the unit of Y axis + * @param batchInterval if `batchInterval` is not None, we will draw a line for `batchInterval` in + * the graph + */ +private[ui] class GraphUIData( + timelineDivId: String, + histogramDivId: String, + data: Seq[(Long, Double)], + minX: Long, + maxX: Long, + minY: Double, + maxY: Double, + unitY: String, + batchInterval: Option[Double] = None) { + + private var dataJavaScriptName: String = _ + + def generateDataJs(jsCollector: JsCollector): Unit = { + val jsForData = data.map { case (x, y) => + s"""{"x": $x, "y": $y}""" + }.mkString("[", ",", "]") + dataJavaScriptName = jsCollector.nextVariableName + jsCollector.addPreparedStatement(s"var $dataJavaScriptName = $jsForData;") + } + + def generateTimelineHtml(jsCollector: JsCollector): Seq[Node] = { + jsCollector.addPreparedStatement(s"registerTimeline($minY, $maxY);") + if (batchInterval.isDefined) { + jsCollector.addStatement( + "drawTimeline(" + + s"'#$timelineDivId', $dataJavaScriptName, $minX, $maxX, $minY, $maxY, '$unitY'," + + s" ${batchInterval.get}" + + ");") + } else { + jsCollector.addStatement( + s"drawTimeline('#$timelineDivId', $dataJavaScriptName, $minX, $maxX, $minY, $maxY," + + s" '$unitY');") + } +
    + } + + def generateHistogramHtml(jsCollector: JsCollector): Seq[Node] = { + val histogramData = s"$dataJavaScriptName.map(function(d) { return d.y; })" + jsCollector.addPreparedStatement(s"registerHistogram($histogramData, $minY, $maxY);") + if (batchInterval.isDefined) { + jsCollector.addStatement( + "drawHistogram(" + + s"'#$histogramDivId', $histogramData, $minY, $maxY, '$unitY', ${batchInterval.get}" + + ");") + } else { + jsCollector.addStatement( + s"drawHistogram('#$histogramDivId', $histogramData, $minY, $maxY, '$unitY');") + } +
    + } +} + +/** + * A helper class for "scheduling delay", "processing time" and "total delay" to generate data that + * will be used in the timeline and histogram graphs. + * + * @param data (batchTime, milliseconds). "milliseconds" is something like "processing time". + */ +private[ui] class MillisecondsStatUIData(data: Seq[(Long, Long)]) { + + /** + * Converting the original data as per `unit`. + */ + def timelineData(unit: TimeUnit): Seq[(Long, Double)] = + data.map(x => x._1 -> UIUtils.convertToTimeUnit(x._2, unit)) + + /** + * Converting the original data as per `unit`. + */ + def histogramData(unit: TimeUnit): Seq[Double] = + data.map(x => UIUtils.convertToTimeUnit(x._2, unit)) + + val avg: Option[Long] = if (data.isEmpty) None else Some(data.map(_._2).sum / data.size) + + val formattedAvg: String = StreamingPage.formatDurationOption(avg) + + val max: Option[Long] = if (data.isEmpty) None else Some(data.map(_._2).max) +} + +/** + * A helper class for "input rate" to generate data that will be used in the timeline and histogram + * graphs. + * + * @param data (batchTime, event-rate). + */ +private[ui] class EventRateUIData(val data: Seq[(Long, Double)]) { + + val avg: Option[Double] = if (data.isEmpty) None else Some(data.map(_._2).sum / data.size) + + val formattedAvg: String = avg.map(_.formatted("%.2f")).getOrElse("-") + + val max: Option[Double] = if (data.isEmpty) None else Some(data.map(_._2).max) +} /** Page for Spark Web UI that shows statistics of a streaming job */ private[ui] class StreamingPage(parent: StreamingTab) extends WebUIPage("") with Logging { + import StreamingPage._ + private val listener = parent.listener private val startTime = System.currentTimeMillis() - private val emptyCell = "-" /** Render the page */ def render(request: HttpServletRequest): Seq[Node] = { - val content = listener.synchronized { - generateBasicStats() ++

    ++ -

    Statistics over last {listener.retainedCompletedBatches.size} processed batches

    ++ - generateReceiverStats() ++ - generateBatchStatsTable() ++ - generateBatchListTables() - } - UIUtils.headerSparkPage("Streaming", content, parent, Some(5000)) + val resources = generateLoadResources() + val basicInfo = generateBasicInfo() + val content = resources ++ + basicInfo ++ + listener.synchronized { + generateStatTable() ++ + generateBatchListTables() + } + SparkUIUtils.headerSparkPage("Streaming Statistics", content, parent, Some(5000)) } - /** Generate basic stats of the streaming program */ - private def generateBasicStats(): Seq[Node] = { - val timeSinceStart = System.currentTimeMillis() - startTime + /** + * Generate html that will load css/js files for StreamingPage + */ + private def generateLoadResources(): Seq[Node] = { // scalastyle:off -
      -
    • - Started at: {UIUtils.formatDate(startTime)} -
    • -
    • - Time since start: {formatDurationVerbose(timeSinceStart)} -
    • -
    • - Network receivers: {listener.numReceivers} -
    • -
    • - Batch interval: {formatDurationVerbose(listener.batchDuration)} -
    • -
    • - Completed batches: {listener.numTotalCompletedBatches} -
    • -
    • - Active batches: {listener.numUnprocessedBatches} -
    • -
    • - Received events: {listener.numTotalReceivedRecords} -
    • -
    • - Processed events: {listener.numTotalProcessedRecords} -
    • -
    + + + // scalastyle:on } - /** Generate stats of data received by the receivers in the streaming program */ - private def generateReceiverStats(): Seq[Node] = { - val receivedRecordDistributions = listener.receivedRecordsDistributions - val lastBatchReceivedRecord = listener.lastReceivedBatchRecords - val table = if (receivedRecordDistributions.size > 0) { - val headerRow = Seq( - "Receiver", - "Status", - "Location", - "Events in last batch\n[" + formatDate(Calendar.getInstance().getTime()) + "]", - "Minimum rate\n[events/sec]", - "Median rate\n[events/sec]", - "Maximum rate\n[events/sec]", - "Last Error" - ) - val dataRows = listener.receiverIds().map { receiverId => - val receiverInfo = listener.receiverInfo(receiverId) - val receiverName = receiverInfo.map(_.name).getOrElse(s"Receiver-$receiverId") - val receiverActive = receiverInfo.map { info => - if (info.active) "ACTIVE" else "INACTIVE" - }.getOrElse(emptyCell) - val receiverLocation = receiverInfo.map(_.location).getOrElse(emptyCell) - val receiverLastBatchRecords = formatNumber(lastBatchReceivedRecord(receiverId)) - val receivedRecordStats = receivedRecordDistributions(receiverId).map { d => - d.getQuantiles(Seq(0.0, 0.5, 1.0)).map(r => formatNumber(r.toLong)) - }.getOrElse { - Seq(emptyCell, emptyCell, emptyCell, emptyCell, emptyCell) - } - val receiverLastError = listener.receiverInfo(receiverId).map { info => - val msg = s"${info.lastErrorMessage} - ${info.lastError}" - if (msg.size > 100) msg.take(97) + "..." else msg - }.getOrElse(emptyCell) - Seq(receiverName, receiverActive, receiverLocation, receiverLastBatchRecords) ++ - receivedRecordStats ++ Seq(receiverLastError) - }.toSeq - Some(listingTable(headerRow, dataRows)) - } else { - None - } + /** Generate basic information of the streaming program */ + private def generateBasicInfo(): Seq[Node] = { + val timeSinceStart = System.currentTimeMillis() - startTime +
    Running batches of + + {SparkUIUtils.formatDurationVerbose(listener.batchDuration)} + + for + + {SparkUIUtils.formatDurationVerbose(timeSinceStart)} + + since + + {SparkUIUtils.formatDate(startTime)} + + ({listener.numTotalCompletedBatches} + completed batches, {listener.numTotalReceivedRecords} records) +
    +
    + } - val content = -
    Receiver Statistics
    ++ -
    {table.getOrElse("No receivers")}
    + /** + * Generate a global "timeFormat" dictionary in the JavaScript to store the time and its formatted + * string. Because we cannot specify a timezone in JavaScript, to make sure the server and client + * use the same timezone, we use the "timeFormat" dictionary to format all time values used in the + * graphs. + * + * @param times all time values that will be used in the graphs. + */ + private def generateTimeMap(times: Seq[Long]): Seq[Node] = { + val js = "var timeFormat = {};\n" + times.map { time => + val formattedTime = + UIUtils.formatBatchTime(time, listener.batchDuration, showYYYYMMSS = false) + s"timeFormat[$time] = '$formattedTime';" + }.mkString("\n") - content + } - /** Generate stats of batch jobs of the streaming program */ - private def generateBatchStatsTable(): Seq[Node] = { - val numBatches = listener.retainedCompletedBatches.size - val lastCompletedBatch = listener.lastCompletedBatch - val table = if (numBatches > 0) { - val processingDelayQuantilesRow = { - Seq( - "Processing Time", - formatDurationOption(lastCompletedBatch.flatMap(_.processingDelay)) - ) ++ getQuantiles(listener.processingDelayDistribution) - } - val schedulingDelayQuantilesRow = { - Seq( - "Scheduling Delay", - formatDurationOption(lastCompletedBatch.flatMap(_.schedulingDelay)) - ) ++ getQuantiles(listener.schedulingDelayDistribution) - } - val totalDelayQuantilesRow = { - Seq( - "Total Delay", - formatDurationOption(lastCompletedBatch.flatMap(_.totalDelay)) - ) ++ getQuantiles(listener.totalDelayDistribution) - } - val headerRow = Seq("Metric", "Last batch", "Minimum", "25th percentile", - "Median", "75th percentile", "Maximum") - val dataRows: Seq[Seq[String]] = Seq( - processingDelayQuantilesRow, - schedulingDelayQuantilesRow, - totalDelayQuantilesRow - ) - Some(listingTable(headerRow, dataRows)) - } else { - None - } + private def generateStatTable(): Seq[Node] = { + val batches = listener.retainedBatches - val content = -
    Batch Processing Statistics
    ++ -
    -
      - {table.getOrElse("No statistics have been generated yet.")} -
    -
    + val batchTimes = batches.map(_.batchTime.milliseconds) + val minBatchTime = if (batchTimes.isEmpty) startTime else batchTimes.min + val maxBatchTime = if (batchTimes.isEmpty) startTime else batchTimes.max - content - } + val eventRateForAllStreams = new EventRateUIData(batches.map { batchInfo => + (batchInfo.batchTime.milliseconds, batchInfo.numRecords * 1000.0 / listener.batchDuration) + }) + val schedulingDelay = new MillisecondsStatUIData(batches.flatMap { batchInfo => + batchInfo.schedulingDelay.map(batchInfo.batchTime.milliseconds -> _) + }) + val processingTime = new MillisecondsStatUIData(batches.flatMap { batchInfo => + batchInfo.processingDelay.map(batchInfo.batchTime.milliseconds -> _) + }) + val totalDelay = new MillisecondsStatUIData(batches.flatMap { batchInfo => + batchInfo.totalDelay.map(batchInfo.batchTime.milliseconds -> _) + }) - /** - * Returns a human-readable string representing a duration such as "5 second 35 ms" - */ - private def formatDurationOption(msOption: Option[Long]): String = { - msOption.map(formatDurationVerbose).getOrElse(emptyCell) + // Use the max value of "schedulingDelay", "processingTime", and "totalDelay" to make the + // Y axis ranges same. + val _maxTime = + (for (m1 <- schedulingDelay.max; m2 <- processingTime.max; m3 <- totalDelay.max) yield + m1 max m2 max m3).getOrElse(0L) + // Should start at 0 + val minTime = 0L + val (maxTime, normalizedUnit) = UIUtils.normalizeDuration(_maxTime) + val formattedUnit = UIUtils.shortTimeUnitString(normalizedUnit) + + // Use the max input rate for all InputDStreams' graphs to make the Y axis ranges same. + // If it's not an integral number, just use its ceil integral number. + val maxEventRate = eventRateForAllStreams.max.map(_.ceil.toLong).getOrElse(0L) + val minEventRate = 0L + + val batchInterval = UIUtils.convertToTimeUnit(listener.batchDuration, normalizedUnit) + + val jsCollector = new JsCollector + + val graphUIDataForEventRateOfAllStreams = + new GraphUIData( + "all-stream-events-timeline", + "all-stream-events-histogram", + eventRateForAllStreams.data, + minBatchTime, + maxBatchTime, + minEventRate, + maxEventRate, + "events/sec") + graphUIDataForEventRateOfAllStreams.generateDataJs(jsCollector) + + val graphUIDataForSchedulingDelay = + new GraphUIData( + "scheduling-delay-timeline", + "scheduling-delay-histogram", + schedulingDelay.timelineData(normalizedUnit), + minBatchTime, + maxBatchTime, + minTime, + maxTime, + formattedUnit) + graphUIDataForSchedulingDelay.generateDataJs(jsCollector) + + val graphUIDataForProcessingTime = + new GraphUIData( + "processing-time-timeline", + "processing-time-histogram", + processingTime.timelineData(normalizedUnit), + minBatchTime, + maxBatchTime, + minTime, + maxTime, + formattedUnit, Some(batchInterval)) + graphUIDataForProcessingTime.generateDataJs(jsCollector) + + val graphUIDataForTotalDelay = + new GraphUIData( + "total-delay-timeline", + "total-delay-histogram", + totalDelay.timelineData(normalizedUnit), + minBatchTime, + maxBatchTime, + minTime, + maxTime, + formattedUnit) + graphUIDataForTotalDelay.generateDataJs(jsCollector) + + // It's false before the user registers the first InputDStream + val hasStream = listener.streamIds.nonEmpty + + val numCompletedBatches = listener.retainedCompletedBatches.size + val numActiveBatches = batchTimes.length - numCompletedBatches + val table = + // scalastyle:off +
+ + + + + + + + + + + + + {if (hasStream) { + + + + }} + + + + + + + + + + + + + + + + +
Timelines (Last {batchTimes.length} batches, {numActiveBatches} active, {numCompletedBatches} completed)Histograms
+
+
+ { + if (hasStream) { + + + + Input Rate + + + } else { + Input Rate + } + } +
+
Avg: {eventRateForAllStreams.formattedAvg} events/sec
+
+
{graphUIDataForEventRateOfAllStreams.generateTimelineHtml(jsCollector)}{graphUIDataForEventRateOfAllStreams.generateHistogramHtml(jsCollector)}
+
+
Scheduling Delay {SparkUIUtils.tooltip("Time taken by Streaming scheduler to submit jobs of a batch", "right")}
+
Avg: {schedulingDelay.formattedAvg}
+
+
{graphUIDataForSchedulingDelay.generateTimelineHtml(jsCollector)}{graphUIDataForSchedulingDelay.generateHistogramHtml(jsCollector)}
+
+
Processing Time {SparkUIUtils.tooltip("Time taken to process all jobs of a batch", "right")}
+
Avg: {processingTime.formattedAvg}
+
+
{graphUIDataForProcessingTime.generateTimelineHtml(jsCollector)}{graphUIDataForProcessingTime.generateHistogramHtml(jsCollector)}
+
+
Total Delay {SparkUIUtils.tooltip("Total time taken to handle a batch", "right")}
+
Avg: {totalDelay.formattedAvg}
+
+
{graphUIDataForTotalDelay.generateTimelineHtml(jsCollector)}{graphUIDataForTotalDelay.generateHistogramHtml(jsCollector)}
+ // scalastyle:on + + generateTimeMap(batchTimes) ++ table ++ jsCollector.toHtml } - /** Get quantiles for any time distribution */ - private def getQuantiles(timeDistributionOption: Option[Distribution]) = { - timeDistributionOption.get.getQuantiles().map { ms => formatDurationVerbose(ms.toLong) } + private def generateInputDStreamsTable( + jsCollector: JsCollector, + minX: Long, + maxX: Long, + minY: Double, + maxY: Double): Seq[Node] = { + val content = listener.receivedEventRateWithBatchTime.map { case (streamId, eventRates) => + generateInputDStreamRow(jsCollector, streamId, eventRates, minX, maxX, minY, maxY) + }.foldLeft[Seq[Node]](Nil)(_ ++ _) + + // scalastyle:off + + + + + + + + + + + + {content} + +
Status
Location
Last Error Time
Last Error Message
+ // scalastyle:on } - /** Generate HTML table from string data */ - private def listingTable(headers: Seq[String], data: Seq[Seq[String]]) = { - def generateDataRow(data: Seq[String]): Seq[Node] = { - {data.map(d => {d})} - } - UIUtils.listingTable(headers, generateDataRow, data, fixedWidth = true) + private def generateInputDStreamRow( + jsCollector: JsCollector, + streamId: Int, + eventRates: Seq[(Long, Double)], + minX: Long, + maxX: Long, + minY: Double, + maxY: Double): Seq[Node] = { + // If this is a ReceiverInputDStream, we need to show the receiver info. Or we only need the + // InputDStream name. + val receiverInfo = listener.receiverInfo(streamId) + val receiverName = receiverInfo.map(_.name). + orElse(listener.streamName(streamId)).getOrElse(s"Stream-$streamId") + val receiverActive = receiverInfo.map { info => + if (info.active) "ACTIVE" else "INACTIVE" + }.getOrElse(emptyCell) + val receiverLocation = receiverInfo.map(_.location).getOrElse(emptyCell) + val receiverLastError = receiverInfo.map { info => + val msg = s"${info.lastErrorMessage} - ${info.lastError}" + if (msg.size > 100) msg.take(97) + "..." else msg + }.getOrElse(emptyCell) + val receiverLastErrorTime = receiverInfo.map { + r => if (r.lastErrorTime < 0) "-" else SparkUIUtils.formatDate(r.lastErrorTime) + }.getOrElse(emptyCell) + val receivedRecords = new EventRateUIData(eventRates) + + val graphUIDataForEventRate = + new GraphUIData( + s"stream-$streamId-events-timeline", + s"stream-$streamId-events-histogram", + receivedRecords.data, + minX, + maxX, + minY, + maxY, + "events/sec") + graphUIDataForEventRate.generateDataJs(jsCollector) + + + +
+
{receiverName}
+
Avg: {receivedRecords.formattedAvg} events/sec
+
+ + {receiverActive} + {receiverLocation} + {receiverLastErrorTime} +
{receiverLastError}
+ + + + {graphUIDataForEventRate.generateTimelineHtml(jsCollector)} + + {graphUIDataForEventRate.generateHistogramHtml(jsCollector)} + } private def generateBatchListTables(): Seq[Node] = { @@ -202,17 +474,81 @@ private[ui] class StreamingPage(parent: StreamingTab) val activeBatchesContent = {

Active Batches ({runningBatches.size + waitingBatches.size})

++ - new ActiveBatchTable(runningBatches, waitingBatches).toNodeSeq + new ActiveBatchTable(runningBatches, waitingBatches, listener.batchDuration).toNodeSeq } val completedBatchesContent = {

Completed Batches (last {completedBatches.size} out of {listener.numTotalCompletedBatches})

++ - new CompletedBatchTable(completedBatches).toNodeSeq + new CompletedBatchTable(completedBatches, listener.batchDuration).toNodeSeq } activeBatchesContent ++ completedBatchesContent } } +private[ui] object StreamingPage { + val BLACK_RIGHT_TRIANGLE_HTML = "▶" + val BLACK_DOWN_TRIANGLE_HTML = "▼" + + val emptyCell = "-" + + /** + * Returns a human-readable string representing a duration such as "5 second 35 ms" + */ + def formatDurationOption(msOption: Option[Long]): String = { + msOption.map(SparkUIUtils.formatDurationVerbose).getOrElse(emptyCell) + } + +} + +/** + * A helper class that allows the user to add JavaScript statements which will be executed when the + * DOM has finished loading. + */ +private[ui] class JsCollector { + + private var variableId = 0 + + /** + * Return the next unused JavaScript variable name + */ + def nextVariableName: String = { + variableId += 1 + "v" + variableId + } + + /** + * JavaScript statements that will execute before `statements` + */ + private val preparedStatements = ArrayBuffer[String]() + + /** + * JavaScript statements that will execute after `preparedStatements` + */ + private val statements = ArrayBuffer[String]() + + def addPreparedStatement(js: String): Unit = { + preparedStatements += js + } + + def addStatement(js: String): Unit = { + statements += js + } + + /** + * Generate a html snippet that will execute all scripts when the DOM has finished loading. + */ + def toHtml: Seq[Node] = { + val js = + s""" + |$$(document).ready(function(){ + | ${preparedStatements.mkString("\n")} + | ${statements.mkString("\n")} + |});""".stripMargin + + + } +} + diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala index e4039639adbad..e0c0f57212f55 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala @@ -17,9 +17,11 @@ package org.apache.spark.streaming.ui +import org.eclipse.jetty.servlet.ServletContextHandler + import org.apache.spark.{Logging, SparkException} import org.apache.spark.streaming.StreamingContext -import org.apache.spark.ui.{SparkUI, SparkUITab} +import org.apache.spark.ui.{JettyUtils, SparkUI, SparkUITab} import StreamingTab._ @@ -30,6 +32,8 @@ import StreamingTab._ private[spark] class StreamingTab(val ssc: StreamingContext) extends SparkUITab(getSparkUI(ssc), "streaming") with Logging { + private val STATIC_RESOURCE_DIR = "org/apache/spark/streaming/ui/static" + val parent = getSparkUI(ssc) val listener = ssc.progressListener @@ -37,10 +41,19 @@ private[spark] class StreamingTab(val ssc: StreamingContext) ssc.sc.addSparkListener(listener) attachPage(new StreamingPage(this)) attachPage(new BatchPage(this)) - parent.attachTab(this) + + var staticHandler: ServletContextHandler = null + + def attach() { + getSparkUI(ssc).attachTab(this) + staticHandler = JettyUtils.createStaticHandler(STATIC_RESOURCE_DIR, "/static/streaming") + getSparkUI(ssc).attachHandler(staticHandler) + } def detach() { getSparkUI(ssc).detachTab(this) + getSparkUI(ssc).detachHandler(staticHandler) + staticHandler = null } } diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala new file mode 100644 index 0000000000000..86cfb1fa47370 --- /dev/null +++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.streaming.ui + +import java.text.SimpleDateFormat +import java.util.TimeZone +import java.util.concurrent.TimeUnit + +private[streaming] object UIUtils { + + /** + * Return the short string for a `TimeUnit`. + */ + def shortTimeUnitString(unit: TimeUnit): String = unit match { + case TimeUnit.NANOSECONDS => "ns" + case TimeUnit.MICROSECONDS => "us" + case TimeUnit.MILLISECONDS => "ms" + case TimeUnit.SECONDS => "sec" + case TimeUnit.MINUTES => "min" + case TimeUnit.HOURS => "hrs" + case TimeUnit.DAYS => "days" + } + + /** + * Find the best `TimeUnit` for converting milliseconds to a friendly string. Return the value + * after converting, also with its TimeUnit. + */ + def normalizeDuration(milliseconds: Long): (Double, TimeUnit) = { + if (milliseconds < 1000) { + return (milliseconds, TimeUnit.MILLISECONDS) + } + val seconds = milliseconds.toDouble / 1000 + if (seconds < 60) { + return (seconds, TimeUnit.SECONDS) + } + val minutes = seconds / 60 + if (minutes < 60) { + return (minutes, TimeUnit.MINUTES) + } + val hours = minutes / 60 + if (hours < 24) { + return (hours, TimeUnit.HOURS) + } + val days = hours / 24 + (days, TimeUnit.DAYS) + } + + /** + * Convert `milliseconds` to the specified `unit`. We cannot use `TimeUnit.convert` because it + * will discard the fractional part. + */ + def convertToTimeUnit(milliseconds: Long, unit: TimeUnit): Double = unit match { + case TimeUnit.NANOSECONDS => milliseconds * 1000 * 1000 + case TimeUnit.MICROSECONDS => milliseconds * 1000 + case TimeUnit.MILLISECONDS => milliseconds + case TimeUnit.SECONDS => milliseconds / 1000.0 + case TimeUnit.MINUTES => milliseconds / 1000.0 / 60.0 + case TimeUnit.HOURS => milliseconds / 1000.0 / 60.0 / 60.0 + case TimeUnit.DAYS => milliseconds / 1000.0 / 60.0 / 60.0 / 24.0 + } + + // SimpleDateFormat is not thread-safe. Don't expose it to avoid improper use. + private val batchTimeFormat = new ThreadLocal[SimpleDateFormat]() { + override def initialValue(): SimpleDateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss") + } + + private val batchTimeFormatWithMilliseconds = new ThreadLocal[SimpleDateFormat]() { + override def initialValue(): SimpleDateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss.SSS") + } + + /** + * If `batchInterval` is less than 1 second, format `batchTime` with milliseconds. Otherwise, + * format `batchTime` without milliseconds. + * + * @param batchTime the batch time to be formatted + * @param batchInterval the batch interval + * @param showYYYYMMSS if showing the `yyyy/MM/dd` part. If it's false, the return value wll be + * only `HH:mm:ss` or `HH:mm:ss.SSS` depending on `batchInterval` + * @param timezone only for test + */ + def formatBatchTime( + batchTime: Long, + batchInterval: Long, + showYYYYMMSS: Boolean = true, + timezone: TimeZone = null): String = { + val oldTimezones = + (batchTimeFormat.get.getTimeZone, batchTimeFormatWithMilliseconds.get.getTimeZone) + if (timezone != null) { + batchTimeFormat.get.setTimeZone(timezone) + batchTimeFormatWithMilliseconds.get.setTimeZone(timezone) + } + try { + val formattedBatchTime = + if (batchInterval < 1000) { + batchTimeFormatWithMilliseconds.get.format(batchTime) + } else { + // If batchInterval >= 1 second, don't show milliseconds + batchTimeFormat.get.format(batchTime) + } + if (showYYYYMMSS) { + formattedBatchTime + } else { + formattedBatchTime.substring(formattedBatchTime.indexOf(' ') + 1) + } + } finally { + if (timezone != null) { + batchTimeFormat.get.setTimeZone(oldTimezones._1) + batchTimeFormatWithMilliseconds.get.setTimeZone(oldTimezones._2) + } + } + } +} diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala index 9985fedc35141..fe6328b1ce727 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala @@ -26,7 +26,7 @@ import scala.language.postfixOps import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path -import org.apache.spark.util.ThreadUtils +import org.apache.spark.util.{CompletionIterator, ThreadUtils} import org.apache.spark.{Logging, SparkConf} /** @@ -124,7 +124,8 @@ private[streaming] class FileBasedWriteAheadLog( logFilesToRead.iterator.map { file => logDebug(s"Creating log reader with $file") - new FileBasedWriteAheadLogReader(file, hadoopConf) + val reader = new FileBasedWriteAheadLogReader(file, hadoopConf) + CompletionIterator[ByteBuffer, Iterator[ByteBuffer]](reader, reader.close _) } flatMap { x => x } } @@ -199,7 +200,7 @@ private[streaming] class FileBasedWriteAheadLog( /** Initialize the log directory or recover existing logs inside the directory */ private def initializeOrRecover(): Unit = synchronized { val logDirectoryPath = new Path(logDirectory) - val fileSystem = HdfsUtils.getFileSystemForPath(logDirectoryPath, hadoopConf) + val fileSystem = HdfsUtils.getFileSystemForPath(logDirectoryPath, hadoopConf) if (fileSystem.exists(logDirectoryPath) && fileSystem.getFileStatus(logDirectoryPath).isDir) { val logFileInfo = logFilesTologInfo(fileSystem.listStatus(logDirectoryPath).map { _.getPath }) diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala index 858ba3c9eb4e5..f60688f173c44 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala @@ -27,7 +27,7 @@ private[streaming] object HdfsUtils { // If the file exists and we have append support, append instead of creating a new file val stream: FSDataOutputStream = { if (dfs.isFile(dfsPath)) { - if (conf.getBoolean("hdfs.append.support", false)) { + if (conf.getBoolean("hdfs.append.support", false) || dfs.isInstanceOf[RawLocalFileSystem]) { dfs.append(dfsPath) } else { throw new IllegalStateException("File exists and there is no append support!") diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala index 4d968f8bfa7a8..408936653c790 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala @@ -27,7 +27,7 @@ object RawTextHelper { * Splits lines and counts the words. */ def splitAndCountPartitions(iter: Iterator[String]): Iterator[(String, Long)] = { - val map = new OpenHashMap[String,Long] + val map = new OpenHashMap[String, Long] var i = 0 var j = 0 while (iter.hasNext) { @@ -98,7 +98,7 @@ object RawTextHelper { * before real workload starts. */ def warmUp(sc: SparkContext) { - for(i <- 0 to 1) { + for (i <- 0 to 1) { sc.parallelize(1 to 200000, 1000) .map(_ % 1331).map(_.toString) .mapPartitions(splitAndCountPartitions).reduceByKey(_ + _, 10) diff --git a/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java index b1adf881dd0f5..1077b1b2cb7e3 100644 --- a/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java +++ b/streaming/src/test/java/org/apache/spark/streaming/JavaAPISuite.java @@ -70,6 +70,20 @@ public void testInitialization() { Assert.assertNotNull(ssc.sparkContext()); } + @SuppressWarnings("unchecked") + @Test + public void testContextState() { + List> inputData = Arrays.asList(Arrays.asList(1, 2, 3, 4)); + Assert.assertTrue(ssc.getState() == StreamingContextState.INITIALIZED); + JavaDStream stream = JavaTestUtils.attachTestInputStream(ssc, inputData, 1); + JavaTestUtils.attachTestOutputStream(stream); + Assert.assertTrue(ssc.getState() == StreamingContextState.INITIALIZED); + ssc.start(); + Assert.assertTrue(ssc.getState() == StreamingContextState.ACTIVE); + ssc.stop(); + Assert.assertTrue(ssc.getState() == StreamingContextState.STOPPED); + } + @SuppressWarnings("unchecked") @Test public void testCount() { @@ -1752,29 +1766,10 @@ public JavaStreamingContext call() { Assert.assertTrue("old context not recovered", !newContextCreated.get()); ssc.stop(); - // Function to create JavaStreamingContext using existing JavaSparkContext - // without any output operations (used to detect the new context) - Function creatingFunc2 = - new Function() { - public JavaStreamingContext call(JavaSparkContext context) { - newContextCreated.set(true); - return new JavaStreamingContext(context, Seconds.apply(1)); - } - }; - - JavaSparkContext sc = new JavaSparkContext(conf); newContextCreated.set(false); - ssc = JavaStreamingContext.getOrCreate(emptyDir.getAbsolutePath(), creatingFunc2, sc); - Assert.assertTrue("new context not created", newContextCreated.get()); - ssc.stop(false); - - newContextCreated.set(false); - ssc = JavaStreamingContext.getOrCreate(corruptedCheckpointDir, creatingFunc2, sc, true); - Assert.assertTrue("new context not created", newContextCreated.get()); - ssc.stop(false); - - newContextCreated.set(false); - ssc = JavaStreamingContext.getOrCreate(checkpointDir, creatingFunc2, sc); + JavaSparkContext sc = new JavaSparkContext(conf); + ssc = JavaStreamingContext.getOrCreate(checkpointDir, creatingFunc, + new org.apache.hadoop.conf.Configuration()); Assert.assertTrue("old context not recovered", !newContextCreated.get()); ssc.stop(); } diff --git a/streaming/src/test/java/org/apache/spark/streaming/JavaTestUtils.scala b/streaming/src/test/java/org/apache/spark/streaming/JavaTestUtils.scala index c0ea0491c313d..bb80bff6dc2e6 100644 --- a/streaming/src/test/java/org/apache/spark/streaming/JavaTestUtils.scala +++ b/streaming/src/test/java/org/apache/spark/streaming/JavaTestUtils.scala @@ -70,6 +70,7 @@ trait JavaTestBase extends TestSuiteBase { ssc: JavaStreamingContext, numBatches: Int, numExpectedOutput: Int): JList[JList[V]] = { implicit val cm: ClassTag[V] = implicitly[ClassTag[AnyRef]].asInstanceOf[ClassTag[V]] + ssc.getState() val res = runStreams[V](ssc.ssc, numBatches, numExpectedOutput) val out = new ArrayList[JList[V]]() res.map(entry => out.append(new ArrayList[V](entry))) diff --git a/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala index 87bc20f79c3cd..08faeaa58f419 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala @@ -255,7 +255,7 @@ class BasicOperationsSuite extends TestSuiteBase { Seq( ) ) val operation = (s1: DStream[String], s2: DStream[String]) => { - s1.map(x => (x,1)).cogroup(s2.map(x => (x, "x"))).mapValues(x => (x._1.toSeq, x._2.toSeq)) + s1.map(x => (x, 1)).cogroup(s2.map(x => (x, "x"))).mapValues(x => (x._1.toSeq, x._2.toSeq)) } testOperation(inputData1, inputData2, operation, outputData, true) } @@ -427,9 +427,9 @@ class BasicOperationsSuite extends TestSuiteBase { test("updateStateByKey - object lifecycle") { val inputData = Seq( - Seq("a","b"), + Seq("a", "b"), null, - Seq("a","c","a"), + Seq("a", "c", "a"), Seq("c"), null, null @@ -557,6 +557,9 @@ class BasicOperationsSuite extends TestSuiteBase { withTestServer(new TestServer()) { testServer => withStreamingContext(new StreamingContext(conf, batchDuration)) { ssc => testServer.start() + + val batchCounter = new BatchCounter(ssc) + // Set up the streaming context and input streams val networkStream = ssc.socketTextStream("localhost", testServer.port, StorageLevel.MEMORY_AND_DISK) @@ -587,7 +590,11 @@ class BasicOperationsSuite extends TestSuiteBase { for (i <- 0 until input.size) { testServer.send(input(i).toString + "\n") Thread.sleep(200) + val numCompletedBatches = batchCounter.getNumCompletedBatches clock.advance(batchDuration.milliseconds) + if (!batchCounter.waitUntilBatchesCompleted(numCompletedBatches + 1, 5000)) { + fail("Batch took more than 5 seconds to complete") + } collectRddInfo() } diff --git a/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala new file mode 100644 index 0000000000000..6a1dd6949b204 --- /dev/null +++ b/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.streaming + +import java.io.NotSerializableException + +import org.scalatest.{BeforeAndAfterAll, FunSuite} + +import org.apache.spark.{HashPartitioner, SparkContext, SparkException} +import org.apache.spark.rdd.RDD +import org.apache.spark.streaming.dstream.DStream +import org.apache.spark.util.ReturnStatementInClosureException + +/** + * Test that closures passed to DStream operations are actually cleaned. + */ +class DStreamClosureSuite extends FunSuite with BeforeAndAfterAll { + private var ssc: StreamingContext = null + + override def beforeAll(): Unit = { + val sc = new SparkContext("local", "test") + ssc = new StreamingContext(sc, Seconds(1)) + } + + override def afterAll(): Unit = { + ssc.stop(stopSparkContext = true) + ssc = null + } + + test("user provided closures are actually cleaned") { + val dstream = new DummyInputDStream(ssc) + val pairDstream = dstream.map { i => (i, i) } + // DStream + testMap(dstream) + testFlatMap(dstream) + testFilter(dstream) + testMapPartitions(dstream) + testReduce(dstream) + testForeach(dstream) + testForeachRDD(dstream) + testTransform(dstream) + testTransformWith(dstream) + testReduceByWindow(dstream) + // PairDStreamFunctions + testReduceByKey(pairDstream) + testCombineByKey(pairDstream) + testReduceByKeyAndWindow(pairDstream) + testUpdateStateByKey(pairDstream) + testMapValues(pairDstream) + testFlatMapValues(pairDstream) + // StreamingContext + testTransform2(ssc, dstream) + } + + /** + * Verify that the expected exception is thrown. + * + * We use return statements as an indication that a closure is actually being cleaned. + * We expect closure cleaner to find the return statements in the user provided closures. + */ + private def expectCorrectException(body: => Unit): Unit = { + try { + body + } catch { + case rse: ReturnStatementInClosureException => // Success! + case e @ (_: NotSerializableException | _: SparkException) => + throw new TestException( + s"Expected ReturnStatementInClosureException, but got $e.\n" + + "This means the closure provided by user is not actually cleaned.") + } + } + + // DStream operations + private def testMap(ds: DStream[Int]): Unit = expectCorrectException { + ds.map { _ => return; 1 } + } + private def testFlatMap(ds: DStream[Int]): Unit = expectCorrectException { + ds.flatMap { _ => return; Seq.empty } + } + private def testFilter(ds: DStream[Int]): Unit = expectCorrectException { + ds.filter { _ => return; true } + } + private def testMapPartitions(ds: DStream[Int]): Unit = expectCorrectException { + ds.mapPartitions { _ => return; Seq.empty.toIterator } + } + private def testReduce(ds: DStream[Int]): Unit = expectCorrectException { + ds.reduce { case (_, _) => return; 1 } + } + private def testForeach(ds: DStream[Int]): Unit = { + val foreachF1 = (rdd: RDD[Int], t: Time) => return + val foreachF2 = (rdd: RDD[Int]) => return + expectCorrectException { ds.foreach(foreachF1) } + expectCorrectException { ds.foreach(foreachF2) } + } + private def testForeachRDD(ds: DStream[Int]): Unit = { + val foreachRDDF1 = (rdd: RDD[Int], t: Time) => return + val foreachRDDF2 = (rdd: RDD[Int]) => return + expectCorrectException { ds.foreachRDD(foreachRDDF1) } + expectCorrectException { ds.foreachRDD(foreachRDDF2) } + } + private def testTransform(ds: DStream[Int]): Unit = { + val transformF1 = (rdd: RDD[Int]) => { return; rdd } + val transformF2 = (rdd: RDD[Int], time: Time) => { return; rdd } + expectCorrectException { ds.transform(transformF1) } + expectCorrectException { ds.transform(transformF2) } + } + private def testTransformWith(ds: DStream[Int]): Unit = { + val transformF1 = (rdd1: RDD[Int], rdd2: RDD[Int]) => { return; rdd1 } + val transformF2 = (rdd1: RDD[Int], rdd2: RDD[Int], time: Time) => { return; rdd2 } + expectCorrectException { ds.transformWith(ds, transformF1) } + expectCorrectException { ds.transformWith(ds, transformF2) } + } + private def testReduceByWindow(ds: DStream[Int]): Unit = { + val reduceF = (_: Int, _: Int) => { return; 1 } + expectCorrectException { ds.reduceByWindow(reduceF, Seconds(1), Seconds(2)) } + expectCorrectException { ds.reduceByWindow(reduceF, reduceF, Seconds(1), Seconds(2)) } + } + + // PairDStreamFunctions operations + private def testReduceByKey(ds: DStream[(Int, Int)]): Unit = { + val reduceF = (_: Int, _: Int) => { return; 1 } + expectCorrectException { ds.reduceByKey(reduceF) } + expectCorrectException { ds.reduceByKey(reduceF, 5) } + expectCorrectException { ds.reduceByKey(reduceF, new HashPartitioner(5)) } + } + private def testCombineByKey(ds: DStream[(Int, Int)]): Unit = { + expectCorrectException { + ds.combineByKey[Int]( + { _: Int => return; 1 }, + { case (_: Int, _: Int) => return; 1 }, + { case (_: Int, _: Int) => return; 1 }, + new HashPartitioner(5) + ) + } + } + private def testReduceByKeyAndWindow(ds: DStream[(Int, Int)]): Unit = { + val reduceF = (_: Int, _: Int) => { return; 1 } + val filterF = (_: (Int, Int)) => { return; false } + expectCorrectException { ds.reduceByKeyAndWindow(reduceF, Seconds(1)) } + expectCorrectException { ds.reduceByKeyAndWindow(reduceF, Seconds(1), Seconds(2)) } + expectCorrectException { ds.reduceByKeyAndWindow(reduceF, Seconds(1), Seconds(2), 5) } + expectCorrectException { + ds.reduceByKeyAndWindow(reduceF, Seconds(1), Seconds(2), new HashPartitioner(5)) + } + expectCorrectException { ds.reduceByKeyAndWindow(reduceF, reduceF, Seconds(2)) } + expectCorrectException { + ds.reduceByKeyAndWindow( + reduceF, reduceF, Seconds(2), Seconds(3), new HashPartitioner(5), filterF) + } + } + private def testUpdateStateByKey(ds: DStream[(Int, Int)]): Unit = { + val updateF1 = (_: Seq[Int], _: Option[Int]) => { return; Some(1) } + val updateF2 = (_: Iterator[(Int, Seq[Int], Option[Int])]) => { return; Seq((1, 1)).toIterator } + val initialRDD = ds.ssc.sparkContext.emptyRDD[Int].map { i => (i, i) } + expectCorrectException { ds.updateStateByKey(updateF1) } + expectCorrectException { ds.updateStateByKey(updateF1, 5) } + expectCorrectException { ds.updateStateByKey(updateF1, new HashPartitioner(5)) } + expectCorrectException { + ds.updateStateByKey(updateF1, new HashPartitioner(5), initialRDD) + } + expectCorrectException { + ds.updateStateByKey(updateF2, new HashPartitioner(5), true) + } + expectCorrectException { + ds.updateStateByKey(updateF2, new HashPartitioner(5), true, initialRDD) + } + } + private def testMapValues(ds: DStream[(Int, Int)]): Unit = expectCorrectException { + ds.mapValues { _ => return; 1 } + } + private def testFlatMapValues(ds: DStream[(Int, Int)]): Unit = expectCorrectException { + ds.flatMapValues { _ => return; Seq.empty } + } + + // StreamingContext operations + private def testTransform2(ssc: StreamingContext, ds: DStream[Int]): Unit = { + val transformF = (rdds: Seq[RDD[_]], time: Time) => { return; ssc.sparkContext.emptyRDD[Int] } + expectCorrectException { ssc.transform(Seq(ds), transformF) } + } + +} diff --git a/streaming/src/test/scala/org/apache/spark/streaming/DStreamScopeSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/DStreamScopeSuite.scala new file mode 100644 index 0000000000000..e3fb2ef130859 --- /dev/null +++ b/streaming/src/test/scala/org/apache/spark/streaming/DStreamScopeSuite.scala @@ -0,0 +1,172 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.streaming + +import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, FunSuite} + +import org.apache.spark.SparkContext +import org.apache.spark.rdd.RDDOperationScope +import org.apache.spark.streaming.dstream.DStream +import org.apache.spark.streaming.ui.UIUtils + +/** + * Tests whether scope information is passed from DStream operations to RDDs correctly. + */ +class DStreamScopeSuite extends FunSuite with BeforeAndAfter with BeforeAndAfterAll { + private var ssc: StreamingContext = null + private val batchDuration: Duration = Seconds(1) + + override def beforeAll(): Unit = { + ssc = new StreamingContext(new SparkContext("local", "test"), batchDuration) + } + + override def afterAll(): Unit = { + ssc.stop(stopSparkContext = true) + } + + before { assertPropertiesNotSet() } + after { assertPropertiesNotSet() } + + test("dstream without scope") { + val dummyStream = new DummyDStream(ssc) + dummyStream.initialize(Time(0)) + + // This DStream is not instantiated in any scope, so all RDDs + // created by this stream should similarly not have a scope + assert(dummyStream.baseScope === None) + assert(dummyStream.getOrCompute(Time(1000)).get.scope === None) + assert(dummyStream.getOrCompute(Time(2000)).get.scope === None) + assert(dummyStream.getOrCompute(Time(3000)).get.scope === None) + } + + test("input dstream without scope") { + val inputStream = new DummyInputDStream(ssc) + inputStream.initialize(Time(0)) + + val baseScope = inputStream.baseScope.map(RDDOperationScope.fromJson) + val scope1 = inputStream.getOrCompute(Time(1000)).get.scope + val scope2 = inputStream.getOrCompute(Time(2000)).get.scope + val scope3 = inputStream.getOrCompute(Time(3000)).get.scope + + // This DStream is not instantiated in any scope, so all RDDs + assertDefined(baseScope, scope1, scope2, scope3) + assert(baseScope.get.name.startsWith("dummy stream")) + assertScopeCorrect(baseScope.get, scope1.get, 1000) + assertScopeCorrect(baseScope.get, scope2.get, 2000) + assertScopeCorrect(baseScope.get, scope3.get, 3000) + } + + test("scoping simple operations") { + val inputStream = new DummyInputDStream(ssc) + val mappedStream = inputStream.map { i => i + 1 } + val filteredStream = mappedStream.filter { i => i % 2 == 0 } + filteredStream.initialize(Time(0)) + + val mappedScopeBase = mappedStream.baseScope.map(RDDOperationScope.fromJson) + val mappedScope1 = mappedStream.getOrCompute(Time(1000)).get.scope + val mappedScope2 = mappedStream.getOrCompute(Time(2000)).get.scope + val mappedScope3 = mappedStream.getOrCompute(Time(3000)).get.scope + val filteredScopeBase = filteredStream.baseScope.map(RDDOperationScope.fromJson) + val filteredScope1 = filteredStream.getOrCompute(Time(1000)).get.scope + val filteredScope2 = filteredStream.getOrCompute(Time(2000)).get.scope + val filteredScope3 = filteredStream.getOrCompute(Time(3000)).get.scope + + // These streams are defined in their respective scopes "map" and "filter", so all + // RDDs created by these streams should inherit the IDs and names of their parent + // DStream's base scopes + assertDefined(mappedScopeBase, mappedScope1, mappedScope2, mappedScope3) + assertDefined(filteredScopeBase, filteredScope1, filteredScope2, filteredScope3) + assert(mappedScopeBase.get.name === "map") + assert(filteredScopeBase.get.name === "filter") + assertScopeCorrect(mappedScopeBase.get, mappedScope1.get, 1000) + assertScopeCorrect(mappedScopeBase.get, mappedScope2.get, 2000) + assertScopeCorrect(mappedScopeBase.get, mappedScope3.get, 3000) + assertScopeCorrect(filteredScopeBase.get, filteredScope1.get, 1000) + assertScopeCorrect(filteredScopeBase.get, filteredScope2.get, 2000) + assertScopeCorrect(filteredScopeBase.get, filteredScope3.get, 3000) + } + + test("scoping nested operations") { + val inputStream = new DummyInputDStream(ssc) + val countStream = inputStream.countByWindow(Seconds(10), Seconds(1)) + countStream.initialize(Time(0)) + + val countScopeBase = countStream.baseScope.map(RDDOperationScope.fromJson) + val countScope1 = countStream.getOrCompute(Time(1000)).get.scope + val countScope2 = countStream.getOrCompute(Time(2000)).get.scope + val countScope3 = countStream.getOrCompute(Time(3000)).get.scope + + // Assert that all children RDDs inherit the DStream operation name correctly + assertDefined(countScopeBase, countScope1, countScope2, countScope3) + assert(countScopeBase.get.name === "countByWindow") + assertScopeCorrect(countScopeBase.get, countScope1.get, 1000) + assertScopeCorrect(countScopeBase.get, countScope2.get, 2000) + assertScopeCorrect(countScopeBase.get, countScope3.get, 3000) + + // All streams except the input stream should share the same scopes as `countStream` + def testStream(stream: DStream[_]): Unit = { + if (stream != inputStream) { + val myScopeBase = stream.baseScope.map(RDDOperationScope.fromJson) + val myScope1 = stream.getOrCompute(Time(1000)).get.scope + val myScope2 = stream.getOrCompute(Time(2000)).get.scope + val myScope3 = stream.getOrCompute(Time(3000)).get.scope + assertDefined(myScopeBase, myScope1, myScope2, myScope3) + assert(myScopeBase === countScopeBase) + assert(myScope1 === countScope1) + assert(myScope2 === countScope2) + assert(myScope3 === countScope3) + // Climb upwards to test the parent streams + stream.dependencies.foreach(testStream) + } + } + testStream(countStream) + } + + /** Assert that the RDD operation scope properties are not set in our SparkContext. */ + private def assertPropertiesNotSet(): Unit = { + assert(ssc != null) + assert(ssc.sc.getLocalProperty(SparkContext.RDD_SCOPE_KEY) == null) + assert(ssc.sc.getLocalProperty(SparkContext.RDD_SCOPE_NO_OVERRIDE_KEY) == null) + } + + /** Assert that the given RDD scope inherits the name and ID of the base scope correctly. */ + private def assertScopeCorrect( + baseScope: RDDOperationScope, + rddScope: RDDOperationScope, + batchTime: Long): Unit = { + assertScopeCorrect(baseScope.id, baseScope.name, rddScope, batchTime) + } + + /** Assert that the given RDD scope inherits the base name and ID correctly. */ + private def assertScopeCorrect( + baseScopeId: String, + baseScopeName: String, + rddScope: RDDOperationScope, + batchTime: Long): Unit = { + val formattedBatchTime = UIUtils.formatBatchTime( + batchTime, ssc.graph.batchDuration.milliseconds, showYYYYMMSS = false) + assert(rddScope.id === s"${baseScopeId}_$batchTime") + assert(rddScope.name.replaceAll("\\n", " ") === s"$baseScopeName @ $formattedBatchTime") + } + + /** Assert that all the specified options are defined. */ + private def assertDefined[T](options: Option[T]*): Unit = { + options.zipWithIndex.foreach { case (o, i) => assert(o.isDefined, s"Option $i was empty!") } + } + +} diff --git a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala index eb136758249d5..b74d67c63a788 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala @@ -18,9 +18,9 @@ package org.apache.spark.streaming import java.io.{File, BufferedWriter, OutputStreamWriter} -import java.net.{SocketException, ServerSocket} +import java.net.{Socket, SocketException, ServerSocket} import java.nio.charset.Charset -import java.util.concurrent.{Executors, TimeUnit, ArrayBlockingQueue} +import java.util.concurrent.{CountDownLatch, Executors, TimeUnit, ArrayBlockingQueue} import java.util.concurrent.atomic.AtomicInteger import scala.collection.mutable.{SynchronizedBuffer, ArrayBuffer, SynchronizedQueue} @@ -36,58 +36,103 @@ import org.scalatest.concurrent.Eventually._ import org.apache.spark.Logging import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel +import org.apache.spark.streaming.scheduler.{StreamingListenerBatchCompleted, StreamingListener} import org.apache.spark.util.{ManualClock, Utils} import org.apache.spark.streaming.dstream.{InputDStream, ReceiverInputDStream} +import org.apache.spark.streaming.rdd.WriteAheadLogBackedBlockRDD import org.apache.spark.streaming.receiver.Receiver class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter { test("socket input stream") { - // Start the server - val testServer = new TestServer() - testServer.start() + withTestServer(new TestServer()) { testServer => + // Start the server + testServer.start() - // Set up the streaming context and input streams - val ssc = new StreamingContext(conf, batchDuration) - val networkStream = ssc.socketTextStream( - "localhost", testServer.port, StorageLevel.MEMORY_AND_DISK) - val outputBuffer = new ArrayBuffer[Seq[String]] with SynchronizedBuffer[Seq[String]] - val outputStream = new TestOutputStream(networkStream, outputBuffer) - def output: ArrayBuffer[String] = outputBuffer.flatMap(x => x) - outputStream.register() - ssc.start() + // Set up the streaming context and input streams + withStreamingContext(new StreamingContext(conf, batchDuration)) { ssc => + ssc.addStreamingListener(ssc.progressListener) - // Feed data to the server to send to the network receiver - val clock = ssc.scheduler.clock.asInstanceOf[ManualClock] - val input = Seq(1, 2, 3, 4, 5) - val expectedOutput = input.map(_.toString) - Thread.sleep(1000) - for (i <- 0 until input.size) { - testServer.send(input(i).toString + "\n") - Thread.sleep(500) - clock.advance(batchDuration.milliseconds) + val input = Seq(1, 2, 3, 4, 5) + // Use "batchCount" to make sure we check the result after all batches finish + val batchCounter = new BatchCounter(ssc) + val networkStream = ssc.socketTextStream( + "localhost", testServer.port, StorageLevel.MEMORY_AND_DISK) + val outputBuffer = new ArrayBuffer[Seq[String]] with SynchronizedBuffer[Seq[String]] + val outputStream = new TestOutputStream(networkStream, outputBuffer) + outputStream.register() + ssc.start() + + // Feed data to the server to send to the network receiver + val clock = ssc.scheduler.clock.asInstanceOf[ManualClock] + val expectedOutput = input.map(_.toString) + for (i <- 0 until input.size) { + testServer.send(input(i).toString + "\n") + Thread.sleep(500) + clock.advance(batchDuration.milliseconds) + } + // Make sure we finish all batches before "stop" + if (!batchCounter.waitUntilBatchesCompleted(input.size, 30000)) { + fail("Timeout: cannot finish all batches in 30 seconds") + } + + // Verify all "InputInfo"s have been reported + assert(ssc.progressListener.numTotalReceivedRecords === input.size) + assert(ssc.progressListener.numTotalProcessedRecords === input.size) + + logInfo("Stopping server") + testServer.stop() + logInfo("Stopping context") + ssc.stop() + + // Verify whether data received was as expected + logInfo("--------------------------------") + logInfo("output.size = " + outputBuffer.size) + logInfo("output") + outputBuffer.foreach(x => logInfo("[" + x.mkString(",") + "]")) + logInfo("expected output.size = " + expectedOutput.size) + logInfo("expected output") + expectedOutput.foreach(x => logInfo("[" + x.mkString(",") + "]")) + logInfo("--------------------------------") + + // Verify whether all the elements received are as expected + // (whether the elements were received one in each interval is not verified) + val output: ArrayBuffer[String] = outputBuffer.flatMap(x => x) + assert(output.size === expectedOutput.size) + for (i <- 0 until output.size) { + assert(output(i) === expectedOutput(i)) + } + } } - Thread.sleep(1000) - logInfo("Stopping server") - testServer.stop() - logInfo("Stopping context") - ssc.stop() + } - // Verify whether data received was as expected - logInfo("--------------------------------") - logInfo("output.size = " + outputBuffer.size) - logInfo("output") - outputBuffer.foreach(x => logInfo("[" + x.mkString(",") + "]")) - logInfo("expected output.size = " + expectedOutput.size) - logInfo("expected output") - expectedOutput.foreach(x => logInfo("[" + x.mkString(",") + "]")) - logInfo("--------------------------------") + test("socket input stream - no block in a batch") { + withTestServer(new TestServer()) { testServer => + testServer.start() - // Verify whether all the elements received are as expected - // (whether the elements were received one in each interval is not verified) - assert(output.size === expectedOutput.size) - for (i <- 0 until output.size) { - assert(output(i) === expectedOutput(i)) + withStreamingContext(new StreamingContext(conf, batchDuration)) { ssc => + ssc.addStreamingListener(ssc.progressListener) + + val batchCounter = new BatchCounter(ssc) + val networkStream = ssc.socketTextStream( + "localhost", testServer.port, StorageLevel.MEMORY_AND_DISK) + val outputBuffer = new ArrayBuffer[Seq[String]] with SynchronizedBuffer[Seq[String]] + val outputStream = new TestOutputStream(networkStream, outputBuffer) + outputStream.register() + ssc.start() + + val clock = ssc.scheduler.clock.asInstanceOf[ManualClock] + clock.advance(batchDuration.milliseconds) + + // Make sure the first batch is finished + if (!batchCounter.waitUntilBatchesCompleted(1, 30000)) { + fail("Timeout: cannot finish all batches in 30 seconds") + } + + networkStream.generatedRDDs.foreach { case (_, rdd) => + assert(!rdd.isInstanceOf[WriteAheadLogBackedBlockRDD[_]]) + } + } } } @@ -368,31 +413,45 @@ class TestServer(portToBind: Int = 0) extends Logging { val serverSocket = new ServerSocket(portToBind) + private val startLatch = new CountDownLatch(1) + val servingThread = new Thread() { override def run() { try { - while(true) { + while (true) { logInfo("Accepting connections on port " + port) val clientSocket = serverSocket.accept() - logInfo("New connection") - try { - clientSocket.setTcpNoDelay(true) - val outputStream = new BufferedWriter( - new OutputStreamWriter(clientSocket.getOutputStream)) - - while(clientSocket.isConnected) { - val msg = queue.poll(100, TimeUnit.MILLISECONDS) - if (msg != null) { - outputStream.write(msg) - outputStream.flush() - logInfo("Message '" + msg + "' sent") + if (startLatch.getCount == 1) { + // The first connection is a test connection to implement "waitForStart", so skip it + // and send a signal + if (!clientSocket.isClosed) { + clientSocket.close() + } + startLatch.countDown() + } else { + // Real connections + logInfo("New connection") + try { + clientSocket.setTcpNoDelay(true) + val outputStream = new BufferedWriter( + new OutputStreamWriter(clientSocket.getOutputStream)) + + while (clientSocket.isConnected) { + val msg = queue.poll(100, TimeUnit.MILLISECONDS) + if (msg != null) { + outputStream.write(msg) + outputStream.flush() + logInfo("Message '" + msg + "' sent") + } + } + } catch { + case e: SocketException => logError("TestServer error", e) + } finally { + logInfo("Connection closed") + if (!clientSocket.isClosed) { + clientSocket.close() } } - } catch { - case e: SocketException => logError("TestServer error", e) - } finally { - logInfo("Connection closed") - if (!clientSocket.isClosed) clientSocket.close() } } } catch { @@ -404,7 +463,29 @@ class TestServer(portToBind: Int = 0) extends Logging { } } - def start() { servingThread.start() } + def start(): Unit = { + servingThread.start() + if (!waitForStart(10000)) { + stop() + throw new AssertionError("Timeout: TestServer cannot start in 10 seconds") + } + } + + /** + * Wait until the server starts. Return true if the server starts in "millis" milliseconds. + * Otherwise, return false to indicate it's timeout. + */ + private def waitForStart(millis: Long): Boolean = { + // We will create a test connection to the server so that we can make sure it has started. + val socket = new Socket("localhost", port) + try { + startLatch.await(millis, TimeUnit.MILLISECONDS) + } finally { + if (!socket.isClosed) { + socket.close() + } + } + } def send(msg: String) { queue.put(msg) } diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala index 8317fb9720416..b1af8d5eaacfb 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala @@ -67,15 +67,20 @@ class ReceivedBlockTrackerSuite // Verify added blocks are unallocated blocks receivedBlockTracker.getUnallocatedBlocks(streamId) shouldEqual blockInfos + receivedBlockTracker.hasUnallocatedReceivedBlocks should be (true) + // Allocate the blocks to a batch and verify that all of them have been allocated receivedBlockTracker.allocateBlocksToBatch(1) receivedBlockTracker.getBlocksOfBatchAndStream(1, streamId) shouldEqual blockInfos + receivedBlockTracker.getBlocksOfBatch(1) shouldEqual Map(streamId -> blockInfos) receivedBlockTracker.getUnallocatedBlocks(streamId) shouldBe empty + receivedBlockTracker.hasUnallocatedReceivedBlocks should be (false) // Allocate no blocks to another batch receivedBlockTracker.allocateBlocksToBatch(2) receivedBlockTracker.getBlocksOfBatchAndStream(2, streamId) shouldBe empty + receivedBlockTracker.getBlocksOfBatch(2) shouldEqual Map(streamId -> Seq.empty) // Verify that older batches have no operation on batch allocation, // will return the same blocks as previously allocated. @@ -88,7 +93,7 @@ class ReceivedBlockTrackerSuite receivedBlockTracker.getUnallocatedBlocks(streamId) shouldEqual blockInfos } - test("block addition, block to batch allocation and clean up with write ahead log") { + test("recovery and cleanup with write ahead logs") { val manualClock = new ManualClock // Set the time increment level to twice the rotation interval so that every increment creates // a new log file @@ -114,9 +119,7 @@ class ReceivedBlockTrackerSuite } // Set WAL configuration - conf.set("spark.streaming.receiver.writeAheadLog.enable", "true") conf.set("spark.streaming.driver.writeAheadLog.rollingIntervalSecs", "1") - require(WriteAheadLogUtils.enableReceiverLog(conf)) require(WriteAheadLogUtils.getRollingIntervalSecs(conf, isDriver = true) === 1) // Start tracker and add blocks @@ -131,15 +134,27 @@ class ReceivedBlockTrackerSuite getWrittenLogData() shouldEqual expectedWrittenData1 getWriteAheadLogFiles() should have size 1 - // Restart tracker and verify recovered list of unallocated blocks incrementTime() - val tracker2 = createTracker(clock = manualClock) - tracker2.getUnallocatedBlocks(streamId).toList shouldEqual blockInfos1 + + // Recovery without recovery from WAL and verify list of unallocated blocks is empty + val tracker1_ = createTracker(clock = manualClock, recoverFromWriteAheadLog = false) + tracker1_.getUnallocatedBlocks(streamId) shouldBe empty + tracker1_.hasUnallocatedReceivedBlocks should be (false) + + // Restart tracker and verify recovered list of unallocated blocks + val tracker2 = createTracker(clock = manualClock, recoverFromWriteAheadLog = true) + val unallocatedBlocks = tracker2.getUnallocatedBlocks(streamId).toList + unallocatedBlocks shouldEqual blockInfos1 + unallocatedBlocks.foreach { block => + block.isBlockIdValid() should be (false) + } + // Allocate blocks to batch and verify whether the unallocated blocks got allocated val batchTime1 = manualClock.getTimeMillis() tracker2.allocateBlocksToBatch(batchTime1) tracker2.getBlocksOfBatchAndStream(batchTime1, streamId) shouldEqual blockInfos1 + tracker2.getBlocksOfBatch(batchTime1) shouldEqual Map(streamId -> blockInfos1) // Add more blocks and allocate to another batch incrementTime() @@ -157,7 +172,7 @@ class ReceivedBlockTrackerSuite // Restart tracker and verify recovered state incrementTime() - val tracker3 = createTracker(clock = manualClock) + val tracker3 = createTracker(clock = manualClock, recoverFromWriteAheadLog = true) tracker3.getBlocksOfBatchAndStream(batchTime1, streamId) shouldEqual blockInfos1 tracker3.getBlocksOfBatchAndStream(batchTime2, streamId) shouldEqual blockInfos2 tracker3.getUnallocatedBlocks(streamId) shouldBe empty @@ -180,28 +195,16 @@ class ReceivedBlockTrackerSuite // Restart tracker and verify recovered state, specifically whether info about the first // batch has been removed, but not the second batch incrementTime() - val tracker4 = createTracker(clock = manualClock) + val tracker4 = createTracker(clock = manualClock, recoverFromWriteAheadLog = true) tracker4.getUnallocatedBlocks(streamId) shouldBe empty tracker4.getBlocksOfBatchAndStream(batchTime1, streamId) shouldBe empty // should be cleaned tracker4.getBlocksOfBatchAndStream(batchTime2, streamId) shouldEqual blockInfos2 } - test("enabling write ahead log but not setting checkpoint dir") { - conf.set("spark.streaming.receiver.writeAheadLog.enable", "true") - intercept[SparkException] { - createTracker(setCheckpointDir = false) - } - } - - test("setting checkpoint dir but not enabling write ahead log") { - // When WAL config is not set, log manager should not be enabled - val tracker1 = createTracker(setCheckpointDir = true) + test("disable write ahead log when checkpoint directory is not set") { + // When checkpoint is disabled, then the write ahead log is disabled + val tracker1 = createTracker(setCheckpointDir = false) tracker1.isWriteAheadLogEnabled should be (false) - - // When WAL is explicitly disabled, log manager should not be enabled - conf.set("spark.streaming.receiver.writeAheadLog.enable", "false") - val tracker2 = createTracker(setCheckpointDir = true) - tracker2.isWriteAheadLogEnabled should be(false) } /** @@ -210,16 +213,18 @@ class ReceivedBlockTrackerSuite */ def createTracker( setCheckpointDir: Boolean = true, + recoverFromWriteAheadLog: Boolean = false, clock: Clock = new SystemClock): ReceivedBlockTracker = { val cpDirOption = if (setCheckpointDir) Some(checkpointDirectory.toString) else None - val tracker = new ReceivedBlockTracker(conf, hadoopConf, Seq(streamId), clock, cpDirOption) + val tracker = new ReceivedBlockTracker( + conf, hadoopConf, Seq(streamId), clock, recoverFromWriteAheadLog, cpDirOption) allReceivedBlockTrackers += tracker tracker } /** Generate blocks infos using random ids */ def generateBlockInfos(): Seq[ReceivedBlockInfo] = { - List.fill(5)(ReceivedBlockInfo(streamId, 0, + List.fill(5)(ReceivedBlockInfo(streamId, 0, None, BlockManagerBasedStoreResult(StreamBlockId(streamId, math.abs(Random.nextInt))))) } diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala index 393a360cfe150..5d7127627eea5 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala @@ -256,8 +256,8 @@ class ReceiverSuite extends TestSuiteBase with Timeouts with Serializable { } withStreamingContext(new StreamingContext(sparkConf, batchDuration)) { ssc => - val receiver1 = ssc.sparkContext.clean(new FakeReceiver(sendData = true)) - val receiver2 = ssc.sparkContext.clean(new FakeReceiver(sendData = true)) + val receiver1 = new FakeReceiver(sendData = true) + val receiver2 = new FakeReceiver(sendData = true) val receiverStream1 = ssc.receiverStream(receiver1) val receiverStream2 = ssc.receiverStream(receiver2) receiverStream1.register() diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala index 5207b7109e69b..317c7251d9e01 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala @@ -17,21 +17,21 @@ package org.apache.spark.streaming -import java.io.File +import java.io.{File, NotSerializableException} import java.util.concurrent.atomic.AtomicInteger import org.apache.commons.io.FileUtils -import org.scalatest.{Assertions, BeforeAndAfter, FunSuite} -import org.scalatest.concurrent.Timeouts import org.scalatest.concurrent.Eventually._ +import org.scalatest.concurrent.Timeouts import org.scalatest.exceptions.TestFailedDueToTimeoutException import org.scalatest.time.SpanSugar._ +import org.scalatest.{Assertions, BeforeAndAfter, FunSuite} -import org.apache.spark.{Logging, SparkConf, SparkContext, SparkException} import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.dstream.DStream import org.apache.spark.streaming.receiver.Receiver import org.apache.spark.util.Utils +import org.apache.spark.{Logging, SparkConf, SparkContext, SparkException} class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts with Logging { @@ -41,6 +41,7 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w val batchDuration = Milliseconds(500) val sparkHome = "someDir" val envPair = "key" -> "value" + val conf = new SparkConf().setMaster(master).setAppName(appName) var sc: SparkContext = null var ssc: StreamingContext = null @@ -109,24 +110,70 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w assert(ssc.conf.getTimeAsSeconds("spark.cleaner.ttl", "-1") === 10) } + test("state matching") { + import StreamingContextState._ + assert(INITIALIZED === INITIALIZED) + assert(INITIALIZED != ACTIVE) + } + test("start and stop state check") { ssc = new StreamingContext(master, appName, batchDuration) addInputStream(ssc).register() - assert(ssc.state === ssc.StreamingContextState.Initialized) + assert(ssc.getState() === StreamingContextState.INITIALIZED) ssc.start() - assert(ssc.state === ssc.StreamingContextState.Started) + assert(ssc.getState() === StreamingContextState.ACTIVE) ssc.stop() - assert(ssc.state === ssc.StreamingContextState.Stopped) + assert(ssc.getState() === StreamingContextState.STOPPED) + + // Make sure that the SparkContext is also stopped by default + intercept[Exception] { + ssc.sparkContext.makeRDD(1 to 10) + } + } + + test("start with non-seriazable DStream checkpoints") { + val checkpointDir = Utils.createTempDir() + ssc = new StreamingContext(conf, batchDuration) + ssc.checkpoint(checkpointDir.getAbsolutePath) + addInputStream(ssc).foreachRDD { rdd => + // Refer to this.appName from inside closure so that this closure refers to + // the instance of StreamingContextSuite, and is therefore not serializable + rdd.count() + appName + } + + // Test whether start() fails early when checkpointing is enabled + val exception = intercept[NotSerializableException] { + ssc.start() + } + assert(exception.getMessage().contains("DStreams with their functions are not serializable")) + assert(ssc.getState() !== StreamingContextState.ACTIVE) + assert(StreamingContext.getActive().isEmpty) } + test("start failure should stop internal components") { + ssc = new StreamingContext(conf, batchDuration) + val inputStream = addInputStream(ssc) + val updateFunc = (values: Seq[Int], state: Option[Int]) => { + Some(values.sum + state.getOrElse(0)) + } + inputStream.map(x => (x, 1)).updateStateByKey[Int](updateFunc) + // Require that the start fails because checkpoint directory was not set + intercept[Exception] { + ssc.start() + } + assert(ssc.getState() === StreamingContextState.STOPPED) + assert(ssc.scheduler.isStarted === false) + } + + test("start multiple times") { ssc = new StreamingContext(master, appName, batchDuration) addInputStream(ssc).register() ssc.start() - intercept[SparkException] { - ssc.start() - } + assert(ssc.getState() === StreamingContextState.ACTIVE) + ssc.start() + assert(ssc.getState() === StreamingContextState.ACTIVE) } test("stop multiple times") { @@ -134,13 +181,16 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w addInputStream(ssc).register() ssc.start() ssc.stop() + assert(ssc.getState() === StreamingContextState.STOPPED) ssc.stop() + assert(ssc.getState() === StreamingContextState.STOPPED) } test("stop before start") { ssc = new StreamingContext(master, appName, batchDuration) addInputStream(ssc).register() ssc.stop() // stop before start should not throw exception + assert(ssc.getState() === StreamingContextState.STOPPED) } test("start after stop") { @@ -148,22 +198,34 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w ssc = new StreamingContext(master, appName, batchDuration) addInputStream(ssc).register() ssc.stop() - intercept[SparkException] { + intercept[IllegalStateException] { ssc.start() // start after stop should throw exception } + assert(ssc.getState() === StreamingContextState.STOPPED) } test("stop only streaming context") { - ssc = new StreamingContext(master, appName, batchDuration) + val conf = new SparkConf().setMaster(master).setAppName(appName) + + // Explicitly do not stop SparkContext + ssc = new StreamingContext(conf, batchDuration) sc = ssc.sparkContext addInputStream(ssc).register() ssc.start() ssc.stop(stopSparkContext = false) + assert(ssc.getState() === StreamingContextState.STOPPED) assert(sc.makeRDD(1 to 100).collect().size === 100) - ssc = new StreamingContext(sc, batchDuration) + sc.stop() + + // Implicitly do not stop SparkContext + conf.set("spark.streaming.stopSparkContextByDefault", "false") + ssc = new StreamingContext(conf, batchDuration) + sc = ssc.sparkContext addInputStream(ssc).register() ssc.start() ssc.stop() + assert(sc.makeRDD(1 to 100).collect().size === 100) + sc.stop() } test("stop(stopSparkContext=true) after stop(stopSparkContext=false)") { @@ -364,23 +426,23 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w assert(newContextCreated, "new context not created") } - val corrutedCheckpointPath = createCorruptedCheckpoint() + val corruptedCheckpointPath = createCorruptedCheckpoint() // getOrCreate should throw exception with fake checkpoint file and createOnError = false intercept[Exception] { - ssc = StreamingContext.getOrCreate(corrutedCheckpointPath, creatingFunction _) + ssc = StreamingContext.getOrCreate(corruptedCheckpointPath, creatingFunction _) } // getOrCreate should throw exception with fake checkpoint file intercept[Exception] { ssc = StreamingContext.getOrCreate( - corrutedCheckpointPath, creatingFunction _, createOnError = false) + corruptedCheckpointPath, creatingFunction _, createOnError = false) } // getOrCreate should create new context with fake checkpoint file and createOnError = true testGetOrCreate { ssc = StreamingContext.getOrCreate( - corrutedCheckpointPath, creatingFunction _, createOnError = true) + corruptedCheckpointPath, creatingFunction _, createOnError = true) assert(ssc != null, "no context created") assert(newContextCreated, "new context not created") } @@ -392,27 +454,39 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w ssc = StreamingContext.getOrCreate(checkpointPath, creatingFunction _) assert(ssc != null, "no context created") assert(!newContextCreated, "old context not recovered") - assert(ssc.conf.get("someKey") === "someValue") + assert(ssc.conf.get("someKey") === "someValue", "checkpointed config not recovered") + } + + // getOrCreate should recover StreamingContext with existing SparkContext + testGetOrCreate { + sc = new SparkContext(conf) + ssc = StreamingContext.getOrCreate(checkpointPath, creatingFunction _) + assert(ssc != null, "no context created") + assert(!newContextCreated, "old context not recovered") + assert(!ssc.conf.contains("someKey"), "checkpointed config unexpectedly recovered") } } - test("getOrCreate with existing SparkContext") { - val conf = new SparkConf().setMaster(master).setAppName(appName) + test("getActive and getActiveOrCreate") { + require(StreamingContext.getActive().isEmpty, "context exists from before") sc = new SparkContext(conf) - // Function to create StreamingContext that has a config to identify it to be new context var newContextCreated = false - def creatingFunction(sparkContext: SparkContext): StreamingContext = { + + def creatingFunc(): StreamingContext = { newContextCreated = true - new StreamingContext(sparkContext, batchDuration) + val newSsc = new StreamingContext(sc, batchDuration) + val input = addInputStream(newSsc) + input.foreachRDD { rdd => rdd.count } + newSsc } - // Call ssc.stop(stopSparkContext = false) after a body of cody - def testGetOrCreate(body: => Unit): Unit = { + def testGetActiveOrCreate(body: => Unit): Unit = { newContextCreated = false try { body } finally { + if (ssc != null) { ssc.stop(stopSparkContext = false) } @@ -420,55 +494,177 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w } } + // getActiveOrCreate should create new context and getActive should return it only + // after starting the context + testGetActiveOrCreate { + ssc = StreamingContext.getActiveOrCreate(creatingFunc _) + assert(ssc != null, "no context created") + assert(newContextCreated === true, "new context not created") + assert(StreamingContext.getActive().isEmpty, + "new initialized context returned before starting") + ssc.start() + assert(StreamingContext.getActive() === Some(ssc), + "active context not returned") + assert(StreamingContext.getActiveOrCreate(creatingFunc _) === ssc, + "active context not returned") + ssc.stop() + assert(StreamingContext.getActive().isEmpty, + "inactive context returned") + assert(StreamingContext.getActiveOrCreate(creatingFunc _) !== ssc, + "inactive context returned") + } + + // getActiveOrCreate and getActive should return independently created context after activating + testGetActiveOrCreate { + ssc = creatingFunc() // Create + assert(StreamingContext.getActive().isEmpty, + "new initialized context returned before starting") + ssc.start() + assert(StreamingContext.getActive() === Some(ssc), + "active context not returned") + assert(StreamingContext.getActiveOrCreate(creatingFunc _) === ssc, + "active context not returned") + ssc.stop() + assert(StreamingContext.getActive().isEmpty, + "inactive context returned") + } + } + + test("getActiveOrCreate with checkpoint") { + // Function to create StreamingContext that has a config to identify it to be new context + var newContextCreated = false + def creatingFunction(): StreamingContext = { + newContextCreated = true + new StreamingContext(conf, batchDuration) + } + + // Call ssc.stop after a body of code + def testGetActiveOrCreate(body: => Unit): Unit = { + require(StreamingContext.getActive().isEmpty) // no active context + newContextCreated = false + try { + body + } finally { + if (ssc != null) { + ssc.stop() + } + ssc = null + } + } + val emptyPath = Utils.createTempDir().getAbsolutePath() + val corruptedCheckpointPath = createCorruptedCheckpoint() + val checkpointPath = createValidCheckpoint() - // getOrCreate should create new context with empty path - testGetOrCreate { - ssc = StreamingContext.getOrCreate(emptyPath, creatingFunction _, sc, createOnError = true) + // getActiveOrCreate should return the current active context if there is one + testGetActiveOrCreate { + ssc = new StreamingContext( + conf.clone.set("spark.streaming.clock", "org.apache.spark.util.ManualClock"), batchDuration) + addInputStream(ssc).register() + ssc.start() + val returnedSsc = StreamingContext.getActiveOrCreate(checkpointPath, creatingFunction _) + assert(!newContextCreated, "new context created instead of returning") + assert(returnedSsc.eq(ssc), "returned context is not the activated context") + } + + // getActiveOrCreate should create new context with empty path + testGetActiveOrCreate { + ssc = StreamingContext.getActiveOrCreate(emptyPath, creatingFunction _) assert(ssc != null, "no context created") assert(newContextCreated, "new context not created") - assert(ssc.sparkContext === sc, "new StreamingContext does not use existing SparkContext") } - val corrutedCheckpointPath = createCorruptedCheckpoint() - - // getOrCreate should throw exception with fake checkpoint file and createOnError = false + // getActiveOrCreate should throw exception with fake checkpoint file and createOnError = false intercept[Exception] { - ssc = StreamingContext.getOrCreate(corrutedCheckpointPath, creatingFunction _, sc) + ssc = StreamingContext.getOrCreate(corruptedCheckpointPath, creatingFunction _) } - // getOrCreate should throw exception with fake checkpoint file + // getActiveOrCreate should throw exception with fake checkpoint file intercept[Exception] { - ssc = StreamingContext.getOrCreate( - corrutedCheckpointPath, creatingFunction _, sc, createOnError = false) + ssc = StreamingContext.getActiveOrCreate( + corruptedCheckpointPath, creatingFunction _, createOnError = false) } - // getOrCreate should create new context with fake checkpoint file and createOnError = true - testGetOrCreate { - ssc = StreamingContext.getOrCreate( - corrutedCheckpointPath, creatingFunction _, sc, createOnError = true) + // getActiveOrCreate should create new context with fake + // checkpoint file and createOnError = true + testGetActiveOrCreate { + ssc = StreamingContext.getActiveOrCreate( + corruptedCheckpointPath, creatingFunction _, createOnError = true) assert(ssc != null, "no context created") assert(newContextCreated, "new context not created") - assert(ssc.sparkContext === sc, "new StreamingContext does not use existing SparkContext") } - val checkpointPath = createValidCheckpoint() - - // StreamingContext.getOrCreate should recover context with checkpoint path - testGetOrCreate { - ssc = StreamingContext.getOrCreate(checkpointPath, creatingFunction _, sc) + // getActiveOrCreate should recover context with checkpoint path, and recover old configuration + testGetActiveOrCreate { + ssc = StreamingContext.getActiveOrCreate(checkpointPath, creatingFunction _) assert(ssc != null, "no context created") assert(!newContextCreated, "old context not recovered") - assert(ssc.sparkContext === sc, "new StreamingContext does not use existing SparkContext") - assert(!ssc.conf.contains("someKey"), - "recovered StreamingContext unexpectedly has old config") + assert(ssc.conf.get("someKey") === "someValue") + } + } + + test("multiple streaming contexts") { + sc = new SparkContext( + conf.clone.set("spark.streaming.clock", "org.apache.spark.util.ManualClock")) + ssc = new StreamingContext(sc, Seconds(1)) + val input = addInputStream(ssc) + input.foreachRDD { rdd => rdd.count } + ssc.start() + + // Creating another streaming context should not create errors + val anotherSsc = new StreamingContext(sc, Seconds(10)) + val anotherInput = addInputStream(anotherSsc) + anotherInput.foreachRDD { rdd => rdd.count } + + val exception = intercept[IllegalStateException] { + anotherSsc.start() } + assert(exception.getMessage.contains("StreamingContext"), "Did not get the right exception") } test("DStream and generated RDD creation sites") { testPackage.test() } + test("throw exception on using active or stopped context") { + val conf = new SparkConf() + .setMaster(master) + .setAppName(appName) + .set("spark.streaming.clock", "org.apache.spark.util.ManualClock") + ssc = new StreamingContext(conf, batchDuration) + require(ssc.getState() === StreamingContextState.INITIALIZED) + val input = addInputStream(ssc) + val transformed = input.map { x => x} + transformed.foreachRDD { rdd => rdd.count } + + def testForException(clue: String, expectedErrorMsg: String)(body: => Unit): Unit = { + withClue(clue) { + val ex = intercept[IllegalStateException] { + body + } + assert(ex.getMessage.toLowerCase().contains(expectedErrorMsg)) + } + } + + ssc.start() + require(ssc.getState() === StreamingContextState.ACTIVE) + testForException("no error on adding input after start", "start") { + addInputStream(ssc) } + testForException("no error on adding transformation after start", "start") { + input.map { x => x * 2 } } + testForException("no error on adding output operation after start", "start") { + transformed.foreachRDD { rdd => rdd.collect() } } + + ssc.stop() + require(ssc.getState() === StreamingContextState.STOPPED) + testForException("no error on adding input after stop", "stop") { + addInputStream(ssc) } + testForException("no error on adding transformation after stop", "stop") { + input.map { x => x * 2 } } + testForException("no error on adding output operation after stop", "stop") { + transformed.foreachRDD { rdd => rdd.collect() } } + } + def addInputStream(s: StreamingContext): DStream[Int] = { val input = (1 to 100).map(i => 1 to i) val inputStream = new TestInputStream(s, input, 1) @@ -478,9 +674,7 @@ class StreamingContextSuite extends FunSuite with BeforeAndAfter with Timeouts w def createValidCheckpoint(): String = { val testDirectory = Utils.createTempDir().getAbsolutePath() val checkpointDirectory = Utils.createTempDir().getAbsolutePath() - val conf = new SparkConf().setMaster(master).setAppName(appName) - conf.set("someKey", "someValue") - ssc = new StreamingContext(conf, batchDuration) + ssc = new StreamingContext(conf.clone.set("someKey", "someValue"), batchDuration) ssc.checkpoint(checkpointDirectory) ssc.textFileStream(testDirectory).foreachRDD { rdd => rdd.count() } ssc.start() @@ -554,7 +748,9 @@ class SlowTestReceiver(totalRecords: Int, recordsPerSecond: Int) def onStop() { // Simulate slow receiver by waiting for all records to be produced - while(!SlowTestReceiver.receivedAllRecords) Thread.sleep(100) + while (!SlowTestReceiver.receivedAllRecords) { + Thread.sleep(100) + } // no clean to be done, the receiving thread should stop on it own } } diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala index 312cce408cfe7..1dc8960d60528 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala @@ -133,8 +133,10 @@ class StreamingListenerSuite extends TestSuiteBase with Matchers { /** Check if a sequence of numbers is in increasing order */ def isInIncreasingOrder(seq: Seq[Long]): Boolean = { - for(i <- 1 until seq.size) { - if (seq(i - 1) > seq(i)) return false + for (i <- 1 until seq.size) { + if (seq(i - 1) > seq(i)) { + return false + } } true } diff --git a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala index 2ba86aeaf9d51..554cd30223f44 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala @@ -35,6 +35,24 @@ import org.apache.spark.streaming.dstream.{DStream, InputDStream, ForEachDStream import org.apache.spark.streaming.scheduler._ import org.apache.spark.util.{ManualClock, Utils} +/** + * A dummy stream that does absolutely nothing. + */ +private[streaming] class DummyDStream(ssc: StreamingContext) extends DStream[Int](ssc) { + override def dependencies: List[DStream[Int]] = List.empty + override def slideDuration: Duration = Seconds(1) + override def compute(time: Time): Option[RDD[Int]] = Some(ssc.sc.emptyRDD[Int]) +} + +/** + * A dummy input stream that does absolutely nothing. + */ +private[streaming] class DummyInputDStream(ssc: StreamingContext) extends InputDStream[Int](ssc) { + override def start(): Unit = { } + override def stop(): Unit = { } + override def compute(time: Time): Option[RDD[Int]] = Some(ssc.sc.emptyRDD[Int]) +} + /** * This is a input stream just for the testsuites. This is equivalent to a checkpointable, * replayable, reliable message queue like Kafka. It requires a sequence as input, and @@ -73,9 +91,11 @@ class TestInputStream[T: ClassTag](ssc_ : StreamingContext, input: Seq[Seq[T]], * * The buffer contains a sequence of RDD's, each containing a sequence of items */ -class TestOutputStream[T: ClassTag](parent: DStream[T], - val output: ArrayBuffer[Seq[T]] = ArrayBuffer[Seq[T]]()) - extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => { +class TestOutputStream[T: ClassTag]( + parent: DStream[T], + val output: SynchronizedBuffer[Seq[T]] = + new ArrayBuffer[Seq[T]] with SynchronizedBuffer[Seq[T]] + ) extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => { val collected = rdd.collect() output += collected }) { @@ -95,8 +115,10 @@ class TestOutputStream[T: ClassTag](parent: DStream[T], * The buffer contains a sequence of RDD's, each containing a sequence of partitions, each * containing a sequence of items. */ -class TestOutputStreamWithPartitions[T: ClassTag](parent: DStream[T], - val output: ArrayBuffer[Seq[Seq[T]]] = ArrayBuffer[Seq[Seq[T]]]()) +class TestOutputStreamWithPartitions[T: ClassTag]( + parent: DStream[T], + val output: SynchronizedBuffer[Seq[Seq[T]]] = + new ArrayBuffer[Seq[Seq[T]]] with SynchronizedBuffer[Seq[Seq[T]]]) extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => { val collected = rdd.glom().collect().map(_.toSeq) output += collected @@ -108,10 +130,6 @@ class TestOutputStreamWithPartitions[T: ClassTag](parent: DStream[T], ois.defaultReadObject() output.clear() } - - def toTestOutputStream: TestOutputStream[T] = { - new TestOutputStream[T](this.parent, this.output.map(_.flatten)) - } } /** @@ -146,6 +164,40 @@ class BatchCounter(ssc: StreamingContext) { def getNumStartedBatches: Int = this.synchronized { numStartedBatches } + + /** + * Wait until `expectedNumCompletedBatches` batches are completed, or timeout. Return true if + * `expectedNumCompletedBatches` batches are completed. Otherwise, return false to indicate it's + * timeout. + * + * @param expectedNumCompletedBatches the `expectedNumCompletedBatches` batches to wait + * @param timeout the maximum time to wait in milliseconds. + */ + def waitUntilBatchesCompleted(expectedNumCompletedBatches: Int, timeout: Long): Boolean = + waitUntilConditionBecomeTrue(numCompletedBatches >= expectedNumCompletedBatches, timeout) + + /** + * Wait until `expectedNumStartedBatches` batches are completed, or timeout. Return true if + * `expectedNumStartedBatches` batches are completed. Otherwise, return false to indicate it's + * timeout. + * + * @param expectedNumStartedBatches the `expectedNumStartedBatches` batches to wait + * @param timeout the maximum time to wait in milliseconds. + */ + def waitUntilBatchesStarted(expectedNumStartedBatches: Int, timeout: Long): Boolean = + waitUntilConditionBecomeTrue(numStartedBatches >= expectedNumStartedBatches, timeout) + + private def waitUntilConditionBecomeTrue(condition: => Boolean, timeout: Long): Boolean = { + synchronized { + var now = System.currentTimeMillis() + val timeoutTick = now + timeout + while (!condition && timeoutTick > now) { + wait(timeoutTick - now) + now = System.currentTimeMillis() + } + condition + } + } } /** @@ -391,12 +443,21 @@ trait TestSuiteBase extends FunSuite with BeforeAndAfter with Logging { logInfo("--------------------------------") // Match the output with the expected output - assert(output.size === expectedOutput.size, "Number of outputs do not match") for (i <- 0 until output.size) { if (useSet) { - assert(output(i).toSet === expectedOutput(i).toSet) + assert( + output(i).toSet === expectedOutput(i).toSet, + s"Set comparison failed\n" + + s"Expected output (${expectedOutput.size} items):\n${expectedOutput.mkString("\n")}\n" + + s"Generated output (${output.size} items): ${output.mkString("\n")}" + ) } else { - assert(output(i).toList === expectedOutput(i).toList) + assert( + output(i).toList === expectedOutput(i).toList, + s"Ordered list comparison failed\n" + + s"Expected output (${expectedOutput.size} items):\n${expectedOutput.mkString("\n")}\n" + + s"Generated output (${output.size} items): ${output.mkString("\n")}" + ) } } logInfo("Output verified successfully") diff --git a/streaming/src/test/scala/org/apache/spark/streaming/TimeSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/TimeSuite.scala index 5579ac364346c..e6a01656f479d 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/TimeSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/TimeSuite.scala @@ -69,6 +69,9 @@ class TimeSuite extends TestSuiteBase { assert(new Time(1200).floor(new Duration(200)) == new Time(1200)) assert(new Time(199).floor(new Duration(200)) == new Time(0)) assert(new Time(1).floor(new Duration(1)) == new Time(1)) + assert(new Time(1350).floor(new Duration(200), new Time(50)) == new Time(1250)) + assert(new Time(1350).floor(new Duration(200), new Time(150)) == new Time(1350)) + assert(new Time(1350).floor(new Duration(200), new Time(200)) == new Time(1200)) } test("isMultipleOf") { diff --git a/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala index 8de43baabc21d..43368de8fc402 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala @@ -28,9 +28,6 @@ import org.scalatest.time.SpanSugar._ import org.apache.spark._ - - - /** * Selenium tests for the Spark Web UI. */ @@ -94,19 +91,34 @@ class UISeleniumSuite eventually(timeout(10 seconds), interval(50 milliseconds)) { // check whether streaming page exists go to (sparkUI.appUIAddress.stripSuffix("/") + "/streaming") - val statisticText = findAll(cssSelector("li strong")).map(_.text).toSeq - statisticText should contain("Network receivers:") - statisticText should contain("Batch interval:") - + val h3Text = findAll(cssSelector("h3")).map(_.text).toSeq + h3Text should contain("Streaming Statistics") + + // Check stat table + val statTableHeaders = findAll(cssSelector("#stat-table th")).map(_.text).toSeq + statTableHeaders.exists( + _.matches("Timelines \\(Last \\d+ batches, \\d+ active, \\d+ completed\\)")) should be + (true) + statTableHeaders should contain ("Histograms") + + val statTableCells = findAll(cssSelector("#stat-table td")).map(_.text).toSeq + statTableCells.exists(_.contains("Input Rate")) should be (true) + statTableCells.exists(_.contains("Scheduling Delay")) should be (true) + statTableCells.exists(_.contains("Processing Time")) should be (true) + statTableCells.exists(_.contains("Total Delay")) should be (true) + + // Check batch tables val h4Text = findAll(cssSelector("h4")).map(_.text).toSeq h4Text.exists(_.matches("Active Batches \\(\\d+\\)")) should be (true) h4Text.exists(_.matches("Completed Batches \\(last \\d+ out of \\d+\\)")) should be (true) findAll(cssSelector("""#active-batches-table th""")).map(_.text).toSeq should be { - List("Batch Time", "Input Size", "Scheduling Delay", "Processing Time", "Status") + List("Batch Time", "Input Size", "Scheduling Delay (?)", "Processing Time (?)", + "Status") } findAll(cssSelector("""#completed-batches-table th""")).map(_.text).toSeq should be { - List("Batch Time", "Input Size", "Scheduling Delay", "Processing Time", "Total Delay") + List("Batch Time", "Input Size", "Scheduling Delay (?)", "Processing Time (?)", + "Total Delay (?)") } val batchLinks = @@ -176,11 +188,9 @@ class UISeleniumSuite eventually(timeout(10 seconds), interval(50 milliseconds)) { go to (sparkUI.appUIAddress.stripSuffix("/") + "/streaming") - val statisticText = findAll(cssSelector("li strong")).map(_.text).toSeq - statisticText should not contain ("Network receivers:") - statisticText should not contain ("Batch interval:") + val h3Text = findAll(cssSelector("h3")).map(_.text).toSeq + h3Text should not contain("Streaming Statistics") } } } } - diff --git a/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala index 8b300d8dd3fbe..6859b65c7165f 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala @@ -26,7 +26,7 @@ import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} import org.apache.spark.storage.{BlockId, BlockManager, StorageLevel, StreamBlockId} import org.apache.spark.streaming.util.{FileBasedWriteAheadLogSegment, FileBasedWriteAheadLogWriter} import org.apache.spark.util.Utils -import org.apache.spark.{SparkConf, SparkContext} +import org.apache.spark.{SparkConf, SparkContext, SparkException} class WriteAheadLogBackedBlockRDDSuite extends FunSuite with BeforeAndAfterAll with BeforeAndAfterEach { @@ -60,24 +60,35 @@ class WriteAheadLogBackedBlockRDDSuite System.clearProperty("spark.driver.port") } - test("Read data available in block manager and write ahead log") { - testRDD(5, 5) + test("Read data available in both block manager and write ahead log") { + testRDD(numPartitions = 5, numPartitionsInBM = 5, numPartitionsInWAL = 5) } test("Read data available only in block manager, not in write ahead log") { - testRDD(5, 0) + testRDD(numPartitions = 5, numPartitionsInBM = 5, numPartitionsInWAL = 0) } test("Read data available only in write ahead log, not in block manager") { - testRDD(0, 5) + testRDD(numPartitions = 5, numPartitionsInBM = 0, numPartitionsInWAL = 5) } - test("Read data available only in write ahead log, and test storing in block manager") { - testRDD(0, 5, testStoreInBM = true) + test("Read data with partially available in block manager, and rest in write ahead log") { + testRDD(numPartitions = 5, numPartitionsInBM = 3, numPartitionsInWAL = 2) } - test("Read data with partially available in block manager, and rest in write ahead log") { - testRDD(3, 2) + test("Test isBlockValid skips block fetching from BlockManager") { + testRDD( + numPartitions = 5, numPartitionsInBM = 5, numPartitionsInWAL = 0, testIsBlockValid = true) + } + + test("Test whether RDD is valid after removing blocks from block manager") { + testRDD( + numPartitions = 5, numPartitionsInBM = 5, numPartitionsInWAL = 5, testBlockRemove = true) + } + + test("Test storing of blocks recovered from write ahead log back into block manager") { + testRDD( + numPartitions = 5, numPartitionsInBM = 0, numPartitionsInWAL = 5, testStoreInBM = true) } /** @@ -85,23 +96,52 @@ class WriteAheadLogBackedBlockRDDSuite * and the rest to a write ahead log, and then reading reading it all back using the RDD. * It can also test if the partitions that were read from the log were again stored in * block manager. - * @param numPartitionsInBM Number of partitions to write to the Block Manager - * @param numPartitionsInWAL Number of partitions to write to the Write Ahead Log - * @param testStoreInBM Test whether blocks read from log are stored back into block manager + * + * + * + * @param numPartitions Number of partitions in RDD + * @param numPartitionsInBM Number of partitions to write to the BlockManager. + * Partitions 0 to (numPartitionsInBM-1) will be written to BlockManager + * @param numPartitionsInWAL Number of partitions to write to the Write Ahead Log. + * Partitions (numPartitions - 1 - numPartitionsInWAL) to + * (numPartitions - 1) will be written to WAL + * @param testIsBlockValid Test whether setting isBlockValid to false skips block fetching + * @param testBlockRemove Test whether calling rdd.removeBlock() makes the RDD still usable with + * reads falling back to the WAL + * @param testStoreInBM Test whether blocks read from log are stored back into block manager + * + * Example with numPartitions = 5, numPartitionsInBM = 3, and numPartitionsInWAL = 4 + * + * numPartitionsInBM = 3 + * |------------------| + * | | + * 0 1 2 3 4 + * | | + * |-------------------------| + * numPartitionsInWAL = 4 */ private def testRDD( - numPartitionsInBM: Int, numPartitionsInWAL: Int, testStoreInBM: Boolean = false) { - val numBlocks = numPartitionsInBM + numPartitionsInWAL - val data = Seq.fill(numBlocks, 10)(scala.util.Random.nextString(50)) + numPartitions: Int, + numPartitionsInBM: Int, + numPartitionsInWAL: Int, + testIsBlockValid: Boolean = false, + testBlockRemove: Boolean = false, + testStoreInBM: Boolean = false + ) { + require(numPartitionsInBM <= numPartitions, + "Can't put more partitions in BlockManager than that in RDD") + require(numPartitionsInWAL <= numPartitions, + "Can't put more partitions in write ahead log than that in RDD") + val data = Seq.fill(numPartitions, 10)(scala.util.Random.nextString(50)) // Put the necessary blocks in the block manager - val blockIds = Array.fill(numBlocks)(StreamBlockId(Random.nextInt(), Random.nextInt())) + val blockIds = Array.fill(numPartitions)(StreamBlockId(Random.nextInt(), Random.nextInt())) data.zip(blockIds).take(numPartitionsInBM).foreach { case(block, blockId) => blockManager.putIterator(blockId, block.iterator, StorageLevel.MEMORY_ONLY_SER) } - // Generate write ahead log file segments - val recordHandles = generateFakeRecordHandles(numPartitionsInBM) ++ + // Generate write ahead log record handles + val recordHandles = generateFakeRecordHandles(numPartitions - numPartitionsInWAL) ++ generateWALRecordHandles(data.takeRight(numPartitionsInWAL), blockIds.takeRight(numPartitionsInWAL)) @@ -111,7 +151,7 @@ class WriteAheadLogBackedBlockRDDSuite "Expected blocks not in BlockManager" ) require( - blockIds.takeRight(numPartitionsInWAL).forall(blockManager.get(_).isEmpty), + blockIds.takeRight(numPartitions - numPartitionsInBM).forall(blockManager.get(_).isEmpty), "Unexpected blocks in BlockManager" ) @@ -122,19 +162,42 @@ class WriteAheadLogBackedBlockRDDSuite "Expected blocks not in write ahead log" ) require( - recordHandles.take(numPartitionsInBM).forall(s => + recordHandles.take(numPartitions - numPartitionsInWAL).forall(s => !new File(s.path.stripPrefix("file://")).exists()), "Unexpected blocks in write ahead log" ) // Create the RDD and verify whether the returned data is correct val rdd = new WriteAheadLogBackedBlockRDD[String](sparkContext, blockIds.toArray, - recordHandles.toArray, storeInBlockManager = false, StorageLevel.MEMORY_ONLY) + recordHandles.toArray, storeInBlockManager = false) assert(rdd.collect() === data.flatten) + // Verify that the block fetching is skipped when isBlockValid is set to false. + // This is done by using a RDD whose data is only in memory but is set to skip block fetching + // Using that RDD will throw exception, as it skips block fetching even if the blocks are in + // in BlockManager. + if (testIsBlockValid) { + require(numPartitionsInBM === numPartitions, "All partitions must be in BlockManager") + require(numPartitionsInWAL === 0, "No partitions must be in WAL") + val rdd2 = new WriteAheadLogBackedBlockRDD[String](sparkContext, blockIds.toArray, + recordHandles.toArray, isBlockIdValid = Array.fill(blockIds.length)(false)) + intercept[SparkException] { + rdd2.collect() + } + } + + // Verify that the RDD is not invalid after the blocks are removed and can still read data + // from write ahead log + if (testBlockRemove) { + require(numPartitions === numPartitionsInWAL, "All partitions must be in WAL for this test") + require(numPartitionsInBM > 0, "Some partitions must be in BlockManager for this test") + rdd.removeBlocks() + assert(rdd.collect() === data.flatten) + } + if (testStoreInBM) { val rdd2 = new WriteAheadLogBackedBlockRDD[String](sparkContext, blockIds.toArray, - recordHandles.toArray, storeInBlockManager = true, StorageLevel.MEMORY_ONLY) + recordHandles.toArray, storeInBlockManager = true, storageLevel = StorageLevel.MEMORY_ONLY) assert(rdd2.collect() === data.flatten) assert( blockIds.forall(blockManager.get(_).nonEmpty), diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ui/StreamingJobProgressListenerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ui/StreamingJobProgressListenerSuite.scala index e874536e63518..c9175d61b1f49 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/ui/StreamingJobProgressListenerSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/ui/StreamingJobProgressListenerSuite.scala @@ -64,7 +64,7 @@ class StreamingJobProgressListenerSuite extends TestSuiteBase with Matchers { listener.numTotalReceivedRecords should be (0) // onBatchStarted - val batchInfoStarted = BatchInfo(Time(1000), streamIdToNumRecords, 1000, Some(2000), None) + val batchInfoStarted = BatchInfo(Time(1000), streamIdToNumRecords, 1000, Some(2000), None) listener.onBatchStarted(StreamingListenerBatchStarted(batchInfoStarted)) listener.waitingBatches should be (Nil) listener.runningBatches should be (List(BatchUIData(batchInfoStarted))) @@ -94,7 +94,7 @@ class StreamingJobProgressListenerSuite extends TestSuiteBase with Matchers { batchUIData.get.schedulingDelay should be (batchInfoStarted.schedulingDelay) batchUIData.get.processingDelay should be (batchInfoStarted.processingDelay) batchUIData.get.totalDelay should be (batchInfoStarted.totalDelay) - batchUIData.get.receiverNumRecords should be (Map(0 -> 300L, 1 -> 300L)) + batchUIData.get.streamIdToNumRecords should be (Map(0 -> 300L, 1 -> 300L)) batchUIData.get.numRecords should be(600) batchUIData.get.outputOpIdSparkJobIdPairs should be Seq(OutputOpIdAndSparkJobId(0, 0), @@ -138,7 +138,7 @@ class StreamingJobProgressListenerSuite extends TestSuiteBase with Matchers { test("Remove the old completed batches when exceeding the limit") { val ssc = setupStreams(input, operation) - val limit = ssc.conf.getInt("spark.streaming.ui.retainedBatches", 100) + val limit = ssc.conf.getInt("spark.streaming.ui.retainedBatches", 1000) val listener = new StreamingJobProgressListener(ssc) val streamIdToNumRecords = Map(0 -> 300L, 1 -> 300L) @@ -155,7 +155,7 @@ class StreamingJobProgressListenerSuite extends TestSuiteBase with Matchers { test("out-of-order onJobStart and onBatchXXX") { val ssc = setupStreams(input, operation) - val limit = ssc.conf.getInt("spark.streaming.ui.retainedBatches", 100) + val limit = ssc.conf.getInt("spark.streaming.ui.retainedBatches", 1000) val listener = new StreamingJobProgressListener(ssc) // fulfill completedBatchInfos @@ -182,7 +182,7 @@ class StreamingJobProgressListenerSuite extends TestSuiteBase with Matchers { batchUIData.get.schedulingDelay should be (batchInfoSubmitted.schedulingDelay) batchUIData.get.processingDelay should be (batchInfoSubmitted.processingDelay) batchUIData.get.totalDelay should be (batchInfoSubmitted.totalDelay) - batchUIData.get.receiverNumRecords should be (Map.empty) + batchUIData.get.streamIdToNumRecords should be (Map.empty) batchUIData.get.numRecords should be (0) batchUIData.get.outputOpIdSparkJobIdPairs should be (Seq(OutputOpIdAndSparkJobId(0, 0))) @@ -203,4 +203,48 @@ class StreamingJobProgressListenerSuite extends TestSuiteBase with Matchers { (listener.waitingBatches.size + listener.runningBatches.size + listener.retainedCompletedBatches.size + 10) } + + test("detect memory leak") { + val ssc = setupStreams(input, operation) + val listener = new StreamingJobProgressListener(ssc) + + val limit = ssc.conf.getInt("spark.streaming.ui.retainedBatches", 1000) + + for (_ <- 0 until 2 * limit) { + val streamIdToNumRecords = Map(0 -> 300L, 1 -> 300L) + + // onBatchSubmitted + val batchInfoSubmitted = BatchInfo(Time(1000), streamIdToNumRecords, 1000, None, None) + listener.onBatchSubmitted(StreamingListenerBatchSubmitted(batchInfoSubmitted)) + + // onBatchStarted + val batchInfoStarted = BatchInfo(Time(1000), streamIdToNumRecords, 1000, Some(2000), None) + listener.onBatchStarted(StreamingListenerBatchStarted(batchInfoStarted)) + + // onJobStart + val jobStart1 = createJobStart(Time(1000), outputOpId = 0, jobId = 0) + listener.onJobStart(jobStart1) + + val jobStart2 = createJobStart(Time(1000), outputOpId = 0, jobId = 1) + listener.onJobStart(jobStart2) + + val jobStart3 = createJobStart(Time(1000), outputOpId = 1, jobId = 0) + listener.onJobStart(jobStart3) + + val jobStart4 = createJobStart(Time(1000), outputOpId = 1, jobId = 1) + listener.onJobStart(jobStart4) + + // onBatchCompleted + val batchInfoCompleted = BatchInfo(Time(1000), streamIdToNumRecords, 1000, Some(2000), None) + listener.onBatchCompleted(StreamingListenerBatchCompleted(batchInfoCompleted)) + } + + listener.waitingBatches.size should be (0) + listener.runningBatches.size should be (0) + listener.retainedCompletedBatches.size should be (limit) + listener.batchTimeToOutputOpIdSparkJobIdPair.size() should be <= + (listener.waitingBatches.size + listener.runningBatches.size + + listener.retainedCompletedBatches.size + 10) + } + } diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ui/UIUtilsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ui/UIUtilsSuite.scala new file mode 100644 index 0000000000000..e9ab917ab845c --- /dev/null +++ b/streaming/src/test/scala/org/apache/spark/streaming/ui/UIUtilsSuite.scala @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.streaming.ui + +import java.util.TimeZone +import java.util.concurrent.TimeUnit + +import org.scalatest.FunSuite +import org.scalatest.Matchers + +class UIUtilsSuite extends FunSuite with Matchers{ + + test("shortTimeUnitString") { + assert("ns" === UIUtils.shortTimeUnitString(TimeUnit.NANOSECONDS)) + assert("us" === UIUtils.shortTimeUnitString(TimeUnit.MICROSECONDS)) + assert("ms" === UIUtils.shortTimeUnitString(TimeUnit.MILLISECONDS)) + assert("sec" === UIUtils.shortTimeUnitString(TimeUnit.SECONDS)) + assert("min" === UIUtils.shortTimeUnitString(TimeUnit.MINUTES)) + assert("hrs" === UIUtils.shortTimeUnitString(TimeUnit.HOURS)) + assert("days" === UIUtils.shortTimeUnitString(TimeUnit.DAYS)) + } + + test("normalizeDuration") { + verifyNormalizedTime(900, TimeUnit.MILLISECONDS, 900) + verifyNormalizedTime(1.0, TimeUnit.SECONDS, 1000) + verifyNormalizedTime(1.0, TimeUnit.MINUTES, 60 * 1000) + verifyNormalizedTime(1.0, TimeUnit.HOURS, 60 * 60 * 1000) + verifyNormalizedTime(1.0, TimeUnit.DAYS, 24 * 60 * 60 * 1000) + } + + private def verifyNormalizedTime( + expectedTime: Double, expectedUnit: TimeUnit, input: Long): Unit = { + val (time, unit) = UIUtils.normalizeDuration(input) + time should be (expectedTime +- 1E-6) + unit should be (expectedUnit) + } + + test("convertToTimeUnit") { + verifyConvertToTimeUnit(60.0 * 1000 * 1000 * 1000, 60 * 1000, TimeUnit.NANOSECONDS) + verifyConvertToTimeUnit(60.0 * 1000 * 1000, 60 * 1000, TimeUnit.MICROSECONDS) + verifyConvertToTimeUnit(60 * 1000, 60 * 1000, TimeUnit.MILLISECONDS) + verifyConvertToTimeUnit(60, 60 * 1000, TimeUnit.SECONDS) + verifyConvertToTimeUnit(1, 60 * 1000, TimeUnit.MINUTES) + verifyConvertToTimeUnit(1.0 / 60, 60 * 1000, TimeUnit.HOURS) + verifyConvertToTimeUnit(1.0 / 60 / 24, 60 * 1000, TimeUnit.DAYS) + } + + private def verifyConvertToTimeUnit( + expectedTime: Double, milliseconds: Long, unit: TimeUnit): Unit = { + val convertedTime = UIUtils.convertToTimeUnit(milliseconds, unit) + convertedTime should be (expectedTime +- 1E-6) + } + + test("formatBatchTime") { + val tzForTest = TimeZone.getTimeZone("America/Los_Angeles") + val batchTime = 1431637480452L // Thu May 14 14:04:40 PDT 2015 + assert("2015/05/14 14:04:40" === UIUtils.formatBatchTime(batchTime, 1000, timezone = tzForTest)) + assert("2015/05/14 14:04:40.452" === + UIUtils.formatBatchTime(batchTime, 999, timezone = tzForTest)) + assert("14:04:40" === UIUtils.formatBatchTime(batchTime, 1000, false, timezone = tzForTest)) + assert("14:04:40.452" === UIUtils.formatBatchTime(batchTime, 999, false, timezone = tzForTest)) + } +} diff --git a/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala index 79098bcf4861c..616903b64983c 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala @@ -36,7 +36,7 @@ import org.apache.spark.{SparkConf, SparkException} class WriteAheadLogSuite extends FunSuite with BeforeAndAfter { import WriteAheadLogSuite._ - + val hadoopConf = new Configuration() var tempDir: File = null var testDir: String = null @@ -359,7 +359,7 @@ object WriteAheadLogSuite { ): FileBasedWriteAheadLog = { if (manualClock.getTimeMillis() < 100000) manualClock.setTime(10000) val wal = new FileBasedWriteAheadLog(new SparkConf(), logDirectory, hadoopConf, 1, 1) - + // Ensure that 500 does not get sorted after 2000, so put a high base value. data.foreach { item => manualClock.advance(500) diff --git a/tools/pom.xml b/tools/pom.xml index 1c6f3e83a1819..2672c03555925 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../pom.xml diff --git a/tox.ini b/tox.ini index b568029a204cc..76e3f42cde62d 100644 --- a/tox.ini +++ b/tox.ini @@ -15,4 +15,4 @@ [pep8] max-line-length=100 -exclude=cloudpickle.py,heapq3.py +exclude=cloudpickle.py,heapq3.py,shared.py diff --git a/unsafe/pom.xml b/unsafe/pom.xml index 5b0733206b2bc..b5000e4be016b 100644 --- a/unsafe/pom.xml +++ b/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../pom.xml @@ -42,6 +42,10 @@ com.google.code.findbugs jsr305 + + com.google.guava + guava + @@ -61,6 +65,11 @@ junit-interface test + + org.mockito + mockito-all + test + target/scala-${scala.binary.version}/classes diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSet.java b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSet.java index f72e07fce92fd..28e23da108ebe 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSet.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSet.java @@ -102,4 +102,12 @@ public boolean isSet(int index) { public int nextSetBit(int fromIndex) { return BitSetMethods.nextSetBit(baseObject, baseOffset, fromIndex, numWords); } + + /** + * Returns {@code true} if any bit is set. + */ + public boolean anySet() { + return BitSetMethods.anySet(baseObject, baseOffset, numWords); + } + } diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java index f30626d8f4317..0987191c1c636 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/bitset/BitSetMethods.java @@ -70,9 +70,10 @@ public static boolean isSet(Object baseObject, long baseOffset, int index) { /** * Returns {@code true} if any bit is set. */ - public static boolean anySet(Object baseObject, long baseOffset, long bitSetWidthInBytes) { - for (int i = 0; i <= bitSetWidthInBytes; i++) { - if (PlatformDependent.UNSAFE.getByte(baseObject, baseOffset + i) != 0) { + public static boolean anySet(Object baseObject, long baseOffset, long bitSetWidthInWords) { + long addr = baseOffset; + for (int i = 0; i < bitSetWidthInWords; i++, addr += WORD_SIZE) { + if (PlatformDependent.UNSAFE.getLong(baseObject, addr) != 0) { return true; } } diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/unsafe/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java index 4e5ebc402be35..0b4d8d286f5f9 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java @@ -23,6 +23,8 @@ import java.util.LinkedList; import java.util.List; +import com.google.common.annotations.VisibleForTesting; + import org.apache.spark.unsafe.*; import org.apache.spark.unsafe.array.ByteArrayMethods; import org.apache.spark.unsafe.array.LongArray; @@ -36,9 +38,8 @@ * This is backed by a power-of-2-sized hash table, using quadratic probing with triangular numbers, * which is guaranteed to exhaust the space. *

- * The map can support up to 2^31 keys because we use 32 bit MurmurHash. If the key cardinality is - * higher than this, you should probably be using sorting instead of hashing for better cache - * locality. + * The map can support up to 2^29 keys. If the key cardinality is higher than this, you should + * probably be using sorting instead of hashing for better cache locality. *

* This class is not thread safe. */ @@ -48,6 +49,11 @@ public final class BytesToBytesMap { private static final HashMapGrowthStrategy growthStrategy = HashMapGrowthStrategy.DOUBLING; + /** + * Special record length that is placed after the last record in a data page. + */ + private static final int END_OF_PAGE_MARKER = -1; + private final TaskMemoryManager memoryManager; /** @@ -64,7 +70,7 @@ public final class BytesToBytesMap { /** * Offset into `currentDataPage` that points to the location where new data can be inserted into - * the page. + * the page. This does not incorporate the page's base offset. */ private long pageCursor = 0; @@ -74,6 +80,15 @@ public final class BytesToBytesMap { */ private static final long PAGE_SIZE_BYTES = 1L << 26; // 64 megabytes + /** + * The maximum number of keys that BytesToBytesMap supports. The hash table has to be + * power-of-2-sized and its backing Java array can contain at most (1 << 30) elements, since + * that's the largest power-of-2 that's less than Integer.MAX_VALUE. We need two long array + * entries per key, giving us a maximum capacity of (1 << 29). + */ + @VisibleForTesting + static final int MAX_CAPACITY = (1 << 29); + // This choice of page table size and page size means that we can address up to 500 gigabytes // of memory. @@ -143,6 +158,13 @@ public BytesToBytesMap( this.loadFactor = loadFactor; this.loc = new Location(); this.enablePerfMetrics = enablePerfMetrics; + if (initialCapacity <= 0) { + throw new IllegalArgumentException("Initial capacity must be greater than 0"); + } + if (initialCapacity > MAX_CAPACITY) { + throw new IllegalArgumentException( + "Initial capacity " + initialCapacity + " exceeds maximum capacity of " + MAX_CAPACITY); + } allocate(initialCapacity); } @@ -162,6 +184,55 @@ public BytesToBytesMap( */ public int size() { return size; } + private static final class BytesToBytesMapIterator implements Iterator { + + private final int numRecords; + private final Iterator dataPagesIterator; + private final Location loc; + + private int currentRecordNumber = 0; + private Object pageBaseObject; + private long offsetInPage; + + BytesToBytesMapIterator(int numRecords, Iterator dataPagesIterator, Location loc) { + this.numRecords = numRecords; + this.dataPagesIterator = dataPagesIterator; + this.loc = loc; + if (dataPagesIterator.hasNext()) { + advanceToNextPage(); + } + } + + private void advanceToNextPage() { + final MemoryBlock currentPage = dataPagesIterator.next(); + pageBaseObject = currentPage.getBaseObject(); + offsetInPage = currentPage.getBaseOffset(); + } + + @Override + public boolean hasNext() { + return currentRecordNumber != numRecords; + } + + @Override + public Location next() { + int keyLength = (int) PlatformDependent.UNSAFE.getLong(pageBaseObject, offsetInPage); + if (keyLength == END_OF_PAGE_MARKER) { + advanceToNextPage(); + keyLength = (int) PlatformDependent.UNSAFE.getLong(pageBaseObject, offsetInPage); + } + loc.with(pageBaseObject, offsetInPage); + offsetInPage += 8 + 8 + keyLength + loc.getValueLength(); + currentRecordNumber++; + return loc; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + } + /** * Returns an iterator for iterating over the entries of this map. * @@ -171,27 +242,7 @@ public BytesToBytesMap( * `lookup()`, the behavior of the returned iterator is undefined. */ public Iterator iterator() { - return new Iterator() { - - private int nextPos = bitset.nextSetBit(0); - - @Override - public boolean hasNext() { - return nextPos != -1; - } - - @Override - public Location next() { - final int pos = nextPos; - nextPos = bitset.nextSetBit(nextPos + 1); - return loc.with(pos, 0, true); - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - }; + return new BytesToBytesMapIterator(size, dataPages.iterator(), loc); } /** @@ -268,8 +319,11 @@ public final class Location { private int valueLength; private void updateAddressesAndSizes(long fullKeyAddress) { - final Object page = memoryManager.getPage(fullKeyAddress); - final long keyOffsetInPage = memoryManager.getOffsetInPage(fullKeyAddress); + updateAddressesAndSizes( + memoryManager.getPage(fullKeyAddress), memoryManager.getOffsetInPage(fullKeyAddress)); + } + + private void updateAddressesAndSizes(Object page, long keyOffsetInPage) { long position = keyOffsetInPage; keyLength = (int) PlatformDependent.UNSAFE.getLong(page, position); position += 8; // word used to store the key size @@ -291,6 +345,12 @@ Location with(int pos, int keyHashcode, boolean isDefined) { return this; } + Location with(Object page, long keyOffsetInPage) { + this.isDefined = true; + updateAddressesAndSizes(page, keyOffsetInPage); + return this; + } + /** * Returns true if the key is defined at this position, and false otherwise. */ @@ -345,15 +405,17 @@ public int getValueLength() { *

* It is only valid to call this method immediately after calling `lookup()` using the same key. *

+ * The key and value must be word-aligned (that is, their sizes must multiples of 8). + *

* After calling this method, calls to `get[Key|Value]Address()` and `get[Key|Value]Length` * will return information on the data stored by this `putNewKey` call. *

* As an example usage, here's the proper way to store a new key: *

*

-     *   Location loc = map.lookup(keyBaseOffset, keyBaseObject, keyLengthInBytes);
+     *   Location loc = map.lookup(keyBaseObject, keyBaseOffset, keyLengthInBytes);
      *   if (!loc.isDefined()) {
-     *     loc.putNewKey(keyBaseOffset, keyBaseObject, keyLengthInBytes, ...)
+     *     loc.putNewKey(keyBaseObject, keyBaseOffset, keyLengthInBytes, ...)
      *   }
      * 
*

@@ -367,20 +429,29 @@ public void putNewKey( long valueBaseOffset, int valueLengthBytes) { assert (!isDefined) : "Can only set value once for a key"; - isDefined = true; assert (keyLengthBytes % 8 == 0); assert (valueLengthBytes % 8 == 0); + if (size == MAX_CAPACITY) { + throw new IllegalStateException("BytesToBytesMap has reached maximum capacity"); + } // Here, we'll copy the data into our data pages. Because we only store a relative offset from // the key address instead of storing the absolute address of the value, the key and value // must be stored in the same memory page. // (8 byte key length) (key) (8 byte value length) (value) final long requiredSize = 8 + keyLengthBytes + 8 + valueLengthBytes; - assert(requiredSize <= PAGE_SIZE_BYTES); + assert (requiredSize <= PAGE_SIZE_BYTES - 8); // Reserve 8 bytes for the end-of-page marker. size++; bitset.set(pos); - // If there's not enough space in the current page, allocate a new page: - if (currentDataPage == null || PAGE_SIZE_BYTES - pageCursor < requiredSize) { + // If there's not enough space in the current page, allocate a new page (8 bytes are reserved + // for the end-of-page marker). + if (currentDataPage == null || PAGE_SIZE_BYTES - 8 - pageCursor < requiredSize) { + if (currentDataPage != null) { + // There wasn't enough space in the current page, so write an end-of-page marker: + final Object pageBaseObject = currentDataPage.getBaseObject(); + final long lengthOffsetInPage = currentDataPage.getBaseOffset() + pageCursor; + PlatformDependent.UNSAFE.putLong(pageBaseObject, lengthOffsetInPage, END_OF_PAGE_MARKER); + } MemoryBlock newPage = memoryManager.allocatePage(PAGE_SIZE_BYTES); dataPages.add(newPage); pageCursor = 0; @@ -414,7 +485,7 @@ public void putNewKey( longArray.set(pos * 2 + 1, keyHashcode); updateAddressesAndSizes(storedKeyAddress); isDefined = true; - if (size > growthThreshold) { + if (size > growthThreshold && longArray.size() < MAX_CAPACITY) { growAndRehash(); } } @@ -427,8 +498,11 @@ public void putNewKey( * @param capacity the new map capacity */ private void allocate(int capacity) { - capacity = Math.max((int) Math.min(Integer.MAX_VALUE, nextPowerOf2(capacity)), 64); - longArray = new LongArray(memoryManager.allocate(capacity * 8 * 2)); + assert (capacity >= 0); + // The capacity needs to be divisible by 64 so that our bit set can be sized properly + capacity = Math.max((int) Math.min(MAX_CAPACITY, nextPowerOf2(capacity)), 64); + assert (capacity <= MAX_CAPACITY); + longArray = new LongArray(memoryManager.allocate(capacity * 8L * 2)); bitset = new BitSet(MemoryBlock.fromLongArray(new long[capacity / 64])); this.growthThreshold = (int) (capacity * loadFactor); @@ -494,10 +568,16 @@ public long getNumHashCollisions() { return numHashCollisions; } + @VisibleForTesting + int getNumDataPages() { + return dataPages.size(); + } + /** * Grows the size of the hash table and re-hash everything. */ - private void growAndRehash() { + @VisibleForTesting + void growAndRehash() { long resizeStartTime = -1; if (enablePerfMetrics) { resizeStartTime = System.nanoTime(); @@ -508,7 +588,7 @@ private void growAndRehash() { final int oldCapacity = (int) oldBitSet.capacity(); // Allocate the new data structures - allocate(Math.min(Integer.MAX_VALUE, growthStrategy.nextCapacity(oldCapacity))); + allocate(Math.min(growthStrategy.nextCapacity(oldCapacity), MAX_CAPACITY)); // Re-mask (we don't recompute the hashcode because we stored all 32 bits of it) for (int pos = oldBitSet.nextSetBit(0); pos >= 0; pos = oldBitSet.nextSetBit(pos + 1)) { diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/map/HashMapGrowthStrategy.java b/unsafe/src/main/java/org/apache/spark/unsafe/map/HashMapGrowthStrategy.java index 7c321baffe82d..20654e4eeaa02 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/map/HashMapGrowthStrategy.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/map/HashMapGrowthStrategy.java @@ -32,7 +32,9 @@ public interface HashMapGrowthStrategy { class Doubling implements HashMapGrowthStrategy { @Override public int nextCapacity(int currentCapacity) { - return currentCapacity * 2; + assert (currentCapacity > 0); + // Guard against overflow + return (currentCapacity * 2 > 0) ? (currentCapacity * 2) : Integer.MAX_VALUE; } } diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/memory/ExecutorMemoryManager.java b/unsafe/src/main/java/org/apache/spark/unsafe/memory/ExecutorMemoryManager.java index 62c29c8cc1e4d..cbbe8594627a5 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/memory/ExecutorMemoryManager.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/memory/ExecutorMemoryManager.java @@ -17,6 +17,12 @@ package org.apache.spark.unsafe.memory; +import java.lang.ref.WeakReference; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.Map; +import javax.annotation.concurrent.GuardedBy; + /** * Manages memory for an executor. Individual operators / tasks allocate memory through * {@link TaskMemoryManager} objects, which obtain their memory from ExecutorMemoryManager. @@ -33,6 +39,12 @@ public class ExecutorMemoryManager { */ final boolean inHeap; + @GuardedBy("this") + private final Map>> bufferPoolsBySize = + new HashMap>>(); + + private static final int POOLING_THRESHOLD_BYTES = 1024 * 1024; + /** * Construct a new ExecutorMemoryManager. * @@ -43,16 +55,57 @@ public ExecutorMemoryManager(MemoryAllocator allocator) { this.allocator = allocator; } + /** + * Returns true if allocations of the given size should go through the pooling mechanism and + * false otherwise. + */ + private boolean shouldPool(long size) { + // Very small allocations are less likely to benefit from pooling. + // At some point, we should explore supporting pooling for off-heap memory, but for now we'll + // ignore that case in the interest of simplicity. + return size >= POOLING_THRESHOLD_BYTES && allocator instanceof HeapMemoryAllocator; + } + /** * Allocates a contiguous block of memory. Note that the allocated memory is not guaranteed * to be zeroed out (call `zero()` on the result if this is necessary). */ MemoryBlock allocate(long size) throws OutOfMemoryError { - return allocator.allocate(size); + if (shouldPool(size)) { + synchronized (this) { + final LinkedList> pool = bufferPoolsBySize.get(size); + if (pool != null) { + while (!pool.isEmpty()) { + final WeakReference blockReference = pool.pop(); + final MemoryBlock memory = blockReference.get(); + if (memory != null) { + assert (memory.size() == size); + return memory; + } + } + bufferPoolsBySize.remove(size); + } + } + return allocator.allocate(size); + } else { + return allocator.allocate(size); + } } void free(MemoryBlock memory) { - allocator.free(memory); + final long size = memory.size(); + if (shouldPool(size)) { + synchronized (this) { + LinkedList> pool = bufferPoolsBySize.get(size); + if (pool == null) { + pool = new LinkedList>(); + bufferPoolsBySize.put(size, pool); + } + pool.add(new WeakReference(memory)); + } + } else { + allocator.free(memory); + } } } diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/memory/TaskMemoryManager.java b/unsafe/src/main/java/org/apache/spark/unsafe/memory/TaskMemoryManager.java index 9224988e6ad69..10881969dbc78 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/memory/TaskMemoryManager.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/memory/TaskMemoryManager.java @@ -19,6 +19,7 @@ import java.util.*; +import com.google.common.annotations.VisibleForTesting; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -43,14 +44,22 @@ * maximum size of a long[] array, allowing us to address 8192 * 2^32 * 8 bytes, which is * approximately 35 terabytes of memory. */ -public final class TaskMemoryManager { +public class TaskMemoryManager { private final Logger logger = LoggerFactory.getLogger(TaskMemoryManager.class); - /** - * The number of entries in the page table. - */ - private static final int PAGE_TABLE_SIZE = 1 << 13; + /** The number of bits used to address the page table. */ + private static final int PAGE_NUMBER_BITS = 13; + + /** The number of bits used to encode offsets in data pages. */ + @VisibleForTesting + static final int OFFSET_BITS = 64 - PAGE_NUMBER_BITS; // 51 + + /** The number of entries in the page table. */ + private static final int PAGE_TABLE_SIZE = 1 << PAGE_NUMBER_BITS; + + /** Maximum supported data page size */ + private static final long MAXIMUM_PAGE_SIZE = (1L << OFFSET_BITS); /** Bit mask for the lower 51 bits of a long. */ private static final long MASK_LONG_LOWER_51_BITS = 0x7FFFFFFFFFFFFL; @@ -101,11 +110,9 @@ public TaskMemoryManager(ExecutorMemoryManager executorMemoryManager) { * intended for allocating large blocks of memory that will be shared between operators. */ public MemoryBlock allocatePage(long size) { - if (logger.isTraceEnabled()) { - logger.trace("Allocating {} byte page", size); - } - if (size >= (1L << 51)) { - throw new IllegalArgumentException("Cannot allocate a page with more than 2^51 bytes"); + if (size > MAXIMUM_PAGE_SIZE) { + throw new IllegalArgumentException( + "Cannot allocate a page with more than " + MAXIMUM_PAGE_SIZE + " bytes"); } final int pageNumber; @@ -120,8 +127,8 @@ public MemoryBlock allocatePage(long size) { final MemoryBlock page = executorMemoryManager.allocate(size); page.pageNumber = pageNumber; pageTable[pageNumber] = page; - if (logger.isDebugEnabled()) { - logger.debug("Allocate page number {} ({} bytes)", pageNumber, size); + if (logger.isTraceEnabled()) { + logger.trace("Allocate page number {} ({} bytes)", pageNumber, size); } return page; } @@ -130,9 +137,6 @@ public MemoryBlock allocatePage(long size) { * Free a block of memory allocated via {@link TaskMemoryManager#allocatePage(long)}. */ public void freePage(MemoryBlock page) { - if (logger.isTraceEnabled()) { - logger.trace("Freeing page number {} ({} bytes)", page.pageNumber, page.size()); - } assert (page.pageNumber != -1) : "Called freePage() on memory that wasn't allocated with allocatePage()"; executorMemoryManager.free(page); @@ -140,8 +144,8 @@ public void freePage(MemoryBlock page) { allocatedPages.clear(page.pageNumber); } pageTable[page.pageNumber] = null; - if (logger.isDebugEnabled()) { - logger.debug("Freed page number {} ({} bytes)", page.pageNumber, page.size()); + if (logger.isTraceEnabled()) { + logger.trace("Freed page number {} ({} bytes)", page.pageNumber, page.size()); } } @@ -173,14 +177,36 @@ public void free(MemoryBlock memory) { /** * Given a memory page and offset within that page, encode this address into a 64-bit long. * This address will remain valid as long as the corresponding page has not been freed. + * + * @param page a data page allocated by {@link TaskMemoryManager#allocate(long)}. + * @param offsetInPage an offset in this page which incorporates the base offset. In other words, + * this should be the value that you would pass as the base offset into an + * UNSAFE call (e.g. page.baseOffset() + something). + * @return an encoded page address. */ public long encodePageNumberAndOffset(MemoryBlock page, long offsetInPage) { - if (inHeap) { - assert (page.pageNumber != -1) : "encodePageNumberAndOffset called with invalid page"; - return (((long) page.pageNumber) << 51) | (offsetInPage & MASK_LONG_LOWER_51_BITS); - } else { - return offsetInPage; + if (!inHeap) { + // In off-heap mode, an offset is an absolute address that may require a full 64 bits to + // encode. Due to our page size limitation, though, we can convert this into an offset that's + // relative to the page's base offset; this relative offset will fit in 51 bits. + offsetInPage -= page.getBaseOffset(); } + return encodePageNumberAndOffset(page.pageNumber, offsetInPage); + } + + @VisibleForTesting + public static long encodePageNumberAndOffset(int pageNumber, long offsetInPage) { + assert (pageNumber != -1) : "encodePageNumberAndOffset called with invalid page"; + return (((long) pageNumber) << OFFSET_BITS) | (offsetInPage & MASK_LONG_LOWER_51_BITS); + } + + @VisibleForTesting + public static int decodePageNumber(long pagePlusOffsetAddress) { + return (int) ((pagePlusOffsetAddress & MASK_LONG_UPPER_13_BITS) >>> OFFSET_BITS); + } + + private static long decodeOffset(long pagePlusOffsetAddress) { + return (pagePlusOffsetAddress & MASK_LONG_LOWER_51_BITS); } /** @@ -189,7 +215,7 @@ public long encodePageNumberAndOffset(MemoryBlock page, long offsetInPage) { */ public Object getPage(long pagePlusOffsetAddress) { if (inHeap) { - final int pageNumber = (int) ((pagePlusOffsetAddress & MASK_LONG_UPPER_13_BITS) >>> 51); + final int pageNumber = decodePageNumber(pagePlusOffsetAddress); assert (pageNumber >= 0 && pageNumber < PAGE_TABLE_SIZE); final Object page = pageTable[pageNumber].getBaseObject(); assert (page != null); @@ -204,10 +230,15 @@ public Object getPage(long pagePlusOffsetAddress) { * {@link TaskMemoryManager#encodePageNumberAndOffset(MemoryBlock, long)} */ public long getOffsetInPage(long pagePlusOffsetAddress) { + final long offsetInPage = decodeOffset(pagePlusOffsetAddress); if (inHeap) { - return (pagePlusOffsetAddress & MASK_LONG_LOWER_51_BITS); + return offsetInPage; } else { - return pagePlusOffsetAddress; + // In off-heap mode, an offset is an absolute address. In encodePageNumberAndOffset, we + // converted the absolute address into a relative address. Here, we invert that operation: + final int pageNumber = decodePageNumber(pagePlusOffsetAddress); + assert (pageNumber >= 0 && pageNumber < PAGE_TABLE_SIZE); + return pageTable[pageNumber].getBaseOffset() + offsetInPage; } } diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/bitset/BitSetSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/bitset/BitSetSuite.java index e3a824e29b768..18393db9f382f 100644 --- a/unsafe/src/test/java/org/apache/spark/unsafe/bitset/BitSetSuite.java +++ b/unsafe/src/test/java/org/apache/spark/unsafe/bitset/BitSetSuite.java @@ -39,6 +39,8 @@ public void basicOps() { for (int i = 0; i < bs.capacity(); i++) { Assert.assertFalse(bs.isSet(i)); } + // another form of asserting that the bit set is empty + Assert.assertFalse(bs.anySet()); // Set every bit and check it. for (int i = 0; i < bs.capacity(); i++) { @@ -52,6 +54,11 @@ public void basicOps() { bs.unset(i); Assert.assertFalse(bs.isSet(i)); } + + // Make sure anySet() can detect any set bit + bs = createBitSet(256); + bs.set(64); + Assert.assertTrue(bs.anySet()); } @Test diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java index 7a5c0622d1ffb..81315f7c94645 100644 --- a/unsafe/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java +++ b/unsafe/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java @@ -25,24 +25,40 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; +import static org.mockito.AdditionalMatchers.geq; +import static org.mockito.Mockito.*; import org.apache.spark.unsafe.array.ByteArrayMethods; +import org.apache.spark.unsafe.memory.*; import org.apache.spark.unsafe.PlatformDependent; import static org.apache.spark.unsafe.PlatformDependent.BYTE_ARRAY_OFFSET; -import org.apache.spark.unsafe.memory.ExecutorMemoryManager; -import org.apache.spark.unsafe.memory.MemoryAllocator; -import org.apache.spark.unsafe.memory.MemoryLocation; -import org.apache.spark.unsafe.memory.TaskMemoryManager; +import static org.apache.spark.unsafe.PlatformDependent.LONG_ARRAY_OFFSET; + public abstract class AbstractBytesToBytesMapSuite { private final Random rand = new Random(42); private TaskMemoryManager memoryManager; + private TaskMemoryManager sizeLimitedMemoryManager; @Before public void setup() { memoryManager = new TaskMemoryManager(new ExecutorMemoryManager(getMemoryAllocator())); + // Mocked memory manager for tests that check the maximum array size, since actually allocating + // such large arrays will cause us to run out of memory in our tests. + sizeLimitedMemoryManager = spy(memoryManager); + when(sizeLimitedMemoryManager.allocate(geq(1L << 20))).thenAnswer(new Answer() { + @Override + public MemoryBlock answer(InvocationOnMock invocation) throws Throwable { + if (((Long) invocation.getArguments()[0] / 8) > Integer.MAX_VALUE) { + throw new OutOfMemoryError("Requested array size exceeds VM limit"); + } + return memoryManager.allocate(1L << 20); + } + }); } @After @@ -101,6 +117,7 @@ public void emptyMap() { final int keyLengthInBytes = keyLengthInWords * 8; final byte[] key = getRandomByteArray(keyLengthInWords); Assert.assertFalse(map.lookup(key, BYTE_ARRAY_OFFSET, keyLengthInBytes).isDefined()); + Assert.assertFalse(map.iterator().hasNext()); } finally { map.free(); } @@ -159,7 +176,7 @@ public void setAndRetrieveAKey() { @Test public void iteratorTest() throws Exception { - final int size = 128; + final int size = 4096; BytesToBytesMap map = new BytesToBytesMap(memoryManager, size / 2); try { for (long i = 0; i < size; i++) { @@ -167,14 +184,26 @@ public void iteratorTest() throws Exception { final BytesToBytesMap.Location loc = map.lookup(value, PlatformDependent.LONG_ARRAY_OFFSET, 8); Assert.assertFalse(loc.isDefined()); - loc.putNewKey( - value, - PlatformDependent.LONG_ARRAY_OFFSET, - 8, - value, - PlatformDependent.LONG_ARRAY_OFFSET, - 8 - ); + // Ensure that we store some zero-length keys + if (i % 5 == 0) { + loc.putNewKey( + null, + PlatformDependent.LONG_ARRAY_OFFSET, + 0, + value, + PlatformDependent.LONG_ARRAY_OFFSET, + 8 + ); + } else { + loc.putNewKey( + value, + PlatformDependent.LONG_ARRAY_OFFSET, + 8, + value, + PlatformDependent.LONG_ARRAY_OFFSET, + 8 + ); + } } final java.util.BitSet valuesSeen = new java.util.BitSet(size); final Iterator iter = map.iterator(); @@ -183,11 +212,16 @@ public void iteratorTest() throws Exception { Assert.assertTrue(loc.isDefined()); final MemoryLocation keyAddress = loc.getKeyAddress(); final MemoryLocation valueAddress = loc.getValueAddress(); - final long key = PlatformDependent.UNSAFE.getLong( - keyAddress.getBaseObject(), keyAddress.getBaseOffset()); final long value = PlatformDependent.UNSAFE.getLong( valueAddress.getBaseObject(), valueAddress.getBaseOffset()); - Assert.assertEquals(key, value); + final long keyLength = loc.getKeyLength(); + if (keyLength == 0) { + Assert.assertTrue("value " + value + " was not divisible by 5", value % 5 == 0); + } else { + final long key = PlatformDependent.UNSAFE.getLong( + keyAddress.getBaseObject(), keyAddress.getBaseOffset()); + Assert.assertEquals(value, key); + } valuesSeen.set((int) value); } Assert.assertEquals(size, valuesSeen.cardinality()); @@ -196,6 +230,74 @@ public void iteratorTest() throws Exception { } } + @Test + public void iteratingOverDataPagesWithWastedSpace() throws Exception { + final int NUM_ENTRIES = 1000 * 1000; + final int KEY_LENGTH = 16; + final int VALUE_LENGTH = 40; + final BytesToBytesMap map = new BytesToBytesMap(memoryManager, NUM_ENTRIES); + // Each record will take 8 + 8 + 16 + 40 = 72 bytes of space in the data page. Our 64-megabyte + // pages won't be evenly-divisible by records of this size, which will cause us to waste some + // space at the end of the page. This is necessary in order for us to take the end-of-record + // handling branch in iterator(). + try { + for (int i = 0; i < NUM_ENTRIES; i++) { + final long[] key = new long[] { i, i }; // 2 * 8 = 16 bytes + final long[] value = new long[] { i, i, i, i, i }; // 5 * 8 = 40 bytes + final BytesToBytesMap.Location loc = map.lookup( + key, + LONG_ARRAY_OFFSET, + KEY_LENGTH + ); + Assert.assertFalse(loc.isDefined()); + loc.putNewKey( + key, + LONG_ARRAY_OFFSET, + KEY_LENGTH, + value, + LONG_ARRAY_OFFSET, + VALUE_LENGTH + ); + } + Assert.assertEquals(2, map.getNumDataPages()); + + final java.util.BitSet valuesSeen = new java.util.BitSet(NUM_ENTRIES); + final Iterator iter = map.iterator(); + final long key[] = new long[KEY_LENGTH / 8]; + final long value[] = new long[VALUE_LENGTH / 8]; + while (iter.hasNext()) { + final BytesToBytesMap.Location loc = iter.next(); + Assert.assertTrue(loc.isDefined()); + Assert.assertEquals(KEY_LENGTH, loc.getKeyLength()); + Assert.assertEquals(VALUE_LENGTH, loc.getValueLength()); + PlatformDependent.copyMemory( + loc.getKeyAddress().getBaseObject(), + loc.getKeyAddress().getBaseOffset(), + key, + LONG_ARRAY_OFFSET, + KEY_LENGTH + ); + PlatformDependent.copyMemory( + loc.getValueAddress().getBaseObject(), + loc.getValueAddress().getBaseOffset(), + value, + LONG_ARRAY_OFFSET, + VALUE_LENGTH + ); + for (long j : key) { + Assert.assertEquals(key[0], j); + } + for (long j : value) { + Assert.assertEquals(key[0], j); + } + valuesSeen.set((int) key[0]); + } + Assert.assertEquals(NUM_ENTRIES, valuesSeen.cardinality()); + } finally { + map.free(); + } + } + @Test public void randomizedStressTest() { final int size = 65536; @@ -247,4 +349,35 @@ public void randomizedStressTest() { map.free(); } } + + @Test + public void initialCapacityBoundsChecking() { + try { + new BytesToBytesMap(sizeLimitedMemoryManager, 0); + Assert.fail("Expected IllegalArgumentException to be thrown"); + } catch (IllegalArgumentException e) { + // expected exception + } + + try { + new BytesToBytesMap(sizeLimitedMemoryManager, BytesToBytesMap.MAX_CAPACITY + 1); + Assert.fail("Expected IllegalArgumentException to be thrown"); + } catch (IllegalArgumentException e) { + // expected exception + } + + // Can allocate _at_ the max capacity + BytesToBytesMap map = + new BytesToBytesMap(sizeLimitedMemoryManager, BytesToBytesMap.MAX_CAPACITY); + map.free(); + } + + @Test + public void resizingLargeMap() { + // As long as a map's capacity is below the max, we should be able to resize up to the max + BytesToBytesMap map = + new BytesToBytesMap(sizeLimitedMemoryManager, BytesToBytesMap.MAX_CAPACITY - 64); + map.growAndRehash(); + map.free(); + } } diff --git a/unsafe/src/test/java/org/apache/spark/unsafe/memory/TaskMemoryManagerSuite.java b/unsafe/src/test/java/org/apache/spark/unsafe/memory/TaskMemoryManagerSuite.java index 932882f1ca248..06fb081183659 100644 --- a/unsafe/src/test/java/org/apache/spark/unsafe/memory/TaskMemoryManagerSuite.java +++ b/unsafe/src/test/java/org/apache/spark/unsafe/memory/TaskMemoryManagerSuite.java @@ -38,4 +38,27 @@ public void leakedPageMemoryIsDetected() { Assert.assertEquals(4096, manager.cleanUpAllAllocatedMemory()); } + @Test + public void encodePageNumberAndOffsetOffHeap() { + final TaskMemoryManager manager = + new TaskMemoryManager(new ExecutorMemoryManager(MemoryAllocator.UNSAFE)); + final MemoryBlock dataPage = manager.allocatePage(256); + // In off-heap mode, an offset is an absolute address that may require more than 51 bits to + // encode. This test exercises that corner-case: + final long offset = ((1L << TaskMemoryManager.OFFSET_BITS) + 10); + final long encodedAddress = manager.encodePageNumberAndOffset(dataPage, offset); + Assert.assertEquals(null, manager.getPage(encodedAddress)); + Assert.assertEquals(offset, manager.getOffsetInPage(encodedAddress)); + } + + @Test + public void encodePageNumberAndOffsetOnHeap() { + final TaskMemoryManager manager = + new TaskMemoryManager(new ExecutorMemoryManager(MemoryAllocator.HEAP)); + final MemoryBlock dataPage = manager.allocatePage(256); + final long encodedAddress = manager.encodePageNumberAndOffset(dataPage, 64); + Assert.assertEquals(dataPage.getBaseObject(), manager.getPage(encodedAddress)); + Assert.assertEquals(64, manager.getOffsetInPage(encodedAddress)); + } + } diff --git a/yarn/pom.xml b/yarn/pom.xml index 7c8c3613e7a05..9fab5a83ef6f9 100644 --- a/yarn/pom.xml +++ b/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.10 - 1.4.0-SNAPSHOT + 1.4.0 ../pom.xml @@ -30,6 +30,7 @@ Spark Project YARN yarn + 1.9 @@ -85,7 +86,12 @@ jetty-servlet - + + + org.apache.hadoop hadoop-yarn-server-tests @@ -97,59 +103,44 @@ mockito-all test + + org.mortbay.jetty + jetty + 6.1.26 + + + org.mortbay.jetty + servlet-api + + + test + + + com.sun.jersey + jersey-core + ${jersey.version} + test + + + com.sun.jersey + jersey-json + ${jersey.version} + test + + + stax + stax-api + + + + + com.sun.jersey + jersey-server + ${jersey.version} + test + - - - - - hadoop-2.2 - - 1.9 - - - - org.mortbay.jetty - jetty - 6.1.26 - - - org.mortbay.jetty - servlet-api - - - test - - - com.sun.jersey - jersey-core - ${jersey.version} - test - - - com.sun.jersey - jersey-json - ${jersey.version} - test - - - stax - stax-api - - - - - com.sun.jersey - jersey-server - ${jersey.version} - test - - - - - + target/scala-${scala.binary.version}/classes target/scala-${scala.binary.version}/test-classes diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index e1694c1f64a9f..29752969e6152 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -285,7 +285,8 @@ private[spark] class ApplicationMaster( } private def runExecutorLauncher(securityMgr: SecurityManager): Unit = { - rpcEnv = RpcEnv.create("sparkYarnAM", Utils.localHostName, 0, sparkConf, securityMgr) + val port = sparkConf.getInt("spark.yarn.am.port", 0) + rpcEnv = RpcEnv.create("sparkYarnAM", Utils.localHostName, port, sparkConf, securityMgr) waitForSparkDriver() addAmIpFilter() registerAM(sparkConf.get("spark.driver.appUIAddress", ""), securityMgr) diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 20ecaf092e3f8..234051eb7d3bb 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -17,7 +17,7 @@ package org.apache.spark.deploy.yarn -import java.io.{ByteArrayInputStream, DataInputStream, File, FileOutputStream} +import java.io.{ByteArrayInputStream, DataInputStream, File, FileOutputStream, IOException} import java.net.{InetAddress, UnknownHostException, URI, URISyntaxException} import java.nio.ByteBuffer import java.security.PrivilegedExceptionAction @@ -91,30 +91,52 @@ private[spark] class Client( * available in the alpha API. */ def submitApplication(): ApplicationId = { - // Setup the credentials before doing anything else, so we have don't have issues at any point. - setupCredentials() - yarnClient.init(yarnConf) - yarnClient.start() - - logInfo("Requesting a new application from cluster with %d NodeManagers" - .format(yarnClient.getYarnClusterMetrics.getNumNodeManagers)) - - // Get a new application from our RM - val newApp = yarnClient.createApplication() - val newAppResponse = newApp.getNewApplicationResponse() - val appId = newAppResponse.getApplicationId() - - // Verify whether the cluster has enough resources for our AM - verifyClusterResources(newAppResponse) - - // Set up the appropriate contexts to launch our AM - val containerContext = createContainerLaunchContext(newAppResponse) - val appContext = createApplicationSubmissionContext(newApp, containerContext) - - // Finally, submit and monitor the application - logInfo(s"Submitting application ${appId.getId} to ResourceManager") - yarnClient.submitApplication(appContext) - appId + var appId: ApplicationId = null + try { + // Setup the credentials before doing anything else, + // so we have don't have issues at any point. + setupCredentials() + yarnClient.init(yarnConf) + yarnClient.start() + + logInfo("Requesting a new application from cluster with %d NodeManagers" + .format(yarnClient.getYarnClusterMetrics.getNumNodeManagers)) + + // Get a new application from our RM + val newApp = yarnClient.createApplication() + val newAppResponse = newApp.getNewApplicationResponse() + appId = newAppResponse.getApplicationId() + + // Verify whether the cluster has enough resources for our AM + verifyClusterResources(newAppResponse) + + // Set up the appropriate contexts to launch our AM + val containerContext = createContainerLaunchContext(newAppResponse) + val appContext = createApplicationSubmissionContext(newApp, containerContext) + + // Finally, submit and monitor the application + logInfo(s"Submitting application ${appId.getId} to ResourceManager") + yarnClient.submitApplication(appContext) + appId + } catch { + case e: Throwable => + if (appId != null) { + val appStagingDir = getAppStagingDir(appId) + try { + val preserveFiles = sparkConf.getBoolean("spark.yarn.preserve.staging.files", false) + val stagingDirPath = new Path(appStagingDir) + val fs = FileSystem.get(hadoopConf) + if (!preserveFiles && fs.exists(stagingDirPath)) { + logInfo("Deleting staging directory " + stagingDirPath) + fs.delete(stagingDirPath, true) + } + } catch { + case ioe: IOException => + logWarning("Failed to cleanup staging dir " + appStagingDir, ioe) + } + } + throw e + } } /** @@ -468,6 +490,17 @@ private[spark] class Client( env("SPARK_YARN_USER_ENV") = userEnvs } + // if spark.submit.pyArchives is in sparkConf, append pyArchives to PYTHONPATH + // that can be passed on to the ApplicationMaster and the executors. + if (sparkConf.contains("spark.submit.pyArchives")) { + var pythonPath = sparkConf.get("spark.submit.pyArchives") + if (env.contains("PYTHONPATH")) { + pythonPath = Seq(env.get("PYTHONPATH"), pythonPath).mkString(File.pathSeparator) + } + env("PYTHONPATH") = pythonPath + sparkConf.setExecutorEnv("PYTHONPATH", pythonPath) + } + // In cluster mode, if the deprecated SPARK_JAVA_OPTS is set, we need to propagate it to // executors. But we can't just set spark.executor.extraJavaOptions, because the driver's // SparkContext will not let that set spark* system properties, which is expected behavior for @@ -1074,7 +1107,7 @@ object Client extends Logging { val hiveConf = hiveClass.getMethod("getConf").invoke(hive) val hiveConfClass = mirror.classLoader.loadClass("org.apache.hadoop.hive.conf.HiveConf") - val hiveConfGet = (param:String) => Option(hiveConfClass + val hiveConfGet = (param: String) => Option(hiveConfClass .getMethod("get", classOf[java.lang.String]) .invoke(hiveConf, param)) @@ -1096,7 +1129,7 @@ object Client extends Logging { val hive2Token = new Token[DelegationTokenIdentifier]() hive2Token.decodeFromUrlString(tokenStr) - credentials.addToken(new Text("hive.server2.delegation.token"),hive2Token) + credentials.addToken(new Text("hive.server2.delegation.token"), hive2Token) logDebug("Added hive.Server2.delegation.token to conf.") hiveClass.getMethod("closeCurrent").invoke(null) } else { @@ -1109,9 +1142,9 @@ object Client extends Logging { logDebug("HiveMetaStore configured in localmode") } } catch { - case e:java.lang.NoSuchMethodException => { logInfo("Hive Method not found " + e); return } - case e:java.lang.ClassNotFoundException => { logInfo("Hive Class not found " + e); return } - case e:Exception => { logError("Unexpected Exception " + e) + case e: java.lang.NoSuchMethodException => { logInfo("Hive Method not found " + e); return } + case e: java.lang.ClassNotFoundException => { logInfo("Hive Class not found " + e); return } + case e: Exception => { logError("Unexpected Exception " + e) throw new RuntimeException("Unexpected exception", e) } } @@ -1141,13 +1174,13 @@ object Client extends Logging { logInfo("Added HBase security token to credentials.") } catch { - case e:java.lang.NoSuchMethodException => + case e: java.lang.NoSuchMethodException => logInfo("HBase Method not found: " + e) - case e:java.lang.ClassNotFoundException => + case e: java.lang.ClassNotFoundException => logDebug("HBase Class not found: " + e) - case e:java.lang.NoClassDefFoundError => + case e: java.lang.NoClassDefFoundError => logDebug("HBase Class not found: " + e) - case e:Exception => + case e: Exception => logError("Exception when obtaining HBase security token: " + e) } } diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala index c592ecfdfce06..3d3a966960e9f 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala @@ -43,22 +43,22 @@ private[spark] class ClientDistributedCacheManager() extends Logging { * Add a resource to the list of distributed cache resources. This list can * be sent to the ApplicationMaster and possibly the executors so that it can * be downloaded into the Hadoop distributed cache for use by this application. - * Adds the LocalResource to the localResources HashMap passed in and saves + * Adds the LocalResource to the localResources HashMap passed in and saves * the stats of the resources to they can be sent to the executors and verified. * * @param fs FileSystem * @param conf Configuration * @param destPath path to the resource * @param localResources localResource hashMap to insert the resource into - * @param resourceType LocalResourceType + * @param resourceType LocalResourceType * @param link link presented in the distributed cache to the destination - * @param statCache cache to store the file/directory stats + * @param statCache cache to store the file/directory stats * @param appMasterOnly Whether to only add the resource to the app master */ def addResource( fs: FileSystem, conf: Configuration, - destPath: Path, + destPath: Path, localResources: HashMap[String, LocalResource], resourceType: LocalResourceType, link: String, @@ -74,15 +74,15 @@ private[spark] class ClientDistributedCacheManager() extends Logging { amJarRsrc.setSize(destStatus.getLen()) if (link == null || link.isEmpty()) throw new Exception("You must specify a valid link name") localResources(link) = amJarRsrc - + if (!appMasterOnly) { val uri = destPath.toUri() val pathURI = new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(), null, link) if (resourceType == LocalResourceType.FILE) { - distCacheFiles(pathURI.toString()) = (destStatus.getLen().toString(), + distCacheFiles(pathURI.toString()) = (destStatus.getLen().toString(), destStatus.getModificationTime().toString(), visibility.name()) } else { - distCacheArchives(pathURI.toString()) = (destStatus.getLen().toString(), + distCacheArchives(pathURI.toString()) = (destStatus.getLen().toString(), destStatus.getModificationTime().toString(), visibility.name()) } } @@ -95,13 +95,13 @@ private[spark] class ClientDistributedCacheManager() extends Logging { val (keys, tupleValues) = distCacheFiles.unzip val (sizes, timeStamps, visibilities) = tupleValues.unzip3 if (keys.size > 0) { - env("SPARK_YARN_CACHE_FILES") = keys.reduceLeft[String] { (acc,n) => acc + "," + n } - env("SPARK_YARN_CACHE_FILES_TIME_STAMPS") = - timeStamps.reduceLeft[String] { (acc,n) => acc + "," + n } - env("SPARK_YARN_CACHE_FILES_FILE_SIZES") = - sizes.reduceLeft[String] { (acc,n) => acc + "," + n } - env("SPARK_YARN_CACHE_FILES_VISIBILITIES") = - visibilities.reduceLeft[String] { (acc,n) => acc + "," + n } + env("SPARK_YARN_CACHE_FILES") = keys.reduceLeft[String] { (acc, n) => acc + "," + n } + env("SPARK_YARN_CACHE_FILES_TIME_STAMPS") = + timeStamps.reduceLeft[String] { (acc, n) => acc + "," + n } + env("SPARK_YARN_CACHE_FILES_FILE_SIZES") = + sizes.reduceLeft[String] { (acc, n) => acc + "," + n } + env("SPARK_YARN_CACHE_FILES_VISIBILITIES") = + visibilities.reduceLeft[String] { (acc, n) => acc + "," + n } } } @@ -112,13 +112,13 @@ private[spark] class ClientDistributedCacheManager() extends Logging { val (keys, tupleValues) = distCacheArchives.unzip val (sizes, timeStamps, visibilities) = tupleValues.unzip3 if (keys.size > 0) { - env("SPARK_YARN_CACHE_ARCHIVES") = keys.reduceLeft[String] { (acc,n) => acc + "," + n } - env("SPARK_YARN_CACHE_ARCHIVES_TIME_STAMPS") = - timeStamps.reduceLeft[String] { (acc,n) => acc + "," + n } + env("SPARK_YARN_CACHE_ARCHIVES") = keys.reduceLeft[String] { (acc, n) => acc + "," + n } + env("SPARK_YARN_CACHE_ARCHIVES_TIME_STAMPS") = + timeStamps.reduceLeft[String] { (acc, n) => acc + "," + n } env("SPARK_YARN_CACHE_ARCHIVES_FILE_SIZES") = - sizes.reduceLeft[String] { (acc,n) => acc + "," + n } - env("SPARK_YARN_CACHE_ARCHIVES_VISIBILITIES") = - visibilities.reduceLeft[String] { (acc,n) => acc + "," + n } + sizes.reduceLeft[String] { (acc, n) => acc + "," + n } + env("SPARK_YARN_CACHE_ARCHIVES_VISIBILITIES") = + visibilities.reduceLeft[String] { (acc, n) => acc + "," + n } } } @@ -160,7 +160,7 @@ private[spark] class ClientDistributedCacheManager() extends Logging { def ancestorsHaveExecutePermissions( fs: FileSystem, path: Path, - statCache: Map[URI, FileStatus]): Boolean = { + statCache: Map[URI, FileStatus]): Boolean = { var current = path while (current != null) { // the subdirs in the path should have execute permissions for others @@ -197,7 +197,7 @@ private[spark] class ClientDistributedCacheManager() extends Logging { def getFileStatus(fs: FileSystem, uri: URI, statCache: Map[URI, FileStatus]): FileStatus = { val stat = statCache.get(uri) match { case Some(existstat) => existstat - case None => + case None => val newStat = fs.getFileStatus(new Path(uri)) statCache.put(uri, newStat) newStat diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala index b8f42dadcb464..88d68d5556162 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala @@ -373,7 +373,9 @@ private[yarn] class YarnAllocator( // Hadoop 2.2.X added a ContainerExitStatus we should switch to use // there are some exit status' we shouldn't necessarily count against us, but for // now I think its ok as none of the containers are expected to exit - if (completedContainer.getExitStatus == -103) { // vmem limit exceeded + if (completedContainer.getExitStatus == ContainerExitStatus.PREEMPTED) { + logInfo("Container preempted: " + containerId) + } else if (completedContainer.getExitStatus == -103) { // vmem limit exceeded logWarning(memLimitExceededLogMessage( completedContainer.getDiagnostics, VMEM_EXCEEDED_PATTERN)) diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala index ba91872107d0c..34ac089e9b0cf 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala @@ -139,9 +139,9 @@ class YarnSparkHadoopUtil extends SparkHadoopUtil { } object YarnSparkHadoopUtil { - // Additional memory overhead + // Additional memory overhead // 10% was arrived at experimentally. In the interest of minimizing memory waste while covering - // the common cases. Memory overhead tends to grow with container size. + // the common cases. Memory overhead tends to grow with container size. val MEMORY_OVERHEAD_FACTOR = 0.10 val MEMORY_OVERHEAD_MIN = 384 diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala index 80b57d1355a3a..24a53f9f33add 100644 --- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala +++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala @@ -40,12 +40,12 @@ import scala.collection.mutable.Map class ClientDistributedCacheManagerSuite extends FunSuite with MockitoSugar { class MockClientDistributedCacheManager extends ClientDistributedCacheManager { - override def getVisibility(conf: Configuration, uri: URI, statCache: Map[URI, FileStatus]): + override def getVisibility(conf: Configuration, uri: URI, statCache: Map[URI, FileStatus]): LocalResourceVisibility = { LocalResourceVisibility.PRIVATE } } - + test("test getFileStatus empty") { val distMgr = new ClientDistributedCacheManager() val fs = mock[FileSystem] @@ -60,7 +60,7 @@ class ClientDistributedCacheManagerSuite extends FunSuite with MockitoSugar { val distMgr = new ClientDistributedCacheManager() val fs = mock[FileSystem] val uri = new URI("/tmp/testing") - val realFileStatus = new FileStatus(10, false, 1, 1024, 10, 10, null, "testOwner", + val realFileStatus = new FileStatus(10, false, 1, 1024, 10, 10, null, "testOwner", null, new Path("/tmp/testing")) when(fs.getFileStatus(new Path(uri))).thenReturn(new FileStatus()) val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus](uri -> realFileStatus) @@ -77,7 +77,7 @@ class ClientDistributedCacheManagerSuite extends FunSuite with MockitoSugar { val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]() when(fs.getFileStatus(destPath)).thenReturn(new FileStatus()) - distMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE, "link", + distMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE, "link", statCache, false) val resource = localResources("link") assert(resource.getVisibility() === LocalResourceVisibility.PRIVATE) @@ -100,11 +100,11 @@ class ClientDistributedCacheManagerSuite extends FunSuite with MockitoSugar { assert(env.get("SPARK_YARN_CACHE_ARCHIVES_VISIBILITIES") === None) // add another one and verify both there and order correct - val realFileStatus = new FileStatus(20, false, 1, 1024, 10, 30, null, "testOwner", + val realFileStatus = new FileStatus(20, false, 1, 1024, 10, 30, null, "testOwner", null, new Path("/tmp/testing2")) val destPath2 = new Path("file:///foo.invalid.com:8080/tmp/testing2") when(fs.getFileStatus(destPath2)).thenReturn(realFileStatus) - distMgr.addResource(fs, conf, destPath2, localResources, LocalResourceType.FILE, "link2", + distMgr.addResource(fs, conf, destPath2, localResources, LocalResourceType.FILE, "link2", statCache, false) val resource2 = localResources("link2") assert(resource2.getVisibility() === LocalResourceVisibility.PRIVATE) @@ -116,7 +116,7 @@ class ClientDistributedCacheManagerSuite extends FunSuite with MockitoSugar { val env2 = new HashMap[String, String]() distMgr.setDistFilesEnv(env2) val timestamps = env2("SPARK_YARN_CACHE_FILES_TIME_STAMPS").split(',') - val files = env2("SPARK_YARN_CACHE_FILES").split(',') + val files = env2("SPARK_YARN_CACHE_FILES").split(',') val sizes = env2("SPARK_YARN_CACHE_FILES_FILE_SIZES").split(',') val visibilities = env2("SPARK_YARN_CACHE_FILES_VISIBILITIES") .split(',') assert(files(0) === "file:/foo.invalid.com:8080/tmp/testing#link") @@ -140,7 +140,7 @@ class ClientDistributedCacheManagerSuite extends FunSuite with MockitoSugar { when(fs.getFileStatus(destPath)).thenReturn(new FileStatus()) intercept[Exception] { - distMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE, null, + distMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE, null, statCache, false) } assert(localResources.get("link") === None) @@ -154,11 +154,11 @@ class ClientDistributedCacheManagerSuite extends FunSuite with MockitoSugar { val destPath = new Path("file:///foo.invalid.com:8080/tmp/testing") val localResources = HashMap[String, LocalResource]() val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]() - val realFileStatus = new FileStatus(20, false, 1, 1024, 10, 30, null, "testOwner", + val realFileStatus = new FileStatus(20, false, 1, 1024, 10, 30, null, "testOwner", null, new Path("/tmp/testing")) when(fs.getFileStatus(destPath)).thenReturn(realFileStatus) - distMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.ARCHIVE, "link", + distMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.ARCHIVE, "link", statCache, true) val resource = localResources("link") assert(resource.getVisibility() === LocalResourceVisibility.PRIVATE) @@ -188,11 +188,11 @@ class ClientDistributedCacheManagerSuite extends FunSuite with MockitoSugar { val destPath = new Path("file:///foo.invalid.com:8080/tmp/testing") val localResources = HashMap[String, LocalResource]() val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]() - val realFileStatus = new FileStatus(20, false, 1, 1024, 10, 30, null, "testOwner", + val realFileStatus = new FileStatus(20, false, 1, 1024, 10, 30, null, "testOwner", null, new Path("/tmp/testing")) when(fs.getFileStatus(destPath)).thenReturn(realFileStatus) - distMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.ARCHIVE, "link", + distMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.ARCHIVE, "link", statCache, false) val resource = localResources("link") assert(resource.getVisibility() === LocalResourceVisibility.PRIVATE) diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala index 508819e242a26..6da3e82acdb14 100644 --- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala +++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala @@ -203,7 +203,7 @@ class ClientSuite extends FunSuite with Matchers with BeforeAndAfterAll { def getFieldValue2[A: ClassTag, A1: ClassTag, B]( clazz: Class[_], field: String, - defaults: => B)(mapTo: A => B)(mapTo1: A1 => B): B = { + defaults: => B)(mapTo: A => B)(mapTo1: A1 => B): B = { Try(clazz.getField(field)).map(_.get(null)).map { case v: A => mapTo(v) case v1: A1 => mapTo1(v1)